Commit f6139c17 authored by sbl1996@126.com's avatar sbl1996@126.com

Add todo for action v2

parent 6752ba72
Pipeline #28497 failed with stages
# Change log
## 0.2.0 (March 12, 2024)
- A feature of negated is added to cards. This feature is used to indicate whether a card is negated or not.
- Positional encoding is added to history actions. When it wasn't added before, the model cannot distinguish the order of history actions.
- Multi-selet action is removed and implemented by multiple single-select actions. It means that the number of selections is now unlimited.
# Features # Features
## Definitions ## Definitions
### Float transform
- float transform: max 65535 -> 2 bytes - float transform: max 65535 -> 2 bytes
- count
### Card ID
The card id is the index of the card code in `code_list.txt`.
## Card ## Card
- 0,1: card id, uint16 -> 2 uint8, name+desc - 0,1: card id, uint16 -> 2 uint8, name+desc
...@@ -11,7 +15,7 @@ ...@@ -11,7 +15,7 @@
- 4: owner, discrete, 0: me, 1: oppo (2) - 4: owner, discrete, 0: me, 1: oppo (2)
- 5: position, discrete, 0: N/A, 1+: same as position2str - 5: position, discrete, 0: N/A, 1+: same as position2str
- 6: overlay, discrete, 0: not, 1: xyz material - 6: overlay, discrete, 0: not, 1: xyz material
- 7: attribute, discrete, 0: N/A, 1+: same as attribute2str[2:] - 7: attribute, discrete, 0: N/A, 1+: same as attribute2str
- 8: race, discrete, 0: N/A, 1+: same as race2str - 8: race, discrete, 0: N/A, 1+: same as race2str
- 9: level, discrete, 0: N/A - 9: level, discrete, 0: N/A
- 10: counter, discrete, 0: N/A - 10: counter, discrete, 0: N/A
......
# LSTM Implementations
## Original PPO + LSTM in CleanRL
```python
not_done = (~done.reshape((-1, batch_size))).float()
new_hidden = []
for i in range(hidden.shape[0]):
h, lstm_state = self.lstm(
hidden[i].unsqueeze(0),
(
not_done[i].view(1, -1, 1) * lstm_state[0],
not_done[i].view(1, -1, 1) * lstm_state[1],
),
)
new_hidden += [h]
new_hidden = torch.cat(new_hidden)
# new_hidden, lstm_state = self.lstm(hidden, lstm_state)
```
The length of the loop is the `num_steps` (typically 128), therefore it is slow (even with torch.compile). Compared with the original LSTM, the overall training time is 4x slower.
## Custom LSTM with triton
```python
```
\ No newline at end of file
...@@ -3700,6 +3700,7 @@ private: ...@@ -3700,6 +3700,7 @@ private:
pl->notify("Battle menu:"); pl->notify("Battle menu:");
} }
for (const auto [code, spec, data] : activatable) { for (const auto [code, spec, data] : activatable) {
// TODO: Add effect description to indicate which effect is being activated
options_.push_back("v " + spec); options_.push_back("v " + spec);
if (verbose_) { if (verbose_) {
auto [loc, seq, pos] = spec_to_ls(spec); auto [loc, seq, pos] = spec_to_ls(spec);
...@@ -3710,18 +3711,27 @@ private: ...@@ -3710,18 +3711,27 @@ private:
} }
} }
for (const auto [code, spec, data] : attackable) { for (const auto [code, spec, data] : attackable) {
// TODO: add this as feature
bool direct_attackable = data & 0x1;
options_.push_back("a " + spec); options_.push_back("a " + spec);
if (verbose_) { if (verbose_) {
auto [loc, seq, pos] = spec_to_ls(spec); auto [loc, seq, pos] = spec_to_ls(spec);
auto c = get_card(player, loc, seq); auto c = get_card(player, loc, seq);
std::string s;
if (c.type_ & TYPE_LINK) { if (c.type_ & TYPE_LINK) {
pl->notify("a " + spec + ": " + c.name_ + " (" + s = "a " + spec + ": " + c.name_ + " (" +
std::to_string(c.attack_) + ") attack"); std::to_string(c.attack_) + ")";
} else { } else {
pl->notify("a " + spec + ": " + c.name_ + " (" + s = "a " + spec + ": " + c.name_ + " (" +
std::to_string(c.attack_) + "/" + std::to_string(c.attack_) + "/" +
std::to_string(c.defense_) + ") attack"); std::to_string(c.defense_) + ")";
}
if (direct_attackable) {
s += " direct attack";
} else {
s += " attack";
} }
pl->notify(s);
} }
} }
if (to_m2) { if (to_m2) {
...@@ -3756,6 +3766,7 @@ private: ...@@ -3756,6 +3766,7 @@ private:
} }
}; };
} else if (msg_ == MSG_SELECT_UNSELECT_CARD) { } else if (msg_ == MSG_SELECT_UNSELECT_CARD) {
// TODO: add feature of selected cards (also for multi select)
auto player = read_u8(); auto player = read_u8();
bool finishable = read_u8(); bool finishable = read_u8();
bool cancelable = read_u8(); bool cancelable = read_u8();
...@@ -4171,6 +4182,7 @@ private: ...@@ -4171,6 +4182,7 @@ private:
auto cs = code_to_spec(spec_code); auto cs = code_to_spec(spec_code);
auto chain_count = chain_counts[spec_code]; auto chain_count = chain_counts[spec_code];
if (chain_count > 1) { if (chain_count > 1) {
// TODO: should use desc to indicate activate which effect
cs.push_back('a' + chain_orders[spec_code]); cs.push_back('a' + chain_orders[spec_code]);
} }
chain_orders[spec_code]++; chain_orders[spec_code]++;
...@@ -4207,7 +4219,12 @@ private: ...@@ -4207,7 +4219,12 @@ private:
to_play_ = player; to_play_ = player;
callback_ = [this, forced](int idx) { callback_ = [this, forced](int idx) {
const auto &option = options_[idx]; const auto &option = options_[idx];
if ((option == "c") && (!forced)) { if (option == "c") {
if (forced) {
fmt::print("cancel not allowed in forced chain\n");
YGO_SetResponsei(pduel_, 0);
return;
}
YGO_SetResponsei(pduel_, -1); YGO_SetResponsei(pduel_, -1);
return; return;
} }
...@@ -4437,6 +4454,7 @@ private: ...@@ -4437,6 +4454,7 @@ private:
} }
ankerl::unordered_dense::map<std::string, int> activate_count; ankerl::unordered_dense::map<std::string, int> activate_count;
for (const auto &[code, spec, data] : idle_activate_) { for (const auto &[code, spec, data] : idle_activate_) {
// TODO: use effect description to indicate which effect to activate
std::string option = "v " + spec; std::string option = "v " + spec;
int count = idle_activate_count[spec]; int count = idle_activate_count[spec];
activate_count[spec]++; activate_count[spec]++;
...@@ -4698,6 +4716,7 @@ private: ...@@ -4698,6 +4716,7 @@ private:
}; };
} else if (msg_ == MSG_SELECT_POSITION) { } else if (msg_ == MSG_SELECT_POSITION) {
// TODO: add card as feature
auto player = read_u8(); auto player = read_u8();
auto code = read_u32(); auto code = read_u32();
auto valid_pos = read_u8(); auto valid_pos = read_u8();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment