Add todo for action v2

f6139c17 · sbl1996@126.com · 6752ba72 · 6752ba72 · f6139c17 · 6752ba72
Commit f6139c17 authored May 19, 2024 by sbl1996@126.com
4 changed files
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
-# Change log
-## 0.2.0 (March 12, 2024)
- A feature of negated is added to cards. This feature is used to indicate whether a card is negated or not.
- Positional encoding is added to history actions. When it wasn't added before, the model cannot distinguish the order of history actions.
- Multi-selet action is removed and implemented by multiple single-select actions. It means that the number of selections is now unlimited.
--- a/docs/feature_engineering.md
+++ b/docs/feature_engineering.md
 # Features
 ## Definitions
+### Float transform
 - float transform: max 65535 -> 2 bytes
- count
+### Card ID
+The card id is the index of the card code in `code_list.txt`.
 ## Card
 - 0,1: card id, uint16 -> 2 uint8, name+desc
@@ -11,7 +15,7 @@
 - 4: owner, discrete, 0: me, 1: oppo (2)
 - 5: position, discrete, 0: N/A, 1+: same as position2str
 - 6: overlay, discrete, 0: not, 1: xyz material
- 7: attribute, discrete, 0: N/A, 1+: same as attribute2str[2:]
+- 7: attribute, discrete, 0: N/A, 1+: same as attribute2str
 - 8: race, discrete, 0: N/A, 1+: same as race2str
 - 9: level, discrete, 0: N/A
 - 10: counter, discrete, 0: N/A

--- a/docs/lstm_implementations.md
+++ b/docs/lstm_implementations.md
-# LSTM Implementations
-## Original PPO + LSTM in CleanRL
-```python
-not_done = (~done.reshape((-1, batch_size))).float()
-new_hidden = []
-for i in range(hidden.shape[0]):
-    h, lstm_state = self.lstm(
-        hidden[i].unsqueeze(0),
-        (
-            not_done[i].view(1, -1, 1) * lstm_state[0],
-            not_done[i].view(1, -1, 1) * lstm_state[1],
-        ),
-    )
-    new_hidden += [h]
-new_hidden = torch.cat(new_hidden)
-# new_hidden, lstm_state = self.lstm(hidden, lstm_state)
-```
-The length of the loop is the `num_steps` (typically 128), therefore it is slow (even with torch.compile). Compared with the original LSTM, the overall training time is 4x slower.
-## Custom LSTM with triton
-```python
-```
\ No newline at end of file
--- a/ygoenv/ygoenv/ygopro/ygopro.h
+++ b/ygoenv/ygoenv/ygopro/ygopro.h
@@ -3700,6 +3700,7 @@ private:
        pl->notify("Battle menu:");
      }
      for (const auto [code, spec, data] : activatable) {
+        // TODO: Add effect description to indicate which effect is being activated
        options_.push_back("v " + spec);
        if (verbose_) {
          auto [loc, seq, pos] = spec_to_ls(spec);
@@ -3710,18 +3711,27 @@ private:
        }
      }
      for (const auto [code, spec, data] : attackable) {
+        // TODO: add this as feature
+        bool direct_attackable = data & 0x1;
        options_.push_back("a " + spec);
        if (verbose_) {
          auto [loc, seq, pos] = spec_to_ls(spec);
          auto c = get_card(player, loc, seq);
+          std::string s;
          if (c.type_ & TYPE_LINK) {
-            pl->notify("a " + spec + ": " + c.name_ + " (" +
+            s = "a " + spec + ": " + c.name_ + " (" +
-                       std::to_string(c.attack_) + ") attack");
+                std::to_string(c.attack_) + ")";
+          } else {
+            s = "a " + spec + ": " + c.name_ + " (" +
+                std::to_string(c.attack_) + "/" +
+                std::to_string(c.defense_) + ")";
+          }
+          if (direct_attackable) {
+            s += " direct attack";
          } else {
-            pl->notify("a " + spec + ": " + c.name_ + " (" +
+            s += " attack";
-                       std::to_string(c.attack_) + "/" +
-                       std::to_string(c.defense_) + ") attack");
          }
+          pl->notify(s);
        }
      }
      if (to_m2) {
@@ -3756,6 +3766,7 @@ private:
        }
      };
    } else if (msg_ == MSG_SELECT_UNSELECT_CARD) {
+      // TODO: add feature of selected cards (also for multi select)
      auto player = read_u8();
      bool finishable = read_u8();
      bool cancelable = read_u8();
@@ -4171,6 +4182,7 @@ private:
        auto cs = code_to_spec(spec_code);
        auto chain_count = chain_counts[spec_code];
        if (chain_count > 1) {
+          // TODO: should use desc to indicate activate which effect
          cs.push_back('a' + chain_orders[spec_code]);
        }
        chain_orders[spec_code]++;
@@ -4207,7 +4219,12 @@ private:
      to_play_ = player;
      callback_ = [this, forced](int idx) {
        const auto &option = options_[idx];
-        if ((option == "c") && (!forced)) {
+        if (option == "c") {
+          if (forced) {
+            fmt::print("cancel not allowed in forced chain\n");
+            YGO_SetResponsei(pduel_, 0);
+            return;
+          }
          YGO_SetResponsei(pduel_, -1);
          return;
        }
@@ -4437,6 +4454,7 @@ private:
      }
      ankerl::unordered_dense::map<std::string, int> activate_count;
      for (const auto &[code, spec, data] : idle_activate_) {
+        // TODO: use effect description to indicate which effect to activate
        std::string option = "v " + spec;
        int count = idle_activate_count[spec];
        activate_count[spec]++;
@@ -4698,6 +4716,7 @@ private:
      };
    } else if (msg_ == MSG_SELECT_POSITION) {
+      // TODO: add card as feature
      auto player = read_u8();
      auto code = read_u32();
      auto valid_pos = read_u8();