Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Y
ygo-agent
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Biluo Shen
ygo-agent
Commits
77492ca0
Commit
77492ca0
authored
Jun 07, 2024
by
sbl1996@126.com
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add oppo_info
parent
3dfee5f5
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
146 additions
and
39 deletions
+146
-39
scripts/cleanba.py
scripts/cleanba.py
+1
-0
ygoai/rl/jax/agent.py
ygoai/rl/jax/agent.py
+108
-34
ygoenv/ygoenv/ygopro/ygopro.h
ygoenv/ygoenv/ygopro/ygopro.h
+37
-5
No files found.
scripts/cleanba.py
View file @
77492ca0
...
...
@@ -211,6 +211,7 @@ def make_env(args, seed, num_envs, num_threads, mode='self', thread_affinity_off
greedy_reward
=
args
.
greedy_reward
if
not
eval
else
True
,
play_mode
=
mode
,
timeout
=
args
.
timeout
,
oppo_info
=
args
.
m2
.
oppo_info
if
eval
else
args
.
m1
.
oppo_info
,
)
envs
.
num_envs
=
num_envs
return
envs
...
...
ygoai/rl/jax/agent.py
View file @
77492ca0
This diff is collapsed.
Click to expand it.
ygoenv/ygoenv/ygopro/ygopro.h
View file @
77492ca0
...
...
@@ -1527,7 +1527,8 @@ public:
"verbose"_.Bind(false), "max_options"_.Bind(16),
"max_cards"_.Bind(80), "n_history_actions"_.Bind(16),
"record"_.Bind(false), "async_reset"_.Bind(false),
"greedy_reward"_.Bind(true), "timeout"_.Bind(600));
"greedy_reward"_.Bind(true), "timeout"_.Bind(600),
"oppo_info"_.Bind(false));
}
template <typename Config>
static decltype(auto) StateSpec(const Config &conf) {
...
...
@@ -1539,6 +1540,7 @@ public:
Spec<uint8_t>({conf["max_options"_], n_action_feats})),
"obs:h_actions_"_.Bind(
Spec<uint8_t>({conf["n_history_actions"_], n_action_feats + 2})),
"obs:g_cards_"_.Bind(Spec<uint8_t>({conf["max_cards"_] * 2, 41})),
"info:num_options"_.Bind(Spec<int>({}, {0, conf["max_options"_] - 1})),
"info:to_play"_.Bind(Spec<int>({}, {0, 1})),
"info:is_selfplay"_.Bind(Spec<int>({}, {0, 1})),
...
...
@@ -2259,8 +2261,13 @@ public:
}
if (play_mode_ == kSelfPlay) {
// to_play_ is the previous player
reward = winner_ == player ? base_reward : -base_reward;
// if (spec_.config["oppo_info"_]) {
if (false) {
reward = winner_ == 0 ? base_reward : -base_reward;
} else {
// to_play_ is the previous player
reward = winner_ == player ? base_reward : -base_reward;
}
} else {
reward = winner_ == ai_player_ ? base_reward : -base_reward;
}
...
...
@@ -2331,6 +2338,9 @@ public:
}
auto [spec_infos, loc_n_cards] = _set_obs_cards(state["obs:cards_"_], to_play_);
if (spec_.config["oppo_info"_]) {
_set_obs_g_cards(state["obs:g_cards_"_]);
}
_set_obs_global(state["obs:global_"_], to_play_, loc_n_cards);
...
...
@@ -2438,8 +2448,30 @@ private:
return {spec_infos, loc_n_cards};
}
void _set_obs_g_cards(TArray<uint8_t> &f_cards) {
int offset = 0;
for (auto pi = 0; pi < 2; pi++) {
std::vector<uint8_t> configs = {
LOCATION_DECK, LOCATION_HAND, LOCATION_MZONE,
LOCATION_SZONE, LOCATION_GRAVE, LOCATION_REMOVED,
LOCATION_EXTRA,
};
for (auto location : configs) {
std::vector<Card> cards = get_cards_in_location(pi, location);
int n_cards = cards.size();
for (int i = 0; i < n_cards; ++i) {
const auto &c = cards[i];
CardId card_id = c_get_card_id(c.code_);
_set_obs_card_(f_cards, offset, c, false, card_id, false);
offset++;
}
}
}
}
void _set_obs_card_(TArray<uint8_t> &f_cards, int offset, const Card &c,
bool hide, CardId card_id = 0) {
bool hide, CardId card_id = 0
, bool global = false
) {
// check offset exceeds max_cards
uint8_t location = c.location_;
bool overlay = location & LOCATION_OVERLAY;
...
...
@@ -2462,7 +2494,7 @@ private:
seq = c.sequence_ + 1;
}
f_cards(offset, 3) = seq;
f_cards(offset, 4) =
(c.controler_ != to_play_) ? 1 : 0
;
f_cards(offset, 4) =
global ? c.controler_ : ((c.controler_ != to_play_) ? 1 : 0)
;
if (overlay) {
f_cards(offset, 5) = position_to_id(POS_FACEUP);
f_cards(offset, 6) = 1;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment