Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Y
ygo-agent
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Biluo Shen
ygo-agent
Commits
722dd65a
Commit
722dd65a
authored
Feb 28, 2024
by
biluo.shen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add nccl timeout of 30min
parent
385bd1cb
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
700 additions
and
30 deletions
+700
-30
scripts/eval.py
scripts/eval.py
+3
-3
scripts/ppo_sp.py
scripts/ppo_sp.py
+639
-0
ygoai/rl/agent.py
ygoai/rl/agent.py
+1
-1
ygoai/rl/dist.py
ygoai/rl/dist.py
+11
-1
ygoenv/ygoenv/ygopro/ygopro.h
ygoenv/ygoenv/ygopro/ygopro.h
+46
-25
No files found.
scripts/eval.py
View file @
722dd65a
...
...
@@ -195,9 +195,9 @@ if __name__ == "__main__":
logits
,
values
,
_valid
=
agent
(
obs
)
probs
=
torch
.
softmax
(
logits
,
dim
=-
1
)
probs
=
probs
.
cpu
()
.
numpy
()
if
args
.
play
:
print
(
probs
[
probs
!=
0
]
.
tolist
()
)
print
(
values
)
if
args
.
verbose
:
print
(
[
f
"{p:.4f}"
for
p
in
probs
[
probs
!=
0
]
.
tolist
()]
)
print
(
f
"{values[0].item():.4f}"
)
actions
=
probs
.
argmax
(
axis
=
1
)
model_time
+=
time
.
time
()
-
_start
else
:
...
...
scripts/ppo_sp.py
0 → 100644
View file @
722dd65a
This diff is collapsed.
Click to expand it.
ygoai/rl/agent.py
View file @
722dd65a
...
...
@@ -374,7 +374,7 @@ class Actor(nn.Module):
self
.
use_transformer
=
use_transformer
if
use_transformer
:
self
.
transformer
=
nn
.
TransformerEncoderLayer
(
c
,
4
,
c
*
4
,
dropout
=
0.0
,
batch_first
=
True
,
norm_first
=
True
,
bias
=
Fals
e
)
c
,
4
,
c
*
4
,
dropout
=
0.0
,
batch_first
=
True
,
norm_first
=
True
,
bias
=
Tru
e
)
self
.
head
=
nn
.
Sequential
(
nn
.
Linear
(
c
,
c
//
4
),
nn
.
ReLU
(),
...
...
ygoai/rl/dist.py
View file @
722dd65a
import
os
import
sys
import
datetime
import
torch
import
torch.distributed
as
dist
import
torch.multiprocessing
as
mp
...
...
@@ -25,7 +27,9 @@ def reduce_gradidents(params, world_size):
def
setup
(
backend
,
rank
,
world_size
,
port
):
os
.
environ
[
'MASTER_ADDR'
]
=
'127.0.0.1'
os
.
environ
[
'MASTER_PORT'
]
=
str
(
port
)
dist
.
init_process_group
(
backend
,
rank
=
rank
,
world_size
=
world_size
)
dist
.
init_process_group
(
backend
,
rank
=
rank
,
world_size
=
world_size
,
timeout
=
datetime
.
timedelta
(
seconds
=
60
*
30
))
# manual init nccl
x
=
torch
.
rand
(
4
,
device
=
f
'cuda:{rank}'
)
...
...
@@ -49,3 +53,9 @@ def mp_start(run):
for
i
in
range
(
world_size
):
children
[
i
]
.
join
()
def
fprint
(
msg
):
sys
.
stdout
.
flush
()
sys
.
stdout
.
write
(
msg
+
os
.
linesep
)
sys
.
stdout
.
flush
()
ygoenv/ygoenv/ygopro/ygopro.h
View file @
722dd65a
...
...
@@ -81,8 +81,8 @@ combinations_with_weight(const std::vector<int> &weights, int r) {
return
results
;
}
inline
bool
sum_to2
(
const
std
::
vector
<
std
::
vector
<
uint32_
t
>>
&
w
,
const
std
::
vector
<
int
>
ind
,
int
i
,
uint32_
t
r
)
{
inline
bool
sum_to2
(
const
std
::
vector
<
std
::
vector
<
in
t
>>
&
w
,
const
std
::
vector
<
int
>
ind
,
int
i
,
in
t
r
)
{
if
(
r
<=
0
)
{
return
false
;
}
...
...
@@ -103,14 +103,14 @@ inline bool sum_to2(const std::vector<std::vector<uint32_t>> &w,
}
}
inline
bool
sum_to2
(
const
std
::
vector
<
std
::
vector
<
uint32_
t
>>
&
w
,
const
std
::
vector
<
int
>
ind
,
uint32_
t
r
)
{
inline
bool
sum_to2
(
const
std
::
vector
<
std
::
vector
<
in
t
>>
&
w
,
const
std
::
vector
<
int
>
ind
,
in
t
r
)
{
return
sum_to2
(
w
,
ind
,
0
,
r
);
}
inline
std
::
vector
<
std
::
vector
<
int
>>
combinations_with_weight2
(
const
std
::
vector
<
std
::
vector
<
uint32_
t
>>
&
weights
,
uint32_
t
r
)
{
combinations_with_weight2
(
const
std
::
vector
<
std
::
vector
<
in
t
>>
&
weights
,
in
t
r
)
{
int
n
=
weights
.
size
();
std
::
vector
<
std
::
vector
<
int
>>
results
;
...
...
@@ -492,7 +492,7 @@ ankerl::unordered_dense::map<K, uint8_t>
make_ids
(
const
std
::
map
<
K
,
std
::
string
>
&
m
,
int
id_offset
=
0
,
int
m_offset
=
0
)
{
ankerl
::
unordered_dense
::
map
<
K
,
uint8_t
>
m2
;
auto
i
=
0
;
int
i
=
0
;
for
(
const
auto
&
[
k
,
v
]
:
m
)
{
if
(
i
<
m_offset
)
{
i
++
;
...
...
@@ -549,6 +549,14 @@ static const std::map<uint8_t, std::string> location2str = {
static
const
ankerl
::
unordered_dense
::
map
<
uint8_t
,
uint8_t
>
location2id
=
make_ids
(
location2str
,
1
);
inline
uint8_t
location_to_id
(
uint8_t
location
)
{
auto
it
=
location2id
.
find
(
location
);
if
(
it
!=
location2id
.
end
())
{
return
it
->
second
;
}
return
0
;
}
#define POS_NONE 0x0 // xyz materials (overlay)
static
const
std
::
map
<
uint8_t
,
std
::
string
>
position2str
=
{
...
...
@@ -1538,8 +1546,8 @@ public:
ReplayWriteInt32
(
code
);
}
ReplayWriteInt32
(
extra_deck
.
size
());
for
(
int
i
=
extra_deck
.
size
()
-
1
;
i
>=
0
;
--
i
)
{
ReplayWriteInt32
(
extra_deck
[
i
]);
for
(
int
j
=
int
(
extra_deck
.
size
())
-
1
;
j
>=
0
;
--
j
)
{
ReplayWriteInt32
(
extra_deck
[
j
]);
}
}
...
...
@@ -1813,7 +1821,21 @@ private:
const
std
::
string
&
spec
,
const
SpecIndex
&
spec2index
,
const
std
::
vector
<
CardId
>
&
card_ids
)
{
uint16_t
idx
=
spec2index
.
empty
()
?
card_ids
[
j
]
:
spec2index
.
at
(
spec
);
uint16_t
idx
;
if
(
spec2index
.
empty
())
{
idx
=
card_ids
[
j
];
}
else
{
auto
it
=
spec2index
.
find
(
spec
);
if
(
it
==
spec2index
.
end
())
{
// print spec2index
fmt
::
println
(
"Spec2index:"
);
for
(
auto
&
[
k
,
v
]
:
spec2index
)
{
fmt
::
println
(
"{}: {}"
,
k
,
v
);
}
throw
std
::
runtime_error
(
"Spec not found: "
+
spec
);
}
idx
=
it
->
second
;
}
feat
(
i
,
2
*
j
)
=
static_cast
<
uint8_t
>
(
idx
>>
8
);
feat
(
i
,
2
*
j
+
1
)
=
static_cast
<
uint8_t
>
(
idx
&
0xff
);
}
...
...
@@ -1877,7 +1899,7 @@ private:
auto
act
=
option
[
0
];
auto
spec
=
option
.
substr
(
2
);
uint8_t
offset
=
0
;
auto
n
=
spec
.
size
();
int
n
=
spec
.
size
();
if
(
act
==
'v'
&&
std
::
isalpha
(
spec
[
n
-
1
]))
{
offset
=
spec
[
n
-
1
]
-
'a'
;
spec
=
spec
.
substr
(
0
,
n
-
1
);
...
...
@@ -2225,7 +2247,7 @@ private:
}
// add extra deck in reverse order following ygopro
for
(
int
i
=
extra_deck
.
size
(
)
-
1
;
i
>=
0
;
--
i
)
{
for
(
int
i
=
int
(
extra_deck
.
size
()
)
-
1
;
i
>=
0
;
--
i
)
{
OCG_NewCard
(
pduel_
,
extra_deck
[
i
],
player
,
player
,
LOCATION_EXTRA
,
0
,
POS_FACEDOWN_DEFENSE
);
}
...
...
@@ -2697,7 +2719,6 @@ private:
auto
c
=
card
.
controler_
;
auto
cpl
=
players_
[
c
];
auto
opl
=
players_
[
1
-
c
];
auto
x
=
1u
-
c
;
cpl
->
notify
(
fmt
::
format
(
"You set {} ({}) in {} position."
,
card
.
name_
,
card
.
get_spec
(
c
),
card
.
get_position
()));
opl
->
notify
(
fmt
::
format
(
"{} sets {} in {} position."
,
cpl
->
nickname_
,
...
...
@@ -3612,7 +3633,7 @@ private:
std
::
string
option
=
""
;
for
(
int
j
=
0
;
j
<
min
;
++
j
)
{
option
+=
specs
[
comb
[
j
]];
if
(
j
<
min
-
1
)
{
if
(
j
<
int
(
min
)
-
1
)
{
option
+=
" "
;
}
}
...
...
@@ -3632,8 +3653,8 @@ private:
auto
mode
=
read_u8
();
auto
player
=
read_u8
();
auto
val
=
read_u32
();
auto
min
=
read_u8
();
auto
max
=
read_u8
();
int
min
=
read_u8
();
int
max
=
read_u8
();
auto
must_select_size
=
read_u8
();
if
(
mode
==
0
)
{
...
...
@@ -3655,7 +3676,7 @@ private:
must_select_params
.
reserve
(
must_select_size
);
must_select_specs
.
reserve
(
must_select_size
);
uint32_
t
expected
;
in
t
expected
;
if
(
verbose_
)
{
std
::
vector
<
Card
>
must_select
;
must_select
.
reserve
(
must_select_size
);
...
...
@@ -3669,7 +3690,7 @@ private:
must_select
.
push_back
(
card
);
must_select_params
.
push_back
(
param
);
}
expected
=
val
-
(
must_select_params
[
0
]
&
0xff
);
expected
=
int
(
val
)
-
(
must_select_params
[
0
]
&
0xff
);
auto
pl
=
players_
[
player
];
pl
->
notify
(
"Select cards with a total value of "
+
std
::
to_string
(
expected
)
+
", seperated by spaces."
);
...
...
@@ -3691,7 +3712,7 @@ private:
must_select_specs
.
push_back
(
spec
);
must_select_params
.
push_back
(
param
);
}
expected
=
val
-
(
must_select_params
[
0
]
&
0xff
);
expected
=
int
(
val
)
-
(
must_select_params
[
0
]
&
0xff
);
}
uint8_t
select_size
=
read_u8
();
...
...
@@ -3731,11 +3752,11 @@ private:
}
}
std
::
vector
<
std
::
vector
<
uint32_
t
>>
card_levels
;
std
::
vector
<
std
::
vector
<
in
t
>>
card_levels
;
for
(
int
i
=
0
;
i
<
select_size
;
++
i
)
{
std
::
vector
<
uint32_
t
>
levels
;
uint32_
t
level1
=
select_params
[
i
]
&
0xff
;
uint32_
t
level2
=
(
select_params
[
i
]
>>
16
);
std
::
vector
<
in
t
>
levels
;
in
t
level1
=
select_params
[
i
]
&
0xff
;
in
t
level2
=
(
select_params
[
i
]
>>
16
);
if
(
level1
>
0
)
{
levels
.
push_back
(
level1
);
}
...
...
@@ -4240,7 +4261,7 @@ private:
};
}
else
if
(
msg_
==
MSG_ANNOUNCE_NUMBER
)
{
auto
player
=
read_u8
();
auto
count
=
read_u8
();
int
count
=
read_u8
();
std
::
vector
<
int
>
numbers
;
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
int
number
=
read_u32
();
...
...
@@ -4269,7 +4290,7 @@ private:
};
}
else
if
(
msg_
==
MSG_ANNOUNCE_ATTRIB
)
{
auto
player
=
read_u8
();
auto
count
=
read_u8
();
int
count
=
read_u8
();
auto
flag
=
read_u32
();
int
n_attrs
=
7
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment