Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Y
ygo-agent
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Biluo Shen
ygo-agent
Commits
af60d012
Commit
af60d012
authored
Feb 27, 2024
by
biluo.shen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add Actor and Critic
parent
32aa61bf
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
51 additions
and
7 deletions
+51
-7
scripts/ppo.py
scripts/ppo.py
+2
-4
ygoai/rl/agent.py
ygoai/rl/agent.py
+46
-0
ygoai/rl/dist.py
ygoai/rl/dist.py
+3
-3
No files found.
scripts/ppo.py
View file @
af60d012
...
...
@@ -95,10 +95,8 @@ class Args:
backend
:
Literal
[
"gloo"
,
"nccl"
,
"mpi"
]
=
"nccl"
"""the backend for distributed training"""
compile
:
bool
=
True
"""whether to use torch.compile to compile the model and functions"""
compile_mode
:
Optional
[
str
]
=
None
"""the mode to use for torch.compile"""
compile
:
Optional
[
str
]
=
None
"""Compile mode of torch.compile, None for no compilation"""
torch_threads
:
Optional
[
int
]
=
None
"""the number of threads to use for torch, defaults to ($OMP_NUM_THREADS or 2) * world_size"""
env_threads
:
Optional
[
int
]
=
None
...
...
ygoai/rl/agent.py
View file @
af60d012
...
...
@@ -24,6 +24,7 @@ class Encoder(nn.Module):
def
__init__
(
self
,
channels
=
128
,
num_card_layers
=
2
,
num_action_layers
=
2
,
num_history_action_layers
=
2
,
embedding_shape
=
None
,
bias
=
False
,
affine
=
True
):
super
(
Encoder
,
self
)
.
__init__
()
self
.
channels
=
channels
self
.
num_history_action_layers
=
num_history_action_layers
c
=
channels
...
...
@@ -319,6 +320,51 @@ class Encoder(nn.Module):
return
f_actions
,
f_state
,
mask
,
valid
class
PPOCritic
(
nn
.
Module
):
def
__init__
(
self
,
channels
):
super
(
PPOCritic
,
self
)
.
__init__
()
c
=
channels
self
.
net
=
nn
.
Sequential
(
nn
.
Linear
(
c
*
2
,
c
//
2
),
nn
.
ReLU
(),
nn
.
Linear
(
c
//
2
,
1
),
)
def
forward
(
self
,
f_state
):
return
self
.
net
(
f_state
)
class
PPOActor
(
nn
.
Module
):
def
__init__
(
self
,
channels
):
super
(
PPOActor
,
self
)
.
__init__
()
c
=
channels
self
.
trans
=
nn
.
TransformerEncoderLayer
(
c
,
4
,
c
*
4
,
dropout
=
0.0
,
batch_first
=
True
,
norm_first
=
True
,
bias
=
False
)
self
.
head
=
nn
.
Sequential
(
nn
.
Linear
(
c
,
c
//
4
),
nn
.
ReLU
(),
nn
.
Linear
(
c
//
4
,
1
),
)
def
forward
(
self
,
f_actions
,
mask
,
action
):
f_actions
=
self
.
trans
(
f_actions
,
src_key_padding_mask
=
mask
)
logits
=
self
.
head
(
f_actions
)[
...
,
0
]
logits
=
logits
.
float
()
logits
=
logits
.
masked_fill
(
mask
,
float
(
"-inf"
))
probs
=
Categorical
(
logits
=
logits
)
return
probs
.
log_prob
(
action
),
probs
.
entropy
()
def
predict
(
self
,
f_actions
,
mask
):
f_actions
=
self
.
trans
(
f_actions
,
src_key_padding_mask
=
mask
)
logits
=
self
.
head
(
f_actions
)[
...
,
0
]
logits
=
logits
.
float
()
logits
=
logits
.
masked_fill
(
mask
,
float
(
"-inf"
))
return
logits
class
PPOAgent
(
nn
.
Module
):
def
__init__
(
self
,
channels
=
128
,
num_card_layers
=
2
,
num_action_layers
=
2
,
...
...
ygoai/rl/dist.py
View file @
af60d012
...
...
@@ -4,17 +4,17 @@ import torch.distributed as dist
import
torch.multiprocessing
as
mp
def
reduce_gradidents
(
model
,
world_size
):
def
reduce_gradidents
(
params
,
world_size
):
if
world_size
==
1
:
return
all_grads_list
=
[]
for
param
in
model
.
parameters
()
:
for
param
in
params
:
if
param
.
grad
is
not
None
:
all_grads_list
.
append
(
param
.
grad
.
view
(
-
1
))
all_grads
=
torch
.
cat
(
all_grads_list
)
dist
.
all_reduce
(
all_grads
,
op
=
dist
.
ReduceOp
.
SUM
)
offset
=
0
for
param
in
model
.
parameters
()
:
for
param
in
params
:
if
param
.
grad
is
not
None
:
param
.
grad
.
data
.
copy_
(
all_grads
[
offset
:
offset
+
param
.
numel
()]
.
view_as
(
param
.
grad
.
data
)
/
world_size
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment