Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Y
ygo-agent
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Locked Files
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Security & Compliance
Security & Compliance
Dependency List
License Compliance
Packages
Packages
List
Container Registry
Analytics
Analytics
CI / CD
Code Review
Insights
Issues
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Biluo Shen
ygo-agent
Commits
892c7364
Commit
892c7364
authored
Apr 07, 2024
by
sbl1996@126.com
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactor PPO
parent
2bf8ce6a
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
181 additions
and
979 deletions
+181
-979
scripts/jax/ppo.py
scripts/jax/ppo.py
+77
-75
scripts/jax/ppo2.py
scripts/jax/ppo2.py
+0
-780
scripts/jax/ppo_lstm.py
scripts/jax/ppo_lstm.py
+85
-107
ygoai/rl/jax/__init__.py
ygoai/rl/jax/__init__.py
+19
-17
No files found.
scripts/jax/ppo.py
View file @
892c7364
This diff is collapsed.
Click to expand it.
scripts/jax/ppo2.py
deleted
100644 → 0
View file @
2bf8ce6a
This diff is collapsed.
Click to expand it.
scripts/jax/ppo_lstm.py
View file @
892c7364
This diff is collapsed.
Click to expand it.
ygoai/rl/jax/__init__.py
View file @
892c7364
...
...
@@ -101,21 +101,22 @@ def compute_gae_2p0s(
gamma
,
gae_lambda
,
):
def
body_fn
(
carry
,
inp
):
pred_values
,
next_values
,
lastgaelam
=
carry
next_done
,
curvalues
,
reward
,
switch
=
inp
nextnonterminal
=
1.0
-
next_done
boot_value
,
boot_done
,
next_value
,
lastgaelam
=
carry
next_done
,
cur_value
,
reward
,
switch
=
inp
next_values
=
jnp
.
where
(
switch
,
-
pred_values
,
next_values
)
next_done
=
jnp
.
where
(
switch
,
boot_done
,
next_done
)
next_value
=
jnp
.
where
(
switch
,
-
boot_value
,
next_value
)
lastgaelam
=
jnp
.
where
(
switch
,
0
,
lastgaelam
)
delta
=
reward
+
gamma
*
next_values
*
nextnonterminal
-
curvalues
lastgaelam
=
delta
+
gamma
*
gae_lambda
*
nextnonterminal
*
lastgaelam
return
(
pred_values
,
curvalues
,
lastgaelam
),
lastgaelam
gamma_
=
gamma
*
(
1.0
-
next_done
)
delta
=
reward
+
gamma_
*
next_value
-
cur_value
lastgaelam
=
delta
+
gae_lambda
*
gamma_
*
lastgaelam
return
(
boot_value
,
boot_done
,
cur_value
,
lastgaelam
),
lastgaelam
dones
=
jnp
.
concatenate
([
dones
,
next_done
[
None
,
:]],
axis
=
0
)
lastgaelam
=
jnp
.
zeros_like
(
next_value
)
carry
=
next_value
,
next_value
,
lastgaelam
carry
=
next_value
,
next_
done
,
next_
value
,
lastgaelam
_
,
advantages
=
jax
.
lax
.
scan
(
body_fn
,
carry
,
(
dones
[
1
:],
values
,
rewards
,
switch
),
reverse
=
True
...
...
@@ -130,28 +131,29 @@ def compute_gae_upgo_2p0s(
gamma
,
gae_lambda
,
):
def
body_fn
(
carry
,
inp
):
pred_value
,
next_value
,
next_q
,
last_return
,
lastgaelam
=
carry
next_done
,
curvalues
,
reward
,
switch
=
inp
gamma_
=
gamma
*
(
1.0
-
next_done
)
boot_value
,
boot_done
,
next_value
,
next_q
,
last_return
,
lastgaelam
=
carry
next_done
,
cur_value
,
reward
,
switch
=
inp
next_value
=
jnp
.
where
(
switch
,
-
pred_value
,
next_value
)
next_q
=
jnp
.
where
(
switch
,
-
pred_value
,
next_q
)
last_return
=
jnp
.
where
(
switch
,
-
pred_value
,
last_return
)
next_done
=
jnp
.
where
(
switch
,
boot_done
,
next_done
)
next_value
=
jnp
.
where
(
switch
,
-
boot_value
,
next_value
)
next_q
=
jnp
.
where
(
switch
,
-
boot_value
*
gamma
,
next_q
)
last_return
=
jnp
.
where
(
switch
,
-
boot_value
,
last_return
)
lastgaelam
=
jnp
.
where
(
switch
,
0
,
lastgaelam
)
gamma_
=
gamma
*
(
1.0
-
next_done
)
last_return
=
reward
+
gamma_
*
jnp
.
where
(
next_q
>=
next_value
,
last_return
,
next_value
)
next_q
=
reward
+
gamma_
*
next_value
delta
=
next_q
-
cur
values
delta
=
next_q
-
cur
_value
lastgaelam
=
delta
+
gae_lambda
*
gamma_
*
lastgaelam
carry
=
pred_value
,
next
_value
,
next_q
,
last_return
,
lastgaelam
carry
=
boot_value
,
boot_done
,
cur
_value
,
next_q
,
last_return
,
lastgaelam
return
carry
,
(
lastgaelam
,
last_return
)
dones
=
jnp
.
concatenate
([
dones
,
next_done
[
None
,
:]],
axis
=
0
)
lastgaelam
=
jnp
.
zeros_like
(
next_value
)
carry
=
next_value
,
next_value
,
next_value
,
next_value
,
lastgaelam
carry
=
next_value
,
next_
done
,
next_
value
,
next_value
,
next_value
,
lastgaelam
_
,
(
advantages
,
returns
)
=
jax
.
lax
.
scan
(
body_fn
,
carry
,
(
dones
[
1
:],
values
,
rewards
,
switch
),
reverse
=
True
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment