Commit 15833b80 authored by biluo.shen's avatar biluo.shen

Add discard_hand

parent 2957a343
...@@ -51,7 +51,7 @@ class Args: ...@@ -51,7 +51,7 @@ class Args:
n_history_actions: int = 8 n_history_actions: int = 8
"""the number of history actions to use""" """the number of history actions to use"""
play_mode: str = "self" play_mode: str = "self"
"""the play mode, can be combination of 'self', 'bot', 'greedy', like 'self+bot'""" """the play mode, can be combination of 'self', 'bot', 'random', like 'self+bot'"""
num_layers: int = 2 num_layers: int = 2
"""the number of layers for the agent""" """the number of layers for the agent"""
...@@ -163,6 +163,7 @@ if __name__ == "__main__": ...@@ -163,6 +163,7 @@ if __name__ == "__main__":
optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5) optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5)
avg_win_rates = [] avg_win_rates = []
avg_ep_returns = []
elo = Elo() elo = Elo()
selfplay = "self" in args.play_mode selfplay = "self" in args.play_mode
...@@ -233,11 +234,12 @@ if __name__ == "__main__": ...@@ -233,11 +234,12 @@ if __name__ == "__main__":
avg_win_rates.append(1 - winner) avg_win_rates.append(1 - winner)
else: else:
# win rate of agent # win rate of agent
winner = 0 if episode_reward == 1 else 1 winner = 0 if episode_reward > 0 else 1
elo.update(winner) elo.update(winner)
writer.add_scalar("charts/elo_rating", elo.r0, global_step) writer.add_scalar("charts/elo_rating", elo.r0, global_step)
else: else:
winner = 0 if episode_reward == 1 else 1 avg_ep_returns.append(episode_reward)
winner = 0 if episode_reward > 0 else 1
avg_win_rates.append(1 - winner) avg_win_rates.append(1 - winner)
elo.update(winner) elo.update(winner)
writer.add_scalar("charts/elo_rating", elo.r0, global_step) writer.add_scalar("charts/elo_rating", elo.r0, global_step)
...@@ -245,7 +247,9 @@ if __name__ == "__main__": ...@@ -245,7 +247,9 @@ if __name__ == "__main__":
if len(avg_win_rates) > 100: if len(avg_win_rates) > 100:
writer.add_scalar("charts/avg_win_rate", np.mean(avg_win_rates), global_step) writer.add_scalar("charts/avg_win_rate", np.mean(avg_win_rates), global_step)
writer.add_scalar("charts/avg_ep_return", np.mean(avg_ep_returns), global_step)
avg_win_rates = [] avg_win_rates = []
avg_ep_returns = []
to_play = infos['to_play'] if selfplay else None to_play = infos['to_play'] if selfplay else None
obs = next_obs obs = next_obs
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment