Single Agent Games¶
The Griddly RLLibEnv wrapper allows any of the single-agent games to be trained with many of the single agent RLLib Algorithms.
register_env('my-single-agent-environment', RLlibEnv)
Full Example¶
The example below uses IMPALA to train on the Partially Observable Clusters Environment.
The agent in the Partially Observable Clusters environment has a 5x5 partially observable ego-centric view.
By default the agent sees a VECTOR view of the environment. This view is passed to a Global Average Pooling Agent to produce the policy.
See also
To use a different game, or specific level, just change the yaml_file
or set a level
parameter in the env_config
. Other options can be found here
import os
import sys
import ray
from ray import tune
from ray.rllib.agents.impala import ImpalaTrainer
from ray.rllib.models import ModelCatalog
from ray.tune.registry import register_env
from griddly import gd
from griddly.util.rllib.torch import GAPAgent
from griddly.util.rllib.wrappers.core import RLlibEnv
if __name__ == '__main__':
ray.init(num_gpus=1)
env_name = "ray-griddly-env"
register_env(env_name, RLlibEnv)
ModelCatalog.register_custom_model("GAP", GAPAgent)
max_training_steps = 100000000
config = {
'framework': 'torch',
'num_workers': 8,
'num_envs_per_worker': 4,
'model': {
'custom_model': 'GAP',
'custom_model_config': {}
},
'env': env_name,
'env_config': {
'record_video_config': {
'frequency': 100000
},
'random_level_on_reset': True,
'yaml_file': 'Single-Player/GVGAI/clusters_partially_observable.yaml',
'global_observer_type': gd.ObserverType.SPRITE_2D,
'max_steps': 1000,
},
'entropy_coeff_schedule': [
[0, 0.01],
[max_training_steps, 0.0]
],
'lr_schedule': [
[0, 0.0005],
[max_training_steps, 0.0]
]
}
stop = {
"timesteps_total": max_training_steps,
}
result = tune.run(ImpalaTrainer, config=config, stop=stop)