Single Agent Games¶
The Griddly RLLibEnv wrapper allows any of the single-agent games to be trained with many of the single agent RLLib Algorithms.
register_env('my-single-agent-environment', RLlibEnv)
Full Example¶
The example below uses IMPALA to train on the Partially Observable Clusters Environment.
The agent in the Partially Observable Clusters environment has a 5x5 partially observable ego-centric view.
By default the agent sees a VECTOR view of the environment. This view is passed to a Global Average Pooling Agent to produce the policy.
See also
To use a different game, or specific level, just change the yaml_file or set a level parameter in the env_config. Other options can be found here
The Clusters environment as seen from the “Global Observer” view.¶
import os
import sys
import ray
from ray import tune
from ray.rllib.agents.impala import ImpalaTrainer
from ray.rllib.models import ModelCatalog
from ray.tune.registry import register_env
from griddly import gd
from griddly.util.rllib.torch import GAPAgent
from griddly.util.rllib.wrappers.core import RLlibEnv
if __name__ == '__main__':
ray.init(num_gpus=1)
env_name = "ray-griddly-env"
register_env(env_name, RLlibEnv)
ModelCatalog.register_custom_model("GAP", GAPAgent)
max_training_steps = 100000000
config = {
'framework': 'torch',
'num_workers': 8,
'num_envs_per_worker': 4,
'model': {
'custom_model': 'GAP',
'custom_model_config': {}
},
'env': env_name,
'env_config': {
'record_video_config': {
'frequency': 100000
},
'random_level_on_reset': True,
'yaml_file': 'Single-Player/GVGAI/clusters_partially_observable.yaml',
'global_observer_type': gd.ObserverType.SPRITE_2D,
'max_steps': 1000,
},
'entropy_coeff_schedule': [
[0, 0.01],
[max_training_steps, 0.0]
],
'lr_schedule': [
[0, 0.0005],
[max_training_steps, 0.0]
]
}
stop = {
"timesteps_total": max_training_steps,
}
result = tune.run(ImpalaTrainer, config=config, stop=stop)