Single Agent Games

The Griddly RLLibEnv wrapper allows any of the single-agent games to be trained with many of the single agent RLLib Algorithms.

register_env('my-single-agent-environment', RLlibEnv)

Full Example

The example below uses PPO to train on the “GridMan” Environment.

The agent in the “GridMan” environment has a 7x7 partially observable ego-centric view.

By default the agent sees a VECTOR view of the environment. This view is passed to a Simple Conv Agent to produce the policy.

See also

To use a different game, or specific level, just change the yaml_file or set a level parameter in the env_config. Other options can be found here

../../_images/GlobalSpriteObserver_11.png — The GridMan environment as seen from the “Global Observer” view.

import os
import sys

from griddly.util.rllib.callbacks import VideoCallbacks
from griddly.util.rllib.environment.core import RLlibEnv
from ray.air.callbacks.wandb import WandbLoggerCallback
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.models import ModelCatalog
from ray.tune import register_env, tune

from rllib_single_agent_example.gap_agent import GAPAgent
from rllib_single_agent_example.simple_conv_agent import SimpleConvAgent

# You have to put this here so that rllib can find griddly libraries when it starts new workers
sep = os.pathsep
os.environ["PYTHONPATH"] = sep.join(sys.path)

environment_name = "GridMan"
environment_yaml = "gridman/gridman.yaml"
model_name = "SimpleConvAgent"

# Register the environment with RLlib
register_env(environment_name, lambda config: RLlibEnv(config))

model_class = None
if model_name == "SimpleConvAgent":
    model_class = SimpleConvAgent
elif model_name == "GlobalAveragePoolingAgent":
    model_class = GAPAgent

# Register the model with RLlib
ModelCatalog.register_custom_model(model_name, model_class)

test_dir = f"./results/{environment_name}"
video_dir = f"videos"

config = (
    PPOConfig()
    .rollouts(num_rollout_workers=8, num_envs_per_worker=16, rollout_fragment_length=128)
    .callbacks(VideoCallbacks)
    .training(
        model={
            "custom_model": model_name
        },
        train_batch_size=16384,
        lr=1e-4,
        gamma=0.95,
        lambda_=0.9,
        use_gae=True,
        clip_param=0.4,
        grad_clip=None,
        entropy_coeff=0.1,
        vf_loss_coeff=0.5,
        sgd_minibatch_size=2048,
        num_sgd_iter=4,
    )
    .environment(
        env_config={
            # A video every 50 iterations
            'record_video_config': {
                'frequency': 1000,
                'directory': video_dir,

                # Will record a video of the global observations
                'include_global': True,

                # Will record a video of the agent's perspective
                'include_agents': False,
            },
            'random_level_on_reset': True,
            'yaml_file': environment_yaml,
            'global_observer_type': "GlobalSpriteObserver",
            'player_observer_type': "Vector",
            'max_steps': 2000,
        },
        env=environment_name, clip_actions=True)
    .debugging(log_level="ERROR")
    .framework(framework="torch")
    .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "1")))
)

result = tune.run(
    "PPO",
    name="PPO",
    stop={"timesteps_total": 10000000},
    local_dir=test_dir,
    config=config.to_dict(),
    callbacks=[
        WandbLoggerCallback(project="RLLib Gridman", group="griddlyai")
    ]
)

Github Repository

You can find a full working example here: https://github.com/GriddlyAI/rllib_single_agent_example