diff --git a/sim-gameworld-demo/.env.template b/sim-gameworld-demo/.env.template new file mode 100644 index 0000000..df37c73 --- /dev/null +++ b/sim-gameworld-demo/.env.template @@ -0,0 +1,25 @@ +# ── Inference API credentials ───────────────────────────────────── +# Your NemoClaw / NVIDIA inference API key. +# Required — install.sh will fail if this is not set. +INFERENCE_API_KEY= + +# ── Inference provider configuration ────────────────────────────── +# Defaults below target the NVIDIA build API with Claude Opus 4.6. +# Override any of these to use a different provider, model, or endpoint. +INFERENCE_PROVIDER_TYPE= +INFERENCE_PROVIDER_NAME= +INFERENCE_BASE_URL= +INFERENCE_MODEL= + +# ── ALFWorld data path (required) ───────────────────────────────── +# Absolute path to the ALFWorld dataset on the host machine. +# On Brev machines the data is pre-downloaded here: +ALFWORLD_DATA=/ephemeral/cache/alfworld +# If you downloaded the data yourself, set this to your download path: +# ALFWORLD_DATA=/home/youruser/alfworld_data + +# ── MCP server configuration (optional overrides) ───────────────── +# Uncomment and change only if you need a non-default port or path. +# MCP_ALFWORLD_HOST=0.0.0.0 +# MCP_ALFWORLD_PORT=9001 +# MCP_ALFWORLD_PATH=/mcp diff --git a/sim-gameworld-demo/alfworld_env_mcp_server_visual.py b/sim-gameworld-demo/alfworld_env_mcp_server_visual.py new file mode 100644 index 0000000..615d2a0 --- /dev/null +++ b/sim-gameworld-demo/alfworld_env_mcp_server_visual.py @@ -0,0 +1,471 @@ +#!/usr/bin/env python3 +""" +alfworld_env_mcp_server_visual.py +---------------------------------- +FastMCP server that exposes the ALFWorld THOR 3D visual environment as MCP tools. +At every step the agent receives both a first-person RGB frame and text feedback. + +The sandbox agent (which has its own LLM with tool-calling capability) decides +which tool to call and which action to take. No secondary LLM or action-picker +runs here — this server is pure environment I/O. + +Tools +----- + reset_env() – Start / restart a THOR episode. + step_env(action) – Execute an action, save frame, log step. + get_admissible_commands() – Return currently valid action strings. + get_current_state() – Text + visual state snapshot. + get_current_frame_info() – Path / shape of the latest saved frame. + upload_frame_to_sandbox(sandbox, step) – Push a frame PNG to a sandbox via openshell. + get_game_log(last_n) – Return last N step blocks from game_log_visual.md. + search_game_log(pattern) – grep game_log_visual.md for a pattern. + +Run +--- + DISPLAY=:1 ALFWORLD_DATA=/ephemeral/cache/alfworld \\ + python alfworld_env_mcp_server_visual.py # listens on 0.0.0.0:9001/mcp + python alfworld_env_mcp_server_visual.py --port 9002 + +Default URL: http://0.0.0.0:9001/mcp + +Environment +----------- + ALFWORLD_DATA – path to downloaded ALFWorld data + MCP_ALFWORLD_HOST – bind host (default 0.0.0.0) + MCP_ALFWORLD_PORT – bind port (default 9001) + MCP_ALFWORLD_PATH – URL path (default /mcp) + +Prerequisites +------------- + * Xvfb running on DISPLAY=:1 (sudo apt-get install xvfb && Xvfb :1 -screen 0 1024x768x24 &) + * ALFWORLD_DATA env var pointing to downloaded data +""" + +from __future__ import annotations + +import argparse +import asyncio +import glob +import json +import os +import re +import subprocess +from datetime import datetime +from pathlib import Path + +import numpy as np +import yaml +from PIL import Image +from dotenv import load_dotenv +from colorama import Fore, init as colorama_init +from fastmcp import FastMCP + +load_dotenv() +colorama_init(autoreset=True) + +# ── Paths ───────────────────────────────────────────────────────────────────── +_SCRIPT_DIR = Path(__file__).resolve().parent +_DEFAULT_CONFIG = _SCRIPT_DIR / "configs" / "base_config.yaml" +_FRAMES_DIR = _SCRIPT_DIR / "visual_frames" +_LOG_FILE = _SCRIPT_DIR / "game_log_visual.md" + +# Destination path inside the sandbox for uploaded frames — change if needed +_SANDBOX_DEST = "/sandbox/.openclaw/workspace/skills/alfworld-game-viz/assets/" + + +# ── Frame helpers ───────────────────────────────────────────────────────────── + +def _save_frame(frame: np.ndarray, step: int) -> str: + _FRAMES_DIR.mkdir(parents=True, exist_ok=True) + path = _FRAMES_DIR / f"step_{step:04d}.png" + Image.fromarray(frame.astype(np.uint8)).save(path) + return str(path) + + +# ── Step logger ─────────────────────────────────────────────────────────────── +# Each block is grep/sed/awk friendly: +# ## STEP:0001 | ACTION:open fridge 1 | DONE:False | GC:0.00 +# OBS: You open the fridge 1. ... +# FRAME: visual_frames/step_0001.png + +def _init_log(task: str) -> None: + with open(_LOG_FILE, "w") as fh: + fh.write("# ALFWorld Visual Game Log\n") + fh.write(f"STARTED: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") + fh.write(f"TASK: {task}\n") + fh.write("ENV: AlfredThorEnv\n\n---\n\n") + fh.write("## STEP:0000 | ACTION:(initial) | DONE:False | GC:0.00\n") + + +def _log_step(step: int, action: str, obs: str, done: bool, + gc_sr: float, frame_path: str) -> None: + obs_single = obs.replace("\n", " ").strip() + with open(_LOG_FILE, "a") as fh: + fh.write(f"\n## STEP:{step:04d} | ACTION:{action} | DONE:{done} | GC:{gc_sr:.2f}\n") + fh.write(f"OBS: {obs_single}\n") + fh.write(f"FRAME: {frame_path}\n") + + +def _clear_frames() -> int: + """Delete all step_*.png files from the visual_frames folder. Returns count removed.""" + removed = 0 + for f in glob.glob(str(_FRAMES_DIR / "step_*.png")): + os.remove(f) + removed += 1 + if removed: + print(f"[alfworld_mcp] Cleared {removed} frame(s) from {_FRAMES_DIR}/") + return removed + + +# ── Global environment state ────────────────────────────────────────────────── + +_env = None +_current_obs: str = "" +_current_done: bool = False +_current_gc_sr: float = 0.0 +_admissible_commands: list = [] +_task_desc: str = "" +_step_count: int = 0 +_current_frame_path: str = "" + + +def _init_env() -> None: + global _env + if _env is not None: + return + from alfworld.agents.environment import get_environment + + if not os.path.isfile(_DEFAULT_CONFIG): + raise FileNotFoundError(f"ALFWorld config not found: {_DEFAULT_CONFIG}") + with open(_DEFAULT_CONFIG) as fh: + config = yaml.safe_load(fh) + + config["env"]["type"] = "AlfredThorEnv" + config["controller"]["type"] = "oracle" + config["env"]["thor"]["save_frames_to_disk"] = False + + raw = get_environment("AlfredThorEnv")(config, train_eval="eval_in_distribution") + _env = raw.init_env(batch_size=1) + _clear_frames() + print("[alfworld_mcp] AlfredThorEnv initialised.") + + +def _extract_task(obs: str) -> str: + for line in obs.splitlines(): + if line.strip().lower().startswith("your task is"): + return line.strip() + return "" + + +# ── FastMCP app ─────────────────────────────────────────────────────────────── +mcp = FastMCP("AlfWorldVisualEnvMCP") + + +@mcp.tool() +async def reset_env() -> str: + """ + Reset the ALFWorld THOR environment and start a new game episode. + Saves the initial frame and initialises the game log. + + Returns JSON: + task (str) + observation (str) + admissible_commands (list[str]) + frame_path (str) – path to step_0000.png + step (int) – 0 + """ + global _current_obs, _current_done, _current_gc_sr + global _admissible_commands, _task_desc, _step_count, _current_frame_path + + _init_env() + _clear_frames() + obs, info = _env.reset() + frames = _env.get_frames() # (1, H, W, 3) uint8 RGB + + _current_obs = obs[0] + _current_done = False + _current_gc_sr = 0.0 + _admissible_commands = list(info["admissible_commands"][0]) + _task_desc = _extract_task(_current_obs) + _step_count = 0 + _current_frame_path = _save_frame(frames[0], step=0) + + _init_log(_task_desc) + + return json.dumps({ + "task": _task_desc, + "observation": _current_obs, + "admissible_commands": _admissible_commands, + "frame_path": _current_frame_path, + "step": _step_count, + }) + + +@mcp.tool() +async def step_env(action: str) -> str: + """ + Execute an action in the THOR environment. + Saves the resulting frame and appends a line to game_log_visual.md. + + Args: + action (str): One of the currently admissible action strings. + + Returns JSON: + observation (str) + goal_condition_success (float) – fraction of goal conditions met (0–1) + done (bool) + won (bool) + admissible_commands (list[str]) + frame_path (str) – path to newly saved frame PNG + step (int) + action_taken (str) + """ + global _current_obs, _current_done, _current_gc_sr + global _admissible_commands, _step_count, _current_frame_path + + if _env is None: + return json.dumps({"error": "Environment not initialised. Call reset_env first."}) + if _current_done: + return json.dumps({"error": "Episode already done. Call reset_env.", "done": True}) + + # AlfredThorEnv.step → (obs, None, dones, infos) + obs, _scores, dones, infos = _env.step([action]) + frames = _env.get_frames() + + _step_count += 1 + _current_obs = obs[0] + _current_done = bool(dones[0]) + won = bool(infos.get("won", [False])[0]) + _current_gc_sr = float(infos.get("goal_condition_success_rate", [0.0])[0]) + _admissible_commands = (list(infos["admissible_commands"][0]) + if not _current_done else []) + _current_frame_path = _save_frame(frames[0], step=_step_count) + + _log_step(_step_count, action, _current_obs, + _current_done, _current_gc_sr, _current_frame_path) + + return json.dumps({ + "observation": _current_obs, + "goal_condition_success": _current_gc_sr, + "done": _current_done, + "won": won, + "admissible_commands": _admissible_commands, + "frame_path": _current_frame_path, + "step": _step_count, + "action_taken": action, + }) + + +@mcp.tool() +async def get_admissible_commands() -> str: + """ + Return the list of currently valid action strings. + + Returns JSON: + admissible_commands (list[str]) + step (int) + """ + return json.dumps({ + "admissible_commands": _admissible_commands, + "step": _step_count, + }) + + +@mcp.tool() +async def get_current_state() -> str: + """ + Return a full snapshot of the current game state (text + frame). + + Returns JSON: + task (str) + observation (str) + goal_condition_success (float) + done (bool) + step (int) + admissible_commands (list[str]) + frame_path (str) + log_file (str) + """ + return json.dumps({ + "task": _task_desc, + "observation": _current_obs, + "goal_condition_success": _current_gc_sr, + "done": _current_done, + "step": _step_count, + "admissible_commands": _admissible_commands, + "frame_path": _current_frame_path, + "log_file": str(_LOG_FILE), + }) + + +@mcp.tool() +async def get_current_frame_info() -> str: + """ + Return metadata about the most recently saved visual frame. + + Returns JSON: + frame_path (str) + step (int) + exists (bool) + size_bytes (int) + width (int) + height (int) + """ + path = Path(_current_frame_path) if _current_frame_path else None + exists = path is not None and path.exists() + w = h = size = 0 + if exists: + size = path.stat().st_size + with Image.open(path) as img: + w, h = img.size + return json.dumps({ + "frame_path": _current_frame_path, + "step": _step_count, + "exists": exists, + "size_bytes": size, + "width": w, + "height": h, + }) + + +@mcp.tool() +async def upload_frame_to_sandbox(sandbox_name: str, step: int | None = None) -> str: + """ + Upload a saved frame PNG to a sandbox via openshell. + + Uses the command: + openshell sandbox upload + + The destination on the sandbox is always: + /sandbox/.openclaw/workspace/skills/alfworld-game-viz/assets/ + + Args: + sandbox_name (str) – Name of the target sandbox (e.g. "my-sandbox"). + step (int) – Step number to upload (defaults to latest step). + + Returns JSON: + sandbox_name (str) + frame_path (str) + dest_path (str) + returncode (int) – 0 means success + stdout (str) + stderr (str) + """ + target_step = step if step is not None else _step_count + frame_path = str(_FRAMES_DIR / f"step_{target_step:04d}.png") + + if not Path(frame_path).exists(): + return json.dumps({ + "error": f"Frame not found: {frame_path}. Run reset_env / step_env first." + }) + + cmd = ["openshell", "sandbox", "upload", "--no-git-ignore", + sandbox_name, frame_path, _SANDBOX_DEST] + + try: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) + return json.dumps({ + "sandbox_name": sandbox_name, + "frame_path": frame_path, + "dest_path": _SANDBOX_DEST, + "returncode": result.returncode, + "stdout": result.stdout.strip(), + "stderr": result.stderr.strip(), + }) + except FileNotFoundError: + return json.dumps({ + "error": "'openshell' command not found. Is openshell installed and on PATH?" + }) + except subprocess.TimeoutExpired: + return json.dumps({"error": "openshell upload timed out after 30s."}) + + +@mcp.tool() +async def get_game_log(last_n: int = 10) -> str: + """ + Return the last N step blocks from game_log_visual.md. + + Args: + last_n (int) – Number of recent steps to return (default 10). + + Returns JSON: + log_file (str) + steps_found (int) + content (str) – raw text of the last N step blocks + """ + if not _LOG_FILE.exists(): + return json.dumps({"error": f"Log file not found: {_LOG_FILE}"}) + + text = _LOG_FILE.read_text() + blocks = re.split(r"(?=^## STEP:)", text, flags=re.MULTILINE) + step_blocks = [b.strip() for b in blocks if b.startswith("## STEP:")] + recent = step_blocks[-last_n:] + + return json.dumps({ + "log_file": str(_LOG_FILE), + "steps_found": len(step_blocks), + "content": "\n\n".join(recent), + }) + + +@mcp.tool() +async def search_game_log(pattern: str) -> str: + """ + Search game_log_visual.md for lines matching a pattern (case-insensitive grep). + + Args: + pattern (str) – Plain text or regex pattern to search for. + + Returns JSON: + pattern (str) + log_file (str) + match_count (int) + matches (list[str]) – matching lines with their line numbers + """ + if not _LOG_FILE.exists(): + return json.dumps({"error": f"Log file not found: {_LOG_FILE}"}) + + try: + result = subprocess.run( + ["grep", "-in", pattern, str(_LOG_FILE)], + capture_output=True, text=True, timeout=10 + ) + lines = [l for l in result.stdout.splitlines() if l] + except subprocess.TimeoutExpired: + lines = [] + + return json.dumps({ + "pattern": pattern, + "log_file": str(_LOG_FILE), + "match_count": len(lines), + "matches": lines, + }) + + +# ── Entry point ─────────────────────────────────────────────────────────────── + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="ALFWorld Visual Environment MCP Server", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("--host", default=os.environ.get("MCP_ALFWORLD_HOST", "0.0.0.0")) + parser.add_argument("--port", type=int, default=int(os.environ.get("MCP_ALFWORLD_PORT", "9001"))) + parser.add_argument("--path", default=os.environ.get("MCP_ALFWORLD_PATH", "/mcp")) + args = parser.parse_args() + + print( + Fore.GREEN + + f"[mcp-server] AlfWorldVisualEnvMCP → " + f"http://{args.host}:{args.port}{args.path}" + ) + print(Fore.CYAN + f"[mcp-server] Config : {_DEFAULT_CONFIG}") + print(Fore.CYAN + f"[mcp-server] Frames : {_FRAMES_DIR}") + print(Fore.YELLOW + "[mcp-server] Reachable from sandbox via host's LAN/bridge IP on that port.") + mcp.run( + transport="streamable-http", + host=args.host, + port=args.port, + path=args.path, + show_banner=False, + ) diff --git a/sim-gameworld-demo/configs/base_config.yaml b/sim-gameworld-demo/configs/base_config.yaml new file mode 100644 index 0000000..460c080 --- /dev/null +++ b/sim-gameworld-demo/configs/base_config.yaml @@ -0,0 +1,145 @@ +dataset: + data_path: '$ALFWORLD_DATA/json_2.1.1/train' + eval_id_data_path: '$ALFWORLD_DATA/json_2.1.1/valid_seen' # null/None to disable + eval_ood_data_path: '$ALFWORLD_DATA/json_2.1.1/valid_unseen' # null/None to disable + num_train_games: -1 # max training games (<=0 indicates full dataset) + num_eval_games: -1 # max evaluation games (<=0 indicates full dataset) + +logic: + domain: '$ALFWORLD_DATA/logic/alfred.pddl' # PDDL domain file that defines the world dynamics + grammar: '$ALFWORLD_DATA/logic/alfred.twl2' # Grammar file that defines the text feedbacks + +env: + type: 'AlfredTWEnv' # 'AlfredTWEnv' or 'AlfredThorEnv' or 'AlfredHybrid' + # regen_game_files: False # [Deprecated] Use script `alfworld-generate` instead. + domain_randomization: False # shuffle Textworld print order and object id nums + task_types: [1, 2, 3, 4, 5, 6] # task-type ids: 1 - Pick & Place, 2 - Examine in Light, 3 - Clean & Place, 4 - Heat & Place, 5 - Cool & Place, 6 - Pick Two & Place + expert_timeout_steps: 150 # max steps before timeout for expert to solve the task + expert_type: "handcoded" # 'handcoded' or 'planner'. Note: the planner is very slow for real-time use + goal_desc_human_anns_prob: 0.0 # prob of using human-annotated goal language instead of templated goals (1.0 indicates all human annotations from ALFRED) + + hybrid: + start_eps: 100000 # starting episode of hybrid training, tw-only training upto this point + thor_prob: 0.5 # prob of AlfredThorEnv during hybrid training + eval_mode: "tw" # 'tw' or 'thor' - env used for evaluation during hybrid training + + thor: + screen_width: 300 # width of THOR window + screen_height: 300 # height of THOR window + smooth_nav: False # smooth rotations, looks, and translations during navigation (very slow) + save_frames_to_disk: False # save frame PNGs to disk (useful for making videos) + save_frames_path: './videos/' # path to save frame PNGs + +controller: + type: 'oracle' # 'oracle' or 'oracle_astar' or 'mrcnn' or 'mrcnn_astar' (aka BUTLER) + debug: False + load_receps: True # load receptacle locations from precomputed dict (if available) + +mask_rcnn: + pretrained_model_path: '$ALFWORLD_DATA/detectors/mrcnn.pth' + +general: + random_seed: 42 + use_cuda: True # disable this when running on machine without cuda + visdom: False # plot training/eval curves, run with visdom server + task: 'alfred' + training_method: 'dagger' # 'dqn' or 'dagger' + save_path: './training/' # path to save pytorch models + observation_pool_capacity: 3 # k-size queue, 0 indicates no observation + hide_init_receptacles: False # remove initial observation containing navigable receptacles + + training: + batch_size: 10 + max_episode: 50000 + smoothing_eps: 0.1 + optimizer: + learning_rate: 0.001 + clip_grad_norm: 5 + + evaluate: + run_eval: True + batch_size: 10 + env: + type: "AlfredTWEnv" + + checkpoint: + report_frequency: 1000 # report every N episode + experiment_tag: 'test' # name of experiment + load_pretrained: False # during test, enable this so that the agent load your pretrained model + load_from_tag: 'not loading anything' # name of pre-trained model to load in save_path + + model: + encoder_layers: 1 + decoder_layers: 1 + encoder_conv_num: 5 + block_hidden_dim: 64 + n_heads: 1 + dropout: 0.1 + block_dropout: 0.1 + recurrent: True + +rl: + action_space: "admissible" # 'admissible' (candidates from text engine) or 'generation' (seq2seq-style generation) or 'beam_search_choice' or 'exhaustive' (not working) + max_target_length: 20 # max token length for seq2seq generation + beam_width: 10 # 1 means greedy + generate_top_k: 3 + + training: + max_nb_steps_per_episode: 50 # terminate after this many steps + learn_start_from_this_episode: 0 # delay updates until this epsiode + target_net_update_frequency: 500 # sync target net with online net per this many epochs + + replay: + accumulate_reward_from_final: True + count_reward_lambda: 0.0 # 0 to disable + novel_object_reward_lambda: 0.0 # 0 to disable + discount_gamma_game_reward: 0.9 + discount_gamma_count_reward: 0.5 + discount_gamma_novel_object_reward: 0.5 + replay_memory_capacity: 500000 # adjust this depending on your RAM size + replay_memory_priority_fraction: 0.5 + update_per_k_game_steps: 5 + replay_batch_size: 64 + multi_step: 3 + replay_sample_history_length: 4 + replay_sample_update_from: 2 + + epsilon_greedy: + noisy_net: False # if this is true, then epsilon greedy is disabled + epsilon_anneal_episodes: 1000 # -1 if not annealing + epsilon_anneal_from: 0.3 + epsilon_anneal_to: 0.1 + +dagger: + action_space: "generation" # 'admissible' (candidates from text engine) or 'generation' (seq2seq-style generation) or 'exhaustive' (not working) + max_target_length: 20 # max token length for seq2seq generation + beam_width: 10 # 1 means greedy + generate_top_k: 5 + unstick_by_beam_search: False # use beam-search for failed actions, set True during evaluation + + training: + max_nb_steps_per_episode: 50 # terminate after this many steps + + fraction_assist: + fraction_assist_anneal_episodes: 50000 + fraction_assist_anneal_from: 1.0 + fraction_assist_anneal_to: 0.01 + + fraction_random: + fraction_random_anneal_episodes: 0 + fraction_random_anneal_from: 0.0 + fraction_random_anneal_to: 0.0 + + replay: + replay_memory_capacity: 500000 + update_per_k_game_steps: 5 + replay_batch_size: 64 + replay_sample_history_length: 4 + replay_sample_update_from: 2 + +vision_dagger: + model_type: "resnet" # 'resnet' (whole image features) or 'maskrcnn_whole' (whole image MaskRCNN feats) or 'maskrcnn' (top k MaskRCNN detection feats) or 'no_vision' (zero vision input) + resnet_fc_dim: 64 + maskrcnn_top_k_boxes: 10 # top k box features + use_exploration_frame_feats: False # append feats from initial exploration (memory intensive!) + sequence_aggregation_method: "average" # 'sum' or 'average' or 'rnn' diff --git a/sim-gameworld-demo/game-alfworld-openclaw-guide.md b/sim-gameworld-demo/game-alfworld-openclaw-guide.md new file mode 100644 index 0000000..66d5d21 --- /dev/null +++ b/sim-gameworld-demo/game-alfworld-openclaw-guide.md @@ -0,0 +1,355 @@ +# Playing ALFWorld with an OpenClaw Agent via MCP (Visual THOR 3D) + +This guide walks you through connecting the [ALFWorld](https://github.com/alfworld/alfworld) 3D household task simulation to an OpenClaw agent running inside an OpenShell sandbox. By the end, your agent will be able to start game episodes, choose actions, display first-person RGB frames, and track goal progress — all through natural language. + +The connection uses **MCP (Model Context Protocol)**. A Python server runs on the host, exposes the AI2-THOR environment as tools over HTTP, and does no LLM reasoning of its own. The OpenClaw agent inside the sandbox calls MCP tools directly and uses its configured inference model for all action selection and reasoning. The sandbox talks to the host MCP server through an egress-approved network policy. + +## Prerequisites + +| Requirement | Details | +|-------------|---------| +| NemoClaw | `nemoclaw` and `openshell` CLIs must be installed. See [NemoClaw setup](https://github.com/NVIDIA/NemoClaw). | +| Inference API key | Required by `nemoclaw onboard` to configure the OpenClaw agent's inference provider. Set `INFERENCE_API_KEY` in `.env` before running `install.sh` (see below). | +| Python 3.10 | Required by `alfworld` / `ai2thor` — `install.sh` pins the venv to 3.10 automatically via `uv`. | +| `uv` | Installed automatically by `install.sh` if missing. | + +--- + +## One-Command Setup + +### 1. Configure `.env` + +Copy the template and fill in your values: + +```bash +cd nemoclaw-demos/sim-gameworld-demo +cp .env.template .env +``` + +Open `.env` and set your API key, inference configuration, and ALFWorld data path. The block below is **an example only** — replace the provider, base URL, and model with whichever combination suits your setup. See the full list of NemoClaw-compatible inference options at [https://docs.nvidia.com/nemoclaw/latest/inference/inference-options.html](https://docs.nvidia.com/nemoclaw/latest/inference/inference-options.html). + +```bash +# Inference API credentials +INFERENCE_API_KEY=nvapi-your-key + +# Inference provider configuration — example using NVIDIA Endpoints with Claude Opus 4.6. +# Replace these values with your chosen provider, base URL, and model. +INFERENCE_PROVIDER_TYPE=nvidia +INFERENCE_PROVIDER_NAME=nvidia +INFERENCE_BASE_URL=https://inference-api.nvidia.com/v1 +INFERENCE_MODEL=aws/anthropic/bedrock-claude-opus-4-6 + +# ALFWorld data path — on Brev machines this is pre-downloaded: +ALFWORLD_DATA=/ephemeral/cache/alfworld +# On other machines leave this as-is; install.sh will download the data automatically. +``` + +`install.sh` loads `.env` automatically — values in `.env` do not override variables already exported in your shell, so you can also override any of them inline: + +```bash +INFERENCE_MODEL=nvidia/llama-3.3-70b-instruct bash install.sh +``` + +`INFERENCE_API_KEY` is also cached to `~/.nemoclaw/credentials.json` after the first run, so future re-runs pick it up without needing `.env` in place. + +--- + +### 2. Run the installer + +```bash +cd nemoclaw-demos/sim-gameworld-demo +bash install.sh +``` + +The script will: +1. Clean up any stale MCP server processes and tmux sessions +2. Install system OpenGL/X11 libraries required by AI2-THOR (`xvfb`, `libgl1`, etc.) +3. Load `.env` and resolve `INFERENCE_API_KEY`, provider, base URL, model, and `ALFWORLD_DATA` +4. Validate `ALFWORLD_DATA` — if the directory is missing or incomplete, download the dataset automatically via `alfworld-download` +5. Pre-configure the openshell inference provider and model +6. Run `nemoclaw onboard` if no sandbox exists — provider and model are already set, so you only need to confirm the sandbox name +7. Install all Python dependencies on the host in a Python 3.10 venv (`fastmcp`, `colorama`, `alfworld[vis]`) +8. Start Xvfb on `:1` if not already running (AI2-THOR requires a virtual display) +9. Start the MCP server in a persistent `tmux` session with `DISPLAY=:1` and `ALFWORLD_DATA` in scope +10. Apply the sandbox network policy (`policy/sandbox_policy.yaml`) +11. Upload the `alfworld-game-viz` skill into the sandbox +12. Bootstrap the skill's Python venv inside the sandbox (`fastmcp`, `colorama`) +13. Restart the OpenClaw gateway so it discovers the new skill +14. Verify the installation (MCP server reachable, skill present, venv imports OK) + +You can also pass a sandbox name directly to skip the interactive prompt: + +```bash +bash install.sh +``` + +> **First-run note:** on first run AI2-THOR downloads its Unity binary (~390 MB). The server may take 60–90 seconds to respond after the tmux session starts. The installer prints a progress indicator and warns you if the timeout is exceeded — the server is usually still starting in the background. + +> **After a reboot:** Xvfb does not persist across reboots. Re-run `install.sh` to restart everything, or manually: +> ```bash +> Xvfb :1 -screen 0 1024x768x24 & +> bash install.sh +> ``` + +--- + +## Trying It Out + +Connect to the sandbox and launch the OpenClaw TUI: + +```bash +# From your host terminal +nemoclaw connect + +# Inside the sandbox +openclaw tui +``` + +Once connected, verify the skill is loaded by asking the agent directly: + +``` +openclaw tui - ws://127.0.0.1:18789 - agent main - session main + + do you have a skill to play the alfworld game? + + Yes, I have the alfworld-game-viz skill available. It gives me direct access to the + ALFWorld THOR 3D environment running on the host machine via MCP. + + I can: + - Start a new game episode and receive the task description + - Read the current game state (observation text + visual frame) + - List the admissible actions at each step + - Execute actions and track goal condition progress + - Upload first-person RGB frames into this sandbox so you can see them + - Query and search the game log + + What would you like to do? I can start a fresh game, continue from where we left off, + or show you the current frame. + + connected | idle + agent main | session main (openclaw-tui) | inference/nvidia/bedrock-claude-opus-4-6 | tokens 12k/131k (9%) +``` + +Try these prompts: + +- "reset the game and let's play" +- "What task do I need to complete?" +- "What actions can I take right now?" +- "Take the next best action to progress toward the goal." +- "Play 3 more steps and report the goal condition score." +- "Show me the game history." + +--- + +## Automating Game Play with a Cron Job and Heartbeat + +Instead of driving the agent manually, ask OpenClaw to set up a cron job and a heartbeat for you in a single prompt inside the TUI. + +### Step 1 — Set up the cron job and heartbeat together + +``` +openclaw tui - ws://127.0.0.1:18789 - agent main - session main + + please set up a cron job for me that progresses the game every 30 seconds by calling + the alfworld-game-viz skill, remember to also set up a heartbeat for me to check in + on the game status quo as well as summarize into a markdown table. + + All set! Here's what I've configured: + + 1. Cron Job — Auto-Step every 30 seconds + - Job ID: 07677bdd-24a8-4f4c-8d64-d0c76eb98e5b + - Runs every 30s in an isolated session + - Each run reads the current game state, picks the best action from admissible + commands, executes it, and announces the result + - First run fires in ~30s + + 2. Heartbeat — Game Status Dashboard + - Updated HEARTBEAT.md to check game state + game log on every heartbeat + - Outputs a markdown table summarizing all recent steps: step number, action + taken, observation, goal progress, and done status + - Also shows the current task, available actions, and overall game status + + Current game state (step 0): + - Task: Put some handtowel on sinkbasin + - Location: Middle of a bathroom with bathtub, cabinets, sink, countertops, + handtowelholder, shelves, toilet, etc. + - Goal progress: 0% — game just started + + The cron job will start working toward the goal autonomously. The next heartbeat + will give you the full progress table. Want me to adjust the interval or anything else? + + connected | idle + agent main | session main (openclaw-tui) | inference/nvidia/nemotron-3-super-120b-a12b | tokens 37k/131k (28%) +``` + +### Step 2 — Refine the cron job stop conditions + +Ask the agent to add a step cap and auto-disable on task completion: + +``` +openclaw tui - ws://127.0.0.1:18789 - agent main - session main + + modify the cron job to stop/disable when the game goal/task is reached, therefore the + game is completed. or max 30 steps as upper cap + + Done. Here's what changed: + + Updated cron job (07677bdd): + - Switched to main session (systemEvent) — this fixes the delivery channel error + the isolated runs were hitting + - Stop conditions added: + 🎉 Game won — if done is true or goal_condition_success reaches 1.0 + → auto-disables the cron job + ⏱️ Step cap — if step count reaches 30 → auto-disables the cron job + - Still runs every 30 seconds, still picks the best action each turn + + The job will self-terminate when the task is completed or the 30-step limit is hit. + No infinite loops. + + connected | idle + agent main | session main (openclaw-tui) | inference/nvidia/nemotron-3-super-120b-a12b | tokens 38k/131k (29%) +``` + +### Step 3 — Check game status via heartbeat + +At any point ask the agent for a full status report: + +``` +openclaw tui - ws://127.0.0.1:18789 - agent main - session main + + now check the game status for me + + 🎉 Game Complete! The task has been won in 14 steps. + + Task: Put some handtowel on sinkbasin + + | Step | Action | Observation | Goal | Done | + |-----:|-----------------------------------------|-----------------------------------------------|:----:|:----:| + | 0 | (initial) | Spawned in bathroom | 0% | ❌ | + | 1 | go to handtowelholder 1 | Found handtowel 1, tissuebox, spraybottle | 0% | ❌ | + | 2 | take handtowel 1 from handtowelholder 1 | Picked up handtowel 1 | 0% | ❌ | + | 3 | go to sinkbasin 1 | Arrived at sinkbasin — cloth, soapbar, faucet | 0% | ❌ | + | 4 | move handtowel 1 to sinkbasin 1 | Nothing happens (wrong verb) | 0% | ❌ | + | 5 | clean handtowel 1 with sinkbasin 1 | Cleaned the handtowel | 0% | ❌ | + | 6 | move handtowel 1 to sinkbasin 1 | Nothing happens | 0% | ❌ | + | 7 | inventory | Carrying: handtowel 1 | 0% | ❌ | + | 8 | examine sinkbasin 1 | Inspected sinkbasin contents | 0% | ❌ | + | 9 | move handtowel 1 to sinkbasin 1 | Nothing happens | 0% | ❌ | + | 10 | inventory | Carrying: handtowel 1 | 0% | ❌ | + | 11 | examine sinkbasin 1 | Inspected sinkbasin contents | 0% | ❌ | + | 12 | go to sink 1 | Arrived at sink 1 — faucet, cloth, towel | 0% | ❌ | + | 13 | go to sinkbasin 1 | Back at sinkbasin | 0% | ❌ | + | 14 | put handtowel 1 in/on sinkbasin 1 | Handtowel placed on sinkbasin! | 100% | ✅ | + + Summary: The agent found the handtowel on step 1, picked it up on step 2, but then + struggled with the correct "put" verb for several steps (trying "move" and "clean" + instead). It finally used the correct put handtowel 1 in/on sinkbasin 1 command on + step 14 to win. + + I've also disabled the cron job since the game is complete. + Want me to reset the environment for a new game? + + connected | idle + agent main | session main (openclaw-tui) | inference/nvidia/nemotron-3-super-120b-a12b | tokens 39k/131k (30%) +``` + +### What each part does + +| Feature | How it works | +|---------|-------------| +| **Cron job** | Fires every 30 seconds. Calls `get_current_state`, selects the best action from `admissible_commands` using the agent's own LLM, calls `step_env`, and uploads the resulting frame. Auto-disables when `done` is `true` or step count reaches 30. | +| **Heartbeat** | Triggered on demand or on a schedule. Reads `get_game_log` and `get_current_state`, renders a markdown table of all steps, and reports overall game status. | + +--- + +## Available MCP Tools + +The OpenClaw agent's LLM decides which tool to call and which action to take. The MCP server exposes raw environment operations — it does no LLM reasoning of its own. + +| Tool | Arguments | What it does | +|------|-----------|-------------| +| `reset_env` | — | Start a new game episode; returns task, observation, admissible commands, and initial frame path | +| `step_env` | `action` | Execute one admissible action; returns new observation, goal condition score, done flag, and frame path | +| `get_admissible_commands` | — | List currently valid action strings and current step number | +| `get_current_state` | — | Full snapshot: task, observation, score, done, step, admissible commands, frame path, log file | +| `get_current_frame_info` | — | Metadata for the latest saved frame (path, dimensions, file size) | +| `upload_frame_to_sandbox` | `sandbox_name`, `step` | Push a frame PNG into the sandbox `assets/` folder via `openshell sandbox upload` | +| `get_game_log` | `last_n` | Return the last N step blocks from `game_log_visual.md` | +| `search_game_log` | `pattern` | Search the game log for a regex/text pattern | + +--- + +## How the Skill Client Works + +Inside the sandbox the agent invokes tools via: + +```bash +SKILL_DIR=~/.openclaw/workspace/skills/alfworld-game-viz +$SKILL_DIR/venv/bin/python3 $SKILL_DIR/scripts/sandbox_client_vis.py [args] +``` + +Each call executes exactly one MCP tool and prints the JSON result to stdout. The agent's LLM reads the result and decides the next tool to call — there is no game loop or action-selection logic inside the client script. + +Example calls the agent makes: + +```bash +SKILL_DIR=~/.openclaw/workspace/skills/alfworld-game-viz +CLIENT="$SKILL_DIR/venv/bin/python3 $SKILL_DIR/scripts/sandbox_client_vis.py" + +# Start a new game +$CLIENT reset_env + +# Get the current state after resuming +$CLIENT get_current_state + +# Execute a chosen action +$CLIENT step_env --action "open fridge 1" + +# Upload the resulting frame so it can be displayed in chat +$CLIENT upload_frame_to_sandbox --sandbox-name my-assistant + +# Review recent game history +$CLIENT get_game_log --last-n 5 +``` + +--- + +## Troubleshooting + +| Issue | Fix | +|-------|-----| +| `INFERENCE_API_KEY is not set` | Add `INFERENCE_API_KEY=your-key` to `.env`, or run `export INFERENCE_API_KEY=...` before `install.sh`. | +| `502 Bad Gateway` from sandbox | The MCP server isn't running. Check: `tmux ls` and `curl http://127.0.0.1:9001/mcp`. Restart with `bash install.sh `. | +| `ast.Str` error from server | The host venv is using Python 3.12+. Recreate with `rm -rf .venv && uv venv --python 3.10 && uv pip install "alfworld[vis]"`, then restart the tmux session. | +| `ALFWORLD_DATA not found` | Set `ALFWORLD_DATA` to a writable path in `.env` and re-run `install.sh` — it will download the data automatically. | +| Server slow / 90s wait on first run | AI2-THOR downloads its Unity binary (~390 MB) on first run. Check progress: `tmux attach -t alfworld-mcp`. | +| No Xvfb after reboot | Xvfb doesn't persist. Run `Xvfb :1 -screen 0 1024x768x24 &` then `bash install.sh `. | +| No frames in `assets/` | `upload_frame_to_sandbox` failed. Check that `openshell` is on PATH on the host and the sandbox name is correct. | +| `l7_decision=deny` in OpenShell logs | The sandbox policy wasn't applied or the connecting binary isn't listed. Re-run `openshell policy set` and check `policy/sandbox_policy.yaml`. | +| Agent doesn't know about ALFWorld | Confirm the skill is at `/sandbox/.openclaw/workspace/skills/alfworld-game-viz/SKILL.md` and reconnect. | +| `ModuleNotFoundError: No module named 'fastmcp'` | Run with the skill venv Python, not bare `python3`. See `/sandbox/.openclaw/workspace/skills/alfworld-game-viz/SKILL.md` Troubleshooting section. | +| `NVIDIA Endpoints endpoint validation failed` / `spawnSync curl ETIMEDOUT` during `nemoclaw onboard` | See below. | + +### nemoclaw onboard: validation timeout + +During `nemoclaw onboard`, step **[3/8] Configuring inference** makes a live inference call to validate the pre-configured model. Thinking/reasoning models can occasionally exceed the 30-second internal timeout, producing: + +``` +NVIDIA Endpoints endpoint validation failed. +Chat Completions API: curl failed (exit -110): spawnSync curl ETIMEDOUT +``` + +If this happens, type `retry` at the prompt. If the problem persists, set `INFERENCE_MODEL` in `.env` to a faster model such as `mistralai/mistral-small-3.1-24b-instruct-2503`. + +### Restarting after a fresh reset + +```bash +# Clean up everything on the host +tmux kill-session -t alfworld-mcp +pkill -f alfworld_env_mcp_server_visual || true +pkill -f "Xvfb :1" || true +rm -rf .venv + +# Re-run +bash install.sh +``` diff --git a/sim-gameworld-demo/imgs/cron_job_in_action_openclaw.png b/sim-gameworld-demo/imgs/cron_job_in_action_openclaw.png new file mode 100644 index 0000000..c39f994 Binary files /dev/null and b/sim-gameworld-demo/imgs/cron_job_in_action_openclaw.png differ diff --git a/sim-gameworld-demo/imgs/heartbeat_on_demand_query_game_status.png b/sim-gameworld-demo/imgs/heartbeat_on_demand_query_game_status.png new file mode 100644 index 0000000..c9e87ca Binary files /dev/null and b/sim-gameworld-demo/imgs/heartbeat_on_demand_query_game_status.png differ diff --git a/sim-gameworld-demo/imgs/setup_cron_job.png b/sim-gameworld-demo/imgs/setup_cron_job.png new file mode 100644 index 0000000..938d597 Binary files /dev/null and b/sim-gameworld-demo/imgs/setup_cron_job.png differ diff --git a/sim-gameworld-demo/install.sh b/sim-gameworld-demo/install.sh new file mode 100644 index 0000000..5cf758c --- /dev/null +++ b/sim-gameworld-demo/install.sh @@ -0,0 +1,465 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +CREDS_PATH="$HOME/.nemoclaw/credentials.json" +MCP_PORT=9001 +TMUX_SESSION="alfworld-mcp" +MCP_LOG_FILE="/tmp/alfworld-mcp.log" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +NC='\033[0m' + +info() { echo -e "${CYAN} ▸ $1${NC}"; } +ok() { echo -e "${GREEN} ✓ $1${NC}"; } +warn() { echo -e "${YELLOW} ⚠ $1${NC}"; } +fail() { echo -e "${RED} ✗ $1${NC}"; exit 1; } + +echo "" +echo -e "${CYAN} ╔══════════════════════════════════════════════════════════╗${NC}" +echo -e "${CYAN} ║ ALFWorld Visual Game MCP Demo Installer for NemoClaw ║${NC}" +echo -e "${CYAN} ║ AI2-THOR 3D Environment via MCP + OpenClaw Skill ║${NC}" +echo -e "${CYAN} ╚══════════════════════════════════════════════════════════╝${NC}" +echo "" + +# ── Step 0: Clean up stale environment ─────────────────────────── +info "Cleaning up stale environment..." +if tmux has-session -t "$TMUX_SESSION" 2>/dev/null; then + tmux kill-session -t "$TMUX_SESSION" + ok "Killed existing tmux session '$TMUX_SESSION'" +fi +STALE=$(pgrep -f "alfworld_env_mcp_server_visual" 2>/dev/null || true) +if [ -n "$STALE" ]; then + kill $STALE 2>/dev/null || true + ok "Killed stale MCP server process(es)" +fi +ok "Environment clean" +echo "" + +# ── Step 1: Check prerequisites ────────────────────────────────── +info "Checking prerequisites..." +command -v openshell >/dev/null 2>&1 || fail "openshell CLI not found. Is NemoClaw installed?" +command -v nemoclaw >/dev/null 2>&1 || fail "nemoclaw CLI not found. Is NemoClaw installed?" +command -v python3 >/dev/null 2>&1 || fail "python3 not found." + +if ! command -v uv >/dev/null 2>&1; then + warn "uv not found — installing..." + curl -LsSf https://astral.sh/uv/install.sh | sh + export PATH="$HOME/.local/bin:$HOME/.cargo/bin:$PATH" + command -v uv >/dev/null 2>&1 || fail "uv install failed. Add ~/.local/bin to PATH and retry." + ok "uv installed" +fi + +# AI2-THOR requires OpenGL and X11 libraries for headless rendering. +# Check for the key library and install the full set only if absent. +if ! ldconfig -p 2>/dev/null | grep -q "libGL.so"; then + warn "OpenGL/X11 libraries not found — installing system dependencies..." + sudo apt-get update -qq + sudo apt-get install -y -qq \ + xvfb \ + libgl1-mesa-glx \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender1 \ + libgl1 \ + || fail "apt-get install failed. Run with sudo or install the packages manually." + ok "System OpenGL/X11 libraries installed" +else + ok "OpenGL/X11 libraries already present" +fi + +command -v Xvfb >/dev/null 2>&1 || fail "Xvfb not found after apt install. Try: sudo apt-get install -y xvfb" +ok "Prerequisites OK" +echo "" + +# ── Step 2: Load .env and resolve inference config ─────────────── +info "Loading configuration..." + +# Load .env if present (values do NOT override existing env vars). +# The user fills in .env with real keys before running this script. +if [ -f "$SCRIPT_DIR/.env" ]; then + while IFS= read -r line || [ -n "$line" ]; do + [[ "$line" =~ ^[[:space:]]*# ]] && continue + [[ -z "${line// }" ]] && continue + key="${line%%=*}" + val="${line#*=}" + val="${val#\"}" ; val="${val%\"}" ; val="${val#\'}" ; val="${val%\'}" + [ -z "${!key+x}" ] && export "$key"="$val" + done < "$SCRIPT_DIR/.env" + ok "Loaded .env from $SCRIPT_DIR/.env" +else + warn ".env not found at $SCRIPT_DIR/.env" + warn "Copy .env.template to .env and fill in your keys, then re-run." + echo "" + fail "Missing .env — see README for setup instructions." +fi + +# Fall back to credentials.json for INFERENCE_API_KEY +if [ -z "${INFERENCE_API_KEY:-}" ] && [ -f "$CREDS_PATH" ]; then + INFERENCE_API_KEY=$(python3 -c " +import json +print(json.load(open('$CREDS_PATH')).get('INFERENCE_API_KEY','')) +" 2>/dev/null || true) + [ -n "${INFERENCE_API_KEY:-}" ] && ok "INFERENCE_API_KEY loaded from $CREDS_PATH" +fi + +[ -z "${INFERENCE_API_KEY:-}" ] && fail "INFERENCE_API_KEY is not set. Add it to $SCRIPT_DIR/.env" + +# Apply defaults for optional inference config vars +INFERENCE_PROVIDER_TYPE="${INFERENCE_PROVIDER_TYPE:-nvidia}" +INFERENCE_PROVIDER_NAME="${INFERENCE_PROVIDER_NAME:-nvidia}" +INFERENCE_BASE_URL="${INFERENCE_BASE_URL:-https://inference-api.nvidia.com/v1}" +INFERENCE_MODEL="${INFERENCE_MODEL:-aws/anthropic/bedrock-claude-opus-4-6}" + +# ALFWorld data path — required +ALFWORLD_DATA="${ALFWORLD_DATA:-/ephemeral/cache/alfworld}" +MCP_PORT="${MCP_ALFWORLD_PORT:-$MCP_PORT}" + +ok "INFERENCE_API_KEY : found" +ok "INFERENCE_PROVIDER : $INFERENCE_PROVIDER_NAME (type=$INFERENCE_PROVIDER_TYPE)" +ok "INFERENCE_BASE_URL : $INFERENCE_BASE_URL" +ok "INFERENCE_MODEL : $INFERENCE_MODEL" +ok "ALFWORLD_DATA : $ALFWORLD_DATA" +ok "MCP_PORT : $MCP_PORT" + +# Validate that ALFWORLD_DATA exists and looks like a real dataset +NEED_ALFWORLD_DOWNLOAD=false +if [ ! -d "$ALFWORLD_DATA" ]; then + warn "ALFWORLD_DATA directory not found: $ALFWORLD_DATA" + warn "Data will be downloaded automatically after the Python venv is set up." + NEED_ALFWORLD_DOWNLOAD=true +elif [ ! -d "$ALFWORLD_DATA/json_2.1.1" ]; then + warn "Expected subdirectory 'json_2.1.1' not found under $ALFWORLD_DATA" + warn "Data appears incomplete — will re-download." + NEED_ALFWORLD_DOWNLOAD=true +else + ok "ALFWORLD_DATA validated: $ALFWORLD_DATA" +fi +echo "" + +# ── Step 3: Onboard if no sandbox exists ───────────────────────── +live_sandboxes() { + openshell sandbox list 2>/dev/null | grep -v "^No sandboxes" | grep -v "^NAME" | awk '{print $1}' | grep -v '^$' || true +} + +LIVE_COUNT=$(live_sandboxes | wc -l | tr -d ' ') + +if [ "${LIVE_COUNT:-0}" -eq 0 ]; then + echo -e " ${YELLOW}No sandbox found — running 'nemoclaw onboard'...${NC}" + echo "" + nemoclaw onboard + echo "" + ok "Onboarding complete" + echo "" + + info "Waiting for sandbox to become ready..." + for i in $(seq 1 20); do + LIVE_COUNT=$(live_sandboxes | wc -l | tr -d ' ') + [ "${LIVE_COUNT:-0}" -gt 0 ] && break + sleep 1 + done + [ "${LIVE_COUNT:-0}" -eq 0 ] && fail "No sandbox appeared after onboarding. Run 'openshell sandbox list' to check." +fi + +# Enforce correct provider + inference model (idempotent — runs every time) +info "Ensuring inference provider '$INFERENCE_PROVIDER_NAME' (${INFERENCE_BASE_URL})..." +openshell provider create \ + --type "$INFERENCE_PROVIDER_TYPE" \ + --name "$INFERENCE_PROVIDER_NAME" \ + --credential INFERENCE_API_KEY \ + --config "NVIDIA_BASE_URL=$INFERENCE_BASE_URL" \ + 2>/dev/null \ + && ok "Provider '$INFERENCE_PROVIDER_NAME' created" \ + || ok "Provider '$INFERENCE_PROVIDER_NAME' already exists" + +info "Setting inference model to $INFERENCE_MODEL..." +openshell inference set \ + --provider "$INFERENCE_PROVIDER_NAME" \ + --model "$INFERENCE_MODEL" \ + && ok "Inference set: $INFERENCE_PROVIDER_NAME / $INFERENCE_MODEL" \ + || fail "Could not set inference model. Run manually: openshell inference set --provider $INFERENCE_PROVIDER_NAME --model $INFERENCE_MODEL" +echo "" + +# ── Step 4: Resolve sandbox name ───────────────────────────────── +if [ -n "${1:-}" ]; then + SANDBOX_NAME="$1" +else + LIVE_NAMES=$(live_sandboxes) + LIVE_COUNT=$(echo "$LIVE_NAMES" | grep -c . || true) + + if [ "${LIVE_COUNT:-0}" -eq 1 ]; then + SANDBOX_NAME=$(echo "$LIVE_NAMES" | head -1) + else + JSON_DEFAULT=$(python3 -c " +import json +try: + d = json.load(open('$HOME/.nemoclaw/sandboxes.json')) + print(d.get('defaultSandbox') or '') +except: pass +" 2>/dev/null || true) + + if [ -n "${JSON_DEFAULT:-}" ] && echo "$LIVE_NAMES" | grep -qx "$JSON_DEFAULT"; then + SANDBOX_NAME="$JSON_DEFAULT" + else + echo "" + echo -e " ${YELLOW}Multiple sandboxes found:${NC}" + echo "$LIVE_NAMES" | while read -r n; do echo " - $n"; done + echo "" + echo -n " Which sandbox should be used? " + read -r SANDBOX_NAME + fi + fi +fi + +[ -z "${SANDBOX_NAME:-}" ] && fail "Could not determine sandbox name. Usage: ./install.sh " + +if ! live_sandboxes | grep -qx "$SANDBOX_NAME"; then + echo "" + echo -e " ${RED} ✗ Sandbox '$SANDBOX_NAME' not found. Live sandboxes:${NC}" + live_sandboxes | while read -r n; do echo " - $n"; done + echo "" + fail "Re-run with: bash install.sh " +fi + +info "Target sandbox: $SANDBOX_NAME" +echo "" + +# Persist inference config in credentials.json (mode 600) +mkdir -p "$(dirname "$CREDS_PATH")" +python3 -c " +import json, os +path = '$CREDS_PATH' +try: d = json.load(open(path)) +except: d = {} +d['INFERENCE_API_KEY'] = '$INFERENCE_API_KEY' +d['INFERENCE_PROVIDER_TYPE'] = '$INFERENCE_PROVIDER_TYPE' +d['INFERENCE_PROVIDER_NAME'] = '$INFERENCE_PROVIDER_NAME' +d['INFERENCE_BASE_URL'] = '$INFERENCE_BASE_URL' +d['INFERENCE_MODEL'] = '$INFERENCE_MODEL' +with open(path, 'w') as f: json.dump(d, f, indent=2) +os.chmod(path, 0o600) +" 2>/dev/null || true + +# ── Step 5: Install host Python dependencies ───────────────────── +info "Installing Python dependencies for MCP server (host)..." +info "This includes alfworld[vis] (ai2thor, cv2, torch) — may take several minutes..." +cd "$SCRIPT_DIR" +uv venv --python 3.10 --quiet +# Force install into the venv (not ~/.local) to avoid import errors at runtime +# Python 3.10 is required — alfworld/ai2thor use ast.Str which was removed in 3.12 +uv pip install --quiet --upgrade \ + fastmcp \ + colorama \ + python-dotenv \ + "alfworld[vis]" +ok "Dependencies installed in .venv" + +# Verify the critical visual imports work inside the venv +IMPORT_CHECK=$(.venv/bin/python3 -c " +from ai2thor.controller import Controller +import cv2 +from alfworld.agents.environment import get_environment +print('ok') +" 2>/dev/null || true) +if [ "$IMPORT_CHECK" = "ok" ]; then + ok "Visual import check passed (ai2thor, cv2, alfworld)" +else + warn "Visual import check failed — the venv may need a manual fix:" + warn " rm -rf $SCRIPT_DIR/.venv" + warn " uv venv --python 3.10 && uv pip install --no-user 'alfworld[vis]'" + warn "Continuing — the server may still work if the packages are importable." +fi +echo "" + +# ── Step 5b: Download ALFWorld data if needed ──────────────────── +if [ "${NEED_ALFWORLD_DOWNLOAD:-false}" = "true" ]; then + info "Downloading ALFWorld game data to $ALFWORLD_DATA ..." + info "This is a one-time download (~1-2 GB). Please be patient." + # Try to create the directory; fall back to ~/alfworld_data if permission denied + if ! mkdir -p "$ALFWORLD_DATA" 2>/dev/null; then + warn "Cannot create $ALFWORLD_DATA (permission denied)." + ALFWORLD_DATA="$HOME/alfworld_data" + warn "Falling back to $ALFWORLD_DATA" + mkdir -p "$ALFWORLD_DATA" + warn "Update ALFWORLD_DATA=$ALFWORLD_DATA in your .env to make this permanent." + fi + # alfworld-download reads ALFWORLD_DATA from the environment + export ALFWORLD_DATA + if .venv/bin/alfworld-download; then + ok "ALFWorld data downloaded to $ALFWORLD_DATA" + # Confirm the key subdirectory appeared + if [ -d "$ALFWORLD_DATA/json_2.1.1" ]; then + ok "Dataset validated (json_2.1.1 present)" + else + warn "Download finished but 'json_2.1.1' still not found." + warn "Check $ALFWORLD_DATA manually before starting the server." + fi + else + echo "" + warn "alfworld-download exited with an error." + warn "You can retry manually:" + warn " export ALFWORLD_DATA=$ALFWORLD_DATA" + warn " $SCRIPT_DIR/.venv/bin/alfworld-download" + warn "Continuing — set a valid ALFWORLD_DATA in .env and re-run install.sh." + fi + echo "" +fi + +# ── Step 6: Start Xvfb virtual display ─────────────────────────── +info "Checking virtual display (Xvfb :1)..." +if pgrep -f "Xvfb :1" >/dev/null 2>&1; then + ok "Xvfb :1 already running" +else + info "Starting Xvfb :1 -screen 0 1024x768x24 ..." + Xvfb :1 -screen 0 1024x768x24 & + sleep 2 + if pgrep -f "Xvfb :1" >/dev/null 2>&1; then + ok "Xvfb :1 started" + else + fail "Xvfb failed to start. Try manually: Xvfb :1 -screen 0 1024x768x24 &" + fi +fi +# Confirm the X11 socket exists +if [ -S /tmp/.X11-unix/X1 ]; then + ok "X11 socket /tmp/.X11-unix/X1 confirmed" +else + warn "X11 socket /tmp/.X11-unix/X1 not found — AI2-THOR may fail to render" +fi +export DISPLAY=:1 +echo "" + +# ── Step 7: Start MCP server in tmux ──────────────────────────── +info "Starting ALFWorld MCP server in tmux session '$TMUX_SESSION'..." +info "Note: on first run AI2-THOR downloads its Unity binary (~390 MB)." +info " The server will be slow to respond until the download completes." + +tmux new-session -d -s "$TMUX_SESSION" \ + "cd '$SCRIPT_DIR' && \ + export DISPLAY=:1 && \ + export ALFWORLD_DATA='$ALFWORLD_DATA' && \ + export MCP_ALFWORLD_PORT='$MCP_PORT' && \ + source .venv/bin/activate && \ + python alfworld_env_mcp_server_visual.py --port '$MCP_PORT' 2>&1 | tee '$MCP_LOG_FILE'" + +ok "tmux session '$TMUX_SESSION' launched" + +# Wait up to 90 s — first run needs to download the AI2-THOR binary (~390 MB) +info "Waiting for server to respond on port $MCP_PORT (up to 90 s)..." +SERVER_UP=false +for i in $(seq 1 90); do + sleep 1 + if curl -s --max-time 1 "http://127.0.0.1:${MCP_PORT}/mcp" >/dev/null 2>&1; then + SERVER_UP=true + ok "MCP server is up on port $MCP_PORT (after ${i}s)" + break + fi + # Print a dot every 10 s so the user can see progress + [ $((i % 10)) -eq 0 ] && echo -e "${CYAN} ... still waiting (${i}s)${NC}" +done + +if [ "$SERVER_UP" = false ]; then + if tmux has-session -t "$TMUX_SESSION" 2>/dev/null; then + warn "Server not responding after 90s — but tmux session is alive." + warn "This is normal on first run while AI2-THOR binary downloads." + warn "Check progress: tmux attach -t $TMUX_SESSION (Ctrl-B D to detach)" + warn "Or watch log: tail -f $MCP_LOG_FILE" + else + fail "tmux session died immediately. Check: cat $MCP_LOG_FILE" + fi +fi +echo "" + +# ── Step 8: Apply sandbox network policy ──────────────────────── +info "Applying sandbox network policy..." +openshell policy set "$SANDBOX_NAME" \ + --policy "$SCRIPT_DIR/policy/sandbox_policy.yaml" \ + --wait +ok "Policy applied" +echo "" + +# ── Step 9: Upload alfworld-game-viz skill ─────────────────────── +info "Uploading alfworld-game-viz skill to sandbox..." +openshell sandbox upload "$SANDBOX_NAME" \ + "$SCRIPT_DIR/sandbox_alfword_viz_skills" \ + /sandbox/.openclaw/workspace/skills/alfworld-game-viz +ok "Skill uploaded to /sandbox/.openclaw/workspace/skills/alfworld-game-viz/" +echo "" + +# ── Step 9b: Bootstrap skill venv with required deps ──────────── +info "Setting up skill Python venv (fastmcp + colorama)..." +SKILL_VENV=/sandbox/.openclaw/workspace/skills/alfworld-game-viz/venv +openshell sandbox exec -n "$SANDBOX_NAME" -- \ + python3 -m venv "$SKILL_VENV" \ + || fail "Failed to create skill venv at $SKILL_VENV inside sandbox '$SANDBOX_NAME'." +openshell sandbox exec -n "$SANDBOX_NAME" -- \ + "$SKILL_VENV/bin/pip" install -q fastmcp colorama \ + || fail "pip install failed inside the skill venv. Check sandbox connectivity." +VENV_CHECK=$(openshell sandbox exec -n "$SANDBOX_NAME" -- \ + "$SKILL_VENV/bin/python3" -c "import fastmcp, colorama; print('ok')" 2>/dev/null || true) +[ "$VENV_CHECK" = "ok" ] \ + && ok "Skill venv ready ($SKILL_VENV)" \ + || fail "Skill venv verification failed — 'import fastmcp, colorama' returned no output. Re-run install.sh." +echo "" + +# ── Step 9c: Restart the OpenClaw gateway ─────────────────────── +info "Restarting OpenClaw gateway in sandbox '$SANDBOX_NAME'..." +openshell sandbox exec -n "$SANDBOX_NAME" -- \ + openclaw gateway restart 2>/dev/null \ + || openshell sandbox exec -n "$SANDBOX_NAME" -- \ + sh -c "openclaw gateway stop 2>/dev/null; sleep 1; openclaw gateway start" \ + || warn "Could not restart OpenClaw gateway — you may need to reconnect manually." +ok "Gateway restarted" +echo "" + +# ── Step 10: Verify ────────────────────────────────────────────── +info "Verifying installation..." + +MCP_UP=$(curl -s --max-time 3 "http://127.0.0.1:${MCP_PORT}/mcp" 2>&1 | wc -c || true) +[ "${MCP_UP:-0}" -gt 0 ] \ + && ok "MCP server responding on http://127.0.0.1:${MCP_PORT}/mcp" \ + || warn "MCP server not responding yet — check: tmux attach -t $TMUX_SESSION" + +SKILL_CHECK=$(openshell sandbox exec -n "$SANDBOX_NAME" -- \ + sh -c "test -f /sandbox/.openclaw/workspace/skills/alfworld-game-viz/SKILL.md && echo ok" \ + 2>/dev/null || true) +[ "$SKILL_CHECK" = "ok" ] \ + && ok "Skill confirmed in sandbox" \ + || warn "Skill not yet visible in sandbox — try reconnecting" + +FASTMCP_CHECK=$(openshell sandbox exec -n "$SANDBOX_NAME" -- \ + "$SKILL_VENV/bin/python3" -c "import fastmcp, colorama; print('ok')" \ + 2>/dev/null || true) +[ "$FASTMCP_CHECK" = "ok" ] \ + && ok "fastmcp + colorama reachable via skill venv" \ + || warn "Skill venv import check failed — try re-running install.sh" + +echo "" +echo -e "${GREEN} ╔══════════════════════════════════════════════════════════╗${NC}" +echo -e "${GREEN} ║ Installation complete! ║${NC}" +echo -e "${GREEN} ╚══════════════════════════════════════════════════════════╝${NC}" +echo "" +echo " MCP server : http://127.0.0.1:${MCP_PORT}/mcp (tmux: $TMUX_SESSION)" +echo " Sandbox URL: http://host.openshell.internal:${MCP_PORT}/mcp" +echo " Server logs: tmux attach -t $TMUX_SESSION (Ctrl-B D to detach)" +echo " Log file : tail -f $MCP_LOG_FILE" +echo "" +echo " Next steps:" +echo " 1. Connect: nemoclaw $SANDBOX_NAME connect" +echo " 2. Try: \"Start a new ALFWorld game\"" +echo " 3. Try: \"What task do I need to complete?\"" +echo " 4. Try: \"Show me the current game frame\"" +echo " 5. Try: \"What actions can I take right now?\"" +echo " 6. Try: \"Take the next action to progress toward the goal\"" +echo "" +echo " If the agent doesn't find the skill, disconnect and reconnect." +echo -e " ${YELLOW}Note: on first run AI2-THOR downloads ~390 MB. The server may take${NC}" +echo -e " ${YELLOW} 60-90s to become ready. Check: tmux attach -t $TMUX_SESSION${NC}" +echo "" +echo -e " ${YELLOW}After reboot: Xvfb is gone — restart it before re-running:${NC}" +echo -e " ${YELLOW} Xvfb :1 -screen 0 1024x768x24 &${NC}" +echo -e " ${YELLOW} bash $SCRIPT_DIR/install.sh $SANDBOX_NAME${NC}" +echo "" diff --git a/sim-gameworld-demo/policy/sandbox_policy.yaml b/sim-gameworld-demo/policy/sandbox_policy.yaml new file mode 100644 index 0000000..ad42fbc --- /dev/null +++ b/sim-gameworld-demo/policy/sandbox_policy.yaml @@ -0,0 +1,144 @@ +version: 1 +filesystem_policy: + include_workdir: true + read_only: + - /usr + - /lib + - /proc + - /dev/urandom + - /app + - /etc + - /var/log + - /sandbox/.openclaw + read_write: + - /sandbox + - /sandbox/.openclaw-data + - /tmp + - /dev/null +landlock: + compatibility: best_effort +process: + run_as_user: sandbox + run_as_group: sandbox +network_policies: + claude_code: + name: claude_code + endpoints: + - host: api.anthropic.com + port: 443 + protocol: rest + tls: terminate + enforcement: enforce + access: full + - host: statsig.anthropic.com + port: 443 + - host: sentry.io + port: 443 + - host: raw.githubusercontent.com + port: 443 + - host: platform.claude.com + port: 443 + binaries: + - path: /usr/local/bin/claude + - path: /usr/local/bin/node + - path: /usr/bin/node + mcp_server_host: + name: mcp_server_host + endpoints: + - host: host.openshell.internal + port: 9001 + allowed_ips: + - 172.17.0.1 + - host: 127.0.0.1 + port: 9001 + binaries: + - { path: /usr/local/bin/claude } + - { path: /usr/local/bin/node } + - { path: /usr/bin/node } + - { path: /usr/bin/curl } + - { path: /usr/bin/python3 } + - { path: /usr/bin/python3.11 } + - { path: /sandbox/.venv/bin/python } + - { path: /sandbox/.venv/bin/python3 } + - { path: /sandbox/test_mcp_client/.venv/bin/python } + - { path: /sandbox/test_mcp_client/.venv/bin/python3 } + - { path: "/sandbox/.uv/python/**" } + - { path: "/sandbox/.openclaw/workspace/skills/*/venv/bin/python" } + - { path: "/sandbox/.openclaw/workspace/skills/*/venv/bin/python3" } + - { path: "/sandbox/.openclaw/workspace/skills/*/venv/bin/python3.11" } + nvidia: + name: nvidia + endpoints: + - { host: integrate.api.nvidia.com, port: 443 } + binaries: + - { path: /usr/bin/curl } + - { path: /bin/bash } + - { path: /usr/local/bin/opencode } + nvidia_web: + name: nvidia_web + endpoints: + - { host: nvidia.com, port: 443 } + - { host: www.nvidia.com, port: 443 } + binaries: + - { path: /usr/bin/curl } + github: + name: github + endpoints: + - host: github.com + port: 443 + protocol: rest + tls: terminate + enforcement: enforce + rules: + - allow: + method: GET + path: "/**/info/refs*" + - allow: + method: POST + path: "/**/git-upload-pack" + binaries: + - { path: /usr/bin/git } + github_rest_api: + name: github-rest-api + endpoints: + - host: api.github.com + port: 443 + protocol: rest + tls: terminate + enforcement: enforce + rules: + - allow: + method: GET + path: "/**" + - allow: + method: HEAD + path: "/**" + - allow: + method: OPTIONS + path: "/**" + binaries: + - { path: /usr/local/bin/claude } + - { path: /usr/bin/gh } + pypi: + name: pypi + endpoints: + - { host: pypi.org, port: 443 } + - { host: files.pythonhosted.org, port: 443 } + - { host: github.com, port: 443 } + - { host: objects.githubusercontent.com, port: 443 } + - { host: api.github.com, port: 443 } + - { host: downloads.python.org, port: 443 } + binaries: + - { path: /usr/bin/python3 } + - { path: /usr/bin/python3.11 } + - { path: /sandbox/.venv/bin/python } + - { path: /sandbox/.venv/bin/python3 } + - { path: /sandbox/.venv/bin/pip } + - { path: /app/.venv/bin/python } + - { path: /app/.venv/bin/python3 } + - { path: /app/.venv/bin/pip } + - { path: /usr/local/bin/uv } + - { path: "/sandbox/.uv/python/**" } + - { path: "/sandbox/.openclaw/workspace/skills/*/venv/bin/python" } + - { path: "/sandbox/.openclaw/workspace/skills/*/venv/bin/python3" } + - { path: "/sandbox/.openclaw/workspace/skills/*/venv/bin/pip" } diff --git a/sim-gameworld-demo/sandbox_alfword_viz_skills/SKILL.md b/sim-gameworld-demo/sandbox_alfword_viz_skills/SKILL.md new file mode 100644 index 0000000..5436d65 --- /dev/null +++ b/sim-gameworld-demo/sandbox_alfword_viz_skills/SKILL.md @@ -0,0 +1,196 @@ +--- +name: alfworld-game-viz +description: Interact with the ALFWorld THOR 3D visual environment via MCP. Provides direct tool access to reset the game, execute actions, inspect game state, view visual frames, upload frames to the sandbox, and query the game log. You (the agent) decide which tool to call and which action to take — no secondary LLM is involved. Trigger keywords — alfworld, game, thor, household task, game step, game frame, game viz, game visualization, embodied agent, reset game, take action. +--- + +# ALFWorld Visual Game Skills + +## Overview + +Direct tool interface to the ALFWorld THOR 3D visual environment running on the **host machine** via MCP. You decide which tool to invoke and which action to take based on the current game state, the visual frame, and the list of admissible actions. The MCP server exposes raw environment operations — call them with the specific parameters that match the intended action. + +## IMPORTANT — The environment runs on the host, not in the sandbox + +The ALFWorld THOR environment and all frame files are managed by the MCP server on the **host machine**. Frames are pushed into the sandbox `assets/` folder via `upload_frame_to_sandbox`. + +- **Never** search the sandbox filesystem for the environment or config files. +- **Always** call tools without extra path arguments — the server uses its configured defaults. +- The server must be running before any tool call will succeed. If you get connection errors, the host server may still be warming up (can take 60+ seconds); retry with delays. + +## Invocation + +Always use the skill venv's Python (required by the sandbox network policy): + +```bash +SKILL_DIR=~/.openclaw/workspace/skills/alfworld-game-viz +$SKILL_DIR/venv/bin/python3 $SKILL_DIR/scripts/sandbox_client_vis.py [args] +``` + +Do **not** use bare `python3` — the system Python is not permitted to reach the MCP server on port 9001. + +## Available Tools + +### `reset_env` +Reset the ALFWorld THOR environment and start a fresh game episode. Clears old frames and initialises the game log. +**Use when:** user wants to start a new game, restart, or the previous episode has ended. +```bash +python3 sandbox_client_vis.py reset_env +``` +Returns JSON with `task`, `observation`, `admissible_commands`, `frame_path`, `step`. + +--- + +### `step_env` +Execute one action in the THOR environment. Saves the resulting frame and appends an entry to the game log. +**Use when:** you have chosen an action from the admissible list and want to advance the game. +```bash +python3 sandbox_client_vis.py step_env --action ACTION +``` +| Argument | Type | Required | Description | +|---|---|---|---| +| `--action` | str | yes | Exact action string from the admissible commands list | + +**Example:** +```bash +python3 sandbox_client_vis.py step_env --action "open fridge 1" +``` +Returns JSON with `observation`, `goal_condition_success`, `done`, `won`, `admissible_commands`, `frame_path`, `step`, `action_taken`. + +--- + +### `get_admissible_commands` +Return the list of currently valid action strings and the current step number. +**Use when:** you need to know what actions are available before choosing one. +```bash +python3 sandbox_client_vis.py get_admissible_commands +``` +Returns JSON with `admissible_commands` (list) and `step`. + +--- + +### `get_current_state` +Return a full snapshot of the current game state including task, observation, frame path, goal progress, and admissible commands. +**Use when:** you want a complete picture of the game at the current step (e.g. after resuming). +```bash +python3 sandbox_client_vis.py get_current_state +``` +Returns JSON with `task`, `observation`, `goal_condition_success`, `done`, `step`, `admissible_commands`, `frame_path`, `log_file`. + +--- + +### `get_current_frame_info` +Return metadata about the most recently saved visual frame (path, dimensions, file size). +**Use when:** you need to locate or verify the latest frame before displaying or uploading it. +```bash +python3 sandbox_client_vis.py get_current_frame_info +``` +Returns JSON with `frame_path`, `step`, `exists`, `size_bytes`, `width`, `height`. + +--- + +### `upload_frame_to_sandbox` +Upload a saved frame PNG from the host to this sandbox via openshell. +The frame lands at `/sandbox/.openclaw/workspace/skills/alfworld-game-viz/assets/`. +**Use when:** you want to display a game frame in the chat or after each `step_env` call. +```bash +python3 sandbox_client_vis.py upload_frame_to_sandbox --sandbox-name NAME [--step N] +``` +| Argument | Type | Default | Description | +|---|---|---|---| +| `--sandbox-name` | str | *(required)* | Name of this sandbox (e.g. `lasting-gorilla`) | +| `--step` | int | *(latest step)* | Step number of the frame to upload | + +**Example:** +```bash +python3 sandbox_client_vis.py upload_frame_to_sandbox --sandbox-name lasting-gorilla --step 3 +``` +After upload, display the frame: +```bash +# Find the uploaded file and read it to show in chat +ls -t ~/.openclaw/workspace/skills/alfworld-game-viz/assets/step_*.png | head -1 +``` + +--- + +### `get_game_log` +Return the last N step blocks from the host-side `game_log_visual.md`. +**Use when:** you want to review recent game history — actions taken, observations, goal progress. +```bash +python3 sandbox_client_vis.py get_game_log [--last-n N] +``` +| Argument | Type | Default | Description | +|---|---|---|---| +| `--last-n` | int | `10` | Number of recent step blocks to return | + +**Example:** +```bash +python3 sandbox_client_vis.py get_game_log --last-n 5 +``` + +--- + +### `search_game_log` +Search `game_log_visual.md` for lines matching a pattern (case-insensitive). +**Use when:** you want to find specific actions, observations, or keywords in the game history. +```bash +python3 sandbox_client_vis.py search_game_log --pattern PATTERN +``` +| Argument | Type | Required | Description | +|---|---|---|---| +| `--pattern` | str | yes | Plain text or regex to match against log lines | + +**Example:** +```bash +python3 sandbox_client_vis.py search_game_log --pattern "fridge" +``` + +--- + +## How to Play a Game Step + +At each step you (the agent) are responsible for: + +1. **Read the state** — call `get_current_state` to get the task, observation, and admissible commands. +2. **Inspect the frame** — call `upload_frame_to_sandbox` (or `get_current_frame_info` to locate it), then read the PNG file to see the visual scene. +3. **Choose an action** — based on the task, the observation text, and what you see in the image, pick the single best action from the `admissible_commands` list. +4. **Execute** — call `step_env --action ""`. +5. **Repeat** until `done` is `true`. + +### Initialize a fresh game: +```bash +python3 sandbox_client_vis.py reset_env +python3 sandbox_client_vis.py upload_frame_to_sandbox --sandbox-name +``` + +### Continue an in-progress game: +```bash +python3 sandbox_client_vis.py get_current_state +# inspect frame, choose action, then: +python3 sandbox_client_vis.py step_env --action "put apple 1 in fridge 1" +python3 sandbox_client_vis.py upload_frame_to_sandbox --sandbox-name +``` + +## Frame Location (in sandbox) + +After upload, frames are at: +``` +~/.openclaw/workspace/skills/alfworld-game-viz/assets/step_NNNN.png +``` +where `NNNN` is the zero-padded step number (e.g. `step_0000.png`, `step_0003.png`). + +## Server URL + +The client connects to `http://host.openshell.internal:9001/mcp` by default. +Override with `--server-url URL` or the `MCP_SERVER_URL` environment variable. + +## Troubleshooting + +If a tool call fails with a connection error: +1. Check the MCP server is running on the host: `curl http://host.openshell.internal:9001/mcp` +2. Confirm the sandbox policy is applied (allows egress to port 9001) +3. The THOR environment can take 60+ seconds to initialise after server start — retry with delays +4. If the venv is missing, recreate it: + ```bash + python3 -m venv $SKILL_DIR/venv + $SKILL_DIR/venv/bin/pip install -q fastmcp + ``` diff --git a/sim-gameworld-demo/sandbox_alfword_viz_skills/scripts/sandbox_client_vis.py b/sim-gameworld-demo/sandbox_alfword_viz_skills/scripts/sandbox_client_vis.py new file mode 100644 index 0000000..50fc402 --- /dev/null +++ b/sim-gameworld-demo/sandbox_alfword_viz_skills/scripts/sandbox_client_vis.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +""" +Direct MCP tool caller for the ALFWorld Visual Environment server. + +The OpenClaw agent (which already has an LLM with tool-calling capability) +decides which tool to invoke and with what arguments. This script executes +that one tool call against the remote MCP server and prints the result to +stdout. No secondary LLM or game-loop logic runs here. + +Usage: + /venv/bin/python3 sandbox_client_vis.py [options] + +Tools: + reset_env + step_env --action ACTION + get_admissible_commands + get_current_state + get_current_frame_info + upload_frame_to_sandbox --sandbox-name NAME [--step N] + get_game_log [--last-n N] + search_game_log --pattern PATTERN + +Server URL (resolved in order): + 1. --server-url flag + 2. MCP_SERVER_URL env var + 3. Default: http://host.openshell.internal:9001/mcp + +Always run with the skill venv's Python so the sandbox policy allows the +outbound connection to port 9001. Do NOT use bare python3. +""" +from __future__ import annotations + +import asyncio +import os +import sys +import argparse + +try: + from fastmcp import Client +except ImportError as _e: + _skill_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + _venv_python = os.path.join(_skill_dir, "venv", "bin", "python3") + print( + f"\nMissing dependency: {_e}\n" + "Run this script with the skill venv's Python, not bare python3:\n\n" + f" {_venv_python} {__file__} [args]\n\n" + "If the venv doesn't exist yet, create it with:\n\n" + f" python3 -m venv {_skill_dir}/venv\n" + f" {_skill_dir}/venv/bin/pip install -q fastmcp\n", + file=sys.stderr, + ) + sys.exit(1) + +_DEFAULT_URL = "http://host.openshell.internal:9001/mcp" + + +async def call_tool(server_url: str, tool: str, args: dict) -> str: + async with Client(server_url) as client: + result = await client.call_tool(tool, args) + return result.content[0].text + + +def main() -> None: + root = argparse.ArgumentParser( + description="Call a specific ALFWorld Visual MCP tool directly.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + root.add_argument( + "--server-url", + default=os.environ.get("MCP_SERVER_URL", _DEFAULT_URL), + help="Full URL of the MCP server.", + ) + + sub = root.add_subparsers(dest="tool", metavar="", required=True) + + # reset_env + sub.add_parser("reset_env", + help="Reset the ALFWorld THOR environment and start a new episode.") + + # step_env + p_step = sub.add_parser("step_env", + help="Execute an action in the THOR environment.") + p_step.add_argument("--action", required=True, + help="One of the currently admissible action strings.") + + # get_admissible_commands + sub.add_parser("get_admissible_commands", + help="Return the list of currently valid action strings.") + + # get_current_state + sub.add_parser("get_current_state", + help="Return a full snapshot of the current game state (text + frame).") + + # get_current_frame_info + sub.add_parser("get_current_frame_info", + help="Return metadata about the most recently saved visual frame.") + + # upload_frame_to_sandbox + p_upload = sub.add_parser("upload_frame_to_sandbox", + help="Upload a saved frame PNG to a sandbox via openshell.") + p_upload.add_argument("--sandbox-name", required=True, + help="Name of the target sandbox (e.g. 'my-sandbox').") + p_upload.add_argument("--step", type=int, default=None, + help="Step number to upload (defaults to latest step).") + + # get_game_log + p_log = sub.add_parser("get_game_log", + help="Return the last N step blocks from game_log_visual.md.") + p_log.add_argument("--last-n", type=int, default=10, + help="Number of recent steps to return.") + + # search_game_log + p_search = sub.add_parser("search_game_log", + help="Search game_log_visual.md for lines matching a pattern.") + p_search.add_argument("--pattern", required=True, + help="Plain text or regex pattern to search for.") + + parsed = root.parse_args() + server_url = parsed.server_url + tool = parsed.tool + + # Build args dict for the tool call + tool_args: dict = {} + + if tool == "reset_env": + tool_args = {} + + elif tool == "step_env": + tool_args = {"action": parsed.action} + + elif tool == "get_admissible_commands": + tool_args = {} + + elif tool == "get_current_state": + tool_args = {} + + elif tool == "get_current_frame_info": + tool_args = {} + + elif tool == "upload_frame_to_sandbox": + tool_args = {"sandbox_name": parsed.sandbox_name} + if parsed.step is not None: + tool_args["step"] = parsed.step + + elif tool == "get_game_log": + tool_args = {"last_n": parsed.last_n} + + elif tool == "search_game_log": + tool_args = {"pattern": parsed.pattern} + + try: + result = asyncio.run(call_tool(server_url, tool, tool_args)) + print(result) + except Exception as exc: + print(f"Error calling '{tool}': {exc}", file=sys.stderr) + print(f"Is the server reachable at {server_url}?", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/sim-gameworld-demo/sandbox_alfword_viz_skills/scripts/shrink_frame.py b/sim-gameworld-demo/sandbox_alfword_viz_skills/scripts/shrink_frame.py new file mode 100644 index 0000000..c05bb21 --- /dev/null +++ b/sim-gameworld-demo/sandbox_alfword_viz_skills/scripts/shrink_frame.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 +""" +shrink_frame.py — Shrink ALFWorld game step PNGs for inline chat display. + +Converts a step PNG to a smaller JPEG (or optionally base64 data URI) +that can be embedded in chat UIs with limited image support. + +Usage: + # Shrink a specific frame (saves _thumb.jpg next to it) + python shrink_frame.py step_0001.png + + # Shrink with custom max dimension and quality + python shrink_frame.py step_0001.png --max-dim 320 --quality 60 + + # Output as base64 data URI (for markdown embedding) + python shrink_frame.py step_0001.png --base64 + + # Shrink the latest frame in assets/ + python shrink_frame.py --latest + + # Shrink ALL frames in assets/ + python shrink_frame.py --all + + # Custom output path + python shrink_frame.py step_0001.png -o /tmp/preview.jpg +""" + +import argparse +import base64 +import glob +import os +import sys + +from PIL import Image + +# Defaults +DEFAULT_MAX_DIM = 400 # max width or height in pixels +DEFAULT_QUALITY = 65 # JPEG quality (1-95) +ASSETS_DIR = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "assets", +) + + +def shrink( + input_path: str, + output_path: str | None = None, + max_dim: int = DEFAULT_MAX_DIM, + quality: int = DEFAULT_QUALITY, + as_base64: bool = False, +) -> str: + """ + Shrink a PNG frame to a smaller JPEG. + + Args: + input_path: Path to the source PNG. + output_path: Where to save the JPEG. Defaults to _thumb.jpg + in the same directory. + max_dim: Maximum width or height in pixels. + quality: JPEG quality (1-95, lower = smaller file). + as_base64: If True, return a data URI string instead of saving. + + Returns: + The output file path, or the data URI string if as_base64=True. + """ + img = Image.open(input_path) + + # Convert RGBA -> RGB (JPEG doesn't support alpha) + if img.mode in ("RGBA", "P"): + img = img.convert("RGB") + + # Resize preserving aspect ratio + w, h = img.size + if max(w, h) > max_dim: + scale = max_dim / max(w, h) + new_w = int(w * scale) + new_h = int(h * scale) + img = img.resize((new_w, new_h), Image.LANCZOS) + + if as_base64: + import io + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=quality, optimize=True) + b64 = base64.b64encode(buf.getvalue()).decode("ascii") + return f"data:image/jpeg;base64,{b64}" + + if output_path is None: + stem, _ = os.path.splitext(input_path) + output_path = f"{stem}_thumb.jpg" + + img.save(output_path, format="JPEG", quality=quality, optimize=True) + + orig_size = os.path.getsize(input_path) + new_size = os.path.getsize(output_path) + ratio = (1 - new_size / orig_size) * 100 + print( + f"{os.path.basename(input_path)}: " + f"{orig_size:,}B → {new_size:,}B " + f"({ratio:.0f}% smaller) → {output_path}" + ) + return output_path + + +def find_latest() -> str | None: + """Find the most recent step_*.png in assets/.""" + pngs = sorted(glob.glob(os.path.join(ASSETS_DIR, "step_*.png"))) + return pngs[-1] if pngs else None + + +def find_all() -> list[str]: + """Find all step_*.png in assets/.""" + return sorted(glob.glob(os.path.join(ASSETS_DIR, "step_*.png"))) + + +def main(): + parser = argparse.ArgumentParser(description="Shrink ALFWorld step frames") + parser.add_argument("input", nargs="?", help="Path to PNG file") + parser.add_argument("--latest", action="store_true", help="Shrink latest frame") + parser.add_argument("--all", action="store_true", help="Shrink all frames") + parser.add_argument("--max-dim", type=int, default=DEFAULT_MAX_DIM, + help=f"Max width/height (default: {DEFAULT_MAX_DIM})") + parser.add_argument("--quality", "-q", type=int, default=DEFAULT_QUALITY, + help=f"JPEG quality 1-95 (default: {DEFAULT_QUALITY})") + parser.add_argument("--base64", "-b", action="store_true", + help="Output base64 data URI to stdout") + parser.add_argument("-o", "--output", help="Output file path") + args = parser.parse_args() + + targets: list[str] = [] + + if args.all: + targets = find_all() + if not targets: + print(f"No step_*.png found in {ASSETS_DIR}", file=sys.stderr) + sys.exit(1) + elif args.latest: + latest = find_latest() + if not latest: + print(f"No step_*.png found in {ASSETS_DIR}", file=sys.stderr) + sys.exit(1) + targets = [latest] + elif args.input: + # Resolve relative to assets dir if not absolute + p = args.input + if not os.path.isabs(p) and not os.path.exists(p): + p = os.path.join(ASSETS_DIR, p) + if not os.path.exists(p): + print(f"File not found: {args.input}", file=sys.stderr) + sys.exit(1) + targets = [p] + else: + parser.print_help() + sys.exit(1) + + for t in targets: + result = shrink( + t, + output_path=args.output if len(targets) == 1 else None, + max_dim=args.max_dim, + quality=args.quality, + as_base64=args.base64, + ) + if args.base64: + print(result) + + +if __name__ == "__main__": + main()