STET

run_metadata

runs/2026-02-27__21-30-28__gpt-5-3-codex/run_metadata.json

1674 bytes

Back to adjudication
{
  "run_id": "2026-02-27__21-30-28__gpt-5-3-codex",
  "uuid": "686f5909-4340-469b-843a-963fdaa7396e",
  "dataset_path": "/Users/ben/dev/flux/.tmp/validation-zod-cleaned50-r7-p1-20260208-170124",
  "dataset_name": null,
  "dataset_version": null,
  "output_path": "/Users/ben/dev/flux/.tmp/h2h-zod-w2/runs",
  "agent_name": "codex",
  "no_rebuild": false,
  "cleanup": true,
  "log_level": 20,
  "task_ids": [
    "flux-pr-5519",
    "flux-pr-5575",
    "flux-pr-5409",
    "flux-commit-a8580f2b",
    "flux-pr-3850",
    "flux-pr-4807",
    "flux-pr-4680",
    "flux-pr-4567",
    "flux-pr-4811",
    "flux-pr-5187",
    "flux-pr-3535",
    "flux-commit-64a54b07",
    "flux-pr-3712",
    "flux-pr-4568",
    "flux-pr-5316",
    "flux-pr-4843",
    "flux-pr-4672",
    "flux-pr-4970",
    "flux-commit-7af773c0",
    "flux-pr-4861",
    "flux-pr-3820",
    "flux-pr-5574",
    "flux-pr-5578",
    "flux-pr-5156",
    "flux-pr-4539",
    "flux-commit-0064304a",
    "flux-pr-5222",
    "flux-commit-fc48a85d"
  ],
  "exclude_task_ids": null,
  "n_tasks": null,
  "n_concurrent_trials": 4,
  "n_attempts": 1,
  "dataset_size": 28,
  "accuracy": 0.6071428571428571,
  "model_name": "gpt-5.3-codex",
  "start_time": "2026-02-27T21:30:30.308011+00:00",
  "end_time": "2026-02-27T22:07:54.553917+00:00",
  "commit_hash": "99ae882e59281de74856a3e7eeb1b7c986b466af",
  "username": "benredmond",
  "s3_bucket": null,
  "reasoning_effort": "medium",
  "agent_kwargs": null,
  "pass_at_k": {},
  "resumed_at": null
}