STET

run_metadata

runs/2026-02-28__13-38-10__gpt-5-4/run_metadata.json

1623 bytes

Back to adjudication
{
  "run_id": "2026-02-28__13-38-10__gpt-5-4",
  "uuid": "dbfcfc5c-be40-40c9-9aa8-9ec3a20e172d",
  "dataset_path": "/Users/ben/dev/flux/.tmp/graphql-go-tools-dataset",
  "dataset_name": null,
  "dataset_version": null,
  "output_path": "/Users/ben/dev/flux/.tmp/h2h-graphql-w2/runs",
  "agent_name": "codex",
  "no_rebuild": false,
  "cleanup": true,
  "log_level": 20,
  "task_ids": [
    "flux-pr-1232",
    "flux-pr-1309",
    "flux-pr-1240",
    "flux-pr-1155",
    "flux-pr-1380",
    "flux-pr-1184",
    "flux-pr-859",
    "flux-pr-828",
    "flux-pr-1308",
    "flux-pr-1034",
    "flux-pr-1209",
    "flux-pr-891",
    "flux-pr-1297",
    "flux-pr-1169",
    "flux-pr-1087",
    "flux-pr-1128",
    "flux-pr-1293",
    "flux-pr-1076",
    "flux-pr-1351",
    "flux-pr-1099",
    "flux-pr-817",
    "flux-pr-870",
    "flux-pr-1230",
    "flux-pr-1241",
    "flux-pr-1260",
    "flux-pr-1268",
    "flux-pr-1262",
    "flux-pr-1001",
    "flux-pr-1338"
  ],
  "exclude_task_ids": null,
  "n_tasks": null,
  "n_concurrent_trials": 2,
  "n_attempts": 1,
  "dataset_size": 29,
  "accuracy": 0.7586206896551724,
  "model_name": "gpt-5.4",
  "start_time": "2026-03-06T11:06:36.354605+00:00",
  "end_time": "2026-03-06T13:16:38.693205+00:00",
  "commit_hash": "8f1bc9fe2c3cb8a14c254de41e9577a2ce352ac2",
  "username": "benredmond",
  "s3_bucket": null,
  "reasoning_effort": "medium",
  "agent_kwargs": null,
  "pass_at_k": {},
  "resumed_at": null
}