STET

run_metadata

runs/2026-02-28__18-27-22__gpt-5-1-codex-mini/run_metadata.json

1673 bytes

Back to adjudication
{
  "run_id": "2026-02-28__18-27-22__gpt-5-1-codex-mini",
  "uuid": "6040e3d3-eaa6-4cc9-abf1-06ad114ad5e1",
  "dataset_path": "/Users/ben/dev/flux/.tmp/sqlparser-rs-dataset",
  "dataset_name": null,
  "dataset_version": null,
  "output_path": "/Users/ben/dev/flux/.tmp/h2h-sqlparser-w2/runs",
  "agent_name": "codex",
  "no_rebuild": false,
  "cleanup": true,
  "log_level": 20,
  "task_ids": [
    "flux-pr-1501",
    "flux-pr-1604",
    "flux-pr-2096",
    "flux-pr-1891",
    "flux-pr-1500",
    "flux-pr-2151",
    "flux-pr-1765",
    "flux-pr-1900",
    "flux-pr-2185",
    "flux-pr-1649",
    "flux-pr-2148",
    "flux-pr-1984",
    "flux-pr-1628",
    "flux-pr-1747",
    "flux-pr-1791",
    "flux-pr-2170",
    "flux-pr-1759",
    "flux-pr-1839",
    "flux-pr-1435",
    "flux-pr-1441",
    "flux-pr-1526",
    "flux-pr-1965",
    "flux-pr-1495",
    "flux-pr-1908",
    "flux-pr-1534",
    "flux-pr-2172",
    "flux-pr-1918",
    "flux-pr-2011",
    "flux-pr-1576",
    "flux-pr-1414"
  ],
  "exclude_task_ids": null,
  "n_tasks": null,
  "n_concurrent_trials": 4,
  "n_attempts": 1,
  "dataset_size": 30,
  "accuracy": 0.26666666666666666,
  "model_name": "gpt-5.1-codex-mini",
  "start_time": "2026-02-28T20:00:12.722616+00:00",
  "end_time": "2026-02-28T21:48:26.352711+00:00",
  "commit_hash": "32dbe1950f8d0a4ed6db86745bf0a62da2ee0921",
  "username": "benredmond",
  "s3_bucket": null,
  "reasoning_effort": "medium",
  "agent_kwargs": null,
  "pass_at_k": {},
  "resumed_at": null
}