STET

validation

validation/gpt-5-1-codex-mini/flux-pr-1076/validation.json

5059 bytes

Back to adjudication
{
  "task_id": "flux-pr-1076",
  "primary_signal": "tests",
  "status": "fail",
  "matrix_status": "fail_no_patch",
  "equiv_rescue_policy": "on",
  "lane_report": {
    "lane": "lane_unknown",
    "source": "lane_unknown",
    "reasons": [
      "lane_unknown"
    ]
  },
  "flags": {},
  "reason_codes": [
    "fail_no_patch"
  ],
  "rescue": {
    "candidate": false,
    "eligible": false,
    "decision": "not_candidate",
    "required_artifacts_ok": false
  },
  "artifacts_manifest": {
    "required_for_rescue_ok": false,
    "missing_for_rescue": [
      "agent.patch"
    ],
    "gold_patch": {
      "path": ".tmp/graphql-go-tools-dataset/flux-pr-1076/gold.patch",
      "exists": true,
      "non_empty": true,
      "size_bytes": 28154,
      "sha256": "630de79193967ed7e442edb80ba1ba1e3ac8b27f43da1f24237307df46ec39fa",
      "required_for_rescue": true
    },
    "repo_tarball": {
      "path": ".tmp/graphql-go-tools-dataset/flux-pr-1076/repo.tar.gz",
      "exists": true,
      "non_empty": true,
      "size_bytes": 4171519,
      "sha256": "12b8370532022eedd8bc584c8636a5388510622c9af50f3585870d32a23c7966",
      "required_for_rescue": true
    },
    "agent_patch": {
      "path": ".tmp/h2h-graphql-flu77-mini-backfill-20260303/runs/2026-02-21__03-36-16__gpt-5-1-codex-mini/flux-pr-1076/flux-pr-1076.1-of-1.2026-02-21__03-36-16__gpt-5-1-codex-mini/agent-logs/agent.patch",
      "exists": true,
      "non_empty": false,
      "required_for_rescue": true
    }
  },
  "publish": {
    "include_in_leaderboard": false,
    "exclusion_reasons": [
      "agent_patch_empty"
    ]
  },
  "timing": {
    "started_at": "2026-03-03T17:07:48.379847-05:00",
    "finished_at": "2026-03-03T17:07:48.38843-05:00",
    "duration": 8611334,
    "time_budget_sec": 300
  },
  "guardrails": {
    "status": "",
    "has_binary": false,
    "has_submodule": false,
    "diff_files": 0,
    "diff_lines": 0,
    "max_diff_files": 0,
    "max_diff_lines": 0,
    "preflight_ok": false
  },
  "footprint_risk": {
    "status": "unavailable",
    "reason": "agent_patch_missing",
    "version": "v1",
    "score": 0,
    "flag": false,
    "severe_flag": false,
    "weights": {
      "delta": 0.65,
      "absolute": 0.35
    },
    "agent": {
      "files_touched": 0,
      "non_test_files_touched": 0,
      "lines_added": 0,
      "lines_deleted": 0,
      "churn": 0,
      "net_loc": 0,
      "net_abs": 0,
      "new_files_added": 0,
      "deleted_files": 0,
      "special_file_touches": 0
    },
    "gold": {
      "files_touched": 0,
      "non_test_files_touched": 0,
      "lines_added": 0,
      "lines_deleted": 0,
      "churn": 0,
      "net_loc": 0,
      "net_abs": 0,
      "new_files_added": 0,
      "deleted_files": 0,
      "special_file_touches": 0
    },
    "divergence": {
      "extra_files_touched": 0,
      "missing_files_touched": 0,
      "extra_non_test_files_touched": 0,
      "file_overlap_ratio": 0
    },
    "normalized": {
      "over_files": 0,
      "over_churn": 0,
      "over_net_abs": 0,
      "over_non_test": 0,
      "extra_files_ratio": 0,
      "extra_non_test_ratio": 0,
      "overlap_gap": 0,
      "absolute_files": 0,
      "absolute_churn": 0,
      "absolute_non_test": 0,
      "absolute_special": 0
    },
    "components": {
      "delta_risk": 0,
      "absolute_risk": 0
    },
    "thresholds": {
      "level_low_lt": 0.33,
      "level_medium_lt": 0.66,
      "flag_default_gte": 0.66,
      "flag_severe_gte": 0.9
    }
  },
  "tests": {
    "outcome": "unknown",
    "strength": "unknown",
    "partial_score_numerator": 0,
    "partial_score_denominator": 0,
    "partial_score_unknown_count": 0
  },
  "equivalence": {
    "outcome": "",
    "status": "skipped"
  },
  "code_review": {
    "status": "skipped",
    "dimension_scores": {
      "correctness": {
        "score_0_4": 0
      },
      "introduced_bug_risk": {
        "score_0_4": 0
      },
      "edge_case_handling": {
        "score_0_4": 0
      },
      "maintainability_idioms": {
        "score_0_4": 0
      }
    },
    "overall": {}
  },
  "behavioral_robustness": {
    "status": "skipped",
    "probe_differential": {
      "status": "skipped"
    },
    "coverage_delta": {
      "status": "unavailable",
      "reason": "coverage_adapter_unavailable"
    },
    "mutation_lite": {
      "status": "unavailable",
      "reason": "mutation_runner_unavailable"
    },
    "notes": "agent patch is empty; behavioral robustness skipped"
  },
  "test_gen": {
    "status": "skipped"
  },
  "prompt_truncation": {
    "gold_patch": {
      "original_bytes": 0,
      "kept_bytes": 0,
      "truncated": false
    },
    "agent_patch": {
      "original_bytes": 0,
      "kept_bytes": 0,
      "truncated": false
    },
    "repo_tree": {
      "original_bytes": 0,
      "kept_bytes": 0,
      "truncated": false
    },
    "repo_excerpts": {
      "original_bytes": 0,
      "kept_bytes": 0,
      "truncated": false
    }
  },
  "notes": [
    "agent patch is empty (0 bytes); skipping all validation"
  ]
}