From 6d118d9c99127d959c12cf17e8a509081ad442ca Mon Sep 17 00:00:00 2001 From: Ashley Mensah Date: Wed, 29 Apr 2026 15:49:09 +0200 Subject: [PATCH] feat(ci): trust LLM decisions and feed it PR merge status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove pre_score override from enforcePolicy — policy now only gates AUTO_CLOSE, otherwise trusts the model's decision - Pass pre_score evidence (hard signals, contradictions) to LLM as context instead of using it as a decision override - Fetch linked PR merge status (MERGED/OPEN/CLOSED) in fetch step and include in LLM prompt so it can distinguish merged fixes from open proposals --- .../scripts/classify-candidates.mjs | 49 ++++++++++++------- .../scripts/fetch-candidates.mjs | 36 +++++++++++++- 2 files changed, 67 insertions(+), 18 deletions(-) diff --git a/.github/issue-resolution/scripts/classify-candidates.mjs b/.github/issue-resolution/scripts/classify-candidates.mjs index 45c3ede11..aa4bcc293 100644 --- a/.github/issue-resolution/scripts/classify-candidates.mjs +++ b/.github/issue-resolution/scripts/classify-candidates.mjs @@ -65,7 +65,7 @@ function truncate(text, maxChars) { return text.slice(0, maxChars) + "\n\n[... truncated due to length]"; } -function buildUserMessage(candidate) { +function buildUserMessage(candidate, pre) { const { issue, comments, timeline } = candidate; const commentBlock = comments @@ -82,7 +82,7 @@ function buildUserMessage(candidate) { }) .join("\n"); - const msg = [ + const sections = [ `## Issue #${issue.number}: ${issue.title}`, `URL: ${issue.html_url}`, `Created: ${issue.created_at} | Updated: ${issue.updated_at}`, @@ -96,9 +96,29 @@ function buildUserMessage(candidate) { "", "### Timeline events", timelineBlock || "(none)", - ].join("\n"); + ]; - return truncate(msg, MAX_USER_MESSAGE_CHARS); + if (candidate.linked_prs?.length) { + sections.push(""); + sections.push("### Linked PRs (verified state)"); + for (const pr of candidate.linked_prs) { + const status = pr.merged ? `MERGED (${pr.merged_at})` : pr.state.toUpperCase(); + sections.push(`- PR #${pr.number}: ${pr.title} — ${status} — ${pr.url}`); + } + } + + if (pre.hardSignals.length || pre.contradictions.length) { + sections.push(""); + sections.push("### Automated evidence scan"); + for (const s of pre.hardSignals) { + sections.push(`- SIGNAL: ${s.type} — ${s.url}`); + } + for (const c of pre.contradictions) { + sections.push(`- CONTRADICTION: ${c.type} — ${c.url}`); + } + } + + return truncate(sections.join("\n"), MAX_USER_MESSAGE_CHARS); } const MODEL = "gpt-4o-mini"; @@ -108,12 +128,12 @@ function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } -async function callGitHubModel(candidate) { +async function callGitHubModel(candidate, pre) { const body = JSON.stringify({ model: MODEL, messages: [ { role: "system", content: systemPrompt }, - { role: "user", content: buildUserMessage(candidate) }, + { role: "user", content: buildUserMessage(candidate, pre) }, ], response_format: { type: "json_schema", @@ -175,6 +195,7 @@ function enforcePolicy(modelOut, pre) { const hasContradiction = (modelOut.contradictions || []).length > 0 || pre.contradictions.length > 0; + // Only auto-close with very strict criteria if ( modelOut.decision === "AUTO_CLOSE" && modelOut.confidence >= 0.97 && @@ -185,19 +206,13 @@ function enforcePolicy(modelOut, pre) { return "AUTO_CLOSE"; } - if (modelOut.decision === "KEEP_OPEN" && pre.score < 25) { - return "KEEP_OPEN"; - } - - if ( - modelOut.decision === "MANUAL_REVIEW" || - modelOut.decision === "AUTO_CLOSE" || - pre.score >= 25 - ) { + // Downgrade AUTO_CLOSE that didn't pass the gate + if (modelOut.decision === "AUTO_CLOSE") { return "MANUAL_REVIEW"; } - return "KEEP_OPEN"; + // Otherwise trust the model + return modelOut.decision; } console.log(`Classifying ${candidates.length} candidates with ${MODEL}...\n`); @@ -216,7 +231,7 @@ async function paced(fn) { const decisions = []; for (const candidate of candidates) { const pre = preScore(candidate); - const modelOut = await paced(() => callGitHubModel(candidate)); + const modelOut = await paced(() => callGitHubModel(candidate, pre)); if (modelOut === null) { console.warn(`\nQuota exhausted after ${decisions.length} issues. Writing partial results.`); diff --git a/.github/issue-resolution/scripts/fetch-candidates.mjs b/.github/issue-resolution/scripts/fetch-candidates.mjs index 0336b0c8d..395e6f49a 100644 --- a/.github/issue-resolution/scripts/fetch-candidates.mjs +++ b/.github/issue-resolution/scripts/fetch-candidates.mjs @@ -16,6 +16,12 @@ async function rest(url) { return res.json(); } +async function restSafe(url) { + const res = await fetch(url, { headers }); + if (!res.ok) return null; + return res.json(); +} + async function paginate(url, max) { const items = []; let page = 1; @@ -80,9 +86,37 @@ for (const issue of realIssues) { } : undefined, })), + linked_prs: [], }); - console.log(` #${issue.number} — ${comments.length} comments, ${timeline.length} timeline events`); + // Fetch merge status for cross-referenced PRs + const prUrls = new Set(); + for (const t of timeline) { + const prHtml = t.source?.issue?.pull_request?.html_url; + if (t.event === "cross-referenced" && prHtml) { + prUrls.add(prHtml); + } + } + + const candidate = candidates[candidates.length - 1]; + for (const prHtml of prUrls) { + // Extract owner/repo and PR number from URL like https://github.com/owner/repo/pull/123 + const match = prHtml.match(/github\.com\/([^/]+\/[^/]+)\/pull\/(\d+)/); + if (!match) continue; + const [, prRepo, prNum] = match; + const pr = await restSafe(`https://api.github.com/repos/${prRepo}/pulls/${prNum}`); + if (!pr) continue; + candidate.linked_prs.push({ + number: pr.number, + title: pr.title, + url: prHtml, + state: pr.state, + merged: pr.merged || false, + merged_at: pr.merged_at, + }); + } + + console.log(` #${issue.number} — ${comments.length} comments, ${timeline.length} timeline events, ${candidate.linked_prs.length} linked PRs`); } await fs.writeFile("candidates.json", JSON.stringify(candidates, null, 2));