Spaces:
Running on T4
Running on T4
| """Auto-file GitHub issues for validator-internal bugs. | |
| Mirror of `tools/hf_watch/validate.py::_ensure_validator_internal_issues` | |
| moved to the HF Space side so the Space can self-report tooling | |
| failures without depending on the GH Actions wrapper to do it. | |
| Policy reminder (from CLAUDE.md / project conventions): | |
| - GitHub Issues track NVIDIA-internal *tooling* problems only. | |
| - Customer-asset findings (real spec violations) stay on the dashboard; | |
| they do NOT become issues. | |
| - Distinguishing the two is `is_validator_internal_issue`'s job. | |
| Token: GH_VALIDATOR_TOKEN (or GITHUB_TOKEN) — set as a Space secret. | |
| A fine-grained PAT with `issues: read+write` on | |
| `NVIDIA-dev/simready-oem-library-pm` is enough; no other scope needed. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import urllib.parse | |
| import urllib.request | |
| from typing import Any | |
| GH_REPO = "NVIDIA-dev/simready-oem-library-pm" | |
| def _gh_token() -> str | None: | |
| return os.environ.get("GH_VALIDATOR_TOKEN") or os.environ.get("GITHUB_TOKEN") | |
| def is_validator_internal_issue(iss: dict) -> bool: | |
| """Heuristic: distinguish validator-package crashes from real | |
| asset findings.""" | |
| code = iss.get("code") or "" | |
| msg = iss.get("msg") or "" | |
| if code == "UNKNOWN" or code.startswith("SDK."): | |
| return True | |
| if "Uncaught error" in msg: | |
| return True | |
| if "is not registered to requirement" in msg: | |
| return True | |
| return False | |
| def _gh_request(method: str, path: str, payload: dict | None = None) -> dict | list: | |
| token = _gh_token() | |
| if not token: | |
| raise RuntimeError("no GitHub token in env (GH_VALIDATOR_TOKEN or GITHUB_TOKEN)") | |
| url = f"https://api.github.com/repos/{GH_REPO}{path}" | |
| headers = { | |
| "Accept": "application/vnd.github+json", | |
| "Authorization": f"Bearer {token}", | |
| "X-GitHub-Api-Version": "2022-11-28", | |
| "User-Agent": "simready-validator-space/0.1", | |
| } | |
| body = None | |
| if payload is not None: | |
| body = json.dumps(payload).encode("utf-8") | |
| headers["Content-Type"] = "application/json" | |
| req = urllib.request.Request(url, data=body, headers=headers, method=method) | |
| with urllib.request.urlopen(req, timeout=30) as r: | |
| return json.loads(r.read() or "null") | |
| def _find_issue(title: str) -> dict | None: | |
| q = urllib.parse.quote(f'repo:{GH_REPO} in:title "{title}" is:issue') | |
| result = _gh_request("GET", f"/../../search/issues?q={q}") | |
| items = (result or {}).get("items") or [] | |
| for it in items: | |
| if it.get("title") == title: | |
| return it | |
| return None | |
| def _create_issue(title: str, body: str, labels: list[str]) -> int: | |
| result = _gh_request("POST", "/issues", | |
| {"title": title, "body": body, "labels": labels}) | |
| return result.get("number", 0) | |
| def _add_comment(issue_num: int, body: str) -> None: | |
| _gh_request("POST", f"/issues/{issue_num}/comments", {"body": body}) | |
| def _build_internal_issue_body(rule: str, code: str, g: dict[str, Any], | |
| dataset: str, profile: str) -> str: | |
| return ( | |
| f"**Validator-internal bug** — surfaced during automatic SimReady validation.\n\n" | |
| f"This is NOT a customer-asset finding. The validator's own rule " | |
| f"registration / spec loading is misbehaving and emitting errors that " | |
| f"don't map to any real spec violation. Track + fix here.\n\n" | |
| f"| Field | Value |\n|---|---|\n" | |
| f"| Rule | `{rule}` |\n" | |
| f"| Code (as reported) | `{code}` |\n" | |
| f"| Severity | {g.get('severity') or '?'} |\n" | |
| f"| Occurrence count (first run) | {g['count']} |\n" | |
| f"| Dataset | `{dataset}` |\n" | |
| f"| Profile | `{profile}` |\n\n" | |
| f"**Sample message** (truncated to 200 chars):\n\n" | |
| f"```\n{g['sample_msg']}\n```\n\n" | |
| f"---\n" | |
| f"_Filed automatically by the HF Space (`tools/hf_space/github_issues.py`). " | |
| f"Subsequent runs that re-hit the same (rule, code) pair will comment " | |
| f"here rather than open new issues._" | |
| ) | |
| def _build_recurrence_comment(g: dict[str, Any], dataset: str, profile: str) -> str: | |
| return ( | |
| f"Re-hit during validation of `{dataset}` (profile `{profile}`).\n" | |
| f"Occurrences this run: {g['count']}, severity: {g.get('severity') or '?'}." | |
| ) | |
| def ensure_internal_issues(results_json: dict, dataset: str, profile: str, | |
| log_fn=None) -> dict: | |
| """Scan results.json for validator-internal bugs and ensure a tracking | |
| GitHub issue exists for each distinct (rule, code) pair. Best-effort — | |
| swallowed exceptions return {"error": ...} so the validator's verdict | |
| is never blocked on GitHub being flaky.""" | |
| out = log_fn or (lambda s: print(s, flush=True)) | |
| if not _gh_token(): | |
| out(" (skipping internal-issue tracking: no GH token)") | |
| return {"skipped": True, "reason": "no_token"} | |
| groups: dict[tuple[str, str], dict[str, Any]] = {} | |
| for asset in results_json.get("results", []): | |
| for iss in (asset.get("issues") or []): | |
| if not is_validator_internal_issue(iss): | |
| continue | |
| rule = iss.get("rule") or "?" | |
| code = iss.get("code") or "UNKNOWN" | |
| key = (rule, code) | |
| g = groups.setdefault(key, { | |
| "count": 0, | |
| "sample_msg": (iss.get("msg") or "")[:200], | |
| "severity": (iss.get("severity") or "").lower(), | |
| }) | |
| g["count"] += 1 | |
| if not groups: | |
| return {"created": 0, "updated": 0} | |
| created = updated = 0 | |
| aborted = False | |
| # First 404 → token doesn't have access to issues on this repo. | |
| # Bail out instead of trying all N groups, which would 404 each | |
| # and spam the log with 70+ identical errors. | |
| for (rule, code), g in groups.items(): | |
| if aborted: | |
| break | |
| title = f"[validator-internal] {rule} → {code}" | |
| try: | |
| existing = _find_issue(title) | |
| if existing: | |
| _add_comment(existing["number"], | |
| _build_recurrence_comment(g, dataset, profile)) | |
| updated += 1 | |
| out(f" internal-issue #{existing['number']}: comment added for {rule}/{code}") | |
| else: | |
| num = _create_issue(title, | |
| _build_internal_issue_body(rule, code, g, dataset, profile), | |
| ["validator-internal", "process"]) | |
| created += 1 | |
| out(f" internal-issue #{num}: opened for {rule}/{code}") | |
| except Exception as e: | |
| msg = f"{type(e).__name__}: {e}" | |
| if "404" in msg: | |
| out(f" ! internal-issue tracking aborted (404 — token lacks issues:write " | |
| f"on {GH_REPO}); skipping {len(groups) - created - updated} remaining group(s)") | |
| aborted = True | |
| else: | |
| out(f" ! internal-issue {rule}/{code} tracking failed: {msg}") | |
| return {"created": created, "updated": updated, "groups": len(groups), | |
| "aborted_404": aborted} | |