Spaces:
Sleeping
Sleeping
File size: 6,842 Bytes
ee933ab | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | """Visualize how FlakeForge tool evidence is produced and fed to the agent.
This script does not require Docker. It uses a lightweight demo runner that
alternates pass/fail outcomes and emits traceback-like stderr so the same
pipeline pieces are exercised:
- preflight classification
- deep flakiness signals
- causal frontier extraction
- tools-based file targeting hints
- final prompt section shown to the agent
Usage:
c:/CodingNest/FlakeForge/venv/Scripts/python.exe tests/visualize_agent_tool_usage.py
c:/CodingNest/FlakeForge/venv/Scripts/python.exe tests/visualize_agent_tool_usage.py --repo-path test_repos/moderate_load_jitter_flaky --test-id tests/test_flaky.py::test_request_processing_should_succeed
"""
from __future__ import annotations
import argparse
import json
import os
import sys
from pathlib import Path
from typing import Optional
# Ensure project root imports resolve when running as a script.
PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
from agent.unified_agent import build_unified_prompt
from models import RunRecord
from server.FlakeForge_environment import FlakeForgeEnvironment
from server.tools import build_agent_targeting_hints
class DemoRunner:
"""Deterministic alternating pass/fail runner for visualization only."""
def __init__(self, repo_path: Path, test_identifier: str) -> None:
self.repo_path = repo_path
self.test_identifier = test_identifier
self.counter = 0
def run_test(self, test_identifier: str) -> RunRecord:
self.counter += 1
test_file = test_identifier.split("::", 1)[0]
source_file = self._best_source_candidate(test_file)
# Alternate outcomes so preflight can classify as flaky.
passed = (self.counter % 2 == 0)
if passed:
return RunRecord(
passed=True,
duration_ms=40 + (self.counter % 7),
error_type=None,
error_message=None,
stderr_excerpt=None,
)
trace = (
"Traceback (most recent call last):\n"
f" File \"{(self.repo_path / test_file).as_posix()}\", line 42, in test_case\n"
f" File \"{(self.repo_path / source_file).as_posix()}\", line 21, in process_request\n"
"AssertionError: transient mismatch\n"
)
return RunRecord(
passed=False,
duration_ms=95 + (self.counter % 13),
error_type="AssertionError",
error_message="transient mismatch",
stderr_excerpt=trace,
)
def _best_source_candidate(self, test_file: str) -> str:
direct = self.repo_path / "source.py"
if direct.exists():
return "source.py"
# Fallback: any .py file that is not a test file.
for path in self.repo_path.rglob("*.py"):
rel = path.relative_to(self.repo_path).as_posix()
if rel.startswith("tests/"):
continue
return rel
return test_file
def _extract_targeting_section(prompt: str) -> str:
header = "=== TARGETING HINTS ==="
if header not in prompt:
return "(TARGETING HINTS section not found in prompt)"
lines = prompt.splitlines()
start = None
for i, line in enumerate(lines):
if line.strip() == header:
start = i
break
if start is None:
return "(TARGETING HINTS section not found in prompt)"
collected = []
for line in lines[start:]:
if line.startswith("=== ") and line.strip() != header and collected:
break
collected.append(line)
return "\n".join(collected)
def main() -> None:
parser = argparse.ArgumentParser(description="Visualize FlakeForge tool-driven file targeting")
parser.add_argument(
"--repo-path",
default=os.environ.get("FF_REPO_PATH", "test_repos/moderate_load_jitter_flaky"),
help="Repo path used for environment reset",
)
parser.add_argument(
"--test-id",
default=os.environ.get("FF_TEST_ID", "tests/test_flaky.py::test_request_processing_should_succeed"),
help="Test identifier",
)
parser.add_argument(
"--quick-runs",
type=int,
default=6,
help="Preflight quick runs",
)
parser.add_argument(
"--confirm-runs",
type=int,
default=6,
help="Preflight confirm runs",
)
args = parser.parse_args()
repo_path = Path(args.repo_path)
runner = DemoRunner(repo_path=repo_path, test_identifier=args.test_id)
env = FlakeForgeEnvironment(
repo_path=str(repo_path),
test_identifier=args.test_id,
runner=runner,
max_steps=3,
num_runs=8,
)
obs = env.reset(
preflight_quick_runs=args.quick_runs,
preflight_confirm_runs=args.confirm_runs,
drop_deterministic_bugs=False,
)
print("\n=== PREFLIGHT SUMMARY ===")
print(json.dumps(obs.preflight_result, indent=2))
print("\n=== DEEP SIGNAL SNAPSHOT ===")
deep_counts = {
"module_cache_violations": len(obs.module_cache_violations),
"fixture_scope_risks": len(obs.fixture_scope_risks),
"mock_residue_sites": len(obs.mock_residue_sites),
"import_side_effect_files": len(obs.import_side_effect_files),
"async_contamination_alive": bool(obs.async_contamination_alive),
}
print(json.dumps(deep_counts, indent=2))
print("\n=== MERGED TARGETING HINTS IN OBSERVATION ===")
for idx, hint in enumerate(obs.causal_hints[:10], start=1):
print(f"{idx:02d}. {hint}")
extra_hints = build_agent_targeting_hints(
repo_path=str(repo_path),
test_identifier=obs.test_identifier,
failing_stack_trace=obs.failing_stack_trace,
source_under_test=obs.source_under_test,
causal_frontier=obs.failure_frontier,
deep_signals={
"module_cache_violations": obs.module_cache_violations,
"fixture_scope_risks": obs.fixture_scope_risks,
"mock_residue_sites": obs.mock_residue_sites,
"import_side_effect_files": obs.import_side_effect_files,
},
max_hints=8,
)
print("\n=== TOOLS-ONLY TARGETING HINTS (DIRECT CALL) ===")
for idx, hint in enumerate(extra_hints, start=1):
print(f"{idx:02d}. {hint}")
prompt = build_unified_prompt(obs)
print("\n=== PROMPT TARGETING SECTION SEEN BY AGENT ===")
print(_extract_targeting_section(prompt))
if __name__ == "__main__":
main()
|