File size: 6,842 Bytes
ee933ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
"""Visualize how FlakeForge tool evidence is produced and fed to the agent.



This script does not require Docker. It uses a lightweight demo runner that

alternates pass/fail outcomes and emits traceback-like stderr so the same

pipeline pieces are exercised:

- preflight classification

- deep flakiness signals

- causal frontier extraction

- tools-based file targeting hints

- final prompt section shown to the agent



Usage:

  c:/CodingNest/FlakeForge/venv/Scripts/python.exe tests/visualize_agent_tool_usage.py

  c:/CodingNest/FlakeForge/venv/Scripts/python.exe tests/visualize_agent_tool_usage.py --repo-path test_repos/moderate_load_jitter_flaky --test-id tests/test_flaky.py::test_request_processing_should_succeed

"""

from __future__ import annotations

import argparse
import json
import os
import sys
from pathlib import Path
from typing import Optional

# Ensure project root imports resolve when running as a script.
PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from agent.unified_agent import build_unified_prompt
from models import RunRecord
from server.FlakeForge_environment import FlakeForgeEnvironment
from server.tools import build_agent_targeting_hints


class DemoRunner:
    """Deterministic alternating pass/fail runner for visualization only."""

    def __init__(self, repo_path: Path, test_identifier: str) -> None:
        self.repo_path = repo_path
        self.test_identifier = test_identifier
        self.counter = 0

    def run_test(self, test_identifier: str) -> RunRecord:
        self.counter += 1
        test_file = test_identifier.split("::", 1)[0]
        source_file = self._best_source_candidate(test_file)

        # Alternate outcomes so preflight can classify as flaky.
        passed = (self.counter % 2 == 0)
        if passed:
            return RunRecord(
                passed=True,
                duration_ms=40 + (self.counter % 7),
                error_type=None,
                error_message=None,
                stderr_excerpt=None,
            )

        trace = (
            "Traceback (most recent call last):\n"
            f"  File \"{(self.repo_path / test_file).as_posix()}\", line 42, in test_case\n"
            f"  File \"{(self.repo_path / source_file).as_posix()}\", line 21, in process_request\n"
            "AssertionError: transient mismatch\n"
        )
        return RunRecord(
            passed=False,
            duration_ms=95 + (self.counter % 13),
            error_type="AssertionError",
            error_message="transient mismatch",
            stderr_excerpt=trace,
        )

    def _best_source_candidate(self, test_file: str) -> str:
        direct = self.repo_path / "source.py"
        if direct.exists():
            return "source.py"

        # Fallback: any .py file that is not a test file.
        for path in self.repo_path.rglob("*.py"):
            rel = path.relative_to(self.repo_path).as_posix()
            if rel.startswith("tests/"):
                continue
            return rel

        return test_file


def _extract_targeting_section(prompt: str) -> str:
    header = "=== TARGETING HINTS ==="
    if header not in prompt:
        return "(TARGETING HINTS section not found in prompt)"

    lines = prompt.splitlines()
    start = None
    for i, line in enumerate(lines):
        if line.strip() == header:
            start = i
            break
    if start is None:
        return "(TARGETING HINTS section not found in prompt)"

    collected = []
    for line in lines[start:]:
        if line.startswith("=== ") and line.strip() != header and collected:
            break
        collected.append(line)
    return "\n".join(collected)


def main() -> None:
    parser = argparse.ArgumentParser(description="Visualize FlakeForge tool-driven file targeting")
    parser.add_argument(
        "--repo-path",
        default=os.environ.get("FF_REPO_PATH", "test_repos/moderate_load_jitter_flaky"),
        help="Repo path used for environment reset",
    )
    parser.add_argument(
        "--test-id",
        default=os.environ.get("FF_TEST_ID", "tests/test_flaky.py::test_request_processing_should_succeed"),
        help="Test identifier",
    )
    parser.add_argument(
        "--quick-runs",
        type=int,
        default=6,
        help="Preflight quick runs",
    )
    parser.add_argument(
        "--confirm-runs",
        type=int,
        default=6,
        help="Preflight confirm runs",
    )
    args = parser.parse_args()

    repo_path = Path(args.repo_path)
    runner = DemoRunner(repo_path=repo_path, test_identifier=args.test_id)

    env = FlakeForgeEnvironment(
        repo_path=str(repo_path),
        test_identifier=args.test_id,
        runner=runner,
        max_steps=3,
        num_runs=8,
    )

    obs = env.reset(
        preflight_quick_runs=args.quick_runs,
        preflight_confirm_runs=args.confirm_runs,
        drop_deterministic_bugs=False,
    )

    print("\n=== PREFLIGHT SUMMARY ===")
    print(json.dumps(obs.preflight_result, indent=2))

    print("\n=== DEEP SIGNAL SNAPSHOT ===")
    deep_counts = {
        "module_cache_violations": len(obs.module_cache_violations),
        "fixture_scope_risks": len(obs.fixture_scope_risks),
        "mock_residue_sites": len(obs.mock_residue_sites),
        "import_side_effect_files": len(obs.import_side_effect_files),
        "async_contamination_alive": bool(obs.async_contamination_alive),
    }
    print(json.dumps(deep_counts, indent=2))

    print("\n=== MERGED TARGETING HINTS IN OBSERVATION ===")
    for idx, hint in enumerate(obs.causal_hints[:10], start=1):
        print(f"{idx:02d}. {hint}")

    extra_hints = build_agent_targeting_hints(
        repo_path=str(repo_path),
        test_identifier=obs.test_identifier,
        failing_stack_trace=obs.failing_stack_trace,
        source_under_test=obs.source_under_test,
        causal_frontier=obs.failure_frontier,
        deep_signals={
            "module_cache_violations": obs.module_cache_violations,
            "fixture_scope_risks": obs.fixture_scope_risks,
            "mock_residue_sites": obs.mock_residue_sites,
            "import_side_effect_files": obs.import_side_effect_files,
        },
        max_hints=8,
    )

    print("\n=== TOOLS-ONLY TARGETING HINTS (DIRECT CALL) ===")
    for idx, hint in enumerate(extra_hints, start=1):
        print(f"{idx:02d}. {hint}")

    prompt = build_unified_prompt(obs)
    print("\n=== PROMPT TARGETING SECTION SEEN BY AGENT ===")
    print(_extract_targeting_section(prompt))


if __name__ == "__main__":
    main()