technophyle commited on
Commit
e84b903
·
verified ·
1 Parent(s): 538d769

Sync from GitHub via hub-sync

Browse files
Files changed (1) hide show
  1. evals/run_eval.py +35 -39
evals/run_eval.py CHANGED
@@ -423,18 +423,14 @@ def maybe_write_report(report):
423
  return str(target)
424
 
425
 
426
- def build_vertex_ragas_llm(run_config):
427
- from google import genai
428
  from langchain_core.outputs import Generation, LLMResult
429
  from ragas.llms.base import BaseRagasLLM
430
 
431
- class VertexRagasLLM(BaseRagasLLM):
432
- def __init__(self, model: str, project: str, location: str, run_config):
433
- self.client = genai.Client(
434
- vertexai=True,
435
- project=project,
436
- location=location,
437
- )
438
  self.model = model
439
  self.set_run_config(run_config)
440
 
@@ -449,35 +445,35 @@ def build_vertex_ragas_llm(run_config):
449
 
450
  def _generate_once(self, prompt, n=1, temperature=1e-8, stop=None, callbacks=None):
451
  prompt_text = self._prompt_to_text(prompt)
452
- config = {
453
  "temperature": 0.0,
454
- "candidate_count": max(1, n),
455
- "max_output_tokens": int(os.getenv("EVAL_MAX_OUTPUT_TOKENS", "2048")),
456
- "response_mime_type": "application/json",
457
  }
458
  if stop:
459
- config["stop_sequences"] = stop
460
-
461
- response = self.client.models.generate_content(
462
- model=self.model,
463
- contents=prompt_text,
464
- config=config,
 
 
 
 
 
465
  )
466
 
467
- candidates = getattr(response, "candidates", None) or []
468
  generations = []
469
- if candidates:
470
- for candidate in candidates[: max(1, n)]:
471
- text = getattr(candidate, "text", None)
472
- if text is None and hasattr(candidate, "content"):
473
- parts = getattr(candidate.content, "parts", None) or []
474
- text = "".join(getattr(part, "text", "") for part in parts if getattr(part, "text", ""))
475
- generations.append(Generation(text=(text or "").strip()))
476
- elif getattr(response, "text", None):
477
- generations.append(Generation(text=response.text.strip()))
478
 
479
  if not generations:
480
- raise RuntimeError("Vertex AI judge returned an empty response.")
481
 
482
  return LLMResult(generations=[generations])
483
 
@@ -500,12 +496,12 @@ def build_vertex_ragas_llm(run_config):
500
  callbacks,
501
  )
502
 
503
- project = os.getenv("GOOGLE_CLOUD_PROJECT")
504
- location = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1")
505
- model = os.getenv("EVAL_MODEL", os.getenv("VERTEX_LLM_MODEL", "gemini-2.5-pro"))
506
- if not project:
507
- raise RuntimeError("GOOGLE_CLOUD_PROJECT must be set for Vertex AI RAGAS evaluation.")
508
- return VertexRagasLLM(model=model, project=project, location=location, run_config=run_config)
509
 
510
 
511
  def build_ragas_embeddings(run_config):
@@ -572,14 +568,14 @@ def run_ragas(rows, outputs):
572
  max_wait=int(os.getenv("EVAL_MAX_WAIT_SECONDS", "60")),
573
  )
574
  log(
575
- "Using Vertex AI for RAGAS judge model "
576
- f"({os.getenv('EVAL_MODEL', os.getenv('VERTEX_LLM_MODEL', 'gemini-2.5-pro'))})"
577
  )
578
  log(
579
  f"RAGAS runtime: async={RAGAS_ASYNC}, raise_exceptions={RAGAS_RAISE_EXCEPTIONS}, "
580
  f"timeout={timeout_seconds}s, thread_timeout={thread_timeout_seconds}s, max_workers={max_workers}"
581
  )
582
- llm = build_vertex_ragas_llm(run_config)
583
  embeddings = build_ragas_embeddings(run_config)
584
  ragas_report = evaluate(
585
  build_ragas_dataset(),
 
423
  return str(target)
424
 
425
 
426
+ def build_bedrock_ragas_llm(run_config):
427
+ import boto3
428
  from langchain_core.outputs import Generation, LLMResult
429
  from ragas.llms.base import BaseRagasLLM
430
 
431
+ class BedrockRagasLLM(BaseRagasLLM):
432
+ def __init__(self, model: str, region: str, run_config):
433
+ self.client = boto3.client("bedrock-runtime", region_name=region)
 
 
 
 
434
  self.model = model
435
  self.set_run_config(run_config)
436
 
 
445
 
446
  def _generate_once(self, prompt, n=1, temperature=1e-8, stop=None, callbacks=None):
447
  prompt_text = self._prompt_to_text(prompt)
448
+ inference_config = {
449
  "temperature": 0.0,
450
+ "maxTokens": int(os.getenv("EVAL_MAX_OUTPUT_TOKENS", "2048")),
 
 
451
  }
452
  if stop:
453
+ inference_config["stopSequences"] = stop
454
+
455
+ response = self.client.converse(
456
+ modelId=self.model,
457
+ messages=[
458
+ {
459
+ "role": "user",
460
+ "content": [{"text": prompt_text}],
461
+ }
462
+ ],
463
+ inferenceConfig=inference_config,
464
  )
465
 
 
466
  generations = []
467
+ output_message = (response.get("output") or {}).get("message") or {}
468
+ content_blocks = output_message.get("content") or []
469
+ text = "".join(
470
+ block.get("text", "") for block in content_blocks if isinstance(block, dict)
471
+ ).strip()
472
+ if text:
473
+ generations.append(Generation(text=text))
 
 
474
 
475
  if not generations:
476
+ raise RuntimeError("AWS Bedrock judge returned an empty response.")
477
 
478
  return LLMResult(generations=[generations])
479
 
 
496
  callbacks,
497
  )
498
 
499
+ region = os.getenv("AWS_REGION", os.getenv("AWS_DEFAULT_REGION", "us-east-1"))
500
+ model = os.getenv(
501
+ "EVAL_MODEL",
502
+ os.getenv("BEDROCK_EVAL_MODEL", "us.anthropic.claude-haiku-4-5-20251001"),
503
+ )
504
+ return BedrockRagasLLM(model=model, region=region, run_config=run_config)
505
 
506
 
507
  def build_ragas_embeddings(run_config):
 
568
  max_wait=int(os.getenv("EVAL_MAX_WAIT_SECONDS", "60")),
569
  )
570
  log(
571
+ "Using AWS Bedrock for RAGAS judge model "
572
+ f"({os.getenv('EVAL_MODEL', os.getenv('BEDROCK_EVAL_MODEL', 'us.anthropic.claude-haiku-4-5-20251001'))})"
573
  )
574
  log(
575
  f"RAGAS runtime: async={RAGAS_ASYNC}, raise_exceptions={RAGAS_RAISE_EXCEPTIONS}, "
576
  f"timeout={timeout_seconds}s, thread_timeout={thread_timeout_seconds}s, max_workers={max_workers}"
577
  )
578
+ llm = build_bedrock_ragas_llm(run_config)
579
  embeddings = build_ragas_embeddings(run_config)
580
  ragas_report = evaluate(
581
  build_ragas_dataset(),