Spaces:

GenAISecurityProject
/

OWASP-AIBOM-Generator

Running

App Files Files

e2hln commited on Mar 12

Commit

6165ba9

verified ·

1 Parent(s): 9da734b

Upload 44 files

Browse files

Files changed (44) hide show

.dockerignore +17 -0
Dockerfile +21 -0
LICENSE +19 -0
PROJECT_README.md +101 -0
README.md +38 -0
entrypoint.sh +25 -0
pyproject.toml +75 -0
src/__init__.py +0 -0
src/cli.py +72 -0
src/config.py +34 -0
src/controllers/__init__.py +1 -0
src/controllers/cli_controller.py +214 -0
src/controllers/web_controller.py +167 -0
src/main.py +90 -0
src/models/__init__.py +13 -0
src/models/extractor.py +833 -0
src/models/field_registry.json +1714 -0
src/models/gguf_metadata.py +528 -0
src/models/model_file_extractors.py +44 -0
src/models/registry.py +535 -0
src/models/schemas.py +66 -0
src/models/scoring.py +454 -0
src/models/service.py +721 -0
src/schemas/bom-1.6.schema.json +0 -0
src/schemas/bom-1.7.schema.json +0 -0
src/schemas/spdx.schema.json +786 -0
src/static/css/style.css +1288 -0
src/static/images/cdx.webp +0 -0
src/static/images/genai_security_project_logo.webp +0 -0
src/static/js/script.js +116 -0
src/templates/error.html +51 -0
src/templates/includes/footer.html +85 -0
src/templates/includes/header.html +28 -0
src/templates/index.html +76 -0
src/templates/result.html +845 -0
src/utils/__init__.py +3 -0
src/utils/analytics.py +76 -0
src/utils/captcha.py +55 -0
src/utils/cleanup_utils.py +86 -0
src/utils/formatter.py +25 -0
src/utils/license_utils.py +129 -0
src/utils/rate_limiting.py +129 -0
src/utils/summarizer.py +266 -0
src/utils/validation.py +158 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,17 @@

+.git
+.github
+.pytest_cache
+.ruff_cache
+.venv
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+*.log
+*.tmp
+*.swp
+.DS_Store
+tests
+docs
+sboms
+output

Dockerfile ADDED Viewed

	@@ -0,0 +1,21 @@

+FROM python:3.11-slim
+WORKDIR /app
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PIP_NO_CACHE_DIR=1
+ENV PYTHONPATH=/app
+COPY README.md /app/README.md
+COPY pyproject.toml /app/pyproject.toml
+COPY LICENSE /app/LICENSE
+COPY PROJECT_README.md /app/PROJECT_README.md
+COPY src /app/src
+COPY entrypoint.sh /app/entrypoint.sh
+RUN pip install --upgrade pip \
+    && pip install -e . \
+    && chmod +x /app/entrypoint.sh
+ENTRYPOINT ["/app/entrypoint.sh"]

LICENSE ADDED Viewed

	@@ -0,0 +1,19 @@

+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+Copyright 2026 OWASP Foundation - AI SBOM Generator and contributors
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

PROJECT_README.md ADDED Viewed

	@@ -0,0 +1,101 @@

+# 🤖 OWASP GenAI Security Project - AIBOM Generator
+This is the official GitHub repository for the **OWASP AIBOM Generator** — an open-source tool for generating **AI Bills of Materials (AIBOMs)** in [CycloneDX](https://cyclonedx.org) format.
+The tool is also listed in the official **[CycloneDX Tool Center](https://cyclonedx.org/tool-center/)**.
+🚀 **Try the tool live:**
+👉 https://owasp-genai-aibom.org
+🔖 Bookmark and share: https://owasp-genai-aibom.org
+🌐 OWASP AIBOM Initiative: [genai.owasp.org](https://genai.owasp.org/)
+> This initiative is about making AI transparency practical. The OWASP AIBOM Generator, running under the OWASP GenAI Security Project, is focused on helping organizations understand what’s actually inside AI models and systems, starting with open models on Hugging Face.
+> Join OWASP GenAI Security Project - AIBOM Initiative to contribute.
+---
+## 📦 What It Does
+- Extracts metadata from models hosted on Hugging Face 🤗
+- Generates an **AIBOM** (AI Bill of Materials) in CycloneDX 1.6 JSON format
+- Calculates **AIBOM completeness scoring** with recommendations
+- Supports metadata extraction from model cards, configurations, and repository files
+---
+## 🛠 Features
+- Human-readable AIBOM viewer
+- JSON download
+- Completeness scoring & improvement tips
+- API endpoints for automation
+- Standards-aligned generation (CycloneDX 1.6, compatible with SPDX AI Profile)
+---
+## � Installation & Usage
+### 1. Install Dependencies
+```bash
+pip install -r requirements.txt
+```
+Or, if you prefer [uv](https://docs.astral.sh/uv/) for faster dependency management:
+```bash
+uv sync
+```
+### 2. Run Web Application
+Start the local server at `http://localhost:8000`:
+```bash
+python3 -m src.main
+```
+### 3. Run via CLI
+Generate an AIBOM for a Hugging Face model directly from your terminal:
+**Basic Usage:**
+```bash
+python3 -m src.cli google-bert/bert-base-uncased
+```
+**Advanced Usage:**
+You can specify additional metadata like component name, version, and supplier.
+```bash
+python3 -m src.cli google-bert/bert-base-uncased \
+  --name "My Custom BERT" \
+  --version "1.0.0" \
+  --manufacturer "Acme Corp" \
+  --output "my_sbom.json"
+```
+**Command Line Options:**
+| Option | Shorthand | Description |
+|--------|-----------|-------------|
+| `model_id` | | Hugging Face Model ID (e.g., `owner/model`) |
+| `--test` | `-t` | Run test mode for multiple predefined models |
+| `--output` | `-o` | Custom output file path |
+| `--name` | `-n` | Override component name in metadata |
+| `--version` | `-v` | Override component version in metadata |
+| `--manufacturer` | `-m` | Override component manufacturer/supplier |
+| `--inference` | `-i` | Use AI inference for enhanced metadata (requires API key) |
+| `--summarize` | `-s` | Enable intelligent description summarization |
+| `--verbose` | | Enable verbose logging |
+*   Metrics and produced SBOMs are saved to the `sboms/` directory by default.
+---
+## �🐞 Found a Bug or Have an Improvement Request?
+We welcome contributions and feedback.
+➡ **Log an issue:**
+https://github.com/GenAI-Security-Project/aibom-generator/issues
+---
+## 📄 License
+This project is open-source and available under the [Apache 2.0 License](LICENSE).

README.md ADDED Viewed

	@@ -0,0 +1,38 @@

+---
+title: OWASP AIBOM Generator
+emoji: 🚀
+colorFrom: indigo
+colorTo: green
+sdk: docker
+app_port: 7860
+pinned: true
+license: apache-2.0
+short_description: OWASP GenAI Security Project - AI Bill of Materials
+thumbnail: >-
+  https://cdn-uploads.huggingface.co/production/uploads/666afcef4fcfc38e18cba142/G7x702vfcrrarm6utDQoM.png
+---
+# OWASP AIBOM Generator
+This Space runs the existing OWASP AIBOM Generator web application as a Docker
+Space. It generates AI Bills of Materials for Hugging Face-hosted models using
+the same service and business logic as the main project.
+## Usage
+1. Enter a Hugging Face model ID or model URL.
+2. Submit the form.
+3. Review the generated AIBOM and download the JSON output.
+## Runtime notes
+- Default web startup binds to `0.0.0.0:${PORT:-7860}`.
+- Generated output and Hugging Face caches prefer `/data` when persistent
+  storage is available.
+- If `/data` is not available, the container falls back to `/tmp`.
+- `HF_TOKEN` is optional. Without it, analytics logging and private-model access
+  may be limited, but the public-model web app still works.
+## Project
+The main project documentation is bundled in `PROJECT_README.md`.

entrypoint.sh ADDED Viewed

	@@ -0,0 +1,25 @@

+#!/bin/sh
+set -eu
+if [ -d "/data" ] && [ -w "/data" ]; then
+  CACHE_ROOT="/data/.cache/huggingface"
+  OUTPUT_ROOT="/data/aibom_output"
+else
+  CACHE_ROOT="/tmp/.cache/huggingface"
+  OUTPUT_ROOT="/tmp/aibom_output"
+fi
+mkdir -p "${CACHE_ROOT}" "${OUTPUT_ROOT}"
+export HF_HOME="${HF_HOME:-${CACHE_ROOT}}"
+export TRANSFORMERS_CACHE="${TRANSFORMERS_CACHE:-${CACHE_ROOT}/transformers}"
+export AIBOM_OUTPUT_DIR="${AIBOM_OUTPUT_DIR:-${OUTPUT_ROOT}}"
+export PORT="${PORT:-7860}"
+mkdir -p "${TRANSFORMERS_CACHE}" "${AIBOM_OUTPUT_DIR}"
+if [ "$#" -gt 0 ]; then
+  exec python -m src.cli "$@"
+fi
+exec uvicorn src.main:app --host 0.0.0.0 --port "${PORT}"

pyproject.toml ADDED Viewed

	@@ -0,0 +1,75 @@

+[project]
+name = "owasp-aibom-generator"
+version = "1.0.2"
+description = "A comprehensive AI Bill of Materials (AIBOM) generation tool for Hugging Face models."
+authors = [
+    { name = "OWASP GenAI Security Project", email = "genai-security@owasp.org" }
+]
+readme = "README.md"
+requires-python = ">=3.11"
+license = { text = "Apache-2.0" }
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Topic :: Security",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence"
+]
+dependencies = [
+    "beautifulsoup4>=4.11.0",
+    "datasets>=2.0.0",
+    "fastapi>=0.104.0",
+    "flask>=2.3.0",
+    "gunicorn>=21.2.0",
+    "httpx>=0.25.0",
+    "huggingface_hub>=0.19.0",
+    "jinja2>=3.0.0",
+    "jsonschema>=4.17.0",
+    "license-expression>=30.4.4",
+    "packageurl-python>=0.11.1",
+    "pydantic>=2.4.0",
+    "python-multipart",
+    "PyYAML>=6.0.1",
+    "requests>=2.31.0",
+    "sentencepiece>=0.1.99",
+    "torch>=2.0.0",
+    "transformers>=4.36.0",
+    "uvicorn>=0.24.0",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-cov>=4.0.0",
+    "pytest-mock>=3.10.0",
+    "ruff",
+    "gguf>=0.6.0"
+]
+[project.scripts]
+aibom = "src.cli:main"
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["src*"]
+namespaces = false
+[tool.pytest.ini_options]
+minversion = "6.0"
+addopts = "-ra -q --cov=src"
+testpaths = [
+    "tests",
+]
+pythonpath = [
+    "."
+]
+[dependency-groups]
+dev = [
+    "gguf>=0.6.0",
+]

src/__init__.py ADDED Viewed

File without changes

src/cli.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import argparse
+import sys
+from .controllers.cli_controller import CLIController
+def main():
+    parser = argparse.ArgumentParser(description="OWASP AIBOM Generator CLI")
+    parser.add_argument("model_id", nargs="?", help="Hugging Face Model ID (e.g. 'owner/model')")
+    parser.add_argument("--test", "-t", action="store_true", help="Run test mode for multiple predefined models to verify description generation")
+    parser.add_argument("--output", "-o", help="Output file path")
+    parser.add_argument("--inference", "-i", action="store_true", help="Use AI inference for enhanced metadata (requires configured valid endpoint)")
+    parser.add_argument("--summarize", "-s", action="store_true", help="Enable intelligent description summarization (requires model download)")
+    parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
+    parser.add_argument("--name", "-n", help="Component name in metadata")
+    parser.add_argument("--version", "-v", help="Component version in metadata")
+    parser.add_argument("--manufacturer", "-m", help="Component manufacturer/supplier in metadata")
+    args = parser.parse_args()
+    controller = CLIController()
+    if args.test:
+        test_models = [
+            "Qwen/Qwen3.5-397B-A17B",
+            "nvidia/personaplex-7b-v1",
+            "meta-llama/Llama-2-7b-chat-hf",
+            "unsloth/Qwen3.5-35B-A3B-GGUF",
+            "LocoreMind/LocoOperator-4B",
+            "Nanbeige/Nanbeige4.1-3B",
+            "zai-org/GLM-5",
+            "MiniMaxAI/MiniMax-M2.5",
+            "unsloth/Qwen3.5-397B-A17B-GGUF",
+            "FireRedTeam/FireRed-Image-Edit-1.0",
+            "nvidia/NVIDIA-Nemotron-Nano-9B-v2-Japanese",
+            "mistralai/Voxtral-Mini-4B-Realtime-2602",
+            "TeichAI/GLM-4.7-Flash-Claude-Opus-4.5-High-Reasoning-Distill-GGUF",
+            "CIRCL/vulnerability-severity-classification-roberta-base"
+        ]
+        print(f"Running test mode against {len(test_models)} models...")
+        for model in test_models:
+            print(f"\n{'='*50}\nTesting model: {model}\n{'='*50}")
+            try:
+                controller.generate(
+                    model_id=model,
+                    output_file=args.output,
+                    include_inference=args.inference,
+                    enable_summarization=True,  # Ensure summarization is on for testing description
+                    verbose=args.verbose,
+                    name=args.name,
+                    version=args.version,
+                    manufacturer=args.manufacturer
+                )
+            except Exception as e:
+                print(f"Error testing {model}: {e}")
+        sys.exit(0)
+    if not args.model_id:
+        parser.error("model_id is required unless --test is specified")
+    controller.generate(
+        model_id=args.model_id,
+        output_file=args.output,
+        include_inference=args.inference,
+        enable_summarization=args.summarize,
+        verbose=args.verbose,
+        name=args.name,
+        version=args.version,
+        manufacturer=args.manufacturer
+    )
+if __name__ == "__main__":
+    main()

src/config.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import os
+from pathlib import Path
+import tomllib
+# Base Directory Setup
+BASE_DIR = Path(__file__).resolve().parent
+OUTPUT_DIR = os.getenv("AIBOM_OUTPUT_DIR") or "/tmp/aibom_output"
+# Ensure absolute path for security
+if not os.path.isabs(OUTPUT_DIR):
+    OUTPUT_DIR = os.path.abspath(OUTPUT_DIR)
+def get_project_metadata() -> tuple[str, str]:
+    try:
+        pyproject_path = BASE_DIR.parent / "pyproject.toml"
+        with open(pyproject_path, "rb") as f:
+            data = tomllib.load(f)
+        return data["project"]["name"], data["project"]["version"]
+    except Exception:
+        return "owasp-aibom-generator", "1.0.2"
+AIBOM_GEN_NAME, AIBOM_GEN_VERSION = get_project_metadata()
+TEMPLATES_DIR = BASE_DIR / "templates"
+# Cleanup Configuration
+MAX_AGE_DAYS = 7
+MAX_FILES = 1000
+CLEANUP_INTERVAL = 100
+# Hugging Face Setup
+HF_REPO = "owasp-genai-security-project/aisbom-usage-log"
+HF_TOKEN = os.getenv("HF_TOKEN")
+RECAPTCHA_SITE_KEY = os.getenv("RECAPTCHA_SITE_KEY")

src/controllers/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Controllers package

src/controllers/cli_controller.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import json
+import logging
+from typing import Optional
+from ..models.service import AIBOMService
+from ..models.scoring import calculate_completeness_score
+from ..models.scoring import calculate_completeness_score
+from ..config import OUTPUT_DIR, TEMPLATES_DIR
+from ..utils.formatter import export_aibom
+import os
+import shutil
+logger = logging.getLogger(__name__)
+class CLIController:
+    def __init__(self):
+        self.service = AIBOMService()
+    def _validate_spdx_schema_version(self, aibom_data: dict, spec_version: str):
+        """
+        TODO: Implement SPDX schema validation.
+        """
+        pass
+    def generate(self, model_id: str, output_file: Optional[str] = None, include_inference: bool = False,
+                 enable_summarization: bool = False, verbose: bool = False,
+                 name: Optional[str] = None, version: Optional[str] = None, manufacturer: Optional[str] = None):
+        if verbose:
+            logging.getLogger().setLevel(logging.INFO)
+        print(f"Generating AIBOM for {model_id}...")
+        versions_to_generate = ["1.6", "1.7"]
+        reports = []
+        generated_aiboms = {}
+        print(f"  - Generating AIBOM model data...")
+        try:
+            primary_aibom = self.service.generate_aibom(
+                model_id,
+                include_inference=include_inference,
+                enable_summarization=enable_summarization,
+                metadata_overrides={
+                    "name": name,
+                    "version": version,
+                    "manufacturer": manufacturer
+                }
+            )
+            primary_report = self.service.get_enhancement_report()
+            # Formatted AIBOM Strings
+            json_1_6 = export_aibom(primary_aibom, bom_type="cyclonedx", spec_version="1.6")
+            json_1_7 = export_aibom(primary_aibom, bom_type="cyclonedx", spec_version="1.7")
+            # Determine output filenames
+            normalized_id = self.service._normalise_model_id(model_id)
+            os.makedirs("sboms", exist_ok=True)
+            output_file_1_6 = output_file
+            if not output_file_1_6:
+                output_file_1_6 = os.path.join("sboms", f"{normalized_id.replace('/', '_')}_ai_sbom_1_6.json")
+            base, ext = os.path.splitext(output_file_1_6)
+            output_file_1_7 = f"{base.replace('_1_6', '')}_1_7{ext}" if '_1_6' in base else f"{base}_1_7{ext}"
+            with open(output_file_1_6, 'w') as f:
+                f.write(json_1_6)
+            with open(output_file_1_7, 'w') as f:
+                f.write(json_1_7)
+            # Check for validation results
+            validation_data = primary_report.get("final_score", {}).get("validation", {})
+            is_valid = validation_data.get("valid", True)
+            validation_errors = [i["message"] for i in validation_data.get("issues", [])]
+            if "schema_validation" not in primary_report:
+                primary_report["schema_validation"] = {}
+            primary_report["schema_validation"]["valid"] = is_valid
+            primary_report["schema_validation"]["errors"] = validation_errors
+            primary_report["schema_validation"]["error_count"] = len(validation_errors)
+            reports = [
+                {"spec_version": "1.6", "output_file": output_file_1_6, "schema_validation": primary_report["schema_validation"]},
+                {"spec_version": "1.7", "output_file": output_file_1_7, "schema_validation": primary_report["schema_validation"]}
+            ]
+            output_file_primary = output_file_1_6
+        except Exception as e:
+            logger.error(f"Failed to generate SBOM: {e}", exc_info=True)
+            print(f"  ❌ Failed to generate SBOM: {e}")
+            reports = []
+        if reports:
+            if output_file_primary:
+                try:
+                    from jinja2 import Environment, FileSystemLoader, select_autoescape
+                    from ..config import TEMPLATES_DIR
+                    env = Environment(
+                        loader=FileSystemLoader(TEMPLATES_DIR),
+                        autoescape=select_autoescape(['html', 'xml'])
+                    )
+                    template = env.get_template("result.html")
+                    completeness_score = primary_report.get("final_score")
+                    if not completeness_score:
+                         completeness_score = calculate_completeness_score(primary_aibom)
+                    # Pre-serialize to preserve order
+                    components_json = json.dumps(primary_aibom.get("components", []), indent=2)
+                    context = {
+                        "request": None,
+                        "filename": os.path.basename(output_file_primary),
+                        "download_url": "#",
+                        "aibom": primary_aibom,
+                        "components_json": components_json,
+                        "aibom_cdx_json_1_6": json_1_6,
+                        "aibom_cdx_json_1_7": json_1_7,
+                        "raw_aibom": primary_aibom,
+                        "model_id": self.service._normalise_model_id(model_id),
+                        "sbom_count": 0,
+                        "completeness_score": completeness_score,
+                        "enhancement_report": primary_report or {},
+                        "result_file": "#",
+                        "static_root": "static"
+                    }
+                    html_content = template.render(context)
+                    html_output_file = output_file_primary.replace("_1_6.json", ".html").replace(".json", ".html")
+                    with open(html_output_file, "w") as f:
+                        f.write(html_content)
+                    print(f"\n📄 HTML Report:\n   {html_output_file}")
+                    # Copy static assets
+                    try:
+                        # output_file_primary is e.g. sboms/model_id_ai_sbom.json
+                        # html_output_file is sboms/model_id_ai_sbom.html
+                        output_dir = os.path.dirname(html_output_file)
+                        # src/static relative to CLI execution root or module
+                        # Let's use absolute path relative to this file to be safe
+                        current_dir = os.path.dirname(os.path.abspath(__file__)) # src/controllers
+                        src_dir = os.path.dirname(current_dir) # src
+                        static_src = os.path.join(src_dir, "static")
+                        static_dst = os.path.join(output_dir, "static")
+                        if os.path.exists(static_src):
+                            if os.path.exists(static_dst):
+                                shutil.rmtree(static_dst)
+                            shutil.copytree(static_src, static_dst)
+                            # print(f"   - Static assets copied to: {static_dst}")
+                        else:
+                            logger.warning(f"Static source directory not found: {static_src}")
+                    except Exception as e:
+                        logger.warning(f"Failed to copy static assets: {e}")
+                    # Model Description
+                    if "components" in primary_aibom and primary_aibom["components"]:
+                        description = primary_aibom["components"][0].get("description", "No description available")
+                        if len(description) > 256:
+                            description = description[:253] + "..."
+                        print(f"\n📝 Model Description:\n   {description}")
+                    # License
+                    if "components" in primary_aibom and primary_aibom["components"]:
+                        comp = primary_aibom["components"][0]
+                        if "licenses" in comp:
+                            license_list = []
+                            for l in comp["licenses"]:
+                                lic = l.get("license", {})
+                                val = lic.get("id") or lic.get("name")
+                                if val:
+                                    license_list.append(val)
+                            if license_list:
+                                print(f"\n⚖️ License:\n   {', '.join(license_list)}")
+                except Exception as e:
+                    logger.warning(f"Failed to generate HTML report: {e}")
+            # Print Summary for ALL versions
+            for r in reports:
+                spec = r.get("spec_version", "1.6")
+                print(f"\n✅ Successfully generated CycloneDX {spec} SBOM:")
+                print(f"   {r.get('output_file')}")
+                if not r["schema_validation"]["valid"]:
+                    print(f"⚠️  Schema Validation Errors ({spec}):")
+                    for err in r["schema_validation"]["errors"]:
+                        print(f"   - {err}")
+                else:
+                    print(f"   - Schema Validation ({spec}): ✅ Valid")
+            # Display Detailed Score Summary (from primary)
+            if primary_report and "final_score" in primary_report:
+                score = primary_report["final_score"]
+                t_score = score.get('total_score', 0)
+                formatted_t_score = int(t_score) if isinstance(t_score, (int, float)) and t_score == int(t_score) else t_score
+                print(f"\n📊 Completeness Score: {formatted_t_score}/100")
+                if "completeness_profile" in score:
+                    profile = score["completeness_profile"]
+                    print(f"   Profile: {profile.get('name')} - {profile.get('description')}")
+                if "section_scores" in score:
+                    print("\n📋 Section Breakdown:")
+                    for section, s_score in score["section_scores"].items():
+                        max_s = score.get("max_scores", {}).get(section, "?")
+                        formatted_s_score = int(s_score) if isinstance(s_score, (int, float)) and s_score == int(s_score) else s_score
+                        print(f"   - {section.replace('_', ' ').title()}: {formatted_s_score}/{max_s}")
+        else:
+             print("\n❌ Failed to generate any SBOMs.")

src/controllers/web_controller.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import os
+import re
+import json
+import logging
+import html
+from urllib.parse import urlparse
+from typing import Optional
+from fastapi import APIRouter, Request, Form, HTTPException, Depends
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.templating import Jinja2Templates
+from huggingface_hub import HfApi
+from huggingface_hub.utils import RepositoryNotFoundError
+from ..models.service import AIBOMService
+from ..models.scoring import calculate_completeness_score
+from ..utils.analytics import log_sbom_generation, get_sbom_count
+from ..utils.formatter import export_aibom
+from ..config import TEMPLATES_DIR, OUTPUT_DIR
+logger = logging.getLogger(__name__)
+router = APIRouter()
+templates = Jinja2Templates(directory=TEMPLATES_DIR)
+# --- Helpers ---
+HF_ID_REGEX = re.compile(r"^[a-zA-Z0-9\.\-\_]+/[a-zA-Z0-9\.\-\_]+$")
+def is_valid_hf_input(input_str: str) -> bool:
+    if not input_str or len(input_str) > 200:
+        return False
+    if input_str.startswith(("http://", "https://")):
+        try:
+            parsed = urlparse(input_str)
+            if parsed.netloc == "huggingface.co":
+                parts = parsed.path.strip("/").split("/")
+                if len(parts) >= 2 and parts[0] and parts[1]:
+                     if re.match(r"^[a-zA-Z0-9\.\-\_]+$", parts[0]) and \
+                        re.match(r"^[a-zA-Z0-9\.\-\_]+$", parts[1]):
+                         return True
+            return False
+        except Exception:
+            return False
+    else:
+        return bool(HF_ID_REGEX.match(input_str))
+# --- Routes ---
+@router.get("/", response_class=HTMLResponse)
+async def root(request: Request):
+    return templates.TemplateResponse("index.html", {
+        "request": request,
+        "sbom_count": get_sbom_count()
+    })
+@router.get("/status")
+async def get_status():
+    return {"status": "operational", "version": "1.0.0", "generator_version": "2.0.0"}
+@router.post("/generate", response_class=HTMLResponse)
+async def generate_form(
+    request: Request,
+    model_id: str = Form(...),
+    include_inference: bool = Form(False),
+    use_best_practices: bool = Form(True)
+):
+    # Security: Validate BEFORE sanitizing to prevent bypass attacks
+    # (e.g., <script>org/model</script> → &lt;script&gt;org/model&lt;/script&gt; could slip through)
+    if not is_valid_hf_input(model_id):
+        return templates.TemplateResponse("error.html", {
+            "request": request,
+            "error": "Invalid model ID format.",
+            "sbom_count": get_sbom_count(),
+            "model_id": html.escape(model_id)
+        })
+    # Sanitize after validation for safe display/storage
+    sanitized_model_id = html.escape(model_id)
+    # Use helper from Service to normalize
+    normalized_id = AIBOMService._normalise_model_id(sanitized_model_id)
+    # Check existence (non-blocking)
+    import asyncio
+    try:
+        loop = asyncio.get_running_loop()
+        await loop.run_in_executor(None, lambda: HfApi().model_info(normalized_id))
+    except RepositoryNotFoundError:
+        return templates.TemplateResponse("error.html", {
+            "request": request,
+            "error": f"Model {normalized_id} not found on Hugging Face.",
+            "sbom_count": get_sbom_count(),
+            "model_id": normalized_id
+        })
+    except Exception as e:
+        return templates.TemplateResponse("error.html", {
+            "request": request,
+            "error": f"Error verifying model: {e}",
+            "sbom_count": get_sbom_count(),
+            "model_id": normalized_id
+        })
+    # Generate (non-blocking)
+    try:
+        def _generate_task():
+            service = AIBOMService(use_best_practices=use_best_practices)
+            aibom = service.generate_aibom(sanitized_model_id, include_inference=include_inference)
+            report = service.get_enhancement_report()
+            return service, aibom, report
+        service, aibom, report = await loop.run_in_executor(None, _generate_task)
+        # Save file (non-blocking I/O)
+        filename = f"{normalized_id.replace('/', '_')}_ai_sbom_1_6.json"
+        filepath = os.path.join(OUTPUT_DIR, filename)
+        filepath_1_7 = os.path.join(OUTPUT_DIR, f"{normalized_id.replace('/', '_')}_ai_sbom_1_7.json")
+        def _save_task():
+            # Generate Formatted JSON strings
+            json_1_6 = export_aibom(aibom, bom_type="cyclonedx", spec_version="1.6")
+            json_1_7 = export_aibom(aibom, bom_type="cyclonedx", spec_version="1.7")
+            with open(filepath, "w") as f:
+                f.write(json_1_6)
+            with open(filepath_1_7, "w") as f:
+                f.write(json_1_7)
+            log_sbom_generation(sanitized_model_id)
+            return json_1_6, json_1_7
+        json_1_6, json_1_7 = await loop.run_in_executor(None, _save_task)
+        # Extract score
+        completeness_score = None
+        if report and "final_score" in report:
+            completeness_score = report["final_score"]
+        # Fallback score if needed
+        if not completeness_score:
+            completeness_score = calculate_completeness_score(aibom)
+        # Prepare context for template
+        context = {
+            "request": request,
+            "filename": filename,
+            "download_url": f"/output/{filename}",
+            "aibom": aibom,
+            "aibom_cdx_json_1_6": json_1_6,
+            "aibom_cdx_json_1_7": json_1_7,
+            "components_json": json.dumps(aibom.get("components", []), indent=2),
+            "model_id": normalized_id,
+            "sbom_count": get_sbom_count(),
+            "completeness_score": completeness_score,
+            "enhancement_report": report or {},
+            # Pass legacy variables for template compatibility if needed
+            "result_file": f"/output/{filename}"
+        }
+        return templates.TemplateResponse("result.html", context)
+    except Exception as e:
+        logger.error(f"Generation error: {e}", exc_info=True)
+        return templates.TemplateResponse("error.html", {
+            "request": request,
+            "error": f"Internal generation error: {e}",
+            "sbom_count": get_sbom_count(),
+            "model_id": normalized_id
+        })

src/main.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import logging
+import os
+import sys
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, Request
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import JSONResponse
+from .config import OUTPUT_DIR, MAX_AGE_DAYS, MAX_FILES, CLEANUP_INTERVAL
+from .controllers.web_controller import router as web_router
+from .utils import RateLimitMiddleware, ConcurrencyLimitMiddleware, RequestSizeLimitMiddleware, perform_cleanup
+# Ensure registry is initialized
+from .models import get_field_registry_manager
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("aibom_generator")
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup
+    logger.info("Starting AI SBOM Generator WebApp")
+    try:
+        get_field_registry_manager() # Ensure registry is loaded
+        logger.info("Registry loaded successfully")
+    except Exception as e:
+        logger.error(f"Failed to load registry: {e}")
+    # Initial cleanup
+    try:
+        perform_cleanup(OUTPUT_DIR, MAX_AGE_DAYS, MAX_FILES)
+    except Exception as e:
+        logger.warning(f"Initial cleanup failed: {e}")
+    yield
+    # Shutdown (if needed)
+app = FastAPI(title="AI SBOM Generator", lifespan=lifespan)
+# --- Middleware ---
+app.add_middleware(
+    RateLimitMiddleware,
+    rate_limit_per_minute=10,
+    rate_limit_window=60,
+    protected_routes=["/generate"]
+)
+app.add_middleware(
+    ConcurrencyLimitMiddleware,
+    max_concurrent_requests=5,
+    timeout=5.0,
+    protected_routes=["/generate"]
+)
+app.add_middleware(
+    RequestSizeLimitMiddleware,
+    max_content_length=1024*1024  # 1MB
+)
+# --- Cleanup Middleware ---
+request_counter = 0
+@app.middleware("http")
+async def cleanup_middleware(request: Request, call_next):
+    global request_counter
+    request_counter += 1
+    if request_counter % CLEANUP_INTERVAL == 0:
+        try:
+            removed = perform_cleanup(OUTPUT_DIR, MAX_AGE_DAYS, MAX_FILES)
+            logger.info(f"Scheduled cleanup removed {removed} files")
+        except Exception as e:
+            logger.error(f"Error during scheduled cleanup: {e}")
+    response = await call_next(request)
+    return response
+# --- Static Files ---
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+app.mount("/output", StaticFiles(directory=OUTPUT_DIR), name="output")
+# Mount static files (CSS/JS)
+os.makedirs("src/static", exist_ok=True)
+app.mount("/static", StaticFiles(directory="src/static"), name="static")
+# --- Routes ---
+app.include_router(web_router)
+if __name__ == "__main__":
+    import uvicorn
+    # Print clear access URL to avoid 0.0.0.0 confusion
+    print("🚀 Application ready! Access it at: http://localhost:8000")
+    uvicorn.run("src.main:app", host="0.0.0.0", port=8000, reload=True)

src/models/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from .schemas import (
+    DataSource,
+    ConfidenceLevel,
+    ExtractionResult,
+    GenerateRequest,
+    BatchRequest,
+    AIBOMResponse,
+    EnhancementReport
+)
+from .registry import get_field_registry_manager
+from .extractor import EnhancedExtractor
+from .scoring import calculate_completeness_score, validate_aibom
+from .service import AIBOMService

src/models/extractor.py ADDED Viewed

	@@ -0,0 +1,833 @@

+import logging
+import re
+import yaml
+import json
+from typing import Dict, Any, Optional, List, Union
+from enum import Enum
+from urllib.parse import urlparse, urljoin
+from huggingface_hub import HfApi, ModelCard, hf_hub_download
+from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError
+from .schemas import DataSource, ConfidenceLevel, ExtractionResult
+from .registry import get_field_registry_manager
+from .model_file_extractors import ModelFileExtractor, default_extractors
+logger = logging.getLogger(__name__)
+class EnhancedExtractor:
+    """
+    Registry-integrated enhanced extractor that automatically picks up new fields
+    from the JSON registry (field_registry.json) without requiring code changes.
+    """
+    # SPDX mappings for common licences
+    LICENSE_MAPPINGS = {
+        "mit": "MIT",
+        "mit license": "MIT",
+        "apache license version 2.0": "Apache-2.0",
+        "apache license 2.0": "Apache-2.0",
+        "apache 2.0": "Apache-2.0",
+        "apache license, version 2.0": "Apache-2.0",
+        "bsd 3-clause": "BSD-3-Clause",
+        "bsd-3-clause": "BSD-3-Clause",
+        "bsd 2-clause": "BSD-2-Clause",
+        "bsd-2-clause": "BSD-2-Clause",
+        "gnu general public license v3": "GPL-3.0-only",
+        "gplv3": "GPL-3.0-only",
+        "gnu general public license v2": "GPL-2.0-only",
+        "gplv2": "GPL-2.0-only",
+    }
+    def __init__(self, hf_api: Optional[HfApi] = None):
+        """
+        Initialize the enhanced extractor with registry integration.
+        Args:
+            hf_api: Optional HuggingFace API instance (will create if not provided)
+        """
+        self.hf_api = hf_api or HfApi()
+        self.extraction_results = {}
+        # Initialize registry manager
+        try:
+            self.registry_manager = get_field_registry_manager()
+            logger.info("✅ Registry manager initialized successfully")
+        except Exception as e:
+            logger.warning(f"⚠️ Could not initialize registry manager: {e}")
+            self.registry_manager = None
+        # Load registry fields
+        self.registry_fields = {}
+        if self.registry_manager:
+            try:
+                self.registry_fields = self.registry_manager.get_field_definitions()
+                logger.info(f"✅ Loaded {len(self.registry_fields)} fields from registry")
+            except Exception as e:
+                logger.error(f"❌ Error loading registry fields: {e}")
+                self.registry_fields = {}
+    # Compiled regex patterns for text extraction
+    # Moved to class level to avoid recompilation on every request
+    PATTERNS = {
+        'license': [
+            re.compile(r'license[:\s]+([a-zA-Z0-9\-\.\s\n]+)', re.IGNORECASE | re.DOTALL),
+            re.compile(r'licensed under[:\s]+([a-zA-Z0-9\-\.\s\n]+)', re.IGNORECASE | re.DOTALL),
+            # Robust capture for markdown links [License Name](...)
+            re.compile(r'governed by[:\s]+(?:the\s+)?\[([^\]]+)\]', re.IGNORECASE | re.DOTALL),
+            re.compile(r'governed by[:\s]+(?:the\s+)?([a-zA-Z0-9\-\.\s\n]+)', re.IGNORECASE | re.DOTALL),
+            re.compile(r'governed by the[:\s]+\[([^\]]+)\]', re.IGNORECASE | re.DOTALL),
+        ],
+        'datasets': [
+            re.compile(r'trained on[:\s]+([a-zA-Z0-9\-\_\/]+)', re.IGNORECASE),
+            re.compile(r'dataset[:\s]+([a-zA-Z0-9\-\_\/]+)', re.IGNORECASE),
+            re.compile(r'using[:\s]+([a-zA-Z0-9\-\_\/]+)\s+dataset', re.IGNORECASE),
+        ],
+        'metrics': [
+            re.compile(r'([a-zA-Z]+)[:\s]+([0-9\.]+)', re.IGNORECASE),
+            re.compile(r'achieves[:\s]+([0-9\.]+)[:\s]+([a-zA-Z]+)', re.IGNORECASE),
+        ],
+        'model_type': [
+            re.compile(r'model type[:\s]+([a-zA-Z0-9\-]+)', re.IGNORECASE),
+            re.compile(r'architecture[:\s]+([a-zA-Z0-9\-]+)', re.IGNORECASE),
+        ],
+        'energy': [
+            re.compile(r'energy[:\s]+([0-9\.]+)\s*([a-zA-Z]+)', re.IGNORECASE),
+            re.compile(r'power[:\s]+([0-9\.]+)\s*([a-zA-Z]+)', re.IGNORECASE),
+            re.compile(r'consumption[:\s]+([0-9\.]+)\s*([a-zA-Z]+)', re.IGNORECASE),
+        ],
+        'limitations': [
+            re.compile(r'limitation[s]?[:\s]+([^\.]+)', re.IGNORECASE),
+            re.compile(r'known issue[s]?[:\s]+([^\.]+)', re.IGNORECASE),
+            re.compile(r'constraint[s]?[:\s]+([^\.]+)', re.IGNORECASE),
+        ],
+        'safety': [
+            re.compile(r'safety[:\s]+([^\.]+)', re.IGNORECASE),
+            re.compile(r'risk[s]?[:\s]+([^\.]+)', re.IGNORECASE),
+            re.compile(r'bias[:\s]+([^\.]+)', re.IGNORECASE),
+        ]
+    }
+    def __init__(
+        self,
+        hf_api: Optional[HfApi] = None,
+        model_file_extractors: Optional[List[ModelFileExtractor]] = None,
+    ):
+        self.hf_api = hf_api or HfApi()
+        self.extraction_results = {}
+        self.model_file_extractors = (
+            model_file_extractors if model_file_extractors is not None
+            else default_extractors()
+        )
+        # Initialize registry manager
+        try:
+            self.registry_manager = get_field_registry_manager()
+            logger.info("✅ Registry manager initialized successfully")
+        except Exception as e:
+            logger.warning(f"⚠️ Could not initialize registry manager: {e}")
+            self.registry_manager = None
+        # Load registry fields
+        self.registry_fields = {}
+        if self.registry_manager:
+            try:
+                self.registry_fields = self.registry_manager.get_field_definitions()
+                logger.info(f"✅ Loaded {len(self.registry_fields)} fields from registry")
+            except Exception as e:
+                logger.error(f"❌ Error loading registry fields: {e}")
+                self.registry_fields = {}
+        logger.info(f"Enhanced extractor initialized (registry-driven: {bool(self.registry_fields)})")
+    # def _compile_patterns(self):  - Removed
+       # ...
+    def _detect_license_from_file(self, model_id: str) -> Optional[str]:
+        """
+        Attempt to detect a licence by looking at repository files.
+        Downloads common licence filenames (e.g. LICENSE, LICENSE.md),
+        reads a small snippet, and returns the matching SPDX identifier,
+        or None if none match.
+        """
+        license_filenames = ["LICENSE", "LICENSE.txt", "LICENSE.md", "LICENSE.rst", "COPYING"]
+        for filename in license_filenames:
+            try:
+                file_path = hf_hub_download(repo_id=model_id, filename=filename)
+                with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+                    snippet = f.read(4096).lower()
+                for header, spdx_id in self.LICENSE_MAPPINGS.items():
+                    if header in snippet:
+                        return spdx_id
+            except (RepositoryNotFoundError, EntryNotFoundError):
+                # file doesn’t exist; continue
+                continue
+            except Exception as e:
+                logger.debug(f"Licence detection error reading {filename}: {e}")
+                continue
+        return None
+    def extract_metadata(self, model_id: str, model_info: Dict[str, Any], model_card: Optional[ModelCard], enable_summarization: bool = False) -> Dict[str, Any]:
+        """
+        Main extraction method with full registry integration.
+        """
+        logger.info(f"🚀 Starting registry-driven extraction for model: {model_id}")
+        # Initialize extraction results tracking
+        self.extraction_results = {}
+        metadata = {}
+        if self.registry_fields:
+            # Registry-driven extraction
+            logger.info(f"📋 Registry-driven mode: Attempting extraction for {len(self.registry_fields)} fields")
+            metadata = self._registry_driven_extraction(model_id, model_info, model_card, enable_summarization)
+        else:
+            # Fallback to legacy extraction
+            logger.warning("⚠️ Registry not available, falling back to legacy extraction")
+            metadata = self._legacy_extraction(model_id, model_info, model_card)
+        # Return metadata in the same format as original method
+        return {k: v for k, v in metadata.items() if v is not None}
+    def _registry_driven_extraction(self, model_id: str, model_info: Dict[str, Any], model_card: Optional[ModelCard], enable_summarization: bool = False) -> Dict[str, Any]:
+        """
+        Registry-driven extraction that automatically processes all registry fields.
+        """
+        metadata = {}
+        # Prepare extraction context
+        extraction_context = {
+            'model_id': model_id,
+            'model_info': model_info,
+            'model_card': model_card,
+            'readme_content': self._get_readme_content(model_card, model_id),
+            'config_data': self._download_and_parse_config(model_id, "config.json"),
+            'tokenizer_config': self._download_and_parse_config(model_id, "tokenizer_config.json"),
+            'enable_summarization': enable_summarization
+        }
+        # Process each field from the registry
+        successful_extractions = 0
+        failed_extractions = 0
+        for field_name, field_config in self.registry_fields.items():
+            try:
+                logger.info(f"🔍 Attempting extraction for field: {field_name}")
+                # Extract field using registry configuration
+                extracted_value = self._extract_registry_field(field_name, field_config, extraction_context)
+                if extracted_value is not None:
+                    metadata[field_name] = extracted_value
+                    successful_extractions += 1
+                else:
+                    failed_extractions += 1
+            except Exception as e:
+                failed_extractions += 1
+                logger.error(f"❌ Error extracting {field_name}: {e}")
+                continue
+        logger.info(f"📊 Registry extraction complete: {successful_extractions} successful, {failed_extractions} failed")
+        model_file_metadata = self._extract_model_file_metadata(model_id)
+        if model_file_metadata:
+            for key, value in model_file_metadata.items():
+                if value is not None:
+                    metadata[key] = value
+                    self.extraction_results[key] = ExtractionResult(
+                        value=value,
+                        source=DataSource.REPOSITORY_FILES,
+                        confidence=ConfidenceLevel.HIGH,
+                        extraction_method="model_file_header",
+                    )
+        # Always extract commit SHA if available (vital for BOM versioning)
+        if 'commit' not in metadata:
+             commit_sha = getattr(model_info, 'sha', None)
+             if commit_sha:
+                 metadata['commit'] = commit_sha
+        # Add external references (always needed)
+        metadata.update(self._generate_external_references(model_id, metadata))
+        return metadata
+    def _extract_model_file_metadata(self, model_id: str) -> Dict[str, Any]:
+        for extractor in self.model_file_extractors:
+            try:
+                if extractor.can_extract(model_id):
+                    metadata = extractor.extract_metadata(model_id)
+                    if metadata:
+                        logger.info(
+                            f"{type(extractor).__name__} returned {len(metadata)} fields"
+                        )
+                        return metadata
+            except Exception as e:
+                logger.warning(
+                    f"Model file extraction failed ({type(extractor).__name__}): {e}"
+                )
+                continue
+        return {}
+    def _extract_registry_field(self, field_name: str, field_config: Dict[str, Any], context: Dict[str, Any]) -> Any:
+        """
+        Extract a single field based on its registry configuration.
+        """
+        if field_name == 'license':
+             logger.warning(f"DEBUG: Extracting license...")
+        extraction_methods = []
+        # Strategy 1: Direct API extraction
+        api_value = self._try_api_extraction(field_name, context)
+        if api_value is not None:
+            self.extraction_results[field_name] = ExtractionResult(
+                value=api_value,
+                source=DataSource.HF_API,
+                confidence=ConfidenceLevel.HIGH,
+                extraction_method="api_direct"
+            )
+            return api_value
+        # Strategy 2: Model card YAML extraction
+        yaml_value = self._try_model_card_extraction(field_name, context)
+        if yaml_value is not None:
+            self.extraction_results[field_name] = ExtractionResult(
+                value=yaml_value,
+                source=DataSource.MODEL_CARD,
+                confidence=ConfidenceLevel.HIGH,
+                extraction_method="model_card_yaml"
+            )
+            return yaml_value
+        # Strategy 3: Configuration file extraction
+        config_value = self._try_config_extraction(field_name, context)
+        if config_value is not None:
+            self.extraction_results[field_name] = ExtractionResult(
+                value=config_value,
+                source=DataSource.CONFIG_FILE,
+                confidence=ConfidenceLevel.HIGH,
+                extraction_method="config_file"
+            )
+            return config_value
+        # Strategy 4: Text pattern extraction
+        text_value = self._try_text_pattern_extraction(field_name, context)
+        if text_value is not None:
+             # ...
+            self.extraction_results[field_name] = ExtractionResult(
+                value=text_value,
+                source=DataSource.README_TEXT,
+                confidence=ConfidenceLevel.MEDIUM,
+                extraction_method="text_pattern"
+            )
+            return text_value
+        # Strategy 5: Intelligent inference
+        inferred_value = self._try_intelligent_inference(field_name, context)
+        if inferred_value is not None:
+            self.extraction_results[field_name] = ExtractionResult(
+                value=inferred_value,
+                source=DataSource.INTELLIGENT_DEFAULT,
+                confidence=ConfidenceLevel.MEDIUM,
+                extraction_method="intelligent_inference"
+            )
+            return inferred_value
+        # detect licence from repository files if the field is licence/ licences
+        if field_name in {"license", "licenses"}:
+            detected = self._detect_license_from_file(context["model_id"])
+            if detected:
+                self.extraction_results[field_name] = ExtractionResult(
+                    value=detected,
+                    source=DataSource.REPOSITORY_FILES,
+                    confidence=ConfidenceLevel.MEDIUM,
+                    extraction_method="license_file",
+                    fallback_chain=extraction_methods,
+                )
+                return detected
+        if field_name == "description":
+            # Try intelligent summarization if description is missing AND enabled
+            if context.get('enable_summarization', False):
+                try:
+                    from ..utils.summarizer import LocalSummarizer
+                    readme = context.get('readme_content')
+                    if readme:
+                        summary = LocalSummarizer.summarize(readme, model_id=context.get('model_id', ''))
+                        if summary:
+                            self.extraction_results[field_name] = ExtractionResult(
+                                value=summary,
+                                source=DataSource.INTELLIGENT_DEFAULT,
+                                confidence=ConfidenceLevel.MEDIUM,
+                                extraction_method="llm_summarization",
+                                fallback_chain=extraction_methods
+                            )
+                            return summary
+                except ImportError:
+                    pass
+                except Exception as e:
+                    logger.debug(f"Summarization processing failed: {e}")
+        # Strategy 6: Fallback value (if configured)
+        fallback_value = self._try_fallback_value(field_name, field_config)
+        if fallback_value is not None:
+            self.extraction_results[field_name] = ExtractionResult(
+                value=fallback_value,
+                source=DataSource.PLACEHOLDER,
+                confidence=ConfidenceLevel.NONE,
+                extraction_method="fallback_placeholder",
+                fallback_chain=extraction_methods
+            )
+            return fallback_value
+        # No extraction successful
+        self.extraction_results[field_name] = ExtractionResult(
+            value=None,
+            source=DataSource.PLACEHOLDER,
+            confidence=ConfidenceLevel.NONE,
+            extraction_method="extraction_failed",
+            fallback_chain=extraction_methods
+        )
+        return None
+    def _extract_paper_link(self, info: Any) -> Union[str, List[str], None]:
+        # 1. Check card_data for explicit paper field
+        if hasattr(info, 'card_data') and info.card_data:
+            paper = getattr(info.card_data, 'paper', None)
+            if paper:
+                return paper
+        # 2. Check tags for arxiv: ID
+        papers = []
+        if hasattr(info, 'tags') and info.tags:
+            for tag in info.tags:
+                if isinstance(tag, str) and tag.startswith('arxiv:'):
+                    papers.append(f"https://arxiv.org/abs/{tag.split(':', 1)[1]}")
+        return papers if papers else None
+    def _try_api_extraction(self, field_name: str, context: Dict[str, Any]) -> Any:
+        """Try to extract field from HuggingFace API data"""
+        model_info = context.get('model_info')
+        if not model_info:
+            return None
+        # Field mapping for API extraction
+        api_mappings = {
+            'author': lambda info: getattr(info, 'author', None) or context['model_id'].split('/')[0],
+            'name': lambda info: getattr(info, 'modelId', context['model_id']).split('/')[-1],
+            'tags': lambda info: getattr(info, 'tags', []),
+            'pipeline_tag': lambda info: getattr(info, 'pipeline_tag', None),
+            'downloads': lambda info: getattr(info, 'downloads', 0),
+            'commit': lambda info: getattr(info, 'sha', '') if getattr(info, 'sha', None) else None,
+            'suppliedBy': lambda info: getattr(info, 'author', None) or context['model_id'].split('/')[0],
+            'primaryPurpose': lambda info: getattr(info, 'pipeline_tag', 'text-generation'),
+            'downloadLocation': lambda info: f"https://huggingface.co/{context['model_id']}/tree/main",
+            'license': lambda info: getattr(info.card_data, 'license', None) if hasattr(info, 'card_data') and info.card_data else None,
+            'licenses': lambda info: getattr(info.card_data, 'license', None) if hasattr(info, 'card_data') and info.card_data else None,
+            'datasets': lambda info: getattr(info.card_data, 'datasets', []) if hasattr(info, 'card_data') and info.card_data else [],
+            'paper': self._extract_paper_link
+        }
+        if field_name in api_mappings:
+            try:
+                val = api_mappings[field_name](model_info)
+                # If valid value found, return it (filtering out "other")
+                if val:
+                    # Special handling for lists (datasets, tags, paper) - don't lowercase/string convert immmediately
+                    if field_name in ["datasets", "tags", "external_references", "paper"]:
+                         return val
+                    str_val = str(val).lower()
+                    if isinstance(val, list) and len(val) > 0:
+                        str_val = str(val[0]).lower()
+                    # Enhanced filtering for "other" variants
+                    ignored_values = {"other", "['other']", "other license", "other-license", "unknown"}
+                    if str_val not in ignored_values:
+                        return val
+                return None
+            except Exception as e:
+                logger.debug(f"API extraction failed for {field_name}: {e}")
+                return None
+        return None
+    def _try_model_card_extraction(self, field_name: str, context: Dict[str, Any]) -> Any:
+        """Try to extract field from model card YAML frontmatter"""
+        model_card = context.get('model_card')
+        if not model_card or not hasattr(model_card, 'data') or not model_card.data:
+            return None
+        try:
+            card_data = model_card.data.to_dict() if hasattr(model_card.data, 'to_dict') else {}
+            # Field mapping for model card extraction
+            card_mappings = {
+                'license': 'license',
+                'language': 'language',
+                'library_name': 'library_name',
+                'base_model': 'base_model',
+                'datasets': 'datasets',
+                'description': ['model_summary', 'description'],
+                'typeOfModel': 'model_type',
+                'licenses': 'license'  # Alternative mapping
+            }
+            if field_name in card_mappings:
+                mapping = card_mappings[field_name]
+                if isinstance(mapping, list):
+                    # Try multiple keys
+                    for key in mapping:
+                        value = card_data.get(key)
+                        if value:
+                            return value
+                else:
+                    val = card_data.get(mapping)
+                    if val:
+                        str_val = str(val).lower()
+                        if isinstance(val, list) and len(val) > 0:
+                            str_val = str(val[0]).lower()
+                        ignored_values = {"other", "['other']", "other license", "other-license", "unknown"}
+                        return val if str_val not in ignored_values else None
+                    return None
+            # Direct field name lookup
+            val = card_data.get(field_name)
+            if val:
+                str_val = str(val).lower()
+                if isinstance(val, list) and len(val) > 0:
+                    str_val = str(val[0]).lower()
+                return val if str_val != "other" else None
+            return None
+        except Exception as e:
+            logger.debug(f"Model card extraction failed for {field_name}: {e}")
+            return None
+    def _try_config_extraction(self, field_name: str, context: Dict[str, Any]) -> Any:
+        """Try to extract field from configuration files"""
+        # Config file mappings
+        config_mappings = {
+            'model_type': ('config_data', 'model_type'),
+            'architectures': ('config_data', 'architectures'),
+            'vocab_size': ('config_data', 'vocab_size'),
+            'tokenizer_class': ('tokenizer_config', 'tokenizer_class'),
+            'typeOfModel': ('config_data', 'model_type')
+        }
+        if field_name in config_mappings:
+            config_type, config_key = config_mappings[field_name]
+            config_source = context.get(config_type)
+            if config_source:
+                return config_source.get(config_key)
+        return None
+    def _try_text_pattern_extraction(self, field_name: str, context: Dict[str, Any]) -> Any:
+        """Try to extract field using text pattern matching"""
+        readme_content = context.get('readme_content')
+        if not readme_content:
+            return None
+        # Pattern mappings for different fields
+        pattern_mappings = {
+            'license': 'license',
+            'licenses': 'license', # Fix: Handle plural key
+            'datasets': 'datasets',
+            'energyConsumption': 'energy',
+            'technicalLimitations': 'limitations',
+            'safetyRiskAssessment': 'safety',
+            'model_type': 'model_type'
+        }
+        if field_name in pattern_mappings:
+            pattern_key = pattern_mappings[field_name]
+            if pattern_key in self.PATTERNS:
+                # Need to implement _find_pattern_matches which was missing in original snippet but used
+                matches = self._find_pattern_matches(readme_content, self.PATTERNS[pattern_key])
+                if matches:
+                    # Prefer longest match for critical fields where "the" or short noise might appear
+                    if field_name in ['license', 'licenses']:
+                         return max(matches, key=len)
+                    # Prefer string for critical fields
+                    if field_name in ['model_type']:
+                        return matches[0]
+                    return matches[0] if len(matches) == 1 else matches
+        return None
+    def _find_pattern_matches(self, content: str, patterns: List[re.Pattern]) -> List[str]:
+        """Find matches for a list of patterns in content"""
+        matches = []
+        for pattern in patterns:
+            match = pattern.search(content)
+            if match:
+                # Replace newlines/tabs with single space
+                val = re.sub(r'\s+', ' ', match.group(1)).strip()
+                # Filtering: 'the' is never a license, and generic "other" values
+                ignored_values = {
+                    "the", "other", "other license", "other-license", "unknown",
+                    "vision", "text", "audio", "image", "video", "data", "dataset", "datasets",
+                    "training", "eval", "evaluation"
+                }
+                if val.lower() in ignored_values:
+                    continue
+                matches.append(val)
+        return list(set(matches)) # Return unique matches
+    def _try_intelligent_inference(self, field_name: str, context: Dict[str, Any]) -> Any:
+        """Try to infer field value from other available data"""
+        model_id = context['model_id']
+        # Intelligent inference rules
+        inference_rules = {
+            'author': lambda: model_id.split('/')[0] if '/' in model_id else 'unknown',
+            'suppliedBy': lambda: model_id.split('/')[0] if '/' in model_id else 'unknown',
+            'name': lambda: model_id.split('/')[-1],
+            'primaryPurpose': lambda: 'text-generation',  # Default for most HF models
+            'typeOfModel': lambda: 'transformer',  # Default for most HF models
+            'downloadLocation': lambda: f"https://huggingface.co/{model_id}/tree/main",
+            'bomFormat': lambda: 'CycloneDX',
+            'specVersion': lambda: '1.6',
+            'serialNumber': lambda: f"urn:uuid:{model_id.replace('/', '-')}",
+            'version': lambda: '1.0.0'
+        }
+        if field_name in inference_rules:
+            try:
+                return inference_rules[field_name]()
+            except Exception as e:
+                logger.debug(f"Intelligent inference failed for {field_name}: {e}")
+                return None
+        return None
+    def _try_fallback_value(self, field_name: str, field_config: Dict[str, Any]) -> Any:
+        """Try to get fallback value from field configuration"""
+        # Check if field config has fallback value
+        if isinstance(field_config, dict):
+            fallback = field_config.get('fallback_value')
+            if fallback:
+                return fallback
+        # Standard fallback values for common fields
+        standard_fallbacks = {
+            'license': 'NOASSERTION',
+            'description': 'No description available',
+            'version': '1.0.0',
+            'bomFormat': 'CycloneDX',
+            'specVersion': '1.6'
+        }
+        return standard_fallbacks.get(field_name)
+    def _legacy_extraction(self, model_id: str, model_info: Dict[str, Any], model_card: Optional[ModelCard]) -> Dict[str, Any]:
+        """
+        Fallback to legacy extraction when registry is not available.
+        This maintains backward compatibility.
+        """
+        logger.info("🔄 Executing legacy extraction mode")
+        metadata = {}
+        # Execute legacy extraction layers
+        metadata.update(self._layer1_structured_api(model_id, model_info, model_card))
+        metadata.update(self._layer2_repository_files(model_id))
+        metadata.update(self._layer3_stp_extraction(model_card, model_id))
+        metadata.update(self._layer4_external_references(model_id, metadata))
+        metadata.update(self._layer5_intelligent_defaults(model_id, metadata))
+        return metadata
+    def _generate_external_references(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
+        """Generate external references for the model"""
+        external_refs = []
+        # Model repository
+        repo_url = f"https://huggingface.co/{model_id}"
+        external_refs.append({
+            "type": "website",
+            "url": repo_url,
+            "comment": "Model repository"
+        })
+        # Model files
+        files_url = f"https://huggingface.co/{model_id}/tree/main"
+        external_refs.append({
+            "type": "distribution",
+            "url": files_url,
+            "comment": "Model files"
+        })
+        # Commit URL if available
+        if 'commit' in metadata:
+            commit_url = f"https://huggingface.co/{model_id}/commit/{metadata['commit']}"
+            external_refs.append({
+                "type": "vcs",
+                "url": commit_url,
+                "comment": "Specific commit"
+            })
+        # Dataset references
+        if 'datasets' in metadata:
+            datasets = metadata['datasets']
+            if isinstance(datasets, list):
+                for dataset in datasets:
+                    if isinstance(dataset, str):
+                        dataset_url = f"https://huggingface.co/datasets/{dataset}"
+                        external_refs.append({
+                            "type": "distribution",
+                            "url": dataset_url,
+                            "comment": f"Training dataset: {dataset}"
+                        })
+        # In current structure, we don't store into self.extraction_results here as a side effect if we can avoid it.
+        # But for tracing, we might want to.
+        return {'external_references': external_refs}
+    # Legacy methods for backward compatibility
+    def _layer1_structured_api(self, model_id: str, model_info: Dict[str, Any], model_card: Optional[ModelCard]) -> Dict[str, Any]:
+        """Legacy Layer 1: Enhanced structured data extraction from HF API and model card."""
+        metadata = {}
+        # Enhanced model info extraction
+        if model_info:
+            try:
+                author = getattr(model_info, "author", None)
+                if not author or author.strip() == "":
+                    parts = model_id.split("/")
+                    author = parts[0] if len(parts) > 1 else "unknown"
+                metadata['author'] = author
+                metadata['name'] = getattr(model_info, "modelId", model_id).split("/")[-1]
+                metadata['tags'] = getattr(model_info, "tags", [])
+                metadata['pipeline_tag'] = getattr(model_info, "pipeline_tag", None)
+                metadata['downloads'] = getattr(model_info, "downloads", 0)
+                commit_sha = getattr(model_info, "sha", None)
+                if commit_sha:
+                    metadata['commit'] = commit_sha
+            except Exception:
+                pass
+        if model_card and hasattr(model_card, "data") and model_card.data:
+            try:
+                card_data = model_card.data.to_dict() if hasattr(model_card.data, "to_dict") else {}
+                metadata['license'] = card_data.get("license")
+                metadata['language'] = card_data.get("language")
+                metadata['library_name'] = card_data.get("library_name")
+                metadata['base_model'] = card_data.get("base_model")
+                metadata['datasets'] = card_data.get("datasets")
+                metadata['description'] = card_data.get("model_summary") or card_data.get("description")
+            except Exception:
+                pass
+        metadata["primaryPurpose"] = metadata.get("pipeline_tag", "text-generation")
+        metadata["suppliedBy"] = metadata.get("author", "unknown")
+        metadata["typeOfModel"] = "transformer"
+        return metadata
+    def _layer2_repository_files(self, model_id: str) -> Dict[str, Any]:
+        """Legacy Layer 2: Repository file analysis"""
+        metadata = {}
+        try:
+            config_data = self._download_and_parse_config(model_id, "config.json")
+            if config_data:
+                metadata['model_type'] = config_data.get("model_type")
+                metadata['architectures'] = config_data.get("architectures", [])
+                metadata['vocab_size'] = config_data.get("vocab_size")
+            tokenizer_config = self._download_and_parse_config(model_id, "tokenizer_config.json")
+            if tokenizer_config:
+                metadata['tokenizer_class'] = tokenizer_config.get("tokenizer_class")
+            if "license" not in metadata or not metadata["license"]:
+                detected_license = self._detect_license_from_file(model_id)
+                if detected_license:
+                    metadata["license"] = detected_license
+        except Exception:
+            pass
+        return metadata
+    def _layer3_stp_extraction(self, model_card: Optional[ModelCard], model_id: str) -> Dict[str, Any]:
+        """Legacy Layer 3: Smart Text Parsing"""
+        metadata = {}
+        try:
+            readme_content = self._get_readme_content(model_card, model_id)
+            if readme_content:
+                extracted_info = self._extract_from_text(readme_content)
+                metadata.update(extracted_info)
+                license_from_text = extracted_info.get("license_from_text")
+                if license_from_text and not metadata.get("license"):
+                    if isinstance(license_from_text, list):
+                        metadata["license"] = license_from_text[0]
+                    else:
+                        metadata["license"] = license_from_text
+        except Exception:
+            pass
+        return metadata
+    def _layer4_external_references(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
+        """Legacy Layer 4: External reference generation"""
+        return self._generate_external_references(model_id, metadata)
+    def _layer5_intelligent_defaults(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
+        """Legacy Layer 5: Intelligent default generation"""
+        if 'author' not in metadata or not metadata['author']:
+            parts = model_id.split("/")
+            metadata['author'] = parts[0] if len(parts) > 1 else "unknown"
+        if 'license' not in metadata or not metadata['license']:
+            metadata['license'] = "NOASSERTION"
+        return metadata
+    def _fetch_with_backoff(self, fetch_func, *args, max_retries=3, initial_backoff=1.0, **kwargs):
+        import time
+        for attempt in range(max_retries):
+            try:
+                return fetch_func(*args, **kwargs)
+            except Exception as e:
+                error_msg = str(e)
+                if "401" in error_msg or "404" in error_msg:  # Auth or not found don't retry
+                    raise e
+                if attempt == max_retries - 1:
+                    raise e
+                time.sleep(initial_backoff * (2 ** attempt))
+    def _download_and_parse_config(self, model_id: str, filename: str) -> Optional[Dict[str, Any]]:
+        """Download and parse a JSON config file from the model repository"""
+        import json
+        try:
+            file_path = self._fetch_with_backoff(hf_hub_download, repo_id=model_id, filename=filename)
+            with open(file_path, 'r') as f:
+                return json.load(f)
+        except (RepositoryNotFoundError, EntryNotFoundError, json.JSONDecodeError):
+            return None
+        except Exception:
+            return None
+    def _get_readme_content(self, model_card: Optional[ModelCard], model_id: str) -> Optional[str]:
+        """Get README content from model card or by downloading"""
+        try:
+            if model_card and hasattr(model_card, 'content'):
+                return model_card.content
+            readme_path = self._fetch_with_backoff(hf_hub_download, repo_id=model_id, filename="README.md")
+            with open(readme_path, 'r', encoding='utf-8') as f:
+                return f.read()
+        except Exception:
+            return None
+    def _extract_from_text(self, text: str) -> Dict[str, Any]:
+        """Extract structured information from unstructured text (Legacy Helper)"""
+        # Minimal implementation for legacy support, utilizing the patterns we already have
+        metadata = {}
+        for category, patterns in self.PATTERNS.items():
+            matches = self._find_pattern_matches(text, patterns)
+            if matches:
+                metadata[category] = matches[0] if len(matches) == 1 else matches
+        return metadata

src/models/field_registry.json ADDED Viewed

	@@ -0,0 +1,1714 @@

+{
+  "registry_metadata": {
+    "description": "Field registry for configurable AI SBOM generation and scoring"
+  },
+  "scoring_config": {
+    "tier_weights": {
+      "critical": 3,
+      "important": 2,
+      "supplementary": 1
+    },
+    "category_weights": {
+      "required_fields": 20,
+      "metadata": 20,
+      "component_basic": 20,
+      "component_model_card": 30,
+      "external_references": 10
+    },
+    "scoring_profiles": {
+      "basic": {
+        "description": "Minimal fields required for identification",
+        "required_categories": [
+          "required_fields",
+          "component_basic"
+        ],
+        "required_fields": [
+          "bomFormat",
+          "specVersion",
+          "serialNumber",
+          "version",
+          "name"
+        ],
+        "minimum_score": 40,
+        "weight_multiplier": 1.0
+      },
+      "standard": {
+        "description": "Comprehensive fields for proper documentation",
+        "required_categories": [
+          "required_fields",
+          "metadata",
+          "component_basic"
+        ],
+        "required_fields": [
+          "bomFormat",
+          "specVersion",
+          "serialNumber",
+          "version",
+          "name",
+          "downloadLocation",
+          "primaryPurpose",
+          "suppliedBy"
+        ],
+        "minimum_score": 70,
+        "weight_multiplier": 1.0
+      },
+      "advanced": {
+        "description": "Extensive documentation for maximum transparency",
+        "required_categories": [
+          "required_fields",
+          "metadata",
+          "component_basic",
+          "component_model_card",
+          "external_references"
+        ],
+        "required_fields": [
+          "bomFormat",
+          "specVersion",
+          "serialNumber",
+          "version",
+          "name",
+          "downloadLocation",
+          "primaryPurpose",
+          "suppliedBy",
+          "type",
+          "purl",
+          "description",
+          "licenses",
+          "hyperparameter",
+          "technicalLimitations",
+          "energyConsumption",
+          "safetyRiskAssessment",
+          "typeOfModel"
+        ],
+        "minimum_score": 85,
+        "weight_multiplier": 1.0
+      }
+    },
+    "algorithm_config": {
+      "type": "weighted_sum",
+      "max_score": 100,
+      "normalization": "category_based",
+      "penalty_for_missing_critical": 0.5,
+      "bonus_for_complete_categories": 0.1
+    }
+  },
+  "aibom_config": {
+    "structure_template": "cyclonedx_1.6",
+    "generator_info": {
+      "name": "owasp-aibom-generator",
+      "version": "1.0.0",
+      "manufacturer": "OWASP GenAI Security Project"
+    },
+    "generation_rules": {
+      "include_metadata_properties": true,
+      "include_model_card": true,
+      "include_external_references": true,
+      "include_dependencies": true
+    },
+    "validation_rules": {
+      "require_critical_fields": true,
+      "validate_jsonpath_expressions": true,
+      "enforce_cyclonedx_schema": true
+    }
+  },
+  "fields": {
+    "bomFormat": {
+      "tier": "critical",
+      "weight": 4.0,
+      "category": "required_fields",
+      "description": "Format identifier for the SBOM",
+      "jsonpath": "$.bomFormat",
+      "aibom_generation": {
+        "location": "$.bomFormat",
+        "rule": "always_include",
+        "source_fields": [
+          "bomFormat"
+        ],
+        "validation": "required",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 4.0,
+        "required_for_profiles": [
+          "basic",
+          "standard",
+          "advanced"
+        ],
+        "category_contribution": 0.2
+      },
+      "validation_message": {
+        "missing": "Missing critical field: bomFormat - essential for SBOM identification",
+        "recommendation": "Ensure bomFormat is set to 'CycloneDX'"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#bomFormat",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#bomFormat",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/core/"
+      }
+    },
+    "datasets": {
+      "tier": "important",
+      "weight": 3.0,
+      "category": "component_model_card",
+      "description": "Datasets used for training",
+      "jsonpath": "$.component.modelCard.modelParameters.datasets",
+      "aibom_generation": {
+        "location": "$.component.modelCard.modelParameters.datasets",
+        "rule": "include_if_available",
+        "source_fields": [
+          "datasets",
+          "dataset",
+          "data"
+        ],
+        "validation": "recommended",
+        "data_type": "array"
+      },
+      "scoring": {
+        "points": 3.0,
+        "required_for_profiles": [
+          "standard",
+          "advanced"
+        ],
+        "category_contribution": 0.1
+      },
+      "validation_message": {
+        "missing": "Missing field: datasets - training data information important for transparency",
+        "recommendation": "Add information about the datasets used to train the model"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_modelCard_modelParameters_datasets",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_modelCard_modelParameters_datasets",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/dataset/"
+      }
+    },
+    "paper": {
+      "tier": "supplementary",
+      "weight": 2.0,
+      "category": "external_references",
+      "description": "Research paper associated with the model",
+      "jsonpath": "$.metadata.component.externalReferences[?(@.type=='documentation')]",
+      "aibom_generation": {
+        "location": "none",
+        "rule": "include_if_present",
+        "source_fields": [
+          "paper"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "extraction": {
+        "methods": [
+          "api"
+        ],
+        "source_priority": [
+          "api"
+        ]
+      },
+      "scoring": {
+        "points": 2.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.2
+      },
+      "validation_message": {
+        "missing": "No research paper link found",
+        "recommendation": "Add ArXiv tag or paper link to model card"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_externalReferences",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_externalReferences",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "vcs": {
+      "tier": "supplementary",
+      "weight": 4.0,
+      "category": "external_references",
+      "description": "Version control system URL",
+      "jsonpath": "$.components[0].externalReferences[?(@.type=='vcs')].url",
+      "aibom_generation": {
+        "location": "none",
+        "rule": "include_if_present",
+        "source_fields": [
+          "vcs",
+          "repository"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "extraction": {
+        "methods": [
+          "api"
+        ],
+        "source_priority": [
+          "api"
+        ]
+      },
+      "scoring": {
+        "points": 4.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.4
+      },
+      "validation_message": {
+        "missing": "No VCS link found",
+        "recommendation": "Add repository link to model card"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_externalReferences",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_externalReferences",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "website": {
+      "tier": "supplementary",
+      "weight": 4.0,
+      "category": "external_references",
+      "description": "Model website or documentation URL",
+      "jsonpath": "$.components[0].externalReferences[?(@.type=='website')].url",
+      "aibom_generation": {
+        "location": "none",
+        "rule": "include_if_present",
+        "source_fields": [
+          "website",
+          "url"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "extraction": {
+        "methods": [
+          "api"
+        ],
+        "source_priority": [
+          "api"
+        ]
+      },
+      "scoring": {
+        "points": 4.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.4
+      },
+      "validation_message": {
+        "missing": "No website link found",
+        "recommendation": "Add website link to model card"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_externalReferences",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_externalReferences",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "specVersion": {
+      "tier": "critical",
+      "weight": 4.0,
+      "category": "required_fields",
+      "description": "CycloneDX specification version",
+      "jsonpath": "$.specVersion",
+      "aibom_generation": {
+        "location": "$.specVersion",
+        "rule": "always_include",
+        "source_fields": [
+          "specVersion"
+        ],
+        "validation": "required",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 4.0,
+        "required_for_profiles": [
+          "basic",
+          "standard",
+          "advanced"
+        ],
+        "category_contribution": 0.2
+      },
+      "validation_message": {
+        "missing": "Missing critical field: specVersion - required for CycloneDX compliance",
+        "recommendation": "Set specVersion to '1.6' for CycloneDX 1.6 compliance"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#specVersion",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#specVersion",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/core/"
+      }
+    },
+    "serialNumber": {
+      "tier": "critical",
+      "weight": 4.0,
+      "category": "required_fields",
+      "description": "Unique identifier for this SBOM instance",
+      "jsonpath": "$.serialNumber",
+      "aibom_generation": {
+        "location": "$.serialNumber",
+        "rule": "always_include",
+        "source_fields": [
+          "serialNumber"
+        ],
+        "validation": "required",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 4.0,
+        "required_for_profiles": [
+          "basic",
+          "standard",
+          "advanced"
+        ],
+        "category_contribution": 0.2
+      },
+      "validation_message": {
+        "missing": "Missing critical field: serialNumber - unique identifier required",
+        "recommendation": "Generate a UUID for the SBOM instance"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#serialNumber",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#serialNumber",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/core/"
+      }
+    },
+    "version": {
+      "tier": "critical",
+      "weight": 4.0,
+      "category": "required_fields",
+      "description": "Version of this SBOM document",
+      "jsonpath": "$.version",
+      "aibom_generation": {
+        "location": "$.version",
+        "rule": "always_include",
+        "source_fields": [
+          "version"
+        ],
+        "validation": "required",
+        "data_type": "integer"
+      },
+      "scoring": {
+        "points": 4.0,
+        "required_for_profiles": [
+          "basic",
+          "standard",
+          "advanced"
+        ],
+        "category_contribution": 0.2
+      },
+      "validation_message": {
+        "missing": "Missing critical field: version - document version required",
+        "recommendation": "Set version to 1 for initial SBOM generation"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#version",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#version",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/core/"
+      }
+    },
+    "primaryPurpose": {
+      "tier": "critical",
+      "weight": 4.0,
+      "category": "metadata",
+      "description": "Primary purpose or task of the AI model",
+      "jsonpath": "$.component.modelCard.modelParameters.task",
+      "aibom_generation": {
+        "location": "$.component.modelCard.modelParameters.task",
+        "rule": "include_if_available",
+        "source_fields": [
+          "primaryPurpose",
+          "pipeline_tag",
+          "ai:task"
+        ],
+        "validation": "recommended",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 4.0,
+        "required_for_profiles": [
+          "standard",
+          "advanced"
+        ],
+        "category_contribution": 0.2
+      },
+      "validation_message": {
+        "missing": "Missing critical field: primaryPurpose - essential for understanding model intent",
+        "recommendation": "Add the primary task or purpose of the AI model"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_modelCard_modelParameters_approach",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_modelCard_modelParameters_approach",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "suppliedBy": {
+      "tier": "critical",
+      "weight": 4.0,
+      "category": "metadata",
+      "description": "Organization or individual that supplied the model",
+      "jsonpath": "$.component.supplier.name",
+      "aibom_generation": {
+        "location": "$.component.supplier",
+        "rule": "include_if_available",
+        "source_fields": [
+          "suppliedBy",
+          "author",
+          "publisher"
+        ],
+        "validation": "recommended",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 4.0,
+        "required_for_profiles": [
+          "standard",
+          "advanced"
+        ],
+        "category_contribution": 0.2
+      },
+      "validation_message": {
+        "missing": "Missing critical field: suppliedBy - supplier identification required",
+        "recommendation": "Add the organization or individual who provided the model"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_supplier",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_supplier",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/software/"
+      }
+    },
+    "standardCompliance": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "metadata",
+      "description": "Standards or regulations the model complies with",
+      "jsonpath": "$.metadata.properties[?(@.name=='standardCompliance')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "standardCompliance",
+          "compliance"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.05
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: standardCompliance - compliance information helpful",
+        "recommendation": "Add any relevant standards or regulations the model complies with"
+      },
+      "reference_urls": {
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/model/AI/Classes/AIPackage/#AI-standardCompliance",
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "external_references": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_basic",
+      "description": "Additional external references",
+      "jsonpath": "$.component.externalReferences",
+      "aibom_generation": {
+        "location": "$.component.externalReferences",
+        "rule": "include_if_available",
+        "source_fields": [
+          "external_references",
+          "references",
+          "citations"
+        ],
+        "validation": "optional",
+        "data_type": "array"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.05
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: external_references - additional references helpful",
+        "recommendation": "Add links to papers, documentation, or other resources"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_externalReferences",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_externalReferences",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "domain": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "metadata",
+      "description": "Domain or field of application",
+      "jsonpath": "$.metadata.properties[?(@.name=='domain')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "domain",
+          "field",
+          "application_area"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.05
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: domain - application domain helpful for context",
+        "recommendation": "Add the domain or field where this model is typically applied"
+      },
+      "reference_urls": {
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/model/AI/Classes/AIPackage/#AI-domain",
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "autonomyType": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "metadata",
+      "description": "Level of autonomy or human involvement required",
+      "jsonpath": "$.metadata.properties[?(@.name=='autonomyType')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "autonomyType",
+          "autonomy_level"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.05
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: autonomyType - autonomy level information helpful",
+        "recommendation": "Add information about the level of human oversight required"
+      },
+      "reference_urls": {
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/model/AI/Classes/AIPackage/#AI-autonomyType",
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "name": {
+      "tier": "critical",
+      "weight": 4.0,
+      "category": "component_basic",
+      "description": "Name of the AI model component",
+      "jsonpath": "$.components[0].name",
+      "aibom_generation": {
+        "location": "$.components[0].name",
+        "rule": "always_include",
+        "source_fields": [
+          "name",
+          "model_name"
+        ],
+        "validation": "required",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 4.0,
+        "required_for_profiles": [
+          "basic",
+          "standard",
+          "advanced"
+        ],
+        "category_contribution": 0.2
+      },
+      "validation_message": {
+        "missing": "Missing critical field: name - essential for model identification",
+        "recommendation": "Add a descriptive name for the model"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_name",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_name",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/software/"
+      }
+    },
+    "type": {
+      "tier": "critical",
+      "weight": 4.0,
+      "category": "component_basic",
+      "description": "Type of component (machine-learning-model)",
+      "jsonpath": "$.components[0].type",
+      "aibom_generation": {
+        "location": "$.components[0].type",
+        "rule": "always_include",
+        "source_fields": [
+          "type"
+        ],
+        "validation": "required",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 4.0,
+        "required_for_profiles": [
+          "basic",
+          "standard",
+          "advanced"
+        ],
+        "category_contribution": 0.2
+      },
+      "validation_message": {
+        "missing": "Missing field: type - component type classification needed",
+        "recommendation": "Set type to 'machine-learning-model' for AI models"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_type",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_type",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/software/"
+      }
+    },
+    "component_version": {
+      "tier": "critical",
+      "weight": 4.0,
+      "category": "component_basic",
+      "description": "Version of the component",
+      "jsonpath": "$.components[0].version",
+      "aibom_generation": {
+        "location": "$.components[0].version",
+        "rule": "always_include",
+        "source_fields": [
+          "version"
+        ],
+        "validation": "required",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 4.0,
+        "required_for_profiles": [
+          "basic",
+          "standard",
+          "advanced"
+        ],
+        "category_contribution": 0.2
+      },
+      "validation_message": {
+        "missing": "Missing field: version - component version needed",
+        "recommendation": "Set an appropriate version for the component"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_version",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_version",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/software/"
+      }
+    },
+    "purl": {
+      "tier": "important",
+      "weight": 3.0,
+      "category": "component_basic",
+      "description": "Package URL identifier",
+      "jsonpath": "$.components[0].purl",
+      "aibom_generation": {
+        "location": "$.components[0].purl",
+        "rule": "include_if_available",
+        "source_fields": [
+          "purl",
+          "package_url"
+        ],
+        "validation": "recommended",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 3.0,
+        "required_for_profiles": [
+          "standard",
+          "advanced"
+        ],
+        "category_contribution": 0.15
+      },
+      "validation_message": {
+        "missing": "Missing field: purl - package URL for identification",
+        "recommendation": "Add a Package URL (PURL) for the model"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_purl",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_purl",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/software/Package/"
+      }
+    },
+    "description": {
+      "tier": "important",
+      "weight": 3.0,
+      "category": "component_basic",
+      "description": "Description of the AI model",
+      "jsonpath": "$.components[0].description",
+      "aibom_generation": {
+        "location": "$.components[0].description",
+        "rule": "include_if_available",
+        "source_fields": [
+          "description",
+          "summary"
+        ],
+        "validation": "recommended",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 3.0,
+        "required_for_profiles": [
+          "standard",
+          "advanced"
+        ],
+        "category_contribution": 0.15
+      },
+      "validation_message": {
+        "missing": "Missing field: description - model description helpful for understanding",
+        "recommendation": "Add a clear description of what the model does"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_description",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_description",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/software/"
+      }
+    },
+    "licenses": {
+      "tier": "important",
+      "weight": 3.0,
+      "category": "component_basic",
+      "description": "License information for the model",
+      "jsonpath": "$.components[0].licenses",
+      "aibom_generation": {
+        "location": "$.components[0].licenses",
+        "rule": "include_if_available",
+        "source_fields": [
+          "licenses",
+          "license"
+        ],
+        "validation": "recommended",
+        "data_type": "array"
+      },
+      "scoring": {
+        "points": 3.0,
+        "required_for_profiles": [
+          "standard",
+          "advanced"
+        ],
+        "category_contribution": 0.15
+      },
+      "validation_message": {
+        "missing": "Missing field: licenses - license information important for compliance",
+        "recommendation": "Add license information for the model"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_licenses",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_licenses",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/simple_licensing/"
+      }
+    },
+    "ethicalConsiderations": {
+      "tier": "important",
+      "weight": 2.0,
+      "category": "component_model_card",
+      "description": "Ethical considerations and fairness assessments",
+      "jsonpath": "$.component.modelCard.considerations.ethicalConsiderations[0].description",
+      "aibom_generation": {
+        "location": "$.component.modelCard.considerations.ethicalConsiderations",
+        "rule": "include_if_available",
+        "source_fields": [
+          "ethicalConsiderations",
+          "ethics",
+          "fairness"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 2.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.067
+      },
+      "validation_message": {
+        "missing": "Missing field: ethicalConsiderations - ethical information is critical",
+        "recommendation": "Add ethical considerations or fairness assessments"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_modelCard_considerations_ethicalConsiderations",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_modelCard_considerations_ethicalConsiderations",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "energyConsumption": {
+      "tier": "important",
+      "weight": 2.0,
+      "category": "component_model_card",
+      "description": "Energy consumption information",
+      "jsonpath": "$.metadata.properties[?(@.name=='energyConsumption')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "energyConsumption",
+          "energy_usage"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 2.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.067
+      },
+      "validation_message": {
+        "missing": "Missing field: energyConsumption - energy usage information helpful for sustainability",
+        "recommendation": "Add information about the model's energy consumption"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_modelCard_considerations_environmentalConsiderations_energyConsumptions",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_modelCard_considerations_environmentalConsiderations_energyConsumptions",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "hyperparameter": {
+      "tier": "important",
+      "weight": 2.0,
+      "category": "component_model_card",
+      "description": "Key hyperparameters of the model architecture",
+      "jsonpath": "$.components[0].modelCard.properties[?(@.name=='genai:aibom:modelcard:hyperparameter')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "hyperparameter",
+          "hyperparameters",
+          "training_params"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 2.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.067
+      },
+      "validation_message": {
+        "missing": "Missing field: hyperparameter - training configuration helpful for reproducibility",
+        "recommendation": "Add key hyperparameters used during model training"
+      },
+      "reference_urls": {
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/model/AI/Classes/AIPackage/#AI-hyperparameter",
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "technicalLimitations": {
+      "tier": "important",
+      "weight": 2.0,
+      "category": "component_model_card",
+      "description": "Known limitations of the model",
+      "jsonpath": "$.component.modelCard.considerations.technicalLimitations[0]",
+      "aibom_generation": {
+        "location": "$.component.modelCard.considerations.technicalLimitations",
+        "rule": "include_if_available",
+        "source_fields": [
+          "technicalLimitations",
+          "limitations",
+          "known_issues"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 2.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.067
+      },
+      "validation_message": {
+        "missing": "Missing field: technicalLimitations - limitations information helpful for safety",
+        "recommendation": "Add known technical limitations of the model"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_modelCard_considerations_technicalLimitations",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_modelCard_considerations_technicalLimitations",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "safetyRiskAssessment": {
+      "tier": "important",
+      "weight": 2.0,
+      "category": "component_model_card",
+      "description": "Safety and risk assessment information",
+      "jsonpath": "$.metadata.properties[?(@.name=='safetyRiskAssessment')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "safetyRiskAssessment",
+          "safety_assessment",
+          "risk_analysis"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 2.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.067
+      },
+      "validation_message": {
+        "missing": "Missing field: safetyRiskAssessment - safety assessment important for responsible deployment",
+        "recommendation": "Add safety and risk assessment information"
+      },
+      "reference_urls": {
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/model/AI/Classes/AIPackage/#AI-safetyRiskAssessment",
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "intendedUse": {
+      "tier": "important",
+      "weight": 2.0,
+      "category": "component_model_card",
+      "description": "Intended use cases for the model",
+      "jsonpath": "$.component.modelCard.considerations.useCases[0]",
+      "aibom_generation": {
+        "location": "$.component.modelCard.considerations.useCases",
+        "rule": "include_if_available",
+        "source_fields": [
+          "intendedUse",
+          "use_cases",
+          "applications"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 2.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.067
+      },
+      "validation_message": {
+        "missing": "Missing field: intendedUse - intended use information helpful for context",
+        "recommendation": "Add intended use cases for the model"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_modelCard_considerations_useCases",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_modelCard_considerations_useCases",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "typeOfModel": {
+      "tier": "important",
+      "weight": 2.0,
+      "category": "component_model_card",
+      "description": "Type or architecture of the model",
+      "jsonpath": "$.components[0].modelCard.modelParameters.modelArchitecture",
+      "aibom_generation": {
+        "location": "$.components[0].modelCard.modelParameters.modelArchitecture",
+        "rule": "include_if_available",
+        "source_fields": [
+          "typeOfModel",
+          "model_type",
+          "architecture"
+        ],
+        "validation": "recommended",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 2.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.067
+      },
+      "validation_message": {
+        "missing": "Missing field: typeOfModel - model architecture information helpful",
+        "recommendation": "Add the type or architecture of the model (e.g., Transformer, CNN)"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_modelCard_modelParameters_approach",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_modelCard_modelParameters_approach",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "modelExplainability": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Information about model explainability",
+      "jsonpath": "$.components[0].modelCard.properties[?(@.name=='genai:aibom:modelcard:modelCardExplainability')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "modelExplainability",
+          "explainability",
+          "interpretability"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: modelExplainability - explainability information helpful for transparency",
+        "recommendation": "Add information about model explainability or interpretability features"
+      },
+      "reference_urls": {
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/model/AI/Classes/AIPackage/#AI-modelExplainability",
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "energyQuantity": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Quantitative energy consumption data",
+      "jsonpath": "$.metadata.properties[?(@.name=='energyQuantity')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "energyQuantity",
+          "energy_amount"
+        ],
+        "validation": "optional",
+        "data_type": "number"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: energyQuantity - quantitative energy data helpful for sustainability metrics",
+        "recommendation": "Add specific energy consumption quantities"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_modelCard_considerations_environmentalConsiderations_energyConsumptions_items_activityEnergyCost_value",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_modelCard_considerations_environmentalConsiderations_energyConsumptions_items_activityEnergyCost_value",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "energyUnit": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Unit of measurement for energy consumption",
+      "jsonpath": "$.metadata.properties[?(@.name=='energyUnit')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "energyUnit",
+          "energy_unit"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: energyUnit - energy measurement unit helpful for standardization",
+        "recommendation": "Add the unit of measurement for energy consumption (e.g., kWh, Joules)"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_modelCard_considerations_environmentalConsiderations_energyConsumptions_items_activityEnergyCost_unit",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_modelCard_considerations_environmentalConsiderations_energyConsumptions_items_activityEnergyCost_unit",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "informationAboutTraining": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Information about the training process",
+      "jsonpath": "$.metadata.properties[?(@.name=='informationAboutTraining')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "informationAboutTraining",
+          "training_info",
+          "training_details"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: informationAboutTraining - training details helpful for understanding model development",
+        "recommendation": "Add information about the training process and methodology"
+      },
+      "reference_urls": {
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/model/AI/Classes/AIPackage/#AI-informationAboutTraining",
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "informationAboutApplication": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Information about intended applications",
+      "jsonpath": "$.metadata.properties[?(@.name=='informationAboutApplication')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "informationAboutApplication",
+          "application_info",
+          "intended_use"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: informationAboutApplication - application guidance helpful for proper usage",
+        "recommendation": "Add information about intended applications and use cases"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_modelCard_considerations_useCases",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_modelCard_considerations_useCases",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "metric": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Performance metrics and evaluation results",
+      "jsonpath": "$.metadata.properties[?(@.name=='metric')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "metric",
+          "metrics",
+          "performance"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: metric - performance metrics helpful for evaluation",
+        "recommendation": "Add performance metrics and evaluation results"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_modelCard_quantitativeAnalysis_performanceMetrics",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_modelCard_quantitativeAnalysis_performanceMetrics",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "metricDecisionThreshold": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Decision thresholds for metrics",
+      "jsonpath": "$.metadata.properties[?(@.name=='metricDecisionThreshold')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "metricDecisionThreshold",
+          "decision_threshold",
+          "threshold"
+        ],
+        "validation": "optional",
+        "data_type": "number"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: metricDecisionThreshold - decision thresholds helpful for operational guidance",
+        "recommendation": "Add decision thresholds for performance metrics"
+      },
+      "reference_urls": {
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/model/AI/Classes/AIPackage/#AI-metricDecisionThreshold",
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "modelDataPreprocessing": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Data preprocessing information",
+      "jsonpath": "$.metadata.properties[?(@.name=='modelDataPreprocessing')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "modelDataPreprocessing",
+          "data_preprocessing",
+          "preprocessing"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: modelDataPreprocessing - preprocessing details helpful for reproducibility",
+        "recommendation": "Add information about data preprocessing steps"
+      },
+      "reference_urls": {
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/model/AI/Classes/AIPackage/#AI-modelDataPreprocessing",
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "useSensitivePersonalInformation": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Information about use of sensitive personal data",
+      "jsonpath": "$.metadata.properties[?(@.name=='useSensitivePersonalInformation')].value",
+      "aibom_generation": {
+        "location": "$.metadata.properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "useSensitivePersonalInformation",
+          "sensitive_data",
+          "personal_data"
+        ],
+        "validation": "optional",
+        "data_type": "boolean"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: useSensitivePersonalInformation - privacy information important for compliance",
+        "recommendation": "Add information about use of sensitive or personal data"
+      },
+      "reference_urls": {
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/model/AI/Classes/AIPackage/#AI-useSensitivePersonalInformation",
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "downloadLocation": {
+      "tier": "important",
+      "weight": 3.0,
+      "category": "external_references",
+      "description": "URL to download the model",
+      "jsonpath": "$.components[0].externalReferences[?(@.type=='distribution' || @.type=='website')].url",
+      "aibom_generation": {
+        "location": "$.component.externalReferences",
+        "rule": "include_if_available",
+        "source_fields": [
+          "downloadLocation",
+          "download_url",
+          "model_url"
+        ],
+        "validation": "recommended",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 3.0,
+        "required_for_profiles": [
+          "standard",
+          "advanced"
+        ],
+        "category_contribution": 0.15
+      },
+      "validation_message": {
+        "missing": "Missing field: downloadLocation - model download URL required",
+        "recommendation": "Add a URL where the model can be downloaded"
+      },
+      "reference_urls": {
+        "cyclonedx_1.6": "https://cyclonedx.org/docs/1.6/json/#components_items_externalReferences",
+        "cyclonedx_1.7": "https://cyclonedx.org/docs/1.7/json/#components_items_externalReferences",
+        "spdx_3.1": "https://spdx.github.io/spdx-spec/v3.1-RC1/ai/"
+      }
+    },
+    "vocab_size": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Expected size of the model's vocabulary",
+      "jsonpath": "$.components[0].modelCard.properties[?(@.name=='genai:aibom:modelcard:vocabSize')].value",
+      "aibom_generation": {
+        "location": "$.components[0].properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "vocab_size"
+        ],
+        "validation": "optional",
+        "data_type": "integer"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: vocab_size - GGUF model properties helpful for reproducibility",
+        "recommendation": "Add Vocabulary Size"
+      },
+      "reference_urls": {
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "tokenizer_class": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "The specific tokenizer class or method used",
+      "jsonpath": "$.components[0].modelCard.properties[?(@.name=='genai:aibom:modelcard:tokenizerClass')].value",
+      "aibom_generation": {
+        "location": "$.components[0].properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "tokenizer_class"
+        ],
+        "validation": "optional",
+        "data_type": "string"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: tokenizer_class - GGUF model properties helpful for reproducibility",
+        "recommendation": "Add Tokenizer Class"
+      },
+      "reference_urls": {
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "context_length": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Maximum context length or sequence length supported",
+      "jsonpath": "$.components[0].modelCard.properties[?(@.name=='genai:aibom:modelcard:contextLength')].value",
+      "aibom_generation": {
+        "location": "$.components[0].properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "context_length"
+        ],
+        "validation": "optional",
+        "data_type": "integer"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: context_length - GGUF model properties helpful for reproducibility",
+        "recommendation": "Add Context Length"
+      },
+      "reference_urls": {
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "embedding_length": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Vector length of the token embeddings",
+      "jsonpath": "$.components[0].modelCard.properties[?(@.name=='genai:aibom:modelcard:embeddingLength')].value",
+      "aibom_generation": {
+        "location": "$.components[0].properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "embedding_length"
+        ],
+        "validation": "optional",
+        "data_type": "integer"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: embedding_length - GGUF model properties helpful for reproducibility",
+        "recommendation": "Add Embedding Length"
+      },
+      "reference_urls": {
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "block_count": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Number of transformer blocks or layers",
+      "jsonpath": "$.components[0].modelCard.properties[?(@.name=='genai:aibom:modelcard:blockCount')].value",
+      "aibom_generation": {
+        "location": "$.components[0].properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "block_count"
+        ],
+        "validation": "optional",
+        "data_type": "integer"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: block_count - GGUF model properties helpful for reproducibility",
+        "recommendation": "Add Block Count"
+      },
+      "reference_urls": {
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "attention_head_count": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Number of attention heads in the model",
+      "jsonpath": "$.components[0].modelCard.properties[?(@.name=='genai:aibom:modelcard:attentionHeadCount')].value",
+      "aibom_generation": {
+        "location": "$.components[0].properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "attention_head_count"
+        ],
+        "validation": "optional",
+        "data_type": "integer"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: attention_head_count - GGUF model properties helpful for reproducibility",
+        "recommendation": "Add Attention Head Count"
+      },
+      "reference_urls": {
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "attention_head_count_kv": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Number of Key-Value attention heads",
+      "jsonpath": "$.components[0].modelCard.properties[?(@.name=='genai:aibom:modelcard:attentionHeadCountKV')].value",
+      "aibom_generation": {
+        "location": "$.components[0].properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "attention_head_count_kv"
+        ],
+        "validation": "optional",
+        "data_type": "integer"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: attention_head_count_kv - GGUF model properties helpful for reproducibility",
+        "recommendation": "Add Attention Head Count KV"
+      },
+      "reference_urls": {
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "feed_forward_length": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Dimensionality of the feed-forward network",
+      "jsonpath": "$.components[0].modelCard.properties[?(@.name=='genai:aibom:modelcard:feedForwardLength')].value",
+      "aibom_generation": {
+        "location": "$.components[0].properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "feed_forward_length"
+        ],
+        "validation": "optional",
+        "data_type": "integer"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: feed_forward_length - GGUF model properties helpful for reproducibility",
+        "recommendation": "Add Feed Forward Length"
+      },
+      "reference_urls": {
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "rope_dimension_count": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Number of dimensions for Rotary Position Embedding (RoPE)",
+      "jsonpath": "$.components[0].modelCard.properties[?(@.name=='genai:aibom:modelcard:ropeDimensionCount')].value",
+      "aibom_generation": {
+        "location": "$.components[0].properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "rope_dimension_count"
+        ],
+        "validation": "optional",
+        "data_type": "integer"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: rope_dimension_count - GGUF model properties helpful for reproducibility",
+        "recommendation": "Add RoPE Dimension Count"
+      },
+      "reference_urls": {
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "quantization_version": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Version or specification identifier of the quantization format",
+      "jsonpath": "$.components[0].modelCard.properties[?(@.name=='genai:aibom:modelcard:quantizationVersion')].value",
+      "aibom_generation": {
+        "location": "$.components[0].properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "quantization_version"
+        ],
+        "validation": "optional",
+        "data_type": "integer"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: quantization_version - GGUF model properties helpful for reproducibility",
+        "recommendation": "Add Quantization Version"
+      },
+      "reference_urls": {
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    },
+    "quantization_file_type": {
+      "tier": "supplementary",
+      "weight": 1.0,
+      "category": "component_model_card",
+      "description": "Enum or integer identifier for the quantization bit-precision (e.g. Q4_K_M)",
+      "jsonpath": "$.components[0].modelCard.properties[?(@.name=='genai:aibom:modelcard:quantizationFileType')].value",
+      "aibom_generation": {
+        "location": "$.components[0].properties",
+        "rule": "include_if_available",
+        "source_fields": [
+          "quantization_file_type"
+        ],
+        "validation": "optional",
+        "data_type": "integer"
+      },
+      "scoring": {
+        "points": 1.0,
+        "required_for_profiles": [
+          "advanced"
+        ],
+        "category_contribution": 0.033
+      },
+      "validation_message": {
+        "missing": "Missing supplementary field: quantization_file_type - GGUF model properties helpful for reproducibility",
+        "recommendation": "Add Quantization File Type"
+      },
+      "reference_urls": {
+        "genai_aibom_taxonomy": "https://github.com/GenAI-Security-Project/cyclonedx-property-taxonomy"
+      }
+    }
+  }
+}

src/models/gguf_metadata.py ADDED Viewed

	@@ -0,0 +1,528 @@

+"""
+GGUF Metadata Extraction for AIBOM Generator
+This module extracts metadata from GGUF files without downloading the full file.
+It uses HTTP range requests to fetch only the header portion (typically 2-8MB)
+of potentially multi-GB model files.
+"""
+import struct
+import logging
+from typing import Dict, Any, Optional, List, OrderedDict
+from collections import OrderedDict as OrderedDictType
+from urllib.parse import quote
+logger = logging.getLogger(__name__)
+GGUF_MAGIC = 0x46554747
+_STRUCT_UINT8 = struct.Struct("<B")
+_STRUCT_INT8 = struct.Struct("<b")
+_STRUCT_UINT16 = struct.Struct("<H")
+_STRUCT_INT16 = struct.Struct("<h")
+_STRUCT_UINT32 = struct.Struct("<I")
+_STRUCT_INT32 = struct.Struct("<i")
+_STRUCT_UINT64 = struct.Struct("<Q")
+_STRUCT_INT64 = struct.Struct("<q")
+_STRUCT_FLOAT32 = struct.Struct("<f")
+_STRUCT_FLOAT64 = struct.Struct("<d")
+class GGUFParseError(Exception):
+    """Base exception for GGUF parsing errors."""
+    pass
+class BufferUnderrunError(GGUFParseError):
+    """Raised when buffer doesn't contain enough data to parse."""
+    def __init__(self, message: str = "buffer underrun", *, required_bytes: Optional[int] = None):
+        super().__init__(message)
+        self.required_bytes = required_bytes
+class InvalidMagicError(GGUFParseError):
+    """Raised when file doesn't have valid GGUF magic number."""
+    pass
+class GGUFValueType:
+    UINT8 = 0
+    INT8 = 1
+    UINT16 = 2
+    INT16 = 3
+    UINT32 = 4
+    INT32 = 5
+    FLOAT32 = 6
+    BOOL = 7
+    STRING = 8
+    ARRAY = 9
+    UINT64 = 10
+    INT64 = 11
+    FLOAT64 = 12
+class GGUFMetadata:
+    """Parsed GGUF file metadata."""
+    def __init__(
+        self,
+        version: int,
+        tensor_count: int,
+        kv_count: int,
+        metadata: Dict[str, Any],
+        header_length: int,
+        filename: str = "",
+    ):
+        self.version = version
+        self.tensor_count = tensor_count
+        self.kv_count = kv_count
+        self.metadata = metadata
+        self.header_length = header_length
+        self.filename = filename
+class GGUFModelInfo:
+    """Model information extracted from GGUF metadata for AIBOM."""
+    def __init__(
+        self,
+        filename: str,
+        architecture: Optional[str] = None,
+        name: Optional[str] = None,
+        quantization_version: Optional[int] = None,
+        file_type: Optional[int] = None,
+        tokenizer_model: Optional[str] = None,
+        vocab_size: Optional[int] = None,
+        context_length: Optional[int] = None,
+        embedding_length: Optional[int] = None,
+        block_count: Optional[int] = None,
+        attention_head_count: Optional[int] = None,
+        attention_head_count_kv: Optional[int] = None,
+        feed_forward_length: Optional[int] = None,
+        rope_dimension_count: Optional[int] = None,
+        description: Optional[str] = None,
+        license: Optional[str] = None,
+        author: Optional[str] = None,
+        raw_metadata: Optional[Dict[str, Any]] = None,
+    ):
+        self.filename = filename
+        self.architecture = architecture
+        self.name = name
+        self.quantization_version = quantization_version
+        self.file_type = file_type
+        self.tokenizer_model = tokenizer_model
+        self.vocab_size = vocab_size
+        self.context_length = context_length
+        self.embedding_length = embedding_length
+        self.block_count = block_count
+        self.attention_head_count = attention_head_count
+        self.attention_head_count_kv = attention_head_count_kv
+        self.feed_forward_length = feed_forward_length
+        self.rope_dimension_count = rope_dimension_count
+        self.description = description
+        self.license = license
+        self.author = author
+        self.raw_metadata = raw_metadata or {}
+class _ByteReader:
+    """Helper for reading structured binary data from a buffer."""
+    __slots__ = ("_view", "_offset")
+    def __init__(self, buffer: bytes) -> None:
+        self._view = memoryview(buffer)
+        self._offset = 0
+    @property
+    def offset(self) -> int:
+        return self._offset
+    def _require(self, size: int) -> None:
+        if self._offset + size > len(self._view):
+            raise BufferUnderrunError(
+                f"need {size} bytes at offset {self._offset}, but only {len(self._view) - self._offset} available",
+                required_bytes=self._offset + size
+            )
+    def read(self, size: int) -> memoryview:
+        self._require(size)
+        start = self._offset
+        self._offset += size
+        return self._view[start:self._offset]
+    def read_uint8(self) -> int:
+        return _STRUCT_UINT8.unpack_from(self.read(_STRUCT_UINT8.size))[0]
+    def read_int8(self) -> int:
+        return _STRUCT_INT8.unpack_from(self.read(_STRUCT_INT8.size))[0]
+    def read_uint16(self) -> int:
+        return _STRUCT_UINT16.unpack_from(self.read(_STRUCT_UINT16.size))[0]
+    def read_int16(self) -> int:
+        return _STRUCT_INT16.unpack_from(self.read(_STRUCT_INT16.size))[0]
+    def read_uint32(self) -> int:
+        return _STRUCT_UINT32.unpack_from(self.read(_STRUCT_UINT32.size))[0]
+    def read_int32(self) -> int:
+        return _STRUCT_INT32.unpack_from(self.read(_STRUCT_INT32.size))[0]
+    def read_uint64(self) -> int:
+        return _STRUCT_UINT64.unpack_from(self.read(_STRUCT_UINT64.size))[0]
+    def read_int64(self) -> int:
+        return _STRUCT_INT64.unpack_from(self.read(_STRUCT_INT64.size))[0]
+    def read_float32(self) -> float:
+        return _STRUCT_FLOAT32.unpack_from(self.read(_STRUCT_FLOAT32.size))[0]
+    def read_float64(self) -> float:
+        return _STRUCT_FLOAT64.unpack_from(self.read(_STRUCT_FLOAT64.size))[0]
+    def read_bool(self) -> bool:
+        return self.read_uint8() != 0
+    def read_string(self) -> str:
+        length = self.read_uint64()
+        if length > 10_000_000:
+            raise GGUFParseError(f"string length {length} exceeds sanity limit")
+        raw = self.read(length)
+        return raw.tobytes().decode("utf-8")
+def _read_value(reader: _ByteReader, value_type: int) -> Any:
+    """Parse a GGUF metadata value based on its type."""
+    if value_type == GGUFValueType.UINT8:
+        return reader.read_uint8()
+    elif value_type == GGUFValueType.INT8:
+        return reader.read_int8()
+    elif value_type == GGUFValueType.UINT16:
+        return reader.read_uint16()
+    elif value_type == GGUFValueType.INT16:
+        return reader.read_int16()
+    elif value_type == GGUFValueType.UINT32:
+        return reader.read_uint32()
+    elif value_type == GGUFValueType.INT32:
+        return reader.read_int32()
+    elif value_type == GGUFValueType.UINT64:
+        return reader.read_uint64()
+    elif value_type == GGUFValueType.INT64:
+        return reader.read_int64()
+    elif value_type == GGUFValueType.FLOAT32:
+        return reader.read_float32()
+    elif value_type == GGUFValueType.FLOAT64:
+        return reader.read_float64()
+    elif value_type == GGUFValueType.BOOL:
+        return reader.read_bool()
+    elif value_type == GGUFValueType.STRING:
+        return reader.read_string()
+    elif value_type == GGUFValueType.ARRAY:
+        element_type = reader.read_uint32()
+        count = reader.read_uint64()
+        if count > 1_000_000:
+            raise GGUFParseError(f"array count {count} exceeds sanity limit")
+        return [_read_value(reader, element_type) for _ in range(count)]
+    else:
+        raise GGUFParseError(f"unknown GGUF value type: {value_type}")
+def parse_gguf_metadata(buffer: bytes, filename: str = "") -> GGUFMetadata:
+    """Parse GGUF metadata from a byte buffer."""
+    reader = _ByteReader(buffer)
+    magic = reader.read_uint32()
+    if magic != GGUF_MAGIC:
+        raise InvalidMagicError(f"invalid magic: 0x{magic:08x}, expected 0x{GGUF_MAGIC:08x}")
+    version = reader.read_uint32()
+    tensor_count = reader.read_uint64()
+    kv_count = reader.read_uint64()
+    if kv_count > 100_000:
+        raise GGUFParseError(f"kv_count {kv_count} exceeds sanity limit")
+    metadata: OrderedDictType[str, Any] = OrderedDictType()
+    for _ in range(kv_count):
+        key = reader.read_string()
+        value_type = reader.read_uint32()
+        value = _read_value(reader, value_type)
+        metadata[key] = value
+    return GGUFMetadata(
+        version=version,
+        tensor_count=tensor_count,
+        kv_count=kv_count,
+        metadata=metadata,
+        header_length=reader.offset,
+        filename=filename
+    )
+def extract_model_info(gguf_metadata: GGUFMetadata) -> GGUFModelInfo:
+    """Extract AIBOM-relevant model information from GGUF metadata."""
+    meta = gguf_metadata.metadata
+    arch = meta.get("general.architecture", "")
+    def get_arch_key(suffix: str) -> Optional[Any]:
+        if arch:
+            val = meta.get(f"{arch}.{suffix}")
+            if val is not None:
+                return val
+        return None
+    return GGUFModelInfo(
+        filename=gguf_metadata.filename,
+        architecture=arch or None,
+        name=meta.get("general.name"),
+        quantization_version=meta.get("general.quantization_version"),
+        file_type=meta.get("general.file_type"),
+        tokenizer_model=meta.get("tokenizer.ggml.model"),
+        vocab_size=len(meta.get("tokenizer.ggml.tokens", [])) or None,
+        context_length=get_arch_key("context_length"),
+        embedding_length=get_arch_key("embedding_length"),
+        block_count=get_arch_key("block_count"),
+        attention_head_count=get_arch_key("attention.head_count"),
+        attention_head_count_kv=get_arch_key("attention.head_count_kv"),
+        feed_forward_length=get_arch_key("feed_forward_length"),
+        rope_dimension_count=get_arch_key("rope.dimension_count"),
+        description=meta.get("general.description"),
+        license=meta.get("general.license"),
+        author=meta.get("general.author"),
+        raw_metadata=dict(meta)
+    )
+def build_huggingface_url(repo_id: str, filename: str, revision: str = "main") -> str:
+    """Build a HuggingFace download URL for a file."""
+    if not repo_id or "/" not in repo_id:
+        raise ValueError("repo_id must be in format 'owner/repo'")
+    owner, repo = repo_id.split("/", 1)
+    owner_quoted = quote(owner, safe="-_.~")
+    repo_quoted = quote(repo, safe="-_.~")
+    revision_quoted = quote(revision, safe="-_.~")
+    filename_quoted = "/".join(quote(part, safe="-_.~/") for part in filename.split("/"))
+    return f"https://huggingface.co/{owner_quoted}/{repo_quoted}/resolve/{revision_quoted}/{filename_quoted}"
+def fetch_gguf_metadata_from_url(
+    url: str,
+    filename: str = "",
+    *,
+    hf_token: Optional[str] = None,
+    initial_request_size: int = 8 * 1024 * 1024,
+    max_request_size: int = 64 * 1024 * 1024,
+    timeout: float = 60.0,
+) -> GGUFMetadata:
+    """Fetch and parse GGUF metadata from a URL using HTTP range requests."""
+    try:
+        import httpx
+    except ImportError:
+        raise ImportError("httpx is required for remote GGUF fetching. Install with: pip install httpx")
+    headers = {
+        "User-Agent": "OWASP-AIBOM-Generator/1.0",
+        "Accept": "application/octet-stream",
+    }
+    if hf_token:
+        headers["Authorization"] = f"Bearer {hf_token}"
+    with httpx.Client(timeout=timeout, follow_redirects=False) as client:
+        current_url = url
+        for _ in range(5):
+            response = client.head(current_url, headers=headers)
+            if response.status_code in (301, 302, 303, 307, 308):
+                current_url = response.headers.get("location", current_url)
+                logger.debug(f"Redirecting to: {current_url}")
+            else:
+                break
+        actual_url = current_url
+    buffer = bytearray()
+    request_size = initial_request_size
+    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
+        range_header = f"bytes=0-{request_size - 1}"
+        request_headers = {**headers, "Range": range_header}
+        logger.info(f"Fetching first {request_size // (1024*1024)}MB of GGUF metadata...")
+        response = client.get(actual_url, headers=request_headers)
+        response.raise_for_status()
+        buffer.extend(response.content)
+        max_retries = 5
+        for retry in range(max_retries):
+            try:
+                return parse_gguf_metadata(bytes(buffer), filename)
+            except BufferUnderrunError as exc:
+                if retry >= max_retries - 1:
+                    raise
+                if exc.required_bytes:
+                    needed = max(exc.required_bytes + 2 * 1024 * 1024, len(buffer) * 2)
+                else:
+                    needed = len(buffer) * 2
+                additional_size = min(needed - len(buffer), max_request_size - len(buffer))
+                if additional_size <= 0 or len(buffer) >= max_request_size:
+                    raise GGUFParseError(f"unable to parse metadata within {max_request_size} bytes")
+                logger.info(f"Need more data (retry {retry + 1}), fetching additional {additional_size // 1024}KB...")
+                range_header = f"bytes={len(buffer)}-{len(buffer) + additional_size - 1}"
+                request_headers = {**headers, "Range": range_header}
+                response = client.get(actual_url, headers=request_headers)
+                response.raise_for_status()
+                buffer.extend(response.content)
+                logger.info(f"Buffer now {len(buffer) // 1024}KB")
+def fetch_gguf_metadata_from_repo(
+    repo_id: str,
+    filename: str,
+    *,
+    revision: str = "main",
+    hf_token: Optional[str] = None,
+    **kwargs
+) -> GGUFModelInfo:
+    """Fetch and extract AIBOM-relevant metadata from a GGUF file in a HuggingFace repo."""
+    url = build_huggingface_url(repo_id, filename, revision)
+    logger.info(f"Fetching GGUF metadata from {repo_id}/{filename}")
+    gguf_metadata = fetch_gguf_metadata_from_url(
+        url,
+        filename=filename,
+        hf_token=hf_token,
+        **kwargs
+    )
+    return extract_model_info(gguf_metadata)
+def list_gguf_files(repo_id: str, hf_token: Optional[str] = None) -> List[str]:
+    """List GGUF files in a HuggingFace repository."""
+    from huggingface_hub import list_repo_files
+    files = list_repo_files(repo_id, token=hf_token)
+    return [f for f in files if f.endswith('.gguf')]
+def extract_all_gguf_metadata(
+    repo_id: str,
+    *,
+    hf_token: Optional[str] = None,
+    **kwargs
+) -> List[GGUFModelInfo]:
+    """Extract metadata from all GGUF files in a repository."""
+    gguf_files = list_gguf_files(repo_id, hf_token)
+    if not gguf_files:
+        logger.debug(f"No GGUF files found in {repo_id}")
+        return []
+    logger.info(f"Found {len(gguf_files)} GGUF files in {repo_id}")
+    results = []
+    for filename in gguf_files:
+        try:
+            info = fetch_gguf_metadata_from_repo(
+                repo_id,
+                filename,
+                hf_token=hf_token,
+                **kwargs
+            )
+            results.append(info)
+            logger.info(f"  {filename}: architecture={info.architecture}")
+        except Exception as e:
+            logger.warning(f"  {filename}: failed to extract metadata: {e}")
+    return results
+def _map_core_fields(gguf_info: GGUFModelInfo) -> Dict[str, Any]:
+    """Map basic model identity and tokenizer fields."""
+    metadata = {}
+    if gguf_info.architecture:
+        metadata["model_type"] = gguf_info.architecture
+        metadata["typeOfModel"] = gguf_info.architecture
+    if gguf_info.name:
+        metadata["name"] = gguf_info.name
+    if gguf_info.tokenizer_model:
+        metadata["tokenizer_class"] = gguf_info.tokenizer_model
+    if gguf_info.vocab_size:
+        metadata["vocab_size"] = gguf_info.vocab_size
+    if gguf_info.context_length:
+        metadata["context_length"] = gguf_info.context_length
+    metadata["gguf_filename"] = gguf_info.filename
+    return metadata
+def _map_supplementary_fields(gguf_info: GGUFModelInfo) -> Dict[str, Any]:
+    """Map optional descriptive fields from GGUF."""
+    metadata = {}
+    if gguf_info.description:
+        metadata["description"] = gguf_info.description
+    if gguf_info.author:
+        metadata["suppliedBy"] = gguf_info.author
+    if gguf_info.license:
+        metadata["gguf_license"] = gguf_info.license
+    return metadata
+def _map_quantization(gguf_info: GGUFModelInfo) -> Dict[str, Any]:
+    """Map quantization metadata."""
+    quantization = {}
+    if gguf_info.quantization_version:
+        quantization["version"] = gguf_info.quantization_version
+    if gguf_info.file_type:
+        quantization["file_type"] = gguf_info.file_type
+    return {"quantization": quantization} if quantization else {}
+def _map_hyperparameters(gguf_info: GGUFModelInfo) -> Dict[str, Any]:
+    """Map inference-shape hyperparameters."""
+    hyperparams = {}
+    if gguf_info.context_length:
+        hyperparams["context_length"] = gguf_info.context_length
+    if gguf_info.embedding_length:
+        hyperparams["embedding_length"] = gguf_info.embedding_length
+    if gguf_info.block_count:
+        hyperparams["block_count"] = gguf_info.block_count
+    if gguf_info.attention_head_count:
+        hyperparams["attention_head_count"] = gguf_info.attention_head_count
+    if gguf_info.attention_head_count_kv:
+        hyperparams["attention_head_count_kv"] = gguf_info.attention_head_count_kv
+    if gguf_info.feed_forward_length:
+        hyperparams["feed_forward_length"] = gguf_info.feed_forward_length
+    if gguf_info.rope_dimension_count:
+        hyperparams["rope_dimension_count"] = gguf_info.rope_dimension_count
+    return {"hyperparameter": hyperparams} if hyperparams else {}
+def map_to_metadata(gguf_info: GGUFModelInfo) -> Dict[str, Any]:
+    metadata = _map_core_fields(gguf_info)
+    metadata |= _map_supplementary_fields(gguf_info)
+    metadata |= _map_quantization(gguf_info)
+    metadata |= _map_hyperparameters(gguf_info)
+    # TODO: add chat template field mapping
+    return metadata

src/models/model_file_extractors.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import logging
+from typing import Protocol, Dict, Any, List, runtime_checkable
+from .gguf_metadata import fetch_gguf_metadata_from_repo, map_to_metadata
+logger = logging.getLogger(__name__)
+@runtime_checkable
+class ModelFileExtractor(Protocol):
+    def can_extract(self, model_id: str) -> bool: ...
+    def extract_metadata(self, model_id: str) -> Dict[str, Any]: ...
+class GGUFFileExtractor:
+    def can_extract(self, model_id: str) -> bool:
+        try:
+            from huggingface_hub import list_repo_files
+            return any(f.endswith(".gguf") for f in list_repo_files(model_id))
+        except Exception:
+            return False
+    def extract_metadata(self, model_id: str) -> Dict[str, Any]:
+        from huggingface_hub import list_repo_files
+        try:
+            files = list_repo_files(model_id)
+            gguf_files = [f for f in files if f.endswith(".gguf")]
+            if not gguf_files:
+                return {}
+            model_info = fetch_gguf_metadata_from_repo(model_id, gguf_files[0])
+            if model_info is None:
+                return {}
+            return map_to_metadata(model_info)
+        except Exception as e:
+            logger.warning(f"GGUF extraction failed for {model_id}: {e}")
+            return {}
+def default_extractors() -> List[ModelFileExtractor]:
+    return [GGUFFileExtractor()]

src/models/registry.py ADDED Viewed

	@@ -0,0 +1,535 @@

+"""
+Field Registry Manager for AI SBOM Generator
+Combines registry loading, configuration generation, and field detection functionality
+"""
+import json
+import os
+import re
+import logging
+from typing import Dict, Any, Optional, List, Tuple
+from functools import lru_cache
+logger = logging.getLogger(__name__)
+class FieldRegistryManager:
+    """
+    Field registry manager that handles:
+    1. Registry loading and validation
+    2. Configuration generation for utils.py compatibility
+    3. Field detection and JSONPath parsing
+    4. AIBOM completeness analysis
+    5. Scoring calculations
+    """
+    def __init__(self, registry_path: Optional[str] = None):
+        """
+        Initialize the field registry manager
+        Args:
+            registry_path: Path to the field registry JSON file. If None, auto-detects.
+        """
+        if registry_path is None:
+            # Auto-detect registry path relative to this file
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            registry_path = os.path.join(current_dir, "field_registry.json")
+        self.registry_path = registry_path
+        self.registry = self._load_registry()
+        # Cache for performance
+        self._field_classification = None
+        self._completeness_profiles = None
+        self._validation_messages = None
+        self._scoring_weights = None
+    def _load_registry(self) -> Dict[str, Any]:
+        """Load the complete field registry from JSON file"""
+        try:
+            with open(self.registry_path, 'r', encoding='utf-8') as f:
+                registry = json.load(f)
+            # Validate basic structure
+            required_sections = ["fields"]
+            missing_sections = [section for section in required_sections if section not in registry]
+            if missing_sections:
+                raise ValueError(f"Registry missing required sections: {missing_sections}")
+            # Validate fields structure
+            fields = registry.get('fields', {})
+            if not fields:
+                raise ValueError("Registry 'fields' section is empty")
+            logger.info(f"✅ Field registry loaded: {len(fields)} fields from {self.registry_path}")
+            return registry
+        except FileNotFoundError:
+            raise FileNotFoundError(f"Field registry not found at: {self.registry_path}")
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Invalid JSON in field registry: {e}")
+        except Exception as e:
+            raise Exception(f"Failed to load field registry: {e}")
+    # =============================================================================
+    # CONFIGURATION GENERATION
+    # =============================================================================
+    @lru_cache(maxsize=1)
+    def get_scoring_config(self) -> Dict[str, Any]:
+        """Get scoring configuration from registry"""
+        return self.registry.get('scoring_config', {})
+    @lru_cache(maxsize=1)
+    def get_aibom_config(self) -> Dict[str, Any]:
+        """Get AIBOM generation configuration from registry"""
+        return self.registry.get('aibom_config', {})
+    @lru_cache(maxsize=1)
+    def get_field_definitions(self) -> Dict[str, Any]:
+        """Get all field definitions from registry"""
+        return self.registry.get('fields', {})
+    def generate_field_classification(self) -> Dict[str, Any]:
+        """
+        Generate FIELD_CLASSIFICATION dictionary from registry
+        """
+        if self._field_classification is not None:
+            return self._field_classification
+        fields = self.get_field_definitions()
+        classification = {}
+        for field_name, field_config in fields.items():
+            jsonpath = field_config.get("jsonpath", "")
+            param_type = "AITX" if "properties[" in jsonpath else "CDX"
+            missing_msg = field_config.get("validation_message", {}).get("missing", "")
+            is_gguf = "GGUF" in missing_msg
+            classification[field_name] = {
+                "tier": field_config.get("tier", "supplementary"),
+                "weight": field_config.get("weight", 1),
+                "category": field_config.get("category", "unknown"),
+                "parameter_type": param_type,
+                "reference_urls": field_config.get("reference_urls", {}),
+                "jsonpath": jsonpath,
+                "is_gguf": is_gguf
+            }
+        self._field_classification = classification
+        return classification
+    def generate_completeness_profiles(self) -> Dict[str, Any]:
+        """
+        Generate COMPLETENESS_PROFILES dictionary from registry
+        """
+        if self._completeness_profiles is not None:
+            return self._completeness_profiles
+        scoring_config = self.get_scoring_config()
+        profiles = scoring_config.get("scoring_profiles", {})
+        # Convert to utils.py format
+        completeness_profiles = {}
+        for profile_name, profile_config in profiles.items():
+            completeness_profiles[profile_name] = {
+                "description": profile_config.get("description", f"{profile_name.title()} completeness profile"),
+                "required_fields": profile_config.get("required_fields", []),
+                "minimum_score": profile_config.get("minimum_score", 50)
+            }
+        # Fallback profiles if none defined in registry
+        if not completeness_profiles:
+            completeness_profiles = {
+                "basic": {
+                    "description": "Minimal fields required for identification",
+                    "required_fields": ["bomFormat", "specVersion", "serialNumber", "version", "name"],
+                    "minimum_score": 40
+                },
+                "standard": {
+                    "description": "Comprehensive fields for proper documentation",
+                    "required_fields": ["bomFormat", "specVersion", "serialNumber", "version", "name",
+                                       "downloadLocation", "primaryPurpose", "suppliedBy"],
+                    "minimum_score": 70
+                },
+                "advanced": {
+                    "description": "Extensive documentation for maximum transparency",
+                    "required_fields": ["bomFormat", "specVersion", "serialNumber", "version", "name",
+                                       "downloadLocation", "primaryPurpose", "suppliedBy",
+                                       "type", "purl", "description", "licenses", "hyperparameter", "technicalLimitations",
+                                       "energyConsumption", "safetyRiskAssessment", "typeOfModel"],
+                    "minimum_score": 85
+                }
+            }
+        self._completeness_profiles = completeness_profiles
+        return completeness_profiles
+    def generate_validation_messages(self) -> Dict[str, Any]:
+        """
+        Generate VALIDATION_MESSAGES dictionary from registry
+        """
+        if self._validation_messages is not None:
+            return self._validation_messages
+        fields = self.get_field_definitions()
+        validation_messages = {}
+        for field_name, field_config in fields.items():
+            validation_msg = field_config.get("validation_message", {})
+            if validation_msg:
+                validation_messages[field_name] = {
+                    "missing": validation_msg.get("missing", f"Missing field: {field_name}"),
+                    "recommendation": validation_msg.get("recommendation", f"Consider adding {field_name} field")
+                }
+        self._validation_messages = validation_messages
+        return validation_messages
+    def get_configurable_scoring_weights(self) -> Dict[str, Any]:
+        """Get configurable scoring weights from registry"""
+        if self._scoring_weights is not None:
+            return self._scoring_weights
+        scoring_config = self.get_scoring_config()
+        weights = {
+            "tier_weights": scoring_config.get("tier_weights", {
+                "critical": 3,
+                "important": 2,
+                "supplementary": 1
+            }),
+            "category_weights": scoring_config.get("category_weights", {
+                "required_fields": 20,
+                "metadata": 20,
+                "component_basic": 20,
+                "component_model_card": 30,
+                "external_references": 10
+            }),
+            "algorithm_config": scoring_config.get("algorithm_config", {
+                "type": "weighted_sum",
+                "max_score": 100,
+                "normalization": "category_based"
+            })
+        }
+        self._scoring_weights = weights
+        return weights
+    # =============================================================================
+    # FIELD DETECTION
+    # =============================================================================
+    def _get_nested_value(self, data: dict, path: str) -> Tuple[bool, Any]:
+        """
+        Get value from nested dictionary using dot notation and array filters
+        Supports paths like: $.components[0].name, $.metadata.properties[?(@.name=='primaryPurpose')].value
+        """
+        try:
+            # Remove leading $. if present
+            if path.startswith('$.'):
+                path = path[2:]
+            # Handle special JSONPath-like syntax for property/array filtering
+            # Supports [?(@.field=='value')]
+            if '[?(@.' in path:
+                return self._handle_property_array_path(data, path)
+            # Split path and traverse
+            parts = self._split_path(path)
+            current = data
+            for part in parts:
+                if '[' in part and ']' in part:
+                    # Handle array access like components[0]
+                    key, index_str = part.split('[')
+                    index = int(index_str.rstrip(']'))
+                    if key and key in current:
+                        current = current[key]
+                    if isinstance(current, list) and 0 <= index < len(current):
+                        current = current[index]
+                    else:
+                        return False, None
+                else:
+                    # Regular key access
+                    if isinstance(current, dict) and part in current:
+                        current = current[part]
+                    else:
+                        return False, None
+            # Check if value is meaningful
+            if current is not None and current != "" and current != []:
+                return True, current
+            return False, None
+        except Exception as e:
+            logger.error(f"Error getting value at path {path}: {e}")
+            return False, None
+    def _handle_property_array_path(self, data: dict, path: str) -> Tuple[bool, Any]:
+        """
+        Handle generic JSONPath-like syntax for array filtering
+        Supports: base_path[?(@.key=='value')].optional_suffix
+        Example: metadata.component.externalReferences[?(@.type=='documentation')]
+        Example: metadata.properties[?(@.name=='primaryPurpose')].value
+        """
+        try:
+            # Regex to capture: Base Path, Filter Key, Filter Value, Optional Suffix
+            # matches: something[?(@.key=='val')] or something[?(@.key=='val')].sub
+            pattern = r'(.+)\[\?\(@\.(\w+)==\'([^\']+)\'\)\](.*)'
+            match = re.search(pattern, path)
+            if not match:
+                return False, None
+            base_path, filter_key, filter_val, suffix = match.groups()
+            # Get the list at base_path
+            base_found, base_list = self._get_nested_value(data, base_path)
+            if not base_found or not isinstance(base_list, list):
+                return False, None
+            # Find matching item
+            found_item = None
+            for item in base_list:
+                if isinstance(item, dict) and str(item.get(filter_key)) == filter_val:
+                    found_item = item
+                    break
+            if found_item is None:
+                return False, None
+            # If there's a suffix (e.g., .value), traverse it
+            if suffix:
+                if suffix.startswith('.'):
+                    suffix = suffix[1:]
+                return self._get_nested_value(found_item, suffix)
+            # No suffix, return the item itself
+            return True, found_item
+        except Exception as e:
+            logger.error(f"Error handling array path {path}: {e}")
+            return False, None
+        except Exception as e:
+            logger.error(f"Error handling property array path {path}: {e}")
+            return False, None
+    def _split_path(self, path: str) -> List[str]:
+        """Split path into parts, handling array notation"""
+        parts = []
+        current_part = ""
+        in_brackets = False
+        for char in path:
+            if char == '[':
+                in_brackets = True
+                current_part += char
+            elif char == ']':
+                in_brackets = False
+                current_part += char
+            elif char == '.' and not in_brackets:
+                if current_part:
+                    parts.append(current_part)
+                current_part = ""
+            else:
+                current_part += char
+        if current_part:
+            parts.append(current_part)
+        return parts
+    def detect_field_presence(self, aibom: dict, field_path: str) -> Tuple[bool, Any]:
+        """
+        Detect if a field exists at the given path in the AIBOM
+        Returns: (field_exists, field_value)
+        """
+        return self._get_nested_value(aibom, field_path)
+    def analyze_aibom_completeness(self, aibom: dict) -> Dict[str, Any]:
+        """
+        Analyze AIBOM completeness against the enhanced field registry
+        Compatible with enhanced registry structure: registry['fields'][field_name]
+        """
+        results = {
+            'category_scores': {},
+            'total_score': 0,
+            'field_details': {},
+            'summary': {}
+        }
+        # Get fields from enhanced registry structure
+        fields = self.get_field_definitions()
+        if not fields:
+            logger.warning("❌ No fields found in registry")
+            return results
+        # Get scoring configuration
+        scoring_weights = self.get_configurable_scoring_weights()
+        category_weights = scoring_weights.get('category_weights', {})
+        # Group fields by category
+        categories = {}
+        for field_name, field_config in fields.items():
+            category = field_config.get('category', 'unknown')
+            if category not in categories:
+                categories[category] = []
+            categories[category].append((field_name, field_config))
+        logger.info(f"🔍 Analyzing {len(fields)} fields across {len(categories)} categories")
+        total_weighted_score = 0
+        for category_name, category_fields in categories.items():
+            category_weight = category_weights.get(category_name, 20)
+            present_fields = 0
+            total_fields = len(category_fields)
+            field_details = {}
+            for field_name, field_config in category_fields:
+                field_path = field_config.get('jsonpath', '')
+                tier = field_config.get('tier', 'supplementary')
+                weight = field_config.get('weight', 1)
+                if not field_path:
+                    field_details[field_name] = {
+                        'present': False,
+                        'value': None,
+                        'path': field_path,
+                        'tier': tier,
+                        'weight': weight,
+                        'error': 'No jsonpath defined'
+                    }
+                    continue
+                is_present, value = self.detect_field_presence(aibom, field_path)
+                field_details[field_name] = {
+                    'present': is_present,
+                    'value': value,
+                    'path': field_path,
+                    'tier': tier,
+                    'weight': weight
+                }
+                if is_present:
+                    present_fields += 1
+            # Calculate category score
+            category_percentage = (present_fields / total_fields) * 100 if total_fields > 0 else 0
+            category_score = (category_percentage / 100) * category_weight
+            results['category_scores'][category_name] = category_score
+            results['field_details'][category_name] = field_details
+            results['summary'][category_name] = {
+                'present': present_fields,
+                'total': total_fields,
+                'percentage': category_percentage,
+                'weight': category_weight
+            }
+            total_weighted_score += category_score
+        results['total_score'] = total_weighted_score
+        return results
+    # =============================================================================
+    # UTILITY METHODS
+    # =============================================================================
+    def get_field_info(self, field_name: str) -> Optional[Dict[str, Any]]:
+        """Get complete information for a specific field"""
+        fields = self.get_field_definitions()
+        return fields.get(field_name)
+    def get_field_jsonpath(self, field_name: str) -> Optional[str]:
+        """Get JSONPath expression for a specific field"""
+        field_info = self.get_field_info(field_name)
+        return field_info.get("jsonpath") if field_info else None
+    def get_fields_by_category(self, category: str) -> List[str]:
+        """Get all field names in a specific category"""
+        fields = self.get_field_definitions()
+        return [
+            field_name for field_name, field_config in fields.items()
+            if field_config.get("category") == category
+        ]
+    def get_fields_by_tier(self, tier: str) -> List[str]:
+        """Get all field names in a specific tier"""
+        fields = self.get_field_definitions()
+        return [
+            field_name for field_name, field_config in fields.items()
+            if field_config.get("tier") == tier
+        ]
+    def validate_registry_integrity(self) -> Dict[str, Any]:
+        """Validate the integrity of the loaded registry"""
+        validation_results = {
+            "valid": True,
+            "errors": [],
+            "warnings": [],
+            "field_count": 0,
+            "category_distribution": {},
+            "tier_distribution": {}
+        }
+        try:
+            fields = self.get_field_definitions()
+            validation_results["field_count"] = len(fields)
+            # Check category and tier distribution
+            categories = {}
+            tiers = {}
+            for field_name, field_config in fields.items():
+                # Check required field properties
+                required_props = ["tier", "weight", "category", "jsonpath"]
+                missing_props = [prop for prop in required_props if prop not in field_config]
+                if missing_props:
+                    validation_results["errors"].append(
+                        f"Field '{field_name}' missing properties: {missing_props}"
+                    )
+                    validation_results["valid"] = False
+                # Count categories and tiers
+                category = field_config.get("category", "unknown")
+                tier = field_config.get("tier", "unknown")
+                categories[category] = categories.get(category, 0) + 1
+                tiers[tier] = tiers.get(tier, 0) + 1
+            validation_results["category_distribution"] = categories
+            validation_results["tier_distribution"] = tiers
+            # Check scoring configuration
+            scoring_config = self.get_scoring_config()
+            if not scoring_config.get("tier_weights"):
+                validation_results["warnings"].append("Missing tier_weights in scoring_config")
+            if not scoring_config.get("category_weights"):
+                validation_results["warnings"].append("Missing category_weights in scoring_config")
+        except Exception as e:
+            validation_results["valid"] = False
+            validation_results["errors"].append(f"Registry validation error: {e}")
+        return validation_results
+# Global Instance
+_registry_manager = None
+def get_field_registry_manager() -> FieldRegistryManager:
+    """Get the global field registry manager instance (singleton pattern)"""
+    global _registry_manager
+    if _registry_manager is None:
+        _registry_manager = FieldRegistryManager()
+    return _registry_manager

src/models/schemas.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from enum import Enum
+from typing import Any, Dict, List, Optional
+from datetime import datetime
+from pydantic import BaseModel, Field
+# --- Enums (from enhanced_extractor.py) ---
+class DataSource(str, Enum):
+    """Enumeration of data sources for provenance tracking"""
+    HF_API = "huggingface_api"
+    MODEL_CARD = "model_card_yaml"
+    README_TEXT = "readme_text"
+    CONFIG_FILE = "config_file"
+    REPOSITORY_FILES = "repository_files"
+    EXTERNAL_REFERENCE = "external_reference"
+    INTELLIGENT_DEFAULT = "intelligent_default"
+    PLACEHOLDER = "placeholder"
+    REGISTRY_DRIVEN = "registry_driven"
+class ConfidenceLevel(str, Enum):
+    """Confidence levels for extracted data"""
+    HIGH = "high"        # Direct API data, official sources
+    MEDIUM = "medium"    # Inferred from reliable patterns
+    LOW = "low"          # Weak inference or pattern matching
+    NONE = "none"        # Placeholder values
+# --- internal Models ---
+class ExtractionResult(BaseModel):
+    """Container for extraction results with full provenance"""
+    value: Any
+    source: DataSource
+    confidence: ConfidenceLevel
+    extraction_method: str
+    timestamp: str = Field(default_factory=lambda: datetime.utcnow().isoformat())
+    fallback_chain: List[str] = Field(default_factory=list)
+    def __str__(self):
+        return f"{self.value} (source: {self.source.value}, confidence: {self.confidence.value})"
+# --- API Request Models ---
+class GenerateRequest(BaseModel):
+    model_id: str
+    include_inference: bool = True
+    use_best_practices: bool = True
+    hf_token: Optional[str] = None
+class BatchRequest(BaseModel):
+    model_ids: List[str]
+    include_inference: bool = True
+    use_best_practices: bool = True
+    hf_token: Optional[str] = None
+# --- API Response Models ---
+class AIBOMResponse(BaseModel):
+    aibom: Dict[str, Any]
+    model_id: str
+    generated_at: str
+    request_id: str
+    download_url: str
+    completeness_score: Optional[Dict[str, Any]] = None
+class EnhancementReport(BaseModel):
+    ai_enhanced: bool = False
+    ai_model: Optional[str] = None
+    original_score: Dict[str, Any]
+    final_score: Dict[str, Any]
+    improvement: float = 0

src/models/scoring.py ADDED Viewed

	@@ -0,0 +1,454 @@

+import logging
+import re
+import os
+import json
+from typing import Dict, List, Optional, Any, Union
+from enum import Enum
+from .registry import get_field_registry_manager
+logger = logging.getLogger(__name__)
+# Validation severity levels
+class ValidationSeverity(Enum):
+    ERROR = "error"
+    WARNING = "warning"
+    INFO = "info"
+# Initialize registry manager
+try:
+    REGISTRY_MANAGER = get_field_registry_manager()
+    FIELD_CLASSIFICATION = REGISTRY_MANAGER.generate_field_classification()
+    COMPLETENESS_PROFILES = REGISTRY_MANAGER.generate_completeness_profiles()
+    VALIDATION_MESSAGES = REGISTRY_MANAGER.generate_validation_messages()
+    SCORING_WEIGHTS = REGISTRY_MANAGER.get_configurable_scoring_weights()
+    logger.info(f"✅ Registry-driven configuration loaded: {len(FIELD_CLASSIFICATION)} fields")
+except Exception as e:
+    logger.error(f"❌ Failed to load registry configuration: {e}")
+    # Fallback to empty defaults or handle gracefully
+    FIELD_CLASSIFICATION = {}
+    COMPLETENESS_PROFILES = {}
+    VALIDATION_MESSAGES = {}
+    SCORING_WEIGHTS = {}
+# Load SPDX licenses
+try:
+    schema_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "schemas", "spdx.schema.json")
+    with open(schema_path, "r", encoding="utf-8") as f:
+        _spdx_schema = json.load(f)
+        SPDX_LICENSES = set(_spdx_schema.get("enum", []))
+    logger.info(f"✅ SPDX licenses schema loaded: {len(SPDX_LICENSES)} licenses")
+except Exception as e:
+    logger.error(f"❌ Failed to load SPDX schema: {e}")
+    SPDX_LICENSES = {"MIT", "Apache-2.0", "GPL-3.0-only", "GPL-2.0-only", "LGPL-3.0-only",
+                     "BSD-3-Clause", "BSD-2-Clause", "CC-BY-4.0", "CC-BY-SA-4.0", "CC0-1.0",
+                     "Unlicense", "NONE"}
+# Build JSON Schema Registry
+JSON_SCHEMA_REGISTRY = None
+try:
+    from referencing import Registry, Resource
+    registry = Registry()
+    schemas_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "schemas")
+    if os.path.exists(schemas_dir):
+        for filename in os.listdir(schemas_dir):
+            if filename.endswith(".json"):
+                with open(os.path.join(schemas_dir, filename), "r", encoding="utf-8") as schema_file:
+                    schema_data = json.load(schema_file)
+                    resource = Resource.from_contents(schema_data)
+                    schema_id = schema_data.get("$id", "")
+                    if schema_id:
+                        registry = registry.with_resource(uri=schema_id, resource=resource)
+                    registry = registry.with_resource(uri=filename, resource=resource)
+        JSON_SCHEMA_REGISTRY = registry
+        logger.info("✅ JSON Schema Registry loaded for local ref resolution")
+except Exception as e:
+    logger.error(f"❌ Failed to build JSON Schema Registry: {e}")
+def validate_spdx(license_entry):
+    if isinstance(license_entry, list):
+        return all(lic in SPDX_LICENSES for lic in license_entry)
+    return license_entry in SPDX_LICENSES
+def check_field_in_aibom(aibom: Dict[str, Any], field: str) -> bool:
+    """
+    Check if a field is present in the AIBOM (Legacy/Standard Layout check).
+    Optimized to use a flattened set if possible, but for individual check this is fine.
+    """
+    # Quick top-level check
+    if field in aibom:
+        return True
+    # Metadata Check
+    metadata = aibom.get("metadata", {})
+    if field in metadata:
+        return True
+    # Metadata Properties
+    if "properties" in metadata:
+        for prop in metadata["properties"]:
+            if prop.get("name") in {field, f"spdx:{field}"}:
+                return True
+    # Component Check (only first component as per original logic)
+    components = aibom.get("components", [])
+    if components:
+        component = components[0]
+        if field in component:
+            return True
+        # Component Properties
+        if "properties" in component:
+            for prop in component["properties"]:
+                if prop.get("name") in {field, f"spdx:{field}"}:
+                    return True
+        # Model Card
+        model_card = component.get("modelCard", {})
+        if field in model_card:
+            return True
+        if "modelParameters" in model_card and field in model_card["modelParameters"]:
+            return True
+        # Considerations Mapping
+        if "considerations" in model_card:
+            considerations = model_card["considerations"]
+            field_mappings = {
+                "technicalLimitations": ["technicalLimitations", "limitations"],
+                "safetyRiskAssessment": ["ethicalConsiderations", "safetyRiskAssessment"],
+                "energyConsumption": ["environmentalConsiderations", "energyConsumption"]
+            }
+            if field in field_mappings:
+                if any(sec in considerations and considerations[sec] for sec in field_mappings[field]):
+                    return True
+            if field in considerations:
+                return True
+    # External References Check
+    components = aibom.get("components", [])
+    if components:
+        ext_refs = components[0].get("externalReferences", [])
+        if field == "downloadLocation":
+            return any(ref.get("type") in ["distribution", "website"] and ref.get("url") for ref in ext_refs)
+        if field == "vcs":
+            return any(ref.get("type") == "vcs" and ref.get("url") for ref in ext_refs)
+        if field == "website":
+            return any(ref.get("type") == "website" and ref.get("url") for ref in ext_refs)
+        if field == "paper":
+            return any(ref.get("type") == "documentation" and ref.get("url") for ref in ext_refs)
+    return False
+def check_field_with_enhanced_results(aibom: Dict[str, Any], field: str, extraction_results: Optional[Dict[str, Any]] = None) -> bool:
+    """
+    Enhanced field detection using registry manager and extraction results.
+    """
+    try:
+        manager = get_field_registry_manager()
+        # 1. Registry-based dynamic detection
+        fields = manager.get_field_definitions()
+        if field in fields:
+            field_config = fields[field]
+            field_path = field_config.get('jsonpath', '')
+            if field_path:
+                is_present, value = manager.detect_field_presence(aibom, field_path)
+                if is_present:
+                    return True
+        # 2. Extraction results check
+        if extraction_results and field in extraction_results:
+            extraction_result = extraction_results[field]
+            # Handle Pydantic model vs Dict vs Object
+            if hasattr(extraction_result, 'confidence'):
+                # Object/Model access
+                conf = extraction_result.confidence
+                # conf could be an Enum or string
+                val = conf.value if hasattr(conf, 'value') else conf
+                if val == 'none':
+                    return False
+                return val in ['medium', 'high']
+            elif hasattr(extraction_result, 'value'):
+                val = extraction_result.value
+                return val not in ['NOASSERTION', 'NOT_FOUND', None, '']
+            else:
+                 # Should probably return True if present in dict?
+                 return True
+        # 3. Fallback
+        return check_field_in_aibom(aibom, field)
+    except Exception as e:
+        logger.error(f"Error in enhanced field detection for {field}: {e}")
+        return check_field_in_aibom(aibom, field)
+def determine_completeness_profile(aibom: Dict[str, Any], score: float) -> Dict[str, Any]:
+    satisfied_profiles = []
+    for profile_name, profile in COMPLETENESS_PROFILES.items():
+        all_required_present = all(check_field_in_aibom(aibom, field) for field in profile["required_fields"])
+        score_sufficient = score >= profile["minimum_score"]
+        if all_required_present and score_sufficient:
+            satisfied_profiles.append(profile_name)
+    if "advanced" in satisfied_profiles:
+        profile = COMPLETENESS_PROFILES.get("advanced", {})
+        return {"name": "Advanced", "description": profile.get("description", ""), "satisfied": True}
+    elif "standard" in satisfied_profiles:
+        profile = COMPLETENESS_PROFILES.get("standard", {})
+        return {"name": "Standard", "description": profile.get("description", ""), "satisfied": True}
+    elif "basic" in satisfied_profiles:
+        profile = COMPLETENESS_PROFILES.get("basic", {})
+        return {"name": "Basic", "description": profile.get("description", ""), "satisfied": True}
+    else:
+        return {"name": "incomplete", "description": "Does not satisfy any completeness profile", "satisfied": False}
+def generate_field_recommendations(missing_fields: Dict[str, List[str]]) -> List[Dict[str, Any]]:
+    recommendations = []
+    for field in missing_fields.get("critical", []):
+        if field in VALIDATION_MESSAGES:
+            recommendations.append({
+                "priority": "high",
+                "field": field,
+                "message": VALIDATION_MESSAGES[field]["missing"],
+                "recommendation": VALIDATION_MESSAGES[field]["recommendation"]
+            })
+        else:
+            recommendations.append({
+                "priority": "high",
+                "field": field,
+                "message": f"Missing critical field: {field}",
+                "recommendation": f"Add {field} to improve documentation completeness"
+            })
+    for field in missing_fields.get("important", []):
+         if field in VALIDATION_MESSAGES:
+            recommendations.append({
+                "priority": "medium",
+                "field": field,
+                "message": VALIDATION_MESSAGES[field]["missing"],
+                "recommendation": VALIDATION_MESSAGES[field]["recommendation"]
+            })
+         else:
+            recommendations.append({
+                "priority": "medium",
+                "field": field,
+                "message": f"Missing field: {field}",
+                "recommendation": f"Consider adding {field}"
+            })
+    supplementary_count = 0
+    for field in missing_fields.get("supplementary", []):
+        if supplementary_count >= 5: break
+        recommendations.append({
+            "priority": "low",
+            "field": field,
+            "message": f"Missing supplementary field: {field}",
+            "recommendation": f"Consider adding {field}"
+        })
+        supplementary_count += 1
+    return recommendations
+def calculate_completeness_score(aibom: Dict[str, Any], validate: bool = True, extraction_results: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+    """
+    Calculate completeness score using registry-defined weights and rules.
+    """
+    # Max points (weights)
+    category_weights = SCORING_WEIGHTS.get("category_weights", {})
+    max_scores = {
+        "required_fields": category_weights.get("required_fields", 20),
+        "metadata": category_weights.get("metadata", 20),
+        "component_basic": category_weights.get("component_basic", 20),
+        "component_model_card": category_weights.get("component_model_card", 30),
+        "external_references": category_weights.get("external_references", 10)
+    }
+    missing_fields = {"critical": [], "important": [], "supplementary": []}
+    fields_by_category = {category: {"total": 0, "present": 0} for category in max_scores.keys()}
+    field_checklist = {}
+    field_types = {}
+    field_reference_urls = {}
+    category_fields_list = {category: [] for category in max_scores.keys()}
+    # Evaluate fields
+    for field, classification in FIELD_CLASSIFICATION.items():
+        tier = classification["tier"]
+        category = classification["category"]
+        is_gguf = classification.get("is_gguf", False)
+        jsonpath = classification.get("jsonpath", "")
+        # Ensure category exists in tracking, else fallback or skip?
+        # Ideally FIELD_CLASSIFICATION only contains known categories.
+        if category not in fields_by_category:
+            fields_by_category[category] = {"total": 0, "present": 0}
+            category_fields_list[category] = []
+        is_present = check_field_with_enhanced_results(aibom, field, extraction_results)
+        if not is_gguf or is_present:
+            fields_by_category[category]["total"] += 1
+            display_path = jsonpath.replace("$.components[0].", "")
+            if display_path.startswith("$."): display_path = display_path[2:]
+            tier_display = {"critical": "Critical", "important": "Important", "supplementary": "Supplementary"}.get(tier, "Unknown")
+            category_fields_list[category].append({
+                "name": field,
+                "tier": tier_display,
+                "path": display_path
+            })
+        if is_present:
+            fields_by_category[category]["present"] += 1
+        else:
+            if not is_gguf:
+                if tier in missing_fields:
+                    missing_fields[tier].append(field)
+        importance_indicator = "★★★" if tier == "critical" else "★★" if tier == "important" else "★"
+        field_checklist[field] = f"{'✔' if is_present else '✘'} {importance_indicator}"
+        field_types[field] = classification.get("parameter_type", "CDX")
+        ref_urls = classification.get("reference_urls", {})
+        selected_url = ""
+        if isinstance(ref_urls, dict):
+            spec_version = aibom.get("specVersion", "1.6")
+            if spec_version == "1.7" and "cyclonedx_1.7" in ref_urls:
+                selected_url = ref_urls["cyclonedx_1.7"]
+            elif "cyclonedx_1.6" in ref_urls:
+                selected_url = ref_urls["cyclonedx_1.6"]
+                if spec_version == "1.7" and "cyclonedx.org/docs/1.6" in selected_url:
+                    selected_url = selected_url.replace("1.6", "1.7")
+            elif "genai_aibom_taxonomy" in ref_urls:
+                selected_url = ref_urls["genai_aibom_taxonomy"]
+            elif "spdx_3.1" in ref_urls:
+                selected_url = ref_urls["spdx_3.1"]
+        elif isinstance(ref_urls, str):
+            selected_url = ref_urls
+        field_reference_urls[field] = selected_url
+    # Calculate category scores
+    category_details = {}
+    category_scores = {}
+    for category, counts in fields_by_category.items():
+        weight = max_scores.get(category, 0)
+        percentage = 0
+        if counts["total"] > 0:
+            percentage = (counts["present"] / counts["total"]) * 100
+            raw_score = (percentage / 100) * weight
+            category_scores[category] = round(raw_score, 1)
+        else:
+             category_scores[category] = 0.0
+        category_details[category] = {
+            "present_fields": counts["present"],
+            "total_fields": counts["total"],
+            "max_points": weight,
+            "percentage": round(percentage, 1)
+        }
+    subtotal_score = sum(category_scores.values())
+    # Penalties
+    missing_critical = len(missing_fields["critical"])
+    missing_important = len(missing_fields["important"])
+    penalty_factor = 1.0
+    penalty_reasons = []
+    if missing_critical > 3:
+        penalty_factor *= 0.8
+        penalty_reasons.append("Multiple critical fields missing")
+    elif missing_critical >= 2:
+        penalty_factor *= 0.9
+        penalty_reasons.append("Some critical fields missing")
+    if missing_important >= 5:
+        penalty_factor *= 0.95
+        penalty_reasons.append("Several important fields missing")
+    final_score = round(subtotal_score * penalty_factor, 1)
+    final_score = max(0.0, min(final_score, 100.0))
+    # Prepare result
+    result = {
+        "total_score": final_score,
+        "subtotal_score": subtotal_score,
+        "section_scores": category_scores,
+        "category_details": category_details,
+        "max_scores": max_scores,
+        "field_checklist": field_checklist,
+        "field_types": field_types,
+        "reference_urls": field_reference_urls,
+        "missing_fields": missing_fields,
+        "category_fields_list": category_fields_list,
+        "completeness_profile": determine_completeness_profile(aibom, final_score),
+        "penalty_applied": penalty_factor < 1.0,
+        "penalty_reason": " and ".join(penalty_reasons) if penalty_reasons else None,
+        "recommendations": generate_field_recommendations(missing_fields)
+    }
+    if validate:
+         validation_report = validate_aibom(aibom)
+         result["validation"] = validation_report
+    return result
+def _validate_ai_requirements(aibom: Dict[str, Any]) -> List[Dict[str, Any]]:
+    # ... logic from utils.py ...
+    # Implementing minimal version or copying full logic?
+    # I'll implement a concise version.
+    issues = []
+    if "bomFormat" in aibom and aibom["bomFormat"] != "CycloneDX":
+         issues.append({"severity": "error", "code": "INVALID_BOM_FORMAT", "message": "Must be CycloneDX", "path": "$.bomFormat"})
+    # ... (Add more crucial checks here as needed)
+    return issues
+def validate_aibom(aibom: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Validate the AIBOM against the appropriate CycloneDX schema.
+    """
+    issues = []
+    # 1. Schema Validation (using local schemas)
+    try:
+        import json
+        import jsonschema
+        import os
+        spec_version = aibom.get("specVersion", "1.6")
+        schema_file = f"bom-{spec_version}.schema.json"
+        # Relative path from src/models/scoring.py -> src/schemas/
+        schema_path = os.path.join(os.path.dirname(__file__), '..', 'schemas', schema_file)
+        if os.path.exists(schema_path):
+            with open(schema_path, 'r', encoding="utf-8") as f:
+                schema = json.load(f)
+            if JSON_SCHEMA_REGISTRY is not None:
+                jsonschema.validate(instance=aibom, schema=schema, registry=JSON_SCHEMA_REGISTRY)
+            else:
+                jsonschema.validate(instance=aibom, schema=schema)
+        else:
+             # If schema missing, warn but don't fail hard
+             issues.append({"severity": "warning", "message": f"Schema file not found: {schema_file}, skipping strict validation."})
+    except jsonschema.ValidationError as e:
+        issues.append({"severity": "error", "message": e.message, "path": getattr(e, "json_path", "unknown")})
+    except Exception as e:
+        issues.append({"severity": "error", "message": f"Validation error: {str(e)}"})
+    # 2. Custom Business Logic Checks (AI Requirements)
+    custom_issues = _validate_ai_requirements(aibom)
+    issues.extend(custom_issues)
+    return {
+        "valid": not any(i["severity"] == "error" for i in issues),
+        "issues": issues,
+        "error_count": sum(1 for i in issues if i["severity"] == "error")
+    }

src/models/service.py ADDED Viewed

	@@ -0,0 +1,721 @@

+import json
+import uuid
+import datetime
+import logging
+import re
+from typing import Dict, Optional, Any, List, Union
+from urllib.parse import urlparse
+from packageurl import PackageURL
+from huggingface_hub import HfApi, ModelCard
+from huggingface_hub.repocard_data import EvalResult
+from .extractor import EnhancedExtractor
+from .model_file_extractors import ModelFileExtractor, default_extractors
+from .scoring import calculate_completeness_score
+from .registry import get_field_registry_manager
+from .schemas import AIBOMResponse, EnhancementReport
+from ..utils.validation import validate_aibom, get_validation_summary
+from ..utils.license_utils import normalize_license_id, get_license_url, is_valid_spdx_license_id
+from ..config import AIBOM_GEN_VERSION, AIBOM_GEN_NAME
+logger = logging.getLogger(__name__)
+class AIBOMService:
+    """
+    Service layer for AI SBOM generation.
+    Orchestrates metadata extraction, AI SBOM structure creation, and scoring.
+    """
+    def __init__(
+        self,
+        hf_token: Optional[str] = None,
+        inference_model_url: Optional[str] = None,
+        use_inference: bool = True,
+        use_best_practices: bool = True,
+        model_file_extractors: Optional[List[ModelFileExtractor]] = None,
+    ):
+        self.hf_api = HfApi(token=hf_token)
+        self.inference_model_url = inference_model_url
+        self.use_inference = use_inference
+        self.use_best_practices = use_best_practices
+        self.enhancement_report = None
+        self.extraction_results = {}
+        self.model_file_extractors = (
+            model_file_extractors if model_file_extractors is not None
+            else default_extractors()
+        )
+        # Initialize registry manager
+        try:
+            self.registry_manager = get_field_registry_manager()
+            logger.info("✅ Registry manager initialized in service")
+        except Exception as e:
+            logger.warning(f"⚠️ Could not initialize registry manager: {e}")
+            self.registry_manager = None
+    def get_extraction_results(self):
+        """Return the enhanced extraction results from the last extraction"""
+        return self.extraction_results
+    def get_enhancement_report(self):
+        """Return the enhancement report from the last generation"""
+        return self.enhancement_report
+    def generate_aibom(
+        self,
+        model_id: str,
+        include_inference: bool = False,
+        use_best_practices: Optional[bool] = None,
+        enable_summarization: bool = False,
+        spec_version: str = "1.6",
+        metadata_overrides: Optional[Dict[str, str]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Generate an AIBOM for the specified Hugging Face model.
+        """
+        try:
+            model_id = self._normalise_model_id(model_id)
+            use_inference = include_inference if include_inference is not None else self.use_inference
+            use_best_practices = use_best_practices if use_best_practices is not None else self.use_best_practices
+            logger.info(f"Generating AIBOM for {model_id}")
+            # Fetch generic info
+            model_info = self._fetch_model_info(model_id)
+            model_card = self._fetch_model_card(model_id)
+            # 1. Extract Metadata
+            original_metadata = self._extract_metadata(model_id, model_info, model_card, enable_summarization)
+            # 2. Create Initial AIBOM
+            original_aibom = self._create_aibom_structure(model_id, original_metadata, spec_version)
+            # 3. Initial Score
+            original_score = calculate_completeness_score(
+                original_aibom,
+                validate=True,
+                extraction_results=self.extraction_results   # Using results from _extract_metadata
+            )
+            # 4. AI Enhancement (Placeholder for now as in original)
+            final_metadata = original_metadata.copy()
+            ai_enhanced = False
+            ai_model_name = None
+            if use_inference and self.inference_model_url:
+                # Placeholder for AI enhancement logic
+                pass
+            # 5. Create Final AIBOM
+            aibom = self._create_aibom_structure(model_id, final_metadata, spec_version=spec_version, metadata_overrides=metadata_overrides)
+            # Validate Schema
+            is_valid, validation_errors = validate_aibom(aibom)
+            if not is_valid:
+                logger.warning(f"AIBOM schema validation failed with {len(validation_errors)} errors")
+            # 6. Final Score
+            final_score = calculate_completeness_score(
+                aibom,
+                validate=True,
+                extraction_results=self.extraction_results
+            )
+            # 7. Store Report
+            self.enhancement_report = {
+                "ai_enhanced": ai_enhanced,
+                "ai_model": ai_model_name,
+                "original_score": original_score,
+                "final_score": final_score,
+                "improvement": round(final_score["total_score"] - original_score["total_score"], 2) if ai_enhanced else 0,
+                "schema_validation": {
+                    "valid": is_valid,
+                    "error_count": len(validation_errors),
+                    "errors": validation_errors[:10] if not is_valid else []
+                }
+            }
+            return aibom
+        except Exception as e:
+            logger.error(f"Error generating AIBOM: {e}", exc_info=True)
+            return self._create_minimal_aibom(model_id, spec_version)
+    def _extract_metadata(self, model_id: str, model_info: Dict[str, Any], model_card: Optional[ModelCard], enable_summarization: bool = False) -> Dict[str, Any]:
+        """Wrapper around EnhancedExtractor"""
+        extractor = EnhancedExtractor(self.hf_api, model_file_extractors=self.model_file_extractors)
+        # Ideally we reuse the registry manager
+        if self.registry_manager:
+            extractor.registry_manager = self.registry_manager
+            extractor.registry_fields = self.registry_manager.get_field_definitions()
+        metadata = extractor.extract_metadata(model_id, model_info, model_card, enable_summarization=enable_summarization)
+        self.extraction_results = extractor.extraction_results
+        return metadata
+    def _generate_purl(self, model_id: str, version: str, purl_type: str = "huggingface") -> str:
+        """Generate PURL using packageurl-python library
+        Args:
+            model_id: Model identifier (e.g., "owner/model" or "model")
+            version: Version string
+            purl_type: PURL type (default: "huggingface", also supports "generic")
+        Returns:
+            PURL string in format pkg:type/namespace/name@version
+        """
+        parts = model_id.split("/", 1)
+        namespace = parts[0] if len(parts) == 2 else None
+        name = parts[1] if len(parts) == 2 else parts[0]
+        purl = PackageURL(type=purl_type, namespace=namespace, name=name, version=version)
+        return purl.to_string()
+    def _get_tool_purl(self) -> str:
+        """Get PURL for OWASP AIBOM Generator tool"""
+        purl = PackageURL(type="generic", namespace="owasp-genai", name=AIBOM_GEN_NAME, version=AIBOM_GEN_VERSION)
+        return purl.to_string()
+    def _get_tool_metadata(self) -> Dict[str, Any]:
+        """Generate the standardized tool metadata for the AIBOM Generator"""
+        return {
+            "components": [{
+                "bom-ref": self._get_tool_purl(),
+                "type": "application",
+                "name": AIBOM_GEN_NAME,
+                "version": AIBOM_GEN_VERSION,
+                "manufacturer": {"name": "OWASP GenAI Security Project"}
+            }]
+        }
+    def _create_minimal_aibom(self, model_id: str, spec_version: str = "1.6") -> Dict[str, Any]:
+        """Create a minimal valid AIBOM structure in case of errors"""
+        hf_purl = self._generate_purl(model_id, "1.0")
+        metadata_purl = self._generate_purl(model_id, "1.0", purl_type="generic")
+        return {
+            "bomFormat": "CycloneDX",
+            "specVersion": spec_version,
+            "serialNumber": f"urn:uuid:{str(uuid.uuid4())}",
+            "version": 1,
+            "metadata": {
+                "timestamp": datetime.datetime.now(datetime.timezone.utc).isoformat(timespec='seconds'),
+                "tools": self._get_tool_metadata(),
+                "component": {
+                    "bom-ref": metadata_purl,
+                    "type": "application",
+                    "name": model_id.split("/")[-1],
+                    "version": "1.0"
+                }
+            },
+            "components": [{
+                "bom-ref": hf_purl,
+                "type": "machine-learning-model",
+                "name": model_id.split("/")[-1],
+                "version": "1.0",
+                "purl": hf_purl
+            }]
+        }
+    def _fetch_with_backoff(self, fetch_func, *args, max_retries=3, initial_backoff=1.0, **kwargs):
+        import time
+        for attempt in range(max_retries):
+            try:
+                return fetch_func(*args, **kwargs)
+            except Exception as e:
+                # e.g., huggingface_hub.utils.HfHubHTTPError
+                error_msg = str(e)
+                if "401" in error_msg or "404" in error_msg:  # Auth or not found don't retry
+                    raise e
+                if attempt == max_retries - 1:
+                    logger.warning(f"Final attempt failed for API call: {e}")
+                    raise e
+                sleep_time = initial_backoff * (2 ** attempt)
+                logger.warning(f"API call failed: {e}. Retrying in {sleep_time} seconds...")
+                time.sleep(sleep_time)
+    def _fetch_model_info(self, model_id: str) -> Dict[str, Any]:
+        try:
+            return self._fetch_with_backoff(self.hf_api.model_info, model_id)
+        except Exception as e:
+            logger.warning(f"Error fetching model info for {model_id}: {e}")
+            return {}
+    def _fetch_model_card(self, model_id: str) -> Optional[ModelCard]:
+        try:
+            return self._fetch_with_backoff(ModelCard.load, model_id)
+        except Exception as e:
+            logger.warning(f"Error fetching model card for {model_id}: {e}")
+            return None
+    @staticmethod
+    def _normalise_model_id(raw_id: str) -> str:
+        if raw_id.startswith(("http://", "https://")):
+            path = urlparse(raw_id).path.lstrip("/")
+            parts = path.split("/")
+            if len(parts) >= 2:
+                return "/".join(parts[:2])
+            return path
+        return raw_id
+    def _create_aibom_structure(self, model_id: str, metadata: Dict[str, Any], spec_version: str = "1.6",
+                              metadata_overrides: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        full_commit = metadata.get("commit")
+        version = full_commit[:8] if full_commit else "1.0"
+        aibom = {
+            "bomFormat": "CycloneDX",
+            "specVersion": spec_version,
+            "serialNumber": f"urn:uuid:{str(uuid.uuid4())}",
+            "version": 1,
+            "metadata": self._create_metadata_section(model_id, metadata, overrides=metadata_overrides),
+            "components": [self._create_component_section(model_id, metadata)],
+            "dependencies": [
+                {
+                    "ref": self._generate_purl(model_id, version, purl_type="generic"),
+                    "dependsOn": [self._generate_purl(model_id, version)]
+                }
+            ]
+        }
+        return aibom
+    def _create_metadata_section(self, model_id: str, metadata: Dict[str, Any], overrides: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat(timespec='seconds')
+        # Defaults
+        default_timestamp = datetime.datetime.now().strftime("job-%Y-%m-%d-%H:%M:%S")
+        default_version = str(int(datetime.datetime.now().timestamp()))
+        default_mfr = "OWASP AIBOM Generator"
+        # Apply oveerides or defaults
+        overrides = overrides or {}
+        comp_name = overrides.get("name") or default_timestamp
+        comp_version = overrides.get("version") or default_version
+        comp_mfr = overrides.get("manufacturer") or default_mfr
+        # Normalize for PURL (replace spaces with - or similar if needed, but minimal change is best)
+        purl_ns = comp_mfr.replace(" ", "-") # simplistic sanitation
+        purl_name = comp_name.replace(" ", "-")
+        purl = PackageURL(type="generic", namespace=purl_ns, name=purl_name, version=comp_version).to_string()
+        tools = {"tools": self._get_tool_metadata()}
+        authors = []
+        if "author" in metadata and metadata["author"]:
+            authors.append({"name": metadata["author"]})
+        component = {
+            "bom-ref": purl,
+            "type": "application",
+            "name": comp_name,
+            "description": f"Generating SBOM for {model_id}",
+            "version": comp_version,
+            "purl": purl,
+            "manufacturer": {"name": comp_mfr},
+            "supplier": {"name": comp_mfr}
+        }
+        if authors:
+            component["authors"] = authors
+        return {
+            "timestamp": timestamp,
+            **tools,
+            "component": component
+        }
+    def _create_component_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
+        parts = model_id.split("/")
+        group = parts[0] if len(parts) > 1 else ""
+        name = parts[1] if len(parts) > 1 else parts[0]
+        full_commit = metadata.get("commit")
+        version = full_commit[:8] if full_commit else "1.0"
+        purl = self._generate_purl(model_id, version)
+        component = {
+            "bom-ref": purl,
+            "type": "machine-learning-model",
+            "group": group,
+            "name": name,
+            "version": version,
+            "purl": purl,
+            "description": metadata.get("description", f"AI model {model_id}")
+        }
+        # 1. Licenses
+        licenses = self._process_licenses(metadata)
+        if licenses:
+            component["licenses"] = licenses
+        # 2. Authors, Manufacturer, Supplier
+        # Note: logic inferred from group and metadata
+        authors, manufacturer, supplier = self._process_authors_and_suppliers(metadata, group)
+        if authors:
+            component["authors"] = authors
+        if manufacturer:
+            component["manufacturer"] = manufacturer
+        if supplier:
+            component["supplier"] = supplier
+        # 3. Technical Properties
+        tech_props = self._process_technical_properties(metadata)
+        if tech_props:
+            component["properties"] = tech_props
+        # 4. External References
+        external_refs = self._process_external_references(model_id, metadata)
+        if external_refs:
+            component["externalReferences"] = external_refs
+        # 5. Model Card
+        component["modelCard"] = self._create_model_card_section(metadata)
+        # Defined order for better readability: bom-ref, type, group, name, version, purl, description, modelCard, manufacturer, supplier, authors
+        # We also need to preserve: licenses, properties, externalReferences (placing them logically)
+        ordered_keys = [
+            "bom-ref", "type", "group", "name", "version", "purl",
+            "description", "licenses", "modelCard",
+            "manufacturer", "supplier", "authors",
+            "properties", "externalReferences"
+        ]
+        ordered_component = {}
+        for key in ordered_keys:
+            if key in component:
+                ordered_component[key] = component[key]
+        # Ensure we didn't miss anything (though we shouldn't have extra keys usually)
+        for k, v in component.items():
+            if k not in ordered_component:
+                ordered_component[k] = v
+        return ordered_component
+    def _process_licenses(self, metadata: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """Process and normalize license information."""
+        raw_license = metadata.get("licenses") or metadata.get("license")
+        # 1. No license provided -> Return empty list (no license in SBOM)
+        if not raw_license:
+            return []
+        # Handle list input
+        if isinstance(raw_license, list):
+            if len(raw_license) > 0:
+                raw_license = raw_license[0]
+            else:
+                return []
+        if not isinstance(raw_license, str) or not raw_license.strip():
+             return []
+        norm_license = normalize_license_id(raw_license)
+        # Skip NOASSERTION or 'other' explicitly
+        if norm_license == "NOASSERTION" or (norm_license and norm_license.lower() == "other"):
+            return []
+        if norm_license:
+            # 1. Strict SPDX validation
+            if not is_valid_spdx_license_id(norm_license):
+                lic_data = {"name": norm_license}
+                # Try to find a known URL (e.g. for Nvidia license)
+                known_url = get_license_url(norm_license, fallback=False)
+                if known_url:
+                    lic_data["url"] = known_url
+                return [{"license": lic_data}]
+            # 2. Valid SPDX ID
+            return [{"license": {"id": norm_license}}]
+        # Fallback if normalization fails, use name unless generic
+        if raw_license.lower() not in ["other", "unknown", "noassertion"]:
+             return [{"license": {"name": raw_license}}]
+        return []
+    def _process_authors_and_suppliers(self, metadata: Dict[str, Any], group: str) -> tuple:
+        """
+        Process authors, manufacturer, and supplier information.
+        Returns: (authors, manufacturer, supplier)
+        """
+        authors = []
+        raw_author = metadata.get("author", group)
+        if raw_author and raw_author != "unknown":
+            if isinstance(raw_author, str):
+                 authors.append({"name": raw_author})
+            elif isinstance(raw_author, list):
+                 for a in raw_author:
+                      authors.append({"name": a})
+        manufacturer = None
+        supplier = None
+        # Manufacturer and Supplier
+        # Use the group (org name) as the manufacturer and supplier if available
+        # If 'suppliedBy' extracted from README, overwrite supplier
+        supplier_entity = None
+        if group:
+            supplier_entity = {
+                "name": group,
+                "url": [f"https://huggingface.co/{group}"]
+            }
+        if "suppliedBy" in metadata and metadata["suppliedBy"]:
+             # If we have explicit suppliedBy, use it for supplier
+             supplier_entity = {"name": metadata["suppliedBy"]}
+        if supplier_entity:
+            supplier = supplier_entity
+            # Manufacturer often implies the creator/fine-tuner.
+            # If we have a group, we assume they manufactured it too unless specified.
+            if group:
+                 manufacturer = {
+                    "name": group,
+                    "url": [f"https://huggingface.co/{group}"]
+                }
+        return authors, manufacturer, supplier
+    def _process_technical_properties(self, metadata: Dict[str, Any]) -> List[Dict[str, Any]]:
+        tech_props = []
+        for field in ["model_type", "architectures", "library_name"]:
+            if field in metadata:
+                val = metadata[field]
+                if isinstance(val, list):
+                    val = ", ".join(val)
+                tech_props.append({"name": field, "value": str(val)})
+        return tech_props
+    def _process_external_references(self, model_id: str, metadata: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """Process external references including Hugging Face URLs and papers."""
+        # Start with generic website reference
+        generic_ref = {"type": "website", "url": f"https://huggingface.co/{model_id}"}
+        external_refs = [generic_ref]
+        if "external_references" in metadata and isinstance(metadata["external_references"], list):
+            for ref in metadata["external_references"]:
+                if isinstance(ref, dict) and "url" in ref:
+                    rtype = ref.get("type", "website")
+                    # Check if URL already exists in our list
+                    existing_idx = next((i for i, r in enumerate(external_refs) if r["url"] == ref["url"]), -1)
+                    new_ref = {"type": rtype, "url": ref["url"], "comment": ref.get("comment")}
+                    if existing_idx != -1:
+                        # If existing is generic (no comment) and new one has comment, replace it
+                        if not external_refs[existing_idx].get("comment") and new_ref.get("comment"):
+                            external_refs[existing_idx] = new_ref
+                    else:
+                        external_refs.append(new_ref)
+        # Paper (ArXiv or other documentation)
+        if "paper" in metadata and metadata["paper"]:
+            papers = metadata["paper"]
+            if isinstance(papers, str):
+                papers = [papers]
+            for p in papers:
+                # Check for duplicates
+                if not any(r["url"] == p for r in external_refs):
+                    # Try to infer if it's arxiv for comment
+                    comment = "Research Paper"
+                    if "arxiv.org" in p:
+                        comment = "ArXiv Paper"
+                    external_refs.append({
+                        "type": "documentation",
+                        "url": p,
+                        "comment": comment
+                    })
+        return external_refs
+    def _create_model_card_section(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
+        section = {}
+        # 1. Model Parameters
+        params = {}
+        # primaryPurpose -> task
+        if "primaryPurpose" in metadata:
+             params["task"] = metadata["primaryPurpose"]
+        elif "pipeline_tag" in metadata:
+             params["task"] = metadata["pipeline_tag"]
+        # typeOfModel -> modelArchitecture
+        if "typeOfModel" in metadata:
+             params["modelArchitecture"] = metadata["typeOfModel"]
+        else:
+             params["modelArchitecture"] = f"{metadata.get('name', 'Unknown')}Model"
+        # Datasets
+        if "datasets" in metadata:
+            ds_val = metadata["datasets"]
+            datasets = []
+            if isinstance(ds_val, list):
+                for d in ds_val:
+                    if isinstance(d, str):
+                        # CycloneDX 1.7 compliant componentData
+                        datasets.append({
+                            "type": "dataset",
+                            "name": d,
+                            "contents": {
+                                "url": f"https://huggingface.co/datasets/{d}"
+                            }
+                        })
+                    elif isinstance(d, dict) and "name" in d:
+                        datasets.append({"type": "dataset", "name": d.get("name"), "url": d.get("url")})
+            elif isinstance(ds_val, str):
+                datasets.append({
+                    "type": "dataset",
+                    "name": ds_val,
+                    "contents": {
+                        "url": f"https://huggingface.co/datasets/{ds_val}"
+                    }
+                })
+            if datasets:
+                params["datasets"] = datasets
+        # Inputs / Outputs (Inferred from task)
+        task = params.get("task")
+        if task:
+            inputs, outputs = self._infer_io_formats(task)
+            if inputs:
+                params["inputs"] = [{"format": i} for i in inputs]
+            if outputs:
+                params["outputs"] = [{"format": o} for o in outputs]
+        if params:
+            section["modelParameters"] = params
+        # 2. Quantitative Analysis
+        if "eval_results" in metadata:
+             metrics = []
+             raw_results = metadata["eval_results"]
+             if isinstance(raw_results, list):
+                 for res in raw_results:
+                     # Handle object or dict
+                     if hasattr(res, "metric_type") and hasattr(res, "metric_value"):
+                         metrics.append({"type": str(res.metric_type), "value": str(res.metric_value)})
+                     elif isinstance(res, dict) and "metric_type" in res and "metric_value" in res:
+                         metrics.append({"type": str(res["metric_type"]), "value": str(res["metric_value"])})
+             if metrics:
+                 section["quantitativeAnalysis"] = {"performanceMetrics": metrics}
+        # 3. Considerations
+        considerations = {}
+        # intendedUse -> useCases
+        if "intendedUse" in metadata:
+             considerations["useCases"] = [metadata["intendedUse"]]
+        # technicalLimitations
+        if "technicalLimitations" in metadata:
+             considerations["technicalLimitations"] = [metadata["technicalLimitations"]]
+        # ethicalConsiderations
+        if "ethicalConsiderations" in metadata:
+             considerations["ethicalConsiderations"] = [{"name": "Ethical Considerations", "description": metadata["ethicalConsiderations"]}]
+        if considerations:
+            section["considerations"] = considerations
+        # 4. Properties (GGUF & Taxonomy + Leftovers)
+        props = []
+        taxonomy_modelcard_mapping = {
+            "hyperparameter": "hyperparameter",
+            "vocab_size": "vocabSize",
+            "tokenizer_class": "tokenizerClass",
+            "context_length": "contextLength",
+            "embedding_length": "embeddingLength",
+            "block_count": "blockCount",
+            "attention_head_count": "attentionHeadCount",
+            "attention_head_count_kv": "attentionHeadCountKV",
+            "feed_forward_length": "feedForwardLength",
+            "rope_dimension_count": "ropeDimensionCount",
+            "quantization_version": "quantizationVersion",
+            "quantization_file_type": "quantizationFileType",
+            "modelExplainability": "modelCardExplainability"
+        }
+        taxonomy_mapped_keys = list(taxonomy_modelcard_mapping.keys())
+        for p_key, p_name in taxonomy_modelcard_mapping.items():
+            if p_key in metadata:
+                val = metadata[p_key]
+                if p_key == "hyperparameter" and isinstance(val, dict):
+                    props.append({"name": f"genai:aibom:modelcard:{p_name}", "value": json.dumps(val)})
+                elif val is not None:
+                    props.append({"name": f"genai:aibom:modelcard:{p_name}", "value": str(val)})
+        # Quantization dict handling
+        if "quantization" in metadata and isinstance(metadata["quantization"], dict):
+            q_dict = metadata["quantization"]
+            if "version" in q_dict:
+                props.append({"name": "genai:aibom:modelcard:quantizationVersion", "value": str(q_dict["version"])})
+            if "file_type" in q_dict:
+                props.append({"name": "genai:aibom:modelcard:quantizationFileType", "value": str(q_dict["file_type"])})
+            taxonomy_mapped_keys.append("quantization")
+        # Basic Fields we've already mapped to structured homes
+        mapped_fields = [
+            "primaryPurpose", "typeOfModel", "suppliedBy", "intendedUse",
+            "technicalLimitations", "ethicalConsiderations", "datasets", "eval_results",
+            "pipeline_tag", "name", "author", "license", "description",
+            "commit", "bomFormat", "specVersion", "version", "licenses",
+            "external_references", "tags", "library_name", "paper", "downloadLocation",
+            "gguf_filename", "gguf_license", "model_type", "architectures"
+        ] + taxonomy_mapped_keys
+        for k, v in metadata.items():
+            if k not in mapped_fields and v is not None:
+                # Basic types only for properties
+                if isinstance(v, (str, int, float, bool)):
+                    props.append({"name": k, "value": str(v)})
+                elif isinstance(v, list) and all(isinstance(x, (str, int, float, bool)) for x in v):
+                    props.append({"name": k, "value": ", ".join(map(str, v))})
+        if props:
+            section["properties"] = props
+        return section
+    def _infer_io_formats(self, task: str) -> tuple:
+        """
+        Infer input and output formats based on the pipeline task.
+        Returns (inputs: list, outputs: list)
+        """
+        task = task.lower().strip()
+        # Text to Text
+        if task in ["text-generation", "text2text-generation", "summarization", "translation",
+                   "conversational", "question-answering", "text-classification", "token-classification"]:
+            return (["string"], ["string"])
+        # Image to Text/Label
+        if task in ["image-classification", "object-detection", "image-segmentation"]:
+            return (["image"], ["string", "json"])
+        # Text to Image
+        if task in ["text-to-image"]:
+            return (["string"], ["image"])
+        # Audio
+        if task in ["automatic-speech-recognition", "audio-classification"]:
+            return (["audio"], ["string"])
+        if task in ["text-to-speech"]:
+            return (["string"], ["audio"])
+        # Multimodal
+        if task in ["visual-question-answering"]:
+            return (["image", "string"], ["string"])
+        # Tabular
+        if task in ["tabular-classification", "tabular-regression"]:
+            return (["csv", "json"], ["string", "number"])
+        return ([], [])

src/schemas/bom-1.6.schema.json ADDED Viewed

The diff for this file is too large to render. See raw diff

src/schemas/bom-1.7.schema.json ADDED Viewed

The diff for this file is too large to render. See raw diff

src/schemas/spdx.schema.json ADDED Viewed

	@@ -0,0 +1,786 @@

+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "http://cyclonedx.org/schema/spdx.schema.json",
+  "$comment": "v1.0-3.27.0",
+  "type": "string",
+  "enum": [
+    "0BSD",
+    "3D-Slicer-1.0",
+    "AAL",
+    "Abstyles",
+    "AdaCore-doc",
+    "Adobe-2006",
+    "Adobe-Display-PostScript",
+    "Adobe-Glyph",
+    "Adobe-Utopia",
+    "ADSL",
+    "AFL-1.1",
+    "AFL-1.2",
+    "AFL-2.0",
+    "AFL-2.1",
+    "AFL-3.0",
+    "Afmparse",
+    "AGPL-1.0",
+    "AGPL-1.0-only",
+    "AGPL-1.0-or-later",
+    "AGPL-3.0",
+    "AGPL-3.0-only",
+    "AGPL-3.0-or-later",
+    "Aladdin",
+    "AMD-newlib",
+    "AMDPLPA",
+    "AML",
+    "AML-glslang",
+    "AMPAS",
+    "ANTLR-PD",
+    "ANTLR-PD-fallback",
+    "any-OSI",
+    "any-OSI-perl-modules",
+    "Apache-1.0",
+    "Apache-1.1",
+    "Apache-2.0",
+    "APAFML",
+    "APL-1.0",
+    "App-s2p",
+    "APSL-1.0",
+    "APSL-1.1",
+    "APSL-1.2",
+    "APSL-2.0",
+    "Arphic-1999",
+    "Artistic-1.0",
+    "Artistic-1.0-cl8",
+    "Artistic-1.0-Perl",
+    "Artistic-2.0",
+    "Artistic-dist",
+    "Aspell-RU",
+    "ASWF-Digital-Assets-1.0",
+    "ASWF-Digital-Assets-1.1",
+    "Baekmuk",
+    "Bahyph",
+    "Barr",
+    "bcrypt-Solar-Designer",
+    "Beerware",
+    "Bitstream-Charter",
+    "Bitstream-Vera",
+    "BitTorrent-1.0",
+    "BitTorrent-1.1",
+    "blessing",
+    "BlueOak-1.0.0",
+    "Boehm-GC",
+    "Boehm-GC-without-fee",
+    "Borceux",
+    "Brian-Gladman-2-Clause",
+    "Brian-Gladman-3-Clause",
+    "BSD-1-Clause",
+    "BSD-2-Clause",
+    "BSD-2-Clause-Darwin",
+    "BSD-2-Clause-first-lines",
+    "BSD-2-Clause-FreeBSD",
+    "BSD-2-Clause-NetBSD",
+    "BSD-2-Clause-Patent",
+    "BSD-2-Clause-pkgconf-disclaimer",
+    "BSD-2-Clause-Views",
+    "BSD-3-Clause",
+    "BSD-3-Clause-acpica",
+    "BSD-3-Clause-Attribution",
+    "BSD-3-Clause-Clear",
+    "BSD-3-Clause-flex",
+    "BSD-3-Clause-HP",
+    "BSD-3-Clause-LBNL",
+    "BSD-3-Clause-Modification",
+    "BSD-3-Clause-No-Military-License",
+    "BSD-3-Clause-No-Nuclear-License",
+    "BSD-3-Clause-No-Nuclear-License-2014",
+    "BSD-3-Clause-No-Nuclear-Warranty",
+    "BSD-3-Clause-Open-MPI",
+    "BSD-3-Clause-Sun",
+    "BSD-4-Clause",
+    "BSD-4-Clause-Shortened",
+    "BSD-4-Clause-UC",
+    "BSD-4.3RENO",
+    "BSD-4.3TAHOE",
+    "BSD-Advertising-Acknowledgement",
+    "BSD-Attribution-HPND-disclaimer",
+    "BSD-Inferno-Nettverk",
+    "BSD-Protection",
+    "BSD-Source-beginning-file",
+    "BSD-Source-Code",
+    "BSD-Systemics",
+    "BSD-Systemics-W3Works",
+    "BSL-1.0",
+    "BUSL-1.1",
+    "bzip2-1.0.5",
+    "bzip2-1.0.6",
+    "C-UDA-1.0",
+    "CAL-1.0",
+    "CAL-1.0-Combined-Work-Exception",
+    "Caldera",
+    "Caldera-no-preamble",
+    "Catharon",
+    "CATOSL-1.1",
+    "CC-BY-1.0",
+    "CC-BY-2.0",
+    "CC-BY-2.5",
+    "CC-BY-2.5-AU",
+    "CC-BY-3.0",
+    "CC-BY-3.0-AT",
+    "CC-BY-3.0-AU",
+    "CC-BY-3.0-DE",
+    "CC-BY-3.0-IGO",
+    "CC-BY-3.0-NL",
+    "CC-BY-3.0-US",
+    "CC-BY-4.0",
+    "CC-BY-NC-1.0",
+    "CC-BY-NC-2.0",
+    "CC-BY-NC-2.5",
+    "CC-BY-NC-3.0",
+    "CC-BY-NC-3.0-DE",
+    "CC-BY-NC-4.0",
+    "CC-BY-NC-ND-1.0",
+    "CC-BY-NC-ND-2.0",
+    "CC-BY-NC-ND-2.5",
+    "CC-BY-NC-ND-3.0",
+    "CC-BY-NC-ND-3.0-DE",
+    "CC-BY-NC-ND-3.0-IGO",
+    "CC-BY-NC-ND-4.0",
+    "CC-BY-NC-SA-1.0",
+    "CC-BY-NC-SA-2.0",
+    "CC-BY-NC-SA-2.0-DE",
+    "CC-BY-NC-SA-2.0-FR",
+    "CC-BY-NC-SA-2.0-UK",
+    "CC-BY-NC-SA-2.5",
+    "CC-BY-NC-SA-3.0",
+    "CC-BY-NC-SA-3.0-DE",
+    "CC-BY-NC-SA-3.0-IGO",
+    "CC-BY-NC-SA-4.0",
+    "CC-BY-ND-1.0",
+    "CC-BY-ND-2.0",
+    "CC-BY-ND-2.5",
+    "CC-BY-ND-3.0",
+    "CC-BY-ND-3.0-DE",
+    "CC-BY-ND-4.0",
+    "CC-BY-SA-1.0",
+    "CC-BY-SA-2.0",
+    "CC-BY-SA-2.0-UK",
+    "CC-BY-SA-2.1-JP",
+    "CC-BY-SA-2.5",
+    "CC-BY-SA-3.0",
+    "CC-BY-SA-3.0-AT",
+    "CC-BY-SA-3.0-DE",
+    "CC-BY-SA-3.0-IGO",
+    "CC-BY-SA-4.0",
+    "CC-PDDC",
+    "CC-PDM-1.0",
+    "CC-SA-1.0",
+    "CC0-1.0",
+    "CDDL-1.0",
+    "CDDL-1.1",
+    "CDL-1.0",
+    "CDLA-Permissive-1.0",
+    "CDLA-Permissive-2.0",
+    "CDLA-Sharing-1.0",
+    "CECILL-1.0",
+    "CECILL-1.1",
+    "CECILL-2.0",
+    "CECILL-2.1",
+    "CECILL-B",
+    "CECILL-C",
+    "CERN-OHL-1.1",
+    "CERN-OHL-1.2",
+    "CERN-OHL-P-2.0",
+    "CERN-OHL-S-2.0",
+    "CERN-OHL-W-2.0",
+    "CFITSIO",
+    "check-cvs",
+    "checkmk",
+    "ClArtistic",
+    "Clips",
+    "CMU-Mach",
+    "CMU-Mach-nodoc",
+    "CNRI-Jython",
+    "CNRI-Python",
+    "CNRI-Python-GPL-Compatible",
+    "COIL-1.0",
+    "Community-Spec-1.0",
+    "Condor-1.1",
+    "copyleft-next-0.3.0",
+    "copyleft-next-0.3.1",
+    "Cornell-Lossless-JPEG",
+    "CPAL-1.0",
+    "CPL-1.0",
+    "CPOL-1.02",
+    "Cronyx",
+    "Crossword",
+    "CryptoSwift",
+    "CrystalStacker",
+    "CUA-OPL-1.0",
+    "Cube",
+    "curl",
+    "cve-tou",
+    "D-FSL-1.0",
+    "DEC-3-Clause",
+    "diffmark",
+    "DL-DE-BY-2.0",
+    "DL-DE-ZERO-2.0",
+    "DOC",
+    "DocBook-DTD",
+    "DocBook-Schema",
+    "DocBook-Stylesheet",
+    "DocBook-XML",
+    "Dotseqn",
+    "DRL-1.0",
+    "DRL-1.1",
+    "DSDP",
+    "dtoa",
+    "dvipdfm",
+    "ECL-1.0",
+    "ECL-2.0",
+    "eCos-2.0",
+    "EFL-1.0",
+    "EFL-2.0",
+    "eGenix",
+    "Elastic-2.0",
+    "Entessa",
+    "EPICS",
+    "EPL-1.0",
+    "EPL-2.0",
+    "ErlPL-1.1",
+    "etalab-2.0",
+    "EUDatagrid",
+    "EUPL-1.0",
+    "EUPL-1.1",
+    "EUPL-1.2",
+    "Eurosym",
+    "Fair",
+    "FBM",
+    "FDK-AAC",
+    "Ferguson-Twofish",
+    "Frameworx-1.0",
+    "FreeBSD-DOC",
+    "FreeImage",
+    "FSFAP",
+    "FSFAP-no-warranty-disclaimer",
+    "FSFUL",
+    "FSFULLR",
+    "FSFULLRSD",
+    "FSFULLRWD",
+    "FSL-1.1-ALv2",
+    "FSL-1.1-MIT",
+    "FTL",
+    "Furuseth",
+    "fwlw",
+    "Game-Programming-Gems",
+    "GCR-docs",
+    "GD",
+    "generic-xts",
+    "GFDL-1.1",
+    "GFDL-1.1-invariants-only",
+    "GFDL-1.1-invariants-or-later",
+    "GFDL-1.1-no-invariants-only",
+    "GFDL-1.1-no-invariants-or-later",
+    "GFDL-1.1-only",
+    "GFDL-1.1-or-later",
+    "GFDL-1.2",
+    "GFDL-1.2-invariants-only",
+    "GFDL-1.2-invariants-or-later",
+    "GFDL-1.2-no-invariants-only",
+    "GFDL-1.2-no-invariants-or-later",
+    "GFDL-1.2-only",
+    "GFDL-1.2-or-later",
+    "GFDL-1.3",
+    "GFDL-1.3-invariants-only",
+    "GFDL-1.3-invariants-or-later",
+    "GFDL-1.3-no-invariants-only",
+    "GFDL-1.3-no-invariants-or-later",
+    "GFDL-1.3-only",
+    "GFDL-1.3-or-later",
+    "Giftware",
+    "GL2PS",
+    "Glide",
+    "Glulxe",
+    "GLWTPL",
+    "gnuplot",
+    "GPL-1.0",
+    "GPL-1.0+",
+    "GPL-1.0-only",
+    "GPL-1.0-or-later",
+    "GPL-2.0",
+    "GPL-2.0+",
+    "GPL-2.0-only",
+    "GPL-2.0-or-later",
+    "GPL-2.0-with-autoconf-exception",
+    "GPL-2.0-with-bison-exception",
+    "GPL-2.0-with-classpath-exception",
+    "GPL-2.0-with-font-exception",
+    "GPL-2.0-with-GCC-exception",
+    "GPL-3.0",
+    "GPL-3.0+",
+    "GPL-3.0-only",
+    "GPL-3.0-or-later",
+    "GPL-3.0-with-autoconf-exception",
+    "GPL-3.0-with-GCC-exception",
+    "Graphics-Gems",
+    "gSOAP-1.3b",
+    "gtkbook",
+    "Gutmann",
+    "HaskellReport",
+    "HDF5",
+    "hdparm",
+    "HIDAPI",
+    "Hippocratic-2.1",
+    "HP-1986",
+    "HP-1989",
+    "HPND",
+    "HPND-DEC",
+    "HPND-doc",
+    "HPND-doc-sell",
+    "HPND-export-US",
+    "HPND-export-US-acknowledgement",
+    "HPND-export-US-modify",
+    "HPND-export2-US",
+    "HPND-Fenneberg-Livingston",
+    "HPND-INRIA-IMAG",
+    "HPND-Intel",
+    "HPND-Kevlin-Henney",
+    "HPND-Markus-Kuhn",
+    "HPND-merchantability-variant",
+    "HPND-MIT-disclaimer",
+    "HPND-Netrek",
+    "HPND-Pbmplus",
+    "HPND-sell-MIT-disclaimer-xserver",
+    "HPND-sell-regexpr",
+    "HPND-sell-variant",
+    "HPND-sell-variant-MIT-disclaimer",
+    "HPND-sell-variant-MIT-disclaimer-rev",
+    "HPND-UC",
+    "HPND-UC-export-US",
+    "HTMLTIDY",
+    "IBM-pibs",
+    "ICU",
+    "IEC-Code-Components-EULA",
+    "IJG",
+    "IJG-short",
+    "ImageMagick",
+    "iMatix",
+    "Imlib2",
+    "Info-ZIP",
+    "Inner-Net-2.0",
+    "InnoSetup",
+    "Intel",
+    "Intel-ACPI",
+    "Interbase-1.0",
+    "IPA",
+    "IPL-1.0",
+    "ISC",
+    "ISC-Veillard",
+    "Jam",
+    "JasPer-2.0",
+    "jove",
+    "JPL-image",
+    "JPNIC",
+    "JSON",
+    "Kastrup",
+    "Kazlib",
+    "Knuth-CTAN",
+    "LAL-1.2",
+    "LAL-1.3",
+    "Latex2e",
+    "Latex2e-translated-notice",
+    "Leptonica",
+    "LGPL-2.0",
+    "LGPL-2.0+",
+    "LGPL-2.0-only",
+    "LGPL-2.0-or-later",
+    "LGPL-2.1",
+    "LGPL-2.1+",
+    "LGPL-2.1-only",
+    "LGPL-2.1-or-later",
+    "LGPL-3.0",
+    "LGPL-3.0+",
+    "LGPL-3.0-only",
+    "LGPL-3.0-or-later",
+    "LGPLLR",
+    "Libpng",
+    "libpng-1.6.35",
+    "libpng-2.0",
+    "libselinux-1.0",
+    "libtiff",
+    "libutil-David-Nugent",
+    "LiLiQ-P-1.1",
+    "LiLiQ-R-1.1",
+    "LiLiQ-Rplus-1.1",
+    "Linux-man-pages-1-para",
+    "Linux-man-pages-copyleft",
+    "Linux-man-pages-copyleft-2-para",
+    "Linux-man-pages-copyleft-var",
+    "Linux-OpenIB",
+    "LOOP",
+    "LPD-document",
+    "LPL-1.0",
+    "LPL-1.02",
+    "LPPL-1.0",
+    "LPPL-1.1",
+    "LPPL-1.2",
+    "LPPL-1.3a",
+    "LPPL-1.3c",
+    "lsof",
+    "Lucida-Bitmap-Fonts",
+    "LZMA-SDK-9.11-to-9.20",
+    "LZMA-SDK-9.22",
+    "Mackerras-3-Clause",
+    "Mackerras-3-Clause-acknowledgment",
+    "magaz",
+    "mailprio",
+    "MakeIndex",
+    "man2html",
+    "Martin-Birgmeier",
+    "McPhee-slideshow",
+    "metamail",
+    "Minpack",
+    "MIPS",
+    "MirOS",
+    "MIT",
+    "MIT-0",
+    "MIT-advertising",
+    "MIT-Click",
+    "MIT-CMU",
+    "MIT-enna",
+    "MIT-feh",
+    "MIT-Festival",
+    "MIT-Khronos-old",
+    "MIT-Modern-Variant",
+    "MIT-open-group",
+    "MIT-testregex",
+    "MIT-Wu",
+    "MITNFA",
+    "MMIXware",
+    "Motosoto",
+    "MPEG-SSG",
+    "mpi-permissive",
+    "mpich2",
+    "MPL-1.0",
+    "MPL-1.1",
+    "MPL-2.0",
+    "MPL-2.0-no-copyleft-exception",
+    "mplus",
+    "MS-LPL",
+    "MS-PL",
+    "MS-RL",
+    "MTLL",
+    "MulanPSL-1.0",
+    "MulanPSL-2.0",
+    "Multics",
+    "Mup",
+    "NAIST-2003",
+    "NASA-1.3",
+    "Naumen",
+    "NBPL-1.0",
+    "NCBI-PD",
+    "NCGL-UK-2.0",
+    "NCL",
+    "NCSA",
+    "Net-SNMP",
+    "NetCDF",
+    "Newsletr",
+    "NGPL",
+    "ngrep",
+    "NICTA-1.0",
+    "NIST-PD",
+    "NIST-PD-fallback",
+    "NIST-Software",
+    "NLOD-1.0",
+    "NLOD-2.0",
+    "NLPL",
+    "Nokia",
+    "NOSL",
+    "Noweb",
+    "NPL-1.0",
+    "NPL-1.1",
+    "NPOSL-3.0",
+    "NRL",
+    "NTIA-PD",
+    "NTP",
+    "NTP-0",
+    "Nunit",
+    "O-UDA-1.0",
+    "OAR",
+    "OCCT-PL",
+    "OCLC-2.0",
+    "ODbL-1.0",
+    "ODC-By-1.0",
+    "OFFIS",
+    "OFL-1.0",
+    "OFL-1.0-no-RFN",
+    "OFL-1.0-RFN",
+    "OFL-1.1",
+    "OFL-1.1-no-RFN",
+    "OFL-1.1-RFN",
+    "OGC-1.0",
+    "OGDL-Taiwan-1.0",
+    "OGL-Canada-2.0",
+    "OGL-UK-1.0",
+    "OGL-UK-2.0",
+    "OGL-UK-3.0",
+    "OGTSL",
+    "OLDAP-1.1",
+    "OLDAP-1.2",
+    "OLDAP-1.3",
+    "OLDAP-1.4",
+    "OLDAP-2.0",
+    "OLDAP-2.0.1",
+    "OLDAP-2.1",
+    "OLDAP-2.2",
+    "OLDAP-2.2.1",
+    "OLDAP-2.2.2",
+    "OLDAP-2.3",
+    "OLDAP-2.4",
+    "OLDAP-2.5",
+    "OLDAP-2.6",
+    "OLDAP-2.7",
+    "OLDAP-2.8",
+    "OLFL-1.3",
+    "OML",
+    "OpenPBS-2.3",
+    "OpenSSL",
+    "OpenSSL-standalone",
+    "OpenVision",
+    "OPL-1.0",
+    "OPL-UK-3.0",
+    "OPUBL-1.0",
+    "OSET-PL-2.1",
+    "OSL-1.0",
+    "OSL-1.1",
+    "OSL-2.0",
+    "OSL-2.1",
+    "OSL-3.0",
+    "PADL",
+    "Parity-6.0.0",
+    "Parity-7.0.0",
+    "PDDL-1.0",
+    "PHP-3.0",
+    "PHP-3.01",
+    "Pixar",
+    "pkgconf",
+    "Plexus",
+    "pnmstitch",
+    "PolyForm-Noncommercial-1.0.0",
+    "PolyForm-Small-Business-1.0.0",
+    "PostgreSQL",
+    "PPL",
+    "PSF-2.0",
+    "psfrag",
+    "psutils",
+    "Python-2.0",
+    "Python-2.0.1",
+    "python-ldap",
+    "Qhull",
+    "QPL-1.0",
+    "QPL-1.0-INRIA-2004",
+    "radvd",
+    "Rdisc",
+    "RHeCos-1.1",
+    "RPL-1.1",
+    "RPL-1.5",
+    "RPSL-1.0",
+    "RSA-MD",
+    "RSCPL",
+    "Ruby",
+    "Ruby-pty",
+    "SAX-PD",
+    "SAX-PD-2.0",
+    "Saxpath",
+    "SCEA",
+    "SchemeReport",
+    "Sendmail",
+    "Sendmail-8.23",
+    "Sendmail-Open-Source-1.1",
+    "SGI-B-1.0",
+    "SGI-B-1.1",
+    "SGI-B-2.0",
+    "SGI-OpenGL",
+    "SGP4",
+    "SHL-0.5",
+    "SHL-0.51",
+    "SimPL-2.0",
+    "SISSL",
+    "SISSL-1.2",
+    "SL",
+    "Sleepycat",
+    "SMAIL-GPL",
+    "SMLNJ",
+    "SMPPL",
+    "SNIA",
+    "snprintf",
+    "SOFA",
+    "softSurfer",
+    "Soundex",
+    "Spencer-86",
+    "Spencer-94",
+    "Spencer-99",
+    "SPL-1.0",
+    "ssh-keyscan",
+    "SSH-OpenSSH",
+    "SSH-short",
+    "SSLeay-standalone",
+    "SSPL-1.0",
+    "StandardML-NJ",
+    "SugarCRM-1.1.3",
+    "SUL-1.0",
+    "Sun-PPP",
+    "Sun-PPP-2000",
+    "SunPro",
+    "SWL",
+    "swrule",
+    "Symlinks",
+    "TAPR-OHL-1.0",
+    "TCL",
+    "TCP-wrappers",
+    "TermReadKey",
+    "TGPPL-1.0",
+    "ThirdEye",
+    "threeparttable",
+    "TMate",
+    "TORQUE-1.1",
+    "TOSL",
+    "TPDL",
+    "TPL-1.0",
+    "TrustedQSL",
+    "TTWL",
+    "TTYP0",
+    "TU-Berlin-1.0",
+    "TU-Berlin-2.0",
+    "Ubuntu-font-1.0",
+    "UCAR",
+    "UCL-1.0",
+    "ulem",
+    "UMich-Merit",
+    "Unicode-3.0",
+    "Unicode-DFS-2015",
+    "Unicode-DFS-2016",
+    "Unicode-TOU",
+    "UnixCrypt",
+    "Unlicense",
+    "Unlicense-libtelnet",
+    "Unlicense-libwhirlpool",
+    "UPL-1.0",
+    "URT-RLE",
+    "Vim",
+    "VOSTROM",
+    "VSL-1.0",
+    "W3C",
+    "W3C-19980720",
+    "W3C-20150513",
+    "w3m",
+    "Watcom-1.0",
+    "Widget-Workshop",
+    "Wsuipa",
+    "WTFPL",
+    "wwl",
+    "wxWindows",
+    "X11",
+    "X11-distribute-modifications-variant",
+    "X11-swapped",
+    "Xdebug-1.03",
+    "Xerox",
+    "Xfig",
+    "XFree86-1.1",
+    "xinetd",
+    "xkeyboard-config-Zinoviev",
+    "xlock",
+    "Xnet",
+    "xpp",
+    "XSkat",
+    "xzoom",
+    "YPL-1.0",
+    "YPL-1.1",
+    "Zed",
+    "Zeeff",
+    "Zend-2.0",
+    "Zimbra-1.3",
+    "Zimbra-1.4",
+    "Zlib",
+    "zlib-acknowledgement",
+    "ZPL-1.1",
+    "ZPL-2.0",
+    "ZPL-2.1",
+    "389-exception",
+    "Asterisk-exception",
+    "Asterisk-linking-protocols-exception",
+    "Autoconf-exception-2.0",
+    "Autoconf-exception-3.0",
+    "Autoconf-exception-generic",
+    "Autoconf-exception-generic-3.0",
+    "Autoconf-exception-macro",
+    "Bison-exception-1.24",
+    "Bison-exception-2.2",
+    "Bootloader-exception",
+    "CGAL-linking-exception",
+    "Classpath-exception-2.0",
+    "CLISP-exception-2.0",
+    "cryptsetup-OpenSSL-exception",
+    "Digia-Qt-LGPL-exception-1.1",
+    "DigiRule-FOSS-exception",
+    "eCos-exception-2.0",
+    "erlang-otp-linking-exception",
+    "Fawkes-Runtime-exception",
+    "FLTK-exception",
+    "fmt-exception",
+    "Font-exception-2.0",
+    "freertos-exception-2.0",
+    "GCC-exception-2.0",
+    "GCC-exception-2.0-note",
+    "GCC-exception-3.1",
+    "Gmsh-exception",
+    "GNAT-exception",
+    "GNOME-examples-exception",
+    "GNU-compiler-exception",
+    "gnu-javamail-exception",
+    "GPL-3.0-389-ds-base-exception",
+    "GPL-3.0-interface-exception",
+    "GPL-3.0-linking-exception",
+    "GPL-3.0-linking-source-exception",
+    "GPL-CC-1.0",
+    "GStreamer-exception-2005",
+    "GStreamer-exception-2008",
+    "harbour-exception",
+    "i2p-gpl-java-exception",
+    "Independent-modules-exception",
+    "KiCad-libraries-exception",
+    "LGPL-3.0-linking-exception",
+    "libpri-OpenH323-exception",
+    "Libtool-exception",
+    "Linux-syscall-note",
+    "LLGPL",
+    "LLVM-exception",
+    "LZMA-exception",
+    "mif-exception",
+    "mxml-exception",
+    "Nokia-Qt-exception-1.1",
+    "OCaml-LGPL-linking-exception",
+    "OCCT-exception-1.0",
+    "OpenJDK-assembly-exception-1.0",
+    "openvpn-openssl-exception",
+    "PCRE2-exception",
+    "polyparse-exception",
+    "PS-or-PDF-font-exception-20170817",
+    "QPL-1.0-INRIA-2004-exception",
+    "Qt-GPL-exception-1.0",
+    "Qt-LGPL-exception-1.1",
+    "Qwt-exception-1.0",
+    "romic-exception",
+    "RRDtool-FLOSS-exception-2.0",
+    "SANE-exception",
+    "SHL-2.0",
+    "SHL-2.1",
+    "stunnel-exception",
+    "SWI-exception",
+    "Swift-exception",
+    "Texinfo-exception",
+    "u-boot-exception-2.0",
+    "UBDL-exception",
+    "Universal-FOSS-exception-1.0",
+    "vsftpd-openssl-exception",
+    "WxWindows-exception-3.1",
+    "x11vnc-openssl-exception"
+  ]
+}

src/static/css/style.css ADDED Viewed

	@@ -0,0 +1,1288 @@

+@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700&display=swap');
+/* Base & Common */
+body {
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    margin: 0;
+    padding: 0;
+    line-height: 1.6;
+    color: #333;
+    background-color: #f9f9f9;
+}
+h1,
+h2,
+h3,
+h4,
+h5,
+h6 {
+    font-family: 'Poppins', sans-serif;
+}
+h1 {
+    font-weight: 700;
+}
+h2 {
+    font-weight: 600;
+}
+h3 {
+    font-weight: 600;
+}
+h4 {
+    font-weight: 500;
+}
+h5 {
+    font-weight: 500;
+}
+h6 {
+    font-weight: 400;
+}
+.container {
+    max-width: 1000px;
+    margin: 0 auto;
+    padding: 0 20px;
+}
+code {
+    background-color: #f8f9fa;
+    padding: 2px 5px;
+    border-radius: 4px;
+    font-family: monospace;
+    font-size: 14px;
+    color: #e74c3c;
+}
+a {
+    color: #3498db;
+    text-decoration: none;
+    transition: color 0.3s;
+}
+a:hover {
+    color: #2980b9;
+    text-decoration: underline;
+}
+/* Header */
+.header {
+    position: relative;
+    background-color: #ffffff;
+    padding: 15px 20px;
+    border-bottom: 1px solid #e9ecef;
+    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    margin-bottom: 30px;
+}
+.header-left {
+    display: flex;
+    align-items: center;
+}
+.header img {
+    height: 60px;
+    margin-right: 15px;
+}
+.header-content {
+    position: absolute;
+    left: 50%;
+    transform: translateX(-50%);
+    display: flex;
+    flex-direction: column;
+}
+.header h1 {
+    margin: 0;
+    font-family: 'Poppins', sans-serif;
+    font-size: 28px;
+    color: #2c3e50;
+    font-weight: 700;
+    margin-top: 5px;
+    /* Adjusting down to align with logo */
+}
+.header-right {
+    display: flex;
+    gap: 10px;
+}
+/* Buttons */
+button {
+    padding: 12px 20px;
+    background-color: #3498db;
+    color: white;
+    border: none;
+    border-radius: 6px;
+    cursor: pointer;
+    font-size: 15px;
+    font-weight: 500;
+    transition: background-color 0.3s;
+}
+button:hover {
+    background-color: #2980b9;
+}
+button:disabled {
+    background-color: #bdc3c7;
+    cursor: not-allowed;
+}
+.button {
+    display: inline-block;
+    padding: 12px 20px;
+    background-color: #7f8c8d;
+    color: white;
+    border: none;
+    border-radius: 6px;
+    cursor: pointer;
+    font-size: 15px;
+    font-weight: 500;
+    text-decoration: none;
+    transition: background-color 0.3s;
+    margin-bottom: 20px;
+}
+.button:hover {
+    background-color: #95a5a6;
+    text-decoration: none;
+}
+.github-button {
+    display: inline-block;
+    padding: 12px 20px;
+    background-color: #3498db;
+    color: white;
+    text-decoration: none;
+    border-radius: 6px;
+    font-weight: 500;
+    font-size: 15px;
+    transition: background-color 0.3s;
+}
+.github-button:hover {
+    background-color: #2980b9;
+    color: white;
+    text-decoration: none;
+}
+.generate-another-btn {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    padding: 0 16px;
+    height: 38px;
+    background: linear-gradient(135deg, rgb(66, 92, 187), rgb(116, 142, 237));
+    color: #ffffff !important;
+    font-weight: 600;
+    border-radius: 19px;
+    font-size: 14px;
+    transition: all 0.3s ease;
+    text-decoration: none !important;
+    gap: 8px;
+    font-family: inherit;
+    cursor: pointer;
+    border: none;
+    box-shadow: 0 2px 4px rgba(66, 92, 187, 0.3);
+}
+.generate-another-btn:hover {
+    background: linear-gradient(135deg, rgb(86, 112, 207), rgb(136, 162, 255));
+    color: #ffffff !important;
+    transform: translateY(-2px);
+    box-shadow: 0 4px 8px rgba(66, 92, 187, 0.4);
+}
+/* Content Sections */
+.content-section {
+    background-color: #ffffff;
+    border-radius: 8px;
+    padding: 25px;
+    margin-bottom: 30px;
+    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
+}
+.content-section:last-child {
+    margin-bottom: 0;
+}
+.content-section h2 {
+    color: #2c3e50;
+    margin-top: 0;
+    margin-bottom: 20px;
+    font-size: 22px;
+    border-bottom: 2px solid #f0f0f0;
+    padding-bottom: 10px;
+}
+.content-section h3 {
+    color: #2c3e50;
+    margin-top: 0;
+    margin-bottom: 15px;
+    font-size: 20px;
+    /* result.html has 20px, index 18px */
+}
+.content-section p {
+    margin-bottom: 20px;
+    font-size: 16px;
+    line-height: 1.7;
+    color: #555;
+}
+/* Forms (from index.html) */
+.form-section {
+    background-color: #ffffff;
+    border-radius: 8px;
+    padding: 25px;
+    margin-bottom: 30px;
+    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
+}
+.form-section p {
+    margin-bottom: 20px;
+    font-size: 16px;
+    color: #555;
+}
+form {
+    margin: 20px 0;
+}
+input[type="text"] {
+    padding: 12px;
+    border: 1px solid #ddd;
+    border-radius: 6px;
+    margin-right: 10px;
+    width: 350px;
+    font-size: 15px;
+    transition: border-color 0.3s;
+}
+input[type="text"]:focus {
+    border-color: #3498db;
+    outline: none;
+    box-shadow: 0 0 5px rgba(52, 152, 219, 0.3);
+}
+/* Result Specific Modules */
+.success-message {
+    text-align: left;
+    padding: 15px;
+    background-color: #d4edda;
+    border: 1px solid #c3e6cb;
+    border-radius: 8px;
+    margin-bottom: 20px;
+}
+.success-message h2 {
+    margin: 0;
+    font-size: 18px;
+    color: #155724;
+    font-weight: 500;
+}
+.model-name {
+    font-weight: 600;
+    color: #2c3e50;
+}
+.aibom-viewer {
+    margin: 20px 0;
+    border: 1px solid #e9ecef;
+    border-radius: 8px;
+    padding: 20px;
+    background-color: #f9f9f9;
+}
+.aibom-section {
+    margin-bottom: 20px;
+    padding: 20px;
+    border-radius: 8px;
+    background-color: white;
+    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
+}
+.aibom-section:last-child {
+    margin-bottom: 0;
+}
+.aibom-section h4 {
+    margin-top: 0;
+    color: #2c3e50;
+    border-bottom: 2px solid #f0f0f0;
+    padding-bottom: 10px;
+    margin-bottom: 15px;
+    font-size: 18px;
+}
+.aibom-property {
+    display: flex;
+    margin: 10px 0;
+}
+.aibom-property:last-child {
+    margin-bottom: 0;
+}
+.property-name {
+    font-weight: bold;
+    width: 200px;
+    color: #34495e;
+}
+.property-value {
+    flex: 1;
+    color: #555;
+    line-height: 1.6;
+}
+.aibom-tabs {
+    display: flex;
+    border-bottom: 1px solid #e9ecef;
+    margin-bottom: 20px;
+}
+.aibom-tab {
+    padding: 12px 20px;
+    cursor: pointer;
+    background-color: #f8f9fa;
+    margin-right: 5px;
+    border-radius: 8px 8px 0 0;
+    font-weight: 500;
+    transition: all 0.3s ease;
+}
+.aibom-tab.active {
+    background-color: #6c7a89;
+    color: white;
+}
+.aibom-tab:hover:not(.active) {
+    background-color: #e9ecef;
+}
+.tab-content {
+    display: none;
+}
+.tab-content.active {
+    display: block;
+}
+.json-view {
+    background-color: #f8f9fa;
+    border: 1px solid #e9ecef;
+    border-radius: 8px;
+    padding: 20px;
+    overflow: auto;
+    max-height: 500px;
+    font-family: monospace;
+    line-height: 1.5;
+}
+.collapsible {
+    cursor: pointer;
+    position: relative;
+    transition: all 0.3s ease;
+}
+.collapsible:after {
+    content: '+';
+    position: absolute;
+    right: 10px;
+    font-weight: bold;
+}
+.collapsible.active:after {
+    content: '-';
+}
+.collapsible-content {
+    max-height: 0;
+    overflow: hidden;
+    transition: max-height 0.3s ease-out;
+}
+.collapsible-content.active {
+    max-height: 500px;
+}
+.tag {
+    display: inline-block;
+    background-color: #e9ecef;
+    padding: 4px 10px;
+    border-radius: 16px;
+    margin: 3px;
+    font-size: 0.9em;
+}
+.key-info {
+    background-color: #e3f2fd;
+    border-left: 4px solid #2196F3;
+    padding: 20px;
+    margin-bottom: 20px;
+    border-radius: 8px;
+}
+.key-info h3,
+.completeness-profile h3 {
+    margin-top: 0;
+    margin-bottom: 15px;
+}
+/* Tables & Scoring */
+table {
+    border-collapse: collapse;
+    width: 100%;
+    margin-top: 15px;
+    margin-bottom: 20px;
+}
+th,
+td {
+    border: 1px solid #e9ecef;
+    padding: 12px;
+}
+th {
+    background-color: #f8f9fa;
+    color: #2c3e50;
+    font-weight: 600;
+}
+.check-mark {
+    color: #27ae60;
+}
+.x-mark {
+    color: #e74c3c;
+}
+.field-name {
+    color: #000;
+}
+.field-stars {
+    color: #000;
+}
+.improvement {
+    color: #2c3e50;
+    background-color: #ecf0f1;
+    padding: 20px;
+    border-radius: 8px;
+    margin-bottom: 30px;
+    border-left: 4px solid #3498db;
+}
+.improvement-value {
+    color: #27ae60;
+    font-weight: bold;
+}
+.ai-badge {
+    background-color: #3498db;
+    color: white;
+    padding: 3px 8px;
+    border-radius: 3px;
+    font-size: 0.8em;
+    margin-left: 10px;
+}
+/* Progress Bars */
+.progress-container {
+    width: 100%;
+    background-color: #f1f1f1;
+    border-radius: 8px;
+    margin: 8px 0;
+    overflow: hidden;
+}
+.progress-bar {
+    height: 24px;
+    border-radius: 8px;
+    text-align: center;
+    line-height: 24px;
+    color: white;
+    font-size: 14px;
+    font-weight: 500;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    transition: width 0.5s ease;
+}
+.progress-excellent {
+    background-color: #4CAF50;
+}
+.progress-good {
+    background-color: #2196F3;
+}
+.progress-fair {
+    background-color: #FF9800;
+}
+.progress-poor {
+    background-color: #f44336;
+}
+.progress-excellent-border {
+    border-left-color: #4CAF50 !important;
+}
+.progress-good-border {
+    border-left-color: #2196F3 !important;
+}
+.progress-fair-border {
+    border-left-color: #FF9800 !important;
+}
+.progress-poor-border {
+    border-left-color: #f44336 !important;
+}
+.score-table {
+    width: 100%;
+    margin-bottom: 20px;
+}
+.score-table th {
+    text-align: left;
+    padding: 12px;
+    background-color: #f8f9fa;
+}
+.score-table th:nth-child(1),
+.score-table td:nth-child(1) {
+    width: 25%;
+}
+.score-table th:nth-child(2),
+.score-table td:nth-child(2) {
+    width: 20%;
+}
+.score-table th:nth-child(3),
+.score-table td:nth-child(3) {
+    width: 15%;
+}
+.score-table th:nth-child(4),
+.score-table td:nth-child(4) {
+    width: 40%;
+}
+.score-weight {
+    font-size: 0.9em;
+    color: #666;
+    margin-left: 5px;
+}
+.score-label {
+    display: inline-block;
+    padding: 3px 8px;
+    border-radius: 4px;
+    color: white;
+    font-size: 0.9em;
+    margin-left: 5px;
+    background-color: transparent;
+}
+.total-score-container {
+    display: flex;
+    align-items: center;
+    margin-bottom: 25px;
+    background-color: white;
+    padding: 20px;
+    border-radius: 8px;
+    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
+}
+.total-score {
+    font-size: 28px;
+    font-weight: bold;
+    margin-right: 20px;
+    color: #2c3e50;
+}
+.total-progress {
+    flex: 1;
+}
+.tooltip {
+    position: relative;
+    display: inline-block;
+    cursor: help;
+}
+.tooltip .tooltiptext {
+    visibility: hidden;
+    width: 300px;
+    background-color: #34495e;
+    color: #fff;
+    text-align: left;
+    border-radius: 6px;
+    padding: 12px;
+    position: absolute;
+    z-index: 1;
+    bottom: 125%;
+    left: 50%;
+    margin-left: -150px;
+    opacity: 0;
+    transition: opacity 0.3s;
+    font-size: 0.9em;
+    line-height: 1.5;
+    box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1);
+}
+.tooltip:hover .tooltiptext {
+    visibility: visible;
+    opacity: 1;
+}
+.tooltip .tooltiptext::after {
+    content: "";
+    position: absolute;
+    top: 100%;
+    left: 50%;
+    margin-left: -5px;
+    border-width: 5px;
+    border-style: solid;
+    border-color: #34495e transparent transparent transparent;
+}
+.missing-fields {
+    background-color: #ffebee;
+    border-left: 4px solid #f44336;
+    padding: 20px;
+    margin: 20px 0;
+    border-radius: 8px;
+}
+.missing-fields h4 {
+    margin-top: 0;
+    color: #d32f2f;
+    margin-bottom: 15px;
+}
+.missing-fields ul {
+    margin-bottom: 0;
+    padding-left: 20px;
+}
+.recommendations {
+    background-color: #e8f5e9;
+    border-left: 4px solid #4caf50;
+    padding: 20px;
+    margin: 20px 0;
+    border-radius: 8px;
+}
+.recommendations h4 {
+    margin-top: 0;
+    color: #2e7d32;
+    margin-bottom: 15px;
+}
+.recommendations ul {
+    margin-bottom: 0;
+    padding-left: 20px;
+}
+.importance-indicator {
+    display: inline-block;
+    margin-left: 5px;
+}
+.high-importance {
+    color: #d32f2f;
+}
+.medium-importance {
+    color: #ff9800;
+}
+.low-importance {
+    color: #2196f3;
+}
+.scoring-rubric {
+    background-color: #e3f2fd;
+    border-left: 4px solid #2196f3;
+    padding: 20px;
+    margin: 20px 0;
+    border-radius: 8px;
+}
+/* Error Pages */
+.error-message {
+    text-align: left;
+    padding: 15px;
+    background-color: #f8d7da;
+    border: 1px solid #f5c6cb;
+    border-radius: 8px;
+    margin-bottom: 20px;
+}
+.error-message h2 {
+    margin: 0;
+    font-size: 18px;
+    color: #721c24;
+    font-weight: 500;
+}
+.error-section {
+    background-color: #ffffff;
+    border-radius: 8px;
+    padding: 25px;
+    margin-bottom: 30px;
+    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
+}
+.error-section h2 {
+    color: #e74c3c;
+    margin-top: 0;
+    margin-bottom: 20px;
+    font-size: 22px;
+    border-bottom: 2px solid #f0f0f0;
+    padding-bottom: 10px;
+}
+.error-details {
+    background-color: #ffebee;
+    border-left: 4px solid #e74c3c;
+    padding: 15px;
+    border-radius: 4px;
+    margin: 20px 0;
+    font-size: 16px;
+    line-height: 1.7;
+    color: #555;
+}
+/* Modern Footer Styles */
+.footer-modern {
+    background-color: #1e293b;
+    color: #e2e8f0;
+    padding: 25px 30px 15px;
+    margin-top: 20px;
+    border-radius: 8px;
+    box-shadow: 0 -4px 6px -1px rgba(0, 0, 0, 0.1);
+    font-family: 'Inter', -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+    width: 100%;
+    box-sizing: border-box;
+    margin-bottom: 25px;
+}
+.footer-modern-container {
+    display: flex;
+    justify-content: space-between;
+    flex-wrap: wrap;
+    gap: 30px;
+    margin: 0 auto;
+    text-align: left;
+}
+.footer-modern-col {
+    flex: 1;
+    min-width: 200px;
+}
+.brand-col {
+    flex: 0 0 160px;
+    /* Takes up less space to bring Support closer */
+}
+.help-col,
+.share-col {
+    padding-top: 8px;
+    /* Pushes content down slightly to align with GenAI logo */
+    display: flex;
+    flex-direction: column;
+}
+.footer-modern-col h4 {
+    color: #f8fafc;
+    font-family: 'Poppins', sans-serif;
+    font-size: 20px;
+    font-weight: 600;
+    margin-top: 0;
+    margin-bottom: 12px;
+    letter-spacing: 0.5px;
+    display: flex;
+    align-items: center;
+    gap: 8px;
+}
+.footer-modern-col p {
+    font-size: 14px;
+    line-height: 1.7;
+    color: #cbd5e1;
+    margin-bottom: 12px;
+}
+.footer-modern-col a {
+    color: #cbd5e1;
+    text-decoration: none;
+    transition: all 0.2s ease;
+}
+.footer-modern-col a:hover {
+    color: #38bdf8;
+}
+.footer-modern-col ul {
+    list-style: none;
+    padding: 0;
+    margin: 0;
+    display: flex;
+    flex-direction: column;
+}
+.footer-modern-col ul li {
+    margin-bottom: 12px;
+}
+.footer-modern-col ul a {
+    font-size: 14px;
+    font-weight: 500;
+}
+.footer-modern-col img {
+    filter: brightness(0) invert(1);
+    opacity: 0.9;
+    margin-bottom: 15px;
+    transition: opacity 0.2s;
+}
+.footer-modern-col img:hover {
+    opacity: 1;
+}
+.footer-social-icons {
+    display: flex;
+    gap: 15px;
+    margin-top: 5px;
+}
+.footer-social-icons a {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    width: 36px;
+    height: 36px;
+    border-radius: 50%;
+    background: linear-gradient(135deg, #334155, #475569);
+    color: #ffffff;
+    transition: all 0.3s ease;
+    box-shadow: 0 2px 4px rgba(51, 65, 85, 0.3);
+}
+.footer-social-icons a:hover {
+    background: linear-gradient(135deg, #475569, #64748b);
+    color: #ffffff;
+    transform: translateY(-2px);
+    box-shadow: 0 4px 8px rgba(51, 65, 85, 0.4);
+}
+.footer-btn-share {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    padding: 0 16px;
+    height: 36px;
+    background: linear-gradient(135deg, #334155, #475569);
+    color: #ffffff !important;
+    font-weight: 500;
+    border-radius: 18px;
+    font-size: 14px;
+    margin-top: auto;
+    align-self: flex-end;
+    margin-right: 15px;
+    transition: all 0.3s ease;
+    text-decoration: none !important;
+    gap: 8px;
+    font-family: inherit;
+    cursor: pointer;
+    border: none;
+    box-shadow: 0 2px 4px rgba(51, 65, 85, 0.3);
+}
+.footer-btn-share:hover {
+    background: linear-gradient(135deg, #475569, #64748b);
+    color: #ffffff !important;
+    transform: translateY(-2px);
+    box-shadow: 0 4px 8px rgba(51, 65, 85, 0.4);
+}
+.footer-modern-bottom {
+    text-align: center;
+    padding-top: 10px;
+    margin-top: 15px;
+    border-top: 1px solid #334155;
+    color: #94a3b8;
+    font-size: 13px;
+    margin-left: auto;
+    margin-right: auto;
+}
+.footer-modern-bottom p {
+    margin: 5px 0 0 0;
+}
+/* Mobile Responsiveness */
+@media (max-width: 768px) {
+    .container {
+        padding: 0 15px;
+    }
+    .header {
+        flex-direction: column;
+        text-align: center;
+        padding: 15px;
+    }
+    .header-left {
+        margin-bottom: 15px;
+    }
+    .header img {
+        margin-bottom: 10px;
+        margin-right: 0;
+    }
+    /* Index specific mobile */
+    form {
+        flex-direction: column !important;
+        align-items: stretch !important;
+    }
+    input[type="text"] {
+        width: 100% !important;
+        max-width: none !important;
+        margin-right: 0 !important;
+        margin-bottom: 15px;
+    }
+    button {
+        width: 100%;
+    }
+    /* Error specific mobile */
+    .button,
+    .generate-another-btn {
+        width: 100%;
+        text-align: center;
+        margin-bottom: 10px;
+    }
+}
+/* Missing Styles Restored */
+.scoring-rubric h4 {
+    margin-top: 0;
+    color: #1565c0;
+    margin-bottom: 15px;
+}
+.scoring-rubric table {
+    width: 100%;
+    margin-top: 15px;
+}
+.scoring-rubric th,
+.scoring-rubric td {
+    padding: 10px;
+    text-align: left;
+}
+.note-box {
+    background-color: #fffbea;
+    border-left: 4px solid #ffc107;
+    padding: 20px;
+    margin: 20px 0;
+    border-radius: 8px;
+}
+.download-section {
+    background-color: #e8f5e9;
+    border-left: 4px solid #83af84;
+    padding: 20px;
+    margin-bottom: 20px;
+    border-radius: 8px;
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    flex-wrap: wrap;
+    gap: 15px;
+}
+.download-section h3 {
+    margin: 0;
+}
+.download-buttons {
+    display: flex;
+    gap: 15px;
+}
+.download-buttons button {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    padding: 0 14px;
+    height: 32px;
+    background-color: #64748b;
+    color: #ffffff !important;
+    font-weight: 600;
+    border-radius: 16px;
+    font-size: 13px;
+    transition: all 0.3s ease;
+    text-decoration: none !important;
+    gap: 8px;
+    font-family: inherit;
+    cursor: pointer;
+    border: none;
+    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+}
+.download-buttons button:hover {
+    background-color: #94a3b8;
+    color: #ffffff !important;
+    transform: translateY(-2px);
+    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
+}
+.completeness-profile {
+    background-color: #f6f5f5;
+    border-radius: 8px;
+    padding: 20px;
+    margin: 20px 0;
+    border-left: 4px solid #7f7c7c;
+}
+.profile-badge {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    height: 32px;
+    padding: 0 14px;
+    border-radius: 16px;
+    color: white;
+    font-weight: 600;
+    font-size: 14px;
+    margin-right: 10px;
+    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+    box-sizing: border-box;
+}
+.profile-basic {
+    background: linear-gradient(135deg, rgb(255, 140, 0), rgb(255, 180, 50));
+    box-shadow: 0 2px 4px rgba(255, 140, 0, 0.3);
+}
+.profile-standard {
+    background-color: #2196f3;
+}
+.profile-advanced {
+    background-color: #4caf50;
+}
+.profile-incomplete {
+    background-color: #f44336;
+    color: white;
+}
+.field-tier {
+    display: inline-block;
+    width: 12px;
+    height: 12px;
+    border-radius: 50%;
+    margin-right: 5px;
+}
+.tier-critical {
+    background-color: #d32f2f;
+}
+.tier-important {
+    background-color: #ff9800;
+}
+.tier-supplementary {
+    background-color: #2196f3;
+}
+.tier-legend {
+    display: flex;
+    margin: 15px 0;
+    font-size: 0.9em;
+}
+.tier-legend-item {
+    display: flex;
+    align-items: center;
+    margin-right: 20px;
+}
+.validation-penalty-info {
+    background-color: #fff3e0;
+    border-left: 4px solid #ff9800;
+    padding: 20px;
+    margin: 20px 0;
+    border-radius: 8px;
+    font-size: 0.95em;
+}
+.validation-penalty-info h4 {
+    margin-top: 0;
+    color: #e65100;
+    margin-bottom: 15px;
+}
+.validation-warning-box {
+    background-color: #fff3e0;
+    border: 1px solid #ff9800;
+    border-left: 4px solid #ff9800;
+    border-radius: 8px;
+    padding: 20px;
+    margin: 20px 0;
+    box-shadow: 0 2px 10px rgba(255, 152, 0, 0.1);
+}
+.validation-warning-box h4 {
+    margin-top: 0;
+    color: #e65100;
+    margin-bottom: 15px;
+    display: flex;
+    align-items: center;
+}
+.validation-warning-box .warning-icon {
+    margin-right: 10px;
+    font-size: 1.2em;
+}
+.validation-warning-box .issue-summary {
+    margin-bottom: 15px;
+    line-height: 1.6;
+}
+.validation-warning-box .issue-details {
+    margin-bottom: 15px;
+}
+.validation-warning-box .issue-list {
+    margin: 10px 0;
+    padding-left: 20px;
+}
+.validation-warning-box .issue-list li {
+    margin-bottom: 8px;
+    line-height: 1.5;
+}
+.validation-warning-box .call-to-action {
+    margin-top: 15px;
+    padding-top: 15px;
+    border-top: 1px solid #ffcc80;
+}
+.validation-warning-box .call-to-action p {
+    margin-bottom: 10px;
+}
+.issue-tracker-link {
+    display: inline-block;
+    padding: 8px 16px;
+    background-color: #3498db;
+    color: white;
+    text-decoration: none;
+    border-radius: 4px;
+    font-weight: 500;
+    transition: background-color 0.3s;
+}
+.issue-tracker-link:hover {
+    background-color: #2980b9;
+    text-decoration: none;
+}
+.category-table {
+    margin-bottom: 30px;
+}
+.category-table h4 {
+    color: #2c3e50;
+    margin-bottom: 10px;
+    font-size: 18px;
+}
+.category-table table th:first-child,
+.category-table table td:first-child {
+    text-align: center;
+    vertical-align: middle;
+    width: 1%;
+    white-space: nowrap;
+}
+.category-table table th:nth-child(3),
+.category-table table td:nth-child(3) {
+    word-break: break-all;
+    overflow-wrap: break-word;
+}
+.category-table table th:nth-child(4),
+.category-table table td:nth-child(4) {
+    width: 1%;
+    white-space: nowrap;
+}
+.category-table table th:nth-child(5),
+.category-table table td:nth-child(5) {
+    width: 1%;
+    white-space: nowrap;
+    text-align: center;
+    vertical-align: middle;
+}
+.category-result {
+    background-color: #f8f9fa;
+    padding: 10px;
+    border-radius: 4px;
+    margin-top: 10px;
+    font-weight: bold;
+}
+.field-type-legend {
+    background-color: #e3f2fd;
+    border-left: 4px solid #2196f3;
+    padding: 15px;
+    margin: 20px 0;
+    border-radius: 8px;
+    font-size: 0.9em;
+}
+.field-type-legend h4 {
+    margin-top: 0;
+    color: #1565c0;
+    margin-bottom: 10px;
+}
+.legend-item {
+    display: inline-block;
+    margin-right: 20px;
+    margin-bottom: 5px;
+}

src/static/images/cdx.webp ADDED Viewed

src/static/images/genai_security_project_logo.webp ADDED Viewed

src/static/js/script.js ADDED Viewed

	@@ -0,0 +1,116 @@

+// OWASP AIBOM Generator - Common Scripts
+// Add Enter key support for form submission (Index Page)
+document.addEventListener('DOMContentLoaded', function () {
+    var modelInput = document.querySelector('input[name="model_id"]');
+    if (modelInput) {
+        modelInput.addEventListener('keypress', function (e) {
+            if (e.key === 'Enter') {
+                e.preventDefault();
+                var btn = document.getElementById('generate-button');
+                if (btn) btn.click();
+            }
+        });
+    }
+});
+/* === Result Page Functions === */
+function switchTab(tabId) {
+    // Hide all tab contents
+    var tabContents = document.getElementsByClassName('tab-content');
+    for (var i = 0; i < tabContents.length; i++) {
+        tabContents[i].classList.remove('active');
+    }
+    // Deactivate all tabs
+    var tabs = document.getElementsByClassName('aibom-tab');
+    for (var i = 0; i < tabs.length; i++) {
+        tabs[i].classList.remove('active');
+    }
+    // Activate the selected tab and content
+    var content = document.getElementById(tabId);
+    if (content) content.classList.add('active');
+    var selectedTab = document.querySelector('.aibom-tab[onclick="switchTab(\'' + tabId + '\')"]');
+    if (selectedTab) selectedTab.classList.add('active');
+}
+function toggleCollapsible(element) {
+    element.classList.toggle('active');
+    var content = element.nextElementSibling;
+    if (content) {
+        content.classList.toggle('active');
+        if (content.classList.contains('active')) {
+            content.style.maxHeight = content.scrollHeight + 'px';
+        } else {
+            content.style.maxHeight = '0';
+        }
+    }
+}
+/**
+ * Downloads a JSON object as a file.
+ * @param {Object|string} content - The JSON object or string to download.
+ * @param {string} filename - The name of the file to save as.
+ */
+function downloadJSON(content, filename) {
+    var jsonString = (typeof content === 'string') ? content : JSON.stringify(content, null, 2);
+    var dataStr = "data:text/json;charset=utf-8," + encodeURIComponent(jsonString);
+    var downloadAnchorNode = document.createElement('a');
+    downloadAnchorNode.setAttribute("href", dataStr);
+    downloadAnchorNode.setAttribute("download", filename || "aibom.json");
+    document.body.appendChild(downloadAnchorNode); // required for firefox
+    downloadAnchorNode.click();
+    downloadAnchorNode.remove();
+}
+// Initialize collapsible sections (Result Page)
+document.addEventListener('DOMContentLoaded', function () {
+    var collapsibles = document.getElementsByClassName('collapsible');
+    for (var i = 0; i < collapsibles.length; i++) {
+        // Remove existing onclick to avoid double firing if inline remains,
+        // but cleaner to attach here if not attached inline.
+        // However, HTML onclick="toggleCollapsible(this)" is common pattern.
+        // If the HTML has onclick, we don't need addEventListener here unless we remove onclick from HTML.
+        // For now, let's assumes HTML calls toggleCollapsible(this).
+        // Initialization of state might be needed though.
+    }
+    // If elements start collapsed, no JS init needed other than event handlers.
+});
+// Validate Hugging Face URL or Model ID (Index Page)
+document.addEventListener('DOMContentLoaded', function () {
+    var modelInput = document.getElementById('model-input');
+    var generateButton = document.getElementById('generate-button');
+    if (modelInput && generateButton) {
+        function validateInput() {
+            var value = modelInput.value.trim();
+            // Check if it's a valid HF URL (starts with https://huggingface.co/)
+            // OR a valid org/repo identifier (e.g. openai/whisper-tiny)
+            var isUrl = value.startsWith('https://huggingface.co/');
+            // Basic regex for org/repo: alphanumeric, dots, dashes, underscores
+            var isModelId = /^[a-zA-Z0-9_\-\.]+\/[a-zA-Z0-9_\-\.]+$/.test(value);
+            if (isUrl || isModelId) {
+                generateButton.disabled = false;
+                generateButton.style.cursor = 'pointer';
+                generateButton.style.opacity = '1';
+            } else {
+                generateButton.disabled = true;
+                generateButton.style.cursor = 'not-allowed';
+                generateButton.style.opacity = '0.6';
+            }
+        }
+        modelInput.addEventListener('input', validateInput);
+        // Initial check
+        validateInput();
+    }
+});

src/templates/error.html ADDED Viewed

	@@ -0,0 +1,51 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>AIBOM Generator - Error</title>
+    <link rel="stylesheet" href="/static/css/style.css">
+</head>
+<body>
+    <div class="container">
+        <!-- Header -->
+        {% include 'includes/header.html' %}
+        <!-- Error message -->
+        <div class="error-message">
+            <h2>❌&nbsp;&nbsp;Error Generating AIBOM</h2>
+        </div>
+        <!-- Try Again Button -->
+        <div style="text-align: left; margin-bottom: 20px;">
+            <a href="/" class="button">🔄 Try Again</a>
+        </div>
+        <!-- Error Details -->
+        <div class="error-section">
+            <h2>What Happened?</h2>
+            <div class="error-details">
+                <p>{{ error }}</p>
+            </div>
+        </div>
+        <!-- Common Solutions -->
+        <div class="content-section">
+            <h2>💡&nbsp;&nbsp;Common Solutions</h2>
+            <p><strong>Model not found:</strong> Check that the model ID follows <code>owner/model-name</code> format
+                and exists on Hugging Face.</p>
+            <p><strong>Access issues:</strong> Some models require an access token or may be private.</p>
+            <p><strong>Temporary issues:</strong> Try again if there were connectivity or Hugging Face API hiccups.</p>
+        </div>
+        <!-- Modern Footer -->
+        {% include 'includes/footer.html' %}
+    </div>
+</body>
+</html>

src/templates/includes/footer.html ADDED Viewed

	@@ -0,0 +1,85 @@

+<footer class="footer-modern">
+    <div class="footer-modern-container">
+        <!-- Brand Column -->
+        <div class="footer-modern-col brand-col">
+            <a href="https://genai.owasp.org/" target="_blank">
+                <img src="{{ static_root|default('/static') }}/images/genai_security_project_logo.webp"
+                    alt="OWASP GenAI Security Project" height="45">
+            </a>
+            <div class="footer-social-icons">
+                <a href="https://github.com/GenAI-Security-Project/aibom-generator" target="_blank"
+                    rel="noopener noreferrer" aria-label="GitHub">
+                    <svg viewBox="0 0 24 24" width="22" height="22" stroke="currentColor" stroke-width="2" fill="none"
+                        stroke-linecap="round" stroke-linejoin="round">
+                        <path
+                            d="M9 19c-5 1.5-5-2.5-7-3m14 6v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22">
+                        </path>
+                    </svg>
+                </a>
+                <a href="https://www.linkedin.com/company/owasp-aibom/" target="_blank" rel="noopener noreferrer"
+                    aria-label="LinkedIn">
+                    <svg viewBox="0 0 24 24" width="22" height="22" stroke="currentColor" stroke-width="2" fill="none"
+                        stroke-linecap="round" stroke-linejoin="round">
+                        <path d="M16 8a6 6 0 0 1 6 6v7h-4v-7a2 2 0 0 0-2-2 2 2 0 0 0-2 2v7h-4v-7a6 6 0 0 1 6-6z"></path>
+                        <rect x="2" y="9" width="4" height="12"></rect>
+                        <circle cx="4" cy="4" r="2"></circle>
+                    </svg>
+                </a>
+            </div>
+        </div>
+        <!-- Help Column -->
+        <div class="footer-modern-col help-col">
+            <h4>
+                <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"
+                    stroke-linecap="round" stroke-linejoin="round">
+                    <path d="M12 22s8-4 8-10V5l-8-3-8 3v7c0 6 8 10 8 10z"></path>
+                </svg>
+                Support
+            </h4>
+            <p>If you encountered any problems, found a bug, or have suggestions for improvement, we'd love to hear from
+                you!</p>
+            <a href="https://github.com/GenAI-Security-Project/aibom-generator/issues" target="_blank"
+                title="Report an Issue" aria-label="Report an Issue" class="footer-btn-share">
+                <svg viewBox="0 0 24 24" width="18" height="18" stroke="currentColor" stroke-width="2" fill="none"
+                    stroke-linecap="round" stroke-linejoin="round">
+                    <path d="M10.29 3.86L1.82 18a2 2 0 0 0 1.71 3h16.94a2 2 0 0 0 1.71-3L13.71 3.86a2 2 0 0 0-3.42 0z">
+                    </path>
+                    <line x1="12" y1="9" x2="12" y2="13"></line>
+                    <line x1="12" y1="17" x2="12.01" y2="17"></line>
+                </svg>
+                <span style="font-weight: 700;">Report Issue</span>
+            </a>
+        </div>
+        <!-- Share Column -->
+        <div class="footer-modern-col share-col">
+            <h4>
+                <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"
+                    stroke-linecap="round" stroke-linejoin="round">
+                    <circle cx="18" cy="5" r="3"></circle>
+                    <circle cx="6" cy="12" r="3"></circle>
+                    <circle cx="18" cy="19" r="3"></circle>
+                    <line x1="8.59" y1="13.51" x2="15.42" y2="17.49"></line>
+                    <line x1="15.41" y1="6.51" x2="8.59" y2="10.49"></line>
+                </svg>
+                Spread the Word
+            </h4>
+            <p>If you find this tool useful, share it with your network!</p>
+            <a href="https://www.linkedin.com/sharing/share-offsite/?url=https://www.linkedin.com/company/owasp-aibom/"
+                target="_blank" rel="noopener noreferrer" title="Share" aria-label="Share" class="footer-btn-share">
+                <svg viewBox="0 0 24 24" width="18" height="18" stroke="currentColor" stroke-width="2" fill="none"
+                    stroke-linecap="round" stroke-linejoin="round">
+                    <path d="M16 8a6 6 0 0 1 6 6v7h-4v-7a2 2 0 0 0-2-2 2 2 0 0 0-2 2v7h-4v-7a6 6 0 0 1 6-6z"></path>
+                    <rect x="2" y="9" width="4" height="12"></rect>
+                    <circle cx="4" cy="4" r="2"></circle>
+                </svg>
+                <span style="font-weight: 700;">Share</span>
+            </a>
+        </div>
+    </div>
+    <div class="footer-modern-bottom">
+        <p>© 2026 OWASP GenAI Security Project - AIBOM Initiative</p>
+    </div>
+</footer>

src/templates/includes/header.html ADDED Viewed

	@@ -0,0 +1,28 @@

+<div class="header">
+    <div class="header-left">
+        <a href="https://genai.owasp.org/" target="_blank">
+            <img src="{{ static_root|default('/static') }}/images/genai_security_project_logo.webp"
+                alt="OWASP GenAI Security Project logo">
+        </a>
+    </div>
+    <div class="header-content">
+        <h1>AIBOM Generator</h1>
+    </div>
+    {% if not hide_generate_another %}
+    <div class="header-right">
+        <a href="/" class="generate-another-btn">
+            <svg viewBox="0 0 24 24" width="16" height="16" stroke="currentColor" stroke-width="2" fill="none"
+                stroke-linecap="round" stroke-linejoin="round">
+                <path
+                    d="m12 3-1.912 5.813a2 2 0 0 1-1.275 1.275L3 12l5.813 1.912a2 2 0 0 1 1.275 1.275L12 21l1.912-5.813a2 2 0 0 1 1.275-1.275L21 12l-5.813-1.912a2 2 0 0 1-1.275-1.275L12 3Z">
+                </path>
+                <path d="M5 3v4"></path>
+                <path d="M19 17v4"></path>
+                <path d="M3 5h4"></path>
+                <path d="M17 19h4"></path>
+            </svg>
+            Generate Another AIBOM
+        </a>
+    </div>
+    {% endif %}
+</div>

src/templates/index.html ADDED Viewed

	@@ -0,0 +1,76 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>AIBOM Generator</title>
+    <link rel="stylesheet" href="/static/css/style.css?v=2.0">
+</head>
+<body>
+    <div class="container">
+        <!-- Header -->
+        {% set hide_generate_another = true %}
+        {% include 'includes/header.html' %}
+    </div>
+    <div class="container">
+        <!-- Form Section (Moved to top) -->
+        <div class="form-section">
+            <h2>Generate AIBOM</h2>
+            <p>
+                Enter a model on Hugging Face, in a format
+                <code>&lt;organization-or-username&gt;/&lt;model-name&gt;</code> (easy copy button), or model's URL, to
+                generate AIBOM in CycloneDX format. You can browse available models in the <a
+                    href="https://huggingface.co/models" target="_blank" rel="noopener noreferrer">Hugging Face models
+                    repository</a>.
+            </p>
+            <form id="sbom-form" action="/generate" method="post"
+                style="display: flex; flex-direction: row; align-items: center; width: 100%;">
+                <input type="text" name="model_id" id="model-input" placeholder="e.g., openai/whisper-tiny" required
+                    style="flex: 1; max-width: 70%; margin-right: 10px;">
+                <button type="submit" id="generate-button" disabled
+                    onclick="this.disabled=true; this.innerText='Generating...'; document.getElementById('sbom-form').submit();">Generate
+                    AIBOM</button>
+            </form>
+        </div>
+        <!-- Tool Description Section -->
+        <div class="content-section">
+            <h2>About This Tool</h2>
+            <p>This open-source tool generates AIBOM (AI Bill of Materials) for models hosted on Hugging Face. It
+                automatically extracts and formats key information about AI models into a standardized, machine-readable
+                SBOM (Software Bill of Materials) using the CycloneDX JSON format. Because metadata quality varies
+                across models and much of the information is unstructured, the tool analyzes what is available,
+                organizes it into a consistent structure, and provides an AIBOM completeness score that evaluates how
+                well the model is documented. This helps users quickly understand documentation gaps and supports
+                transparency, security, and compliance. The tool is also listed on <a
+                    href="https://cyclonedx.org/tool-center/" target="_blank" rel="noopener noreferrer">CycloneDX Tool
+                    Center</a>.</p>
+        </div>
+        <!-- Introduction Section -->
+        <div class="content-section">
+            <h2>Understanding AIBOMs</h2>
+            <p>An AIBOM (Artificial Intelligence Bill of Materials, also known as AI/ML-BOM, AI SBOM, or SBOM for AI) is
+                a detailed, structured inventory that lists the components and dependencies involved in building and
+                operating an AI system—such as pre-trained models, datasets, libraries, and configuration parameters.
+                Much like a traditional SBOM for software, an AIBOM brings transparency to what goes into an AI system,
+                enabling organizations to assess security, compliance, and ethical risks. It is essential for managing
+                AI supply chain risks, supporting regulatory requirements, ensuring model provenance, and enabling
+                incident response and audits. As AI systems grow more complex and widely adopted, AIBOMs become critical
+                for maintaining trust, accountability, and control over how AI technologies are developed, integrated,
+                and deployed.</p>
+        </div>
+        <!-- Modern Footer -->
+        {% include 'includes/footer.html' %}
+    </div>
+    <!-- JavaScript for loading indicator and Captcha -->
+    <script src="/static/js/script.js"></script>
+</body>
+</html>

src/templates/result.html ADDED Viewed

	@@ -0,0 +1,845 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>AIBOM Generated</title>
+    <link rel="stylesheet" href="{{ static_root|default('/static') }}/css/style.css?v=2.0">
+</head>
+<body>
+    <div class="container">
+        <!-- Header -->
+        {% include 'includes/header.html' %}
+        <!-- Success Message
+        <div class="success-message">
+            <h2>✅&nbsp;&nbsp;AIBOM is Generated Successfully for <span class="model-name">{{ model_id }}</span></h2>
+        </div>  -->
+        <!-- Key Information -->
+        <div class="key-info">
+            <h3>📋&nbsp;&nbsp;AIBOM Summary</h3>
+            <div class="aibom-property">
+                <span class="property-name">Model:</span>
+                <span class="property-value"><a href="https://huggingface.co/{{ model_id }}" target="_blank">{{ model_id
+                        }}</a></span>
+            </div>
+            <div class="aibom-property">
+                <span class="property-name">Generated:</span>
+                <span class="property-value">{{ aibom.metadata.timestamp }}</span>
+            </div>
+            <div class="aibom-property">
+                <span class="property-name">SBOM Format:</span>
+                <span class="property-value">
+                    <a href="https://cyclonedx.org/docs/1.6/json/#components_items_modelCard" target="_blank">{{
+                        aibom.bomFormat }} {{ aibom.specVersion }}</a>,
+                    <a href="https://cyclonedx.org/docs/1.7/json/#components_items_modelCard" target="_blank">{{
+                        aibom.bomFormat }} 1.7</a>
+                </span>
+            </div>
+            <div class="aibom-property">
+                <span class="property-name">Serial Number:</span>
+                <span class="property-value">{{ aibom.serialNumber }}</span>
+            </div>
+        </div>
+        <!-- Calculate Shared Score State -->
+        {% set score_percent = (completeness_score.total_score if completeness_score.total_score != 'Undefined' else 0)
+        | float %}
+        {% if score_percent >= 90 %}
+        {% set score_class = 'progress-excellent' %}
+        {% set score_label = 'Excellent' %}
+        {% elif score_percent >= 70 %}
+        {% set score_class = 'progress-good' %}
+        {% set score_label = 'Good' %}
+        {% elif score_percent >= 50 %}
+        {% set score_class = 'progress-fair' %}
+        {% set score_label = 'Fair' %}
+        {% else %}
+        {% set score_class = 'progress-poor' %}
+        {% set score_label = 'Poor' %}
+        {% endif %}
+        <!-- Completeness Profile & Download Section -->
+        <div class="completeness-profile {{ score_class }}-border"
+            style="display: flex; flex-wrap: wrap; gap: 0; align-items: center;">
+            <div class="completeness-left"
+                style="flex: 1 1 50%; min-width: 300px; padding-right: 20px; box-sizing: border-box;">
+                {% if completeness_score.completeness_profile %}
+                <h3 style="margin-top:0; margin-bottom:15px;">📊&nbsp;&nbsp;Completeness Assessment</h3>
+                <div style="display: flex; gap: 15px; align-items: center;">
+                    <span class="profile-badge profile-{{ completeness_score.completeness_profile.name|lower }}">
+                        {{ completeness_score.completeness_profile.name }}
+                    </span>
+                    <span>{{ completeness_score.completeness_profile.description }}</span>
+                </div>
+                {% endif %}
+            </div>
+            <div class="completeness-right" style="flex: 1 1 50%; min-width: 300px; box-sizing: border-box;">
+                <div class="download-section-inner">
+                    <h3 style="margin-top:0; margin-bottom:15px;">💾&nbsp;&nbsp;Download your AIBOM</h3>
+                    <div class="download-buttons" style="display: flex; gap: 10px;">
+                        <button onclick="downloadJSON(AIBOM_CDX_JSON_1_6, FILENAME_BASE + '_aibom_1_6.json')">
+                            <img src="{{ static_root|default('/static') }}/images/cdx.webp" alt="CycloneDX Logo"
+                                width="16" height="16" style="filter: brightness(0) invert(1);">
+                            CycloneDX 1.6
+                        </button>
+                        <button onclick="downloadJSON(AIBOM_CDX_JSON_1_7, FILENAME_BASE + '_aibom_1_7.json')">
+                            <img src="{{ static_root|default('/static') }}/images/cdx.webp" alt="CycloneDX Logo"
+                                width="16" height="16" style="filter: brightness(0) invert(1);">
+                            CycloneDX 1.7
+                        </button>
+                    </div>
+                </div>
+            </div>
+        </div>
+        <!-- Tabbed Content -->
+        <div class="aibom-viewer">
+            <div class="aibom-tabs">
+                <div class="aibom-tab active" onclick="switchTab('human-view')">Human-Friendly View</div>
+                <div class="aibom-tab" onclick="switchTab('field-checklist')">Field Checklist</div>
+                <div class="aibom-tab" onclick="switchTab('score-view')">Score Report</div>
+                <div class="aibom-tab" onclick="switchTab('json-view')">JSON View</div>
+            </div>
+            <!-- Human-Friendly View Tab -->
+            <div id="human-view" class="tab-content active">
+                <div class="aibom-section">
+                    <h4>🤖&nbsp;&nbsp;AI Model Information</h4>
+                    <div class="aibom-property">
+                        <span class="property-name">Name:</span>
+                        <span class="property-value">
+                            {{ aibom.components[0].name if aibom.components else 'Not specified' }}
+                        </span>
+                    </div>
+                    <div class="aibom-property">
+                        <span class="property-name">Type:</span>
+                        <span class="property-value">
+                            {{ aibom.components[0].type if aibom.components else 'Not specified' }}
+                        </span>
+                    </div>
+                    <div class="aibom-property">
+                        <span class="property-name">Version:</span>
+                        <span class="property-value">{{ aibom.components[0].version if aibom.components else 'Not
+                            specified' }}</span>
+                    </div>
+                    <div class="aibom-property">
+                        <span class="property-name">Description:</span>
+                        <span class="property-value">{{ aibom.components[0].description if aibom.components and
+                            aibom.components[0].description else 'Not specified' }}</span>
+                    </div>
+                    <div class="aibom-property">
+                        <span class="property-name">PURL:</span>
+                        <span class="property-value">{{ aibom.components[0].purl if aibom.components and
+                            aibom.components[0].purl else 'Not specified' }}</span>
+                    </div>
+                    {% if aibom.components and aibom.components[0].licenses %}
+                    <div class="aibom-property">
+                        <span class="property-name">Licenses:</span>
+                        <span class="property-value">
+                            {% for license in aibom.components[0].licenses %}
+                            <span class="tag">
+                                {% if license.license %}
+                                {{ license.license.id if license.license.id else license.license.name }}
+                                {% else %}
+                                Unknown
+                                {% endif %}
+                            </span>
+                            {% endfor %}
+                        </span>
+                    </div>
+                    {% endif %}
+                </div>
+                {% if aibom.components and aibom.components[0].modelCard %}
+                <div class="aibom-section">
+                    <h4>📊&nbsp;&nbsp;Model Card</h4>
+                    {% if aibom.components[0].modelCard.modelParameters %}
+                    <div class="aibom-property">
+                        <span class="property-name">Architecture:</span>
+                        <span class="property-value">
+                            {{ aibom.components[0].modelCard.modelParameters.modelArchitecture if
+                            aibom.components[0].modelCard.modelParameters.modelArchitecture else 'Not specified' }}
+                        </span>
+                    </div>
+                    <div class="aibom-property">
+                        <span class="property-name">Task:</span>
+                        <span class="property-value">{{ aibom.components[0].modelCard.modelParameters.task if
+                            aibom.components[0].modelCard.modelParameters.task else 'Not specified' }}</span>
+                    </div>
+                    {% endif %}
+                    {% if aibom.components[0].modelCard.properties %}
+                    <div class="aibom-property">
+                        <span class="property-name">Additional Properties:</span>
+                        <span class="property-value">
+                            {% for prop in aibom.components[0].modelCard.properties %}
+                            <span class="tag">{{ prop.name }}: {{ prop.value }}</span>
+                            {% endfor %}
+                        </span>
+                    </div>
+                    {% endif %}
+                    {% set hp_props = [] %}
+                    {% set quant_props = [] %}
+                    {% if aibom.components[0].properties %}
+                    {% for prop in aibom.components[0].properties %}
+                    {% if prop.name.startswith('hyperparameter:') %}{% if hp_props.append(prop) %}{% endif %}{% endif %}
+                    {% if prop.name.startswith('quantization:') %}{% if quant_props.append(prop) %}{% endif %}{% endif
+                    %}
+                    {% endfor %}
+                    {% endif %}
+                    {% if hp_props %}
+                    <div class="aibom-property">
+                        <span class="property-name">Hyperparameters:</span>
+                        <span class="property-value">
+                            {% for prop in hp_props %}
+                            <span class="tag">{{ prop.name.split(':')[1] | replace('_', ' ') | title }}: {{ prop.value
+                                }}</span>
+                            {% endfor %}
+                        </span>
+                    </div>
+                    {% endif %}
+                    {% if quant_props %}
+                    <div class="aibom-property">
+                        <span class="property-name">Quantization:</span>
+                        <span class="property-value">
+                            {% for prop in quant_props %}
+                            <span class="tag">{{ prop.name.split(':')[1] | replace('_', ' ') | title }}: {{ prop.value
+                                }}</span>
+                            {% endfor %}
+                        </span>
+                    </div>
+                    {% endif %}
+                </div>
+                {% endif %}
+                {% if aibom.externalReferences %}
+                <div class="aibom-section">
+                    <h4>🔗&nbsp;&nbsp;External References</h4>
+                    {% for ref in aibom.externalReferences %}
+                    <div class="aibom-property">
+                        <span class="property-name">{{ ref.type|title }}:</span>
+                        <span class="property-value"><a href="{{ ref.url }}" target="_blank">{{ ref.url }}</a></span>
+                    </div>
+                    {% endfor %}
+                </div>
+                {% endif %}
+                <div class="aibom-section">
+                    <h4>🛠️&nbsp;&nbsp;Generation Metadata</h4>
+                    <div class="aibom-property">
+                        <span class="property-name">Generated by:</span>
+                        <span class="property-value">{{ aibom.metadata.tools.components[0].name if aibom.metadata.tools
+                            and aibom.metadata.tools.components else 'Unknown' }}</span>
+                    </div>
+                    <div class="aibom-property">
+                        <span class="property-name">Timestamp:</span>
+                        <span class="property-value">{{ aibom.metadata.timestamp }}</span>
+                    </div>
+                    {% if aibom.components and aibom.components[0].purl %}
+                    <div class="aibom-property">
+                        <span class="property-name">Component PURL:</span>
+                        <span class="property-value"><a href="https://huggingface.co/{{ model_id }}" target="_blank">{{
+                                aibom.components[0].purl }}</a></span>
+                    </div>
+                    {% elif aibom.metadata.component %}
+                    <div class="aibom-property">
+                        <span class="property-name">Component PURL:</span>
+                        <span class="property-value">{{ aibom.metadata.component['bom-ref'] }}</span>
+                    </div>
+                    {% endif %}
+                </div>
+            </div>
+            <!-- Field Checklist Tab -->
+            <div id="field-checklist" class="tab-content">
+                <div class="content-section">
+                    <h3>Field Checklist & Mapping</h3>
+                    <!-- Field Type Legend -->
+                    <div class="field-type-legend">
+                        <h4>Legend</h4>
+                        <div class="legend-item">
+                            <span class="field-tier tier-critical"></span>
+                            <span>Critical</span>
+                        </div>
+                        <div class="legend-item">
+                            <span class="field-tier tier-important"></span>
+                            <span>Important</span>
+                        </div>
+                        <div class="legend-item">
+                            <span class="field-tier tier-supplementary"></span>
+                            <span>Supplementary</span>
+                        </div>
+                        <div class="legend-item">
+                            <strong>CDX</strong> = CycloneDX Standard
+                        </div>
+                        <div class="legend-item">
+                            <strong>AI</strong> = AI-Specific Extension
+                        </div>
+                    </div>
+                    <p>This breakdown outlines field categories and statuses in the AIBOM generated for model <strong><a
+                                href="https://huggingface.co/{{ model_id }}" target="_blank">{{ model_id
+                                }}</a></strong>, showing how each field impacts the completeness score.</p>
+                    {% if completeness_score.field_checklist %}
+                    <!-- Required Fields Category -->
+                    <div class="category-table">
+                        <h4>Required Fields Category</h4>
+                        <table>
+                            <thead>
+                                <tr>
+                                    <th>Status</th>
+                                    <th>Field Name</th>
+                                    <th>Actual Location</th>
+                                    <th>Tier</th>
+                                    <th>Type</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                {% set required_fields = ['bomFormat', 'specVersion', 'serialNumber', 'version'] %}
+                                {% for field in required_fields %}
+                                <tr>
+                                    <td>
+                                        {% if completeness_score.field_checklist.get(field, '').startswith('✔') %}
+                                        <span class="check-mark">✔</span>
+                                        {% else %}
+                                        <span class="x-mark">✘</span>
+                                        {% endif %}
+                                    </td>
+                                    <td>{{ field }}</td>
+                                    <td>
+                                        {% if completeness_score.field_checklist.get(field, '').startswith('✔') %}
+                                        $.{{ field }}
+                                        {% else %}
+                                        Not found
+                                        {% endif %}
+                                    </td>
+                                    <td><span class="field-tier tier-critical"></span> Critical</td>
+                                    <td>
+                                        {% set f_type = completeness_score.field_types.get(field, 'Unknown') %}
+                                        {% set f_url = completeness_score.reference_urls.get(field, '') if
+                                        completeness_score.reference_urls else '' %}
+                                        {% if f_url %}
+                                        <a href="{{ f_url }}" target="_blank">{{ f_type }}</a>
+                                        {% else %}
+                                        {{ f_type }}
+                                        {% endif %}
+                                    </td>
+                                </tr>
+                                {% endfor %}
+                            </tbody>
+                        </table>
+                        <div class="category-result">
+                            Result: {{ completeness_score.category_details.required_fields.present_fields if
+                            completeness_score.category_details else 'N/A' }}/{{
+                            completeness_score.category_details.required_fields.total_fields if
+                            completeness_score.category_details else 'N/A' }} present
+                            ({{ completeness_score.category_details.required_fields.percentage if
+                            completeness_score.category_details else 'N/A' }}%) =
+                            {{ completeness_score.section_scores.required_fields if completeness_score.section_scores
+                            else 'N/A' }}/20 points
+                        </div>
+                    </div>
+                    <!-- Metadata Category -->
+                    <div class="category-table">
+                        <h4>Metadata Category</h4>
+                        <table>
+                            <thead>
+                                <tr>
+                                    <th>Status</th>
+                                    <th>Field Name</th>
+                                    <th>Actual Location</th>
+                                    <th>Tier</th>
+                                    <th>Type</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                {% set metadata_fields = [
+                                ('primaryPurpose', 'Critical'),
+                                ('suppliedBy', 'Critical'),
+                                ('standardCompliance', 'Supplementary'),
+                                ('domain', 'Supplementary'),
+                                ('autonomyType', 'Supplementary')
+                                ] %}
+                                {% for field, tier in metadata_fields %}
+                                <tr>
+                                    <td>
+                                        {% if completeness_score.field_checklist.get(field, '').startswith('✔') %}
+                                        <span class="check-mark">✔</span>
+                                        {% else %}
+                                        <span class="x-mark">✘</span>
+                                        {% endif %}
+                                    </td>
+                                    <td>{{ field }}</td>
+                                    <td>
+                                        {% if completeness_score.field_checklist.get(field, '').startswith('✔') %}
+                                        {% if field == 'primaryPurpose' %}
+                                        $.components[0].modelCard.modelParameters.task
+                                        {% elif field == 'suppliedBy' %}
+                                        $.components[0].supplier.name
+                                        {% else %}
+                                        $.components[0].modelCard.properties[name="{{ field }}"]
+                                        {% endif %}
+                                        {% else %}
+                                        Not found
+                                        {% endif %}
+                                    </td>
+                                    <td><span class="field-tier tier-{{ tier|lower }}"></span> {{ tier }}</td>
+                                    <td>
+                                        {% set f_type = completeness_score.field_types.get(field, 'Unknown') %}
+                                        {% set f_url = completeness_score.reference_urls.get(field, '') if
+                                        completeness_score.reference_urls else '' %}
+                                        {% if f_url %}
+                                        <a href="{{ f_url }}" target="_blank">{{ f_type }}</a>
+                                        {% else %}
+                                        {{ f_type }}
+                                        {% endif %}
+                                    </td>
+                                </tr>
+                                {% endfor %}
+                            </tbody>
+                        </table>
+                        <div class="category-result">
+                            Result: {{ completeness_score.category_details.metadata.present_fields if
+                            completeness_score.category_details else 'N/A' }}/{{
+                            completeness_score.category_details.metadata.total_fields if
+                            completeness_score.category_details else 'N/A' }} present
+                            ({{ completeness_score.category_details.metadata.percentage if
+                            completeness_score.category_details else 'N/A' }}%) =
+                            {{ completeness_score.section_scores.metadata if completeness_score.section_scores else
+                            'N/A' }}/20 points
+                        </div>
+                    </div>
+                    <!-- Component Basic Category -->
+                    <div class="category-table">
+                        <h4>Component Basic Category</h4>
+                        <table>
+                            <thead>
+                                <tr>
+                                    <th>Status</th>
+                                    <th>Field Name</th>
+                                    <th>Actual Location</th>
+                                    <th>Tier</th>
+                                    <th>Type</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                {% set component_basic_fields = [
+                                ('name', 'Critical'),
+                                ('type', 'Critical'),
+                                ('component_version', 'Critical'),
+                                ('purl', 'Important'),
+                                ('description', 'Important'),
+                                ('licenses', 'Important')
+                                ] %}
+                                {% for field, tier in component_basic_fields %}
+                                <tr>
+                                    <td>
+                                        {% if completeness_score.field_checklist.get(field, '').startswith('✔') %}
+                                        <span class="check-mark">✔</span>
+                                        {% else %}
+                                        <span class="x-mark">��</span>
+                                        {% endif %}
+                                    </td>
+                                    <td>{% if field == 'component_version' %}version{% else %}{{ field }}{% endif %}
+                                    </td>
+                                    <td>
+                                        {% if completeness_score.field_checklist.get(field, '').startswith('✔') %}
+                                        {% if field == 'component_version' %}
+                                        $.components[0].version
+                                        {% else %}
+                                        $.components[0].{{ field }}
+                                        {% endif %}
+                                        {% else %}
+                                        {% if field == 'description' %}
+                                        Not found in component level
+                                        {% else %}
+                                        Not found
+                                        {% endif %}
+                                        {% endif %}
+                                    </td>
+                                    <td><span class="field-tier tier-{{ tier|lower }}"></span> {{ tier }}</td>
+                                    <td>
+                                        {% set f_type = completeness_score.field_types.get(field, 'Unknown') %}
+                                        {% set f_url = completeness_score.reference_urls.get(field, '') if
+                                        completeness_score.reference_urls else '' %}
+                                        {% if f_url %}
+                                        <a href="{{ f_url }}" target="_blank">{{ f_type }}</a>
+                                        {% else %}
+                                        {{ f_type }}
+                                        {% endif %}
+                                    </td>
+                                </tr>
+                                {% endfor %}
+                            </tbody>
+                        </table>
+                        <div class="category-result">
+                            Result: {{ completeness_score.category_details.component_basic.present_fields if
+                            completeness_score.category_details else 'N/A' }}/{{
+                            completeness_score.category_details.component_basic.total_fields if
+                            completeness_score.category_details else 'N/A' }} present
+                            ({{ completeness_score.category_details.component_basic.percentage if
+                            completeness_score.category_details else 'N/A' }}%) =
+                            {{ completeness_score.section_scores.component_basic if completeness_score.section_scores
+                            else 'N/A' }}/20 points
+                        </div>
+                    </div>
+                    <!-- Component Model Card Category -->
+                    <div class="category-table">
+                        <h4>Component Model Card Category</h4>
+                        <table>
+                            <thead>
+                                <tr>
+                                    <th>Status</th>
+                                    <th>Field Name</th>
+                                    <th>Actual Location</th>
+                                    <th>Tier</th>
+                                    <th>Type</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                {% set model_card_fields = completeness_score.category_fields_list.component_model_card
+                                if completeness_score and completeness_score.category_fields_list else [] %}
+                                {% for field_item in model_card_fields %}
+                                {% set field = field_item.name %}
+                                {% set tier = field_item.tier %}
+                                <tr>
+                                    <td>
+                                        {% if completeness_score.field_checklist.get(field, '').startswith('✔') %}
+                                        <span class="check-mark">✔</span>
+                                        {% else %}
+                                        <span class="x-mark">✘</span>
+                                        {% endif %}
+                                    </td>
+                                    <td>{{ field }}</td>
+                                    <td>
+                                        {% if completeness_score.field_checklist.get(field, '').startswith('✔') %}
+                                        {{ field_item.path }}
+                                        {% else %}
+                                        Not found
+                                        {% endif %}
+                                    </td>
+                                    <td><span class="field-tier tier-{{ tier|lower }}"></span> {{ tier }}</td>
+                                    <td>
+                                        {% set f_type = completeness_score.field_types.get(field, 'Unknown') %}
+                                        {% set f_url = completeness_score.reference_urls.get(field, '') if
+                                        completeness_score.reference_urls else '' %}
+                                        {% if f_url %}
+                                        <a href="{{ f_url }}" target="_blank">{{ f_type }}</a>
+                                        {% else %}
+                                        {{ f_type }}
+                                        {% endif %}
+                                    </td>
+                                </tr>
+                                {% endfor %}
+                            </tbody>
+                        </table>
+                        <div class="category-result">
+                            Result: {{ completeness_score.category_details.component_model_card.present_fields if
+                            completeness_score.category_details else 'N/A' }}/{{
+                            completeness_score.category_details.component_model_card.total_fields if
+                            completeness_score.category_details else 'N/A' }} present
+                            ({{ completeness_score.category_details.component_model_card.percentage if
+                            completeness_score.category_details else 'N/A' }}%) =
+                            {{ completeness_score.section_scores.component_model_card if
+                            completeness_score.section_scores else 'N/A' }}/30 points
+                        </div>
+                    </div>
+                    <!-- External References Category -->
+                    <div class="category-table">
+                        <h4>External References Category</h4>
+                        <table>
+                            <thead>
+                                <tr>
+                                    <th>Status</th>
+                                    <th>Field Name</th>
+                                    <th>Actual Location</th>
+                                    <th>Tier</th>
+                                    <th>Type</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                {% set external_ref_fields = completeness_score.category_fields_list.external_references
+                                if completeness_score and completeness_score.category_fields_list else [] %}
+                                {% for field_item in external_ref_fields %}
+                                {% set field = field_item.name %}
+                                {% set tier = field_item.tier %}
+                                <tr>
+                                    <td>
+                                        {% if completeness_score.field_checklist.get(field, '').startswith('✔') %}
+                                        <span class="check-mark">✔</span>
+                                        {% else %}
+                                        <span class="x-mark">✘</span>
+                                        {% endif %}
+                                    </td>
+                                    <td>{{ field }}</td>
+                                    <td>
+                                        {% if completeness_score.field_checklist.get(field, '').startswith('✔') %}
+                                        {{ field_item.path }}
+                                        {% else %}
+                                        Not found
+                                        {% endif %}
+                                    </td>
+                                    <td><span class="field-tier tier-{{ tier|lower }}"></span> {{ tier }}</td>
+                                    <td>
+                                        {% set f_type = completeness_score.field_types.get(field, 'Unknown') %}
+                                        {% set f_url = completeness_score.reference_urls.get(field, '') if
+                                        completeness_score.reference_urls else '' %}
+                                        {% if f_url %}
+                                        <a href="{{ f_url }}" target="_blank">{{ f_type }}</a>
+                                        {% else %}
+                                        {{ f_type }}
+                                        {% endif %}
+                                    </td>
+                                </tr>
+                                {% endfor %}
+                            </tbody>
+                        </table>
+                        <div class="category-result">
+                            Result: {{ completeness_score.category_details.external_references.present_fields if
+                            completeness_score.category_details else 'N/A' }}/{{
+                            completeness_score.category_details.external_references.total_fields if
+                            completeness_score.category_details else 'N/A' }} present
+                            ({{ completeness_score.category_details.external_references.percentage if
+                            completeness_score.category_details else 'N/A' }}%) =
+                            {{ completeness_score.section_scores.external_references if
+                            completeness_score.section_scores else 'N/A' }}/10 points
+                        </div>
+                    </div>
+                    {% else %}
+                    <p>Field checklist data not available.</p>
+                    {% endif %}
+                </div>
+            </div>
+            <!-- Score Report Tab -->
+            <div id="score-view" class="tab-content">
+                <div class="content-section">
+                    <h3>📊&nbsp;&nbsp;Completeness Score Report</h3>
+                    <!-- Total Score Display -->
+                    <div class="total-score-container">
+                        <div class="total-score">{{ (completeness_score.total_score if completeness_score.total_score !=
+                            "Undefined" else 0)|round(1) }}/100</div>
+                        <div class="total-progress">
+                            <div class="progress-container">
+                                <div class="progress-bar {{ score_class }}"
+                                    style="width: {{ score_percent|round|int }}%">
+                                    {{ score_percent|int }}% {{ score_label }}
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+                    <!-- Specific Breakdown for This SBOM -->
+                    <div class="note-box">
+                        <h4>Your AIBOM Breakdown</h4>
+                        <p><strong>Model:</strong> <a href="https://huggingface.co/{{ model_id }}" target="_blank">{{
+                                model_id }}</a></p>
+                        <table class="score-table">
+                            <thead>
+                                <tr>
+                                    <th>Category</th>
+                                    <th>Fields Present</th>
+                                    <th>Score</th>
+                                    <th>Progress</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                {% if completeness_score.category_details and completeness_score.section_scores %}
+                                {% set categories = [
+                                ('Required Fields', 'required_fields', 20),
+                                ('Metadata', 'metadata', 20),
+                                ('Component Basic', 'component_basic', 20),
+                                ('Model Card', 'component_model_card', 30),
+                                ('External References', 'external_references', 10)
+                                ] %}
+                                {% for display_name, key, max_score in categories %}
+                                <tr>
+                                    <td>{{ display_name }}</td>
+                                    <td>{{ completeness_score.category_details[key].present_fields }}/{{
+                                        completeness_score.category_details[key].total_fields }}</td>
+                                    <td>{{ completeness_score.section_scores[key]|round(1) }}/{{ max_score }}</td>
+                                    <td>
+                                        <div class="progress-container">
+                                            {% set percentage = completeness_score.category_details[key].percentage %}
+                                            {% if percentage >= 80 %}
+                                            {% set progress_class = "progress-excellent" %}
+                                            {% elif percentage >= 60 %}
+                                            {% set progress_class = "progress-good" %}
+                                            {% elif percentage >= 40 %}
+                                            {% set progress_class = "progress-fair" %}
+                                            {% else %}
+                                            {% set progress_class = "progress-poor" %}
+                                            {% endif %}
+                                            <div class="progress-bar {{ progress_class }}"
+                                                style="width: {{ percentage|round|int }}%">{{ percentage|round|int }}%
+                                            </div>
+                                        </div>
+                                    </td>
+                                </tr>
+                                {% endfor %}
+                                {% else %}
+                                <tr>
+                                    <td colspan="4">Breakdown data not available</td>
+                                </tr>
+                                {% endif %}
+                            </tbody>
+                        </table>
+                    </div>
+                    <p><strong>Calculation:</strong></p>
+                    <p>Subtotal:
+                        {% if completeness_score.section_scores %}
+                        {% for category, score in completeness_score.section_scores.items() %}
+                        {{ score|round(1) }}{% if not loop.last %} + {% endif %}
+                        {% endfor %}
+                        = <strong>{{ completeness_score.subtotal_score|round(1) }}/100</strong>
+                        {% else %}
+                        <strong>{{ completeness_score.subtotal_score|round(1) }}/100</strong>
+                        {% endif %}
+                    </p>
+                    {% if completeness_score.penalty_applied %}
+                    <p>Penalty Applied: <strong>-{{ completeness_score.penalty_percentage }}%</strong> ({{
+                        completeness_score.penalty_reason }})</p>
+                    <p>Final Score: {{ completeness_score.subtotal_score|round(1) }} × {{
+                        completeness_score.penalty_factor }} = <strong>{{ completeness_score.total_score|round(1)
+                            }}/100</strong></p>
+                    {% else %}
+                    <p>No penalties applied</p>
+                    <p>Final Score: <strong>{{ completeness_score.total_score|round(1) }}/100</strong></p>
+                    {% endif %}
+                </div>
+                <!-- Missing Fields Analysis -->
+                {% if completeness_score.missing_counts %}
+                <div class="missing-fields">
+                    <h4>Missing Fields Summary</h4>
+                    <ul>
+                        <li><strong>Critical:</strong> {{ completeness_score.missing_counts.critical }} missing</li>
+                        <li><strong>Important:</strong> {{ completeness_score.missing_counts.important }} missing</li>
+                        <li><strong>Supplementary:</strong> {{ completeness_score.missing_counts.supplementary }}
+                            missing</li>
+                    </ul>
+                    {% if completeness_score.missing_counts.important >= 5 %}
+                    <p><strong>Impact:</strong> Missing multiple critical and/or important fields will incur penalties
+                        according to the Penalty Structure.</p>
+                    {% endif %}
+                </div>
+                {% endif %}
+                <!-- Recommendations -->
+                {% if completeness_score.recommendations %}
+                <div class="recommendations">
+                    <h4>General Recommendations to Improve AIBOM Completeness</h4>
+                    <ul>
+                        <li><strong>Required Fields:</strong> Ensure the model is published with a clear name, version,
+                            and hosting platform information to allow proper SBOM structuring.</li>
+                        <li><strong>Metadata:</strong> Include author or organization name, purpose of the model, and
+                            relevant timestamps in the model repository or card.</li>
+                        <li><strong>Component Basic:</strong> Provide a descriptive model title, a meaningful
+                            description, a valid license, and a consistent version reference (e.g., tags or commits).
+                        </li>
+                        <li><strong>Model Card:</strong> Fill out structured sections for model parameters, evaluation
+                            metrics, limitations, and ethical considerations to enable full transparency.</li>
+                        <li><strong>External References:</strong> Add links to source code, datasets, documentation, and
+                            versioned download locations to support traceability and reproducibility.</li>
+                    </ul>
+                </div>
+                <!-- Generic Scoring Explanation -->
+                <div class="scoring-rubric">
+                    <h4>How AIBOM Completeness is Scored</h4>
+                    <p>The completeness score evaluates how well your AIBOM documents the model across five key
+                        categories:</p>
+                    <ul>
+                        <li><strong>Required Fields ({{ completeness_score.category_details.required_fields.max_points
+                                if completeness_score.category_details else 'N/A' }} points):</strong> Basic SBOM
+                            structure mandated by CycloneDX
+                        </li>
+                        <li><strong>Metadata ({{ completeness_score.category_details.metadata.max_points if
+                                completeness_score.category_details else 'N/A' }} points):</strong> Information about
+                            the SBOM generation and model
+                            purpose</li>
+                        <li><strong>Component Basic ({{ completeness_score.category_details.component_basic.max_points
+                                if completeness_score.category_details else 'N/A' }} points):</strong> Essential model
+                            identification and licensing
+                        </li>
+                        <li><strong>Model Card ({{ completeness_score.category_details.component_model_card.max_points
+                                if completeness_score.category_details else 'N/A' }} points):</strong> Detailed
+                            AI-specific documentation for transparency
+                        </li>
+                        <li><strong>External References ({{
+                                completeness_score.category_details.external_references.max_points if
+                                completeness_score.category_details else 'N/A' }} points):</strong> Links to model
+                            resources and documentation
+                        </li>
+                    </ul>
+                    <p><strong>Calculation Method:</strong></p>
+                    <p>Each category score = (Present Fields ÷ Total Fields) × Maximum Points</p>
+                    <p>Subtotal = Sum of all category scores</p>
+                    <p>Final Score = Subtotal × Penalty Factor (if applicable)</p>
+                    <h4>Penalty Structure:</h4>
+                    <p><strong>Critical Fields Missing:</strong></p>
+                    <ul>
+                        <li>0-1 missing: No penalty</li>
+                        <li>2-3 missing: 10% penalty (×0.9)</li>
+                        <li>4+ missing: 20% penalty (×0.8)</li>
+                    </ul>
+                    <p><strong>Important Fields Missing:</strong></p>
+                    <ul>
+                        <li>0-4 missing: No penalty</li>
+                        <li>5+ missing: 5% penalty (×0.95)</li>
+                    </ul>
+                    <p><strong>Note:</strong> Penalties are cumulative and applied to the subtotal. For example, if you
+                        have 3 critical fields missing AND 5 important fields missing, both penalties apply: Subtotal ×
+                        0.9 × 0.95 = Final Score.</p>
+                </div>
+                {% endif %}
+            </div>
+        </div>
+        <!-- JSON View Tab -->
+        <div id="json-view" class="tab-content">
+            <div class="content-section">
+                <h3>📄&nbsp;&nbsp;Raw JSON View</h3>
+                <p>This is the complete AIBOM components array in CycloneDX JSON format:</p>
+                <div class="json-view">
+                    <pre>{{ components_json }}</pre>
+                </div>
+            </div>
+        </div>
+        <!-- Modern Footer -->
+        {% include 'includes/footer.html' %}
+    </div>
+    <script>
+        const AIBOM_CDX_JSON_1_6 = {{ aibom_cdx_json_1_6 | safe }};
+        const AIBOM_CDX_JSON_1_7 = {{ aibom_cdx_json_1_7 | safe }};
+        const FILENAME_BASE = "{{ model_id|replace('/', '_') }}";
+    </script>
+    <script src="{{ static_root|default('/static') }}/js/script.js"></script>
+</body>
+</html>

src/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .rate_limiting import RateLimitMiddleware, ConcurrencyLimitMiddleware, RequestSizeLimitMiddleware
+from .captcha import verify_recaptcha
+from .cleanup_utils import perform_cleanup

src/utils/analytics.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import os
+import logging
+from datetime import datetime
+from datasets import Dataset, load_dataset, concatenate_datasets
+from ..config import HF_REPO, HF_TOKEN
+logger = logging.getLogger(__name__)
+def log_sbom_generation(model_id: str):
+    """Logs a successful SBOM generation event to the Hugging Face dataset."""
+    if not HF_TOKEN:
+        logger.warning("HF_TOKEN not set. Skipping SBOM generation logging.")
+        return
+    try:
+        if not HF_TOKEN:
+            return
+        import asyncio
+        from concurrent.futures import ThreadPoolExecutor
+        # Define the synchronous task
+        def _push_log():
+            try:
+                normalized_model_id = model_id
+                log_data = {
+                    "timestamp": [datetime.utcnow().isoformat()],
+                    "event": ["generated"],
+                    "model_id": [normalized_model_id]
+                }
+                ds_new_log = Dataset.from_dict(log_data)
+                # Optimisation: Try to append if possible, but datasets library is heavy.
+                # Just catch errors to ensure main thread never crashes.
+                try:
+                    existing_ds = load_dataset(HF_REPO, token=HF_TOKEN, split='train', trust_remote_code=True)
+                    if len(existing_ds) > 0:
+                         ds_to_push = concatenate_datasets([existing_ds, ds_new_log])
+                    else:
+                         ds_to_push = ds_new_log
+                except Exception as load_err:
+                     logger.info(f"Could not load existing dataset: {load_err}. Creating new.")
+                     ds_to_push = ds_new_log
+                ds_to_push.push_to_hub(HF_REPO, token=HF_TOKEN, private=True)
+                logger.info(f"Successfully logged SBOM generation for {model_id}")
+            except Exception as e:
+                logger.error(f"Background analytics failed: {e}")
+        # Fire and forget in a separate thread
+        # Use existing event loop if available, else fire in thread
+        loop = None
+        try:
+             loop = asyncio.get_running_loop()
+        except RuntimeError:
+             pass
+        if loop and loop.is_running():
+            loop.run_in_executor(None, _push_log)
+        else:
+             # Fallback for sync contexts (like CLI)
+             ThreadPoolExecutor(max_workers=1).submit(_push_log)
+    except Exception as e:
+        logger.error(f"Failed to initiate analytics logging: {e}")
+def get_sbom_count() -> str:
+    """Retrieves the total count of generated SBOMs."""
+    if not HF_TOKEN:
+        return "N/A"
+    try:
+        ds = load_dataset(HF_REPO, token=HF_TOKEN, split='train', trust_remote_code=True)
+        return f"{len(ds):,}"
+    except Exception as e:
+        logger.error(f"Failed to retrieve SBOM count: {e}")
+        return "N/A"

src/utils/captcha.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import requests
+import logging
+from typing import Optional
+logger = logging.getLogger(__name__ )
+# Get the secret key from environment variable
+RECAPTCHA_SECRET_KEY = os.environ.get("RECAPTCHA_SECRET_KEY")
+def verify_recaptcha(response_token: Optional[str]) -> bool:
+    # LOGGING: Log the token start
+    logger.info(f"Starting reCAPTCHA verification with token: {response_token[:10]}..." if response_token else "None")
+    # Check if secret key is set
+    secret_key = os.environ.get("RECAPTCHA_SECRET_KEY")
+    if not secret_key:
+        logger.warning("RECAPTCHA_SECRET_KEY not set, bypassing verification")
+        return True
+    else:
+        # LOGGING: Log that secret key is set
+        logger.info("RECAPTCHA_SECRET_KEY is set (not showing for security)")
+    # If no token provided, verification fails
+    if not response_token:
+        logger.warning("No reCAPTCHA response token provided")
+        return False
+    try:
+        # LOGGING: Log before making request
+        logger.info("Sending verification request to Google reCAPTCHA API")
+        verification_response = requests.post(
+            "https://www.google.com/recaptcha/api/siteverify",
+            data={
+                "secret": secret_key,
+                "response": response_token
+            }
+         )
+        result = verification_response.json()
+        # LOGGING: Log the complete result from Google
+        logger.info(f"reCAPTCHA verification result: {result}")
+        if result.get("success"):
+            logger.info("reCAPTCHA verification successful")
+            return True
+        else:
+            # LOGGING: Log the specific error codes
+            logger.warning(f"reCAPTCHA verification failed: {result.get('error-codes', [])}")
+            return False
+    except Exception as e:
+        # LOGGING: Log any exceptions
+        logger.error(f"Error verifying reCAPTCHA: {str(e)}")
+        return False

src/utils/cleanup_utils.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import os
+import logging
+from datetime import datetime, timedelta
+logger = logging.getLogger(__name__)
+def cleanup_old_files(directory, max_age_days=7):
+    """
+    Remove files older than max_age_days from the specified directory.
+    Optimized to use os.scandir for better performance.
+    """
+    if not os.path.exists(directory):
+        logger.warning(f"Directory does not exist: {directory}")
+        return 0
+    removed_count = 0
+    now = datetime.now()
+    cutoff_time = now - timedelta(days=max_age_days)
+    try:
+        with os.scandir(directory) as entries:
+            for entry in entries:
+                if entry.is_file():
+                    try:
+                        # entry.stat().st_mtime is faster than os.path.getmtime
+                        file_mtime = datetime.fromtimestamp(entry.stat().st_mtime)
+                        if file_mtime < cutoff_time:
+                            os.remove(entry.path)
+                            removed_count += 1
+                            logger.info(f"Removed old file: {entry.path}")
+                    except OSError as e:
+                        logger.error(f"Error accessing/removing file {entry.path}: {e}")
+        if removed_count > 0:
+            logger.info(f"Cleanup completed: removed {removed_count} files older than {max_age_days} days from {directory}")
+        return removed_count
+    except Exception as e:
+        logger.error(f"Error during cleanup of directory {directory}: {e}")
+        return 0
+def limit_file_count(directory, max_files=1000):
+    """
+    Ensure no more than max_files are kept in the directory (removes oldest first).
+    Optimized to use os.scandir.
+    """
+    if not os.path.exists(directory):
+        logger.warning(f"Directory does not exist: {directory}")
+        return 0
+    try:
+        files = []
+        with os.scandir(directory) as entries:
+            for entry in entries:
+                if entry.is_file():
+                    files.append((entry.path, entry.stat().st_mtime))
+        # If we are within limits, return early
+        if len(files) <= max_files:
+            return 0
+        # Sort by modification time (oldest first)
+        files.sort(key=lambda x: x[1])
+        # Remove oldest files if we exceed the limit
+        files_to_remove = files[:-max_files]
+        removed_count = 0
+        for file_path, _ in files_to_remove:
+            try:
+                os.remove(file_path)
+                removed_count += 1
+                logger.info(f"Removed excess file: {file_path}")
+            except OSError as e:
+                logger.error(f"Error removing file {file_path}: {e}")
+        logger.info(f"File count limit enforced: removed {removed_count} oldest files, keeping max {max_files}")
+        return removed_count
+    except Exception as e:
+        logger.error(f"Error during file count limiting in directory {directory}: {e}")
+        return 0
+def perform_cleanup(directory, max_age_days=7, max_files=1000):
+    """Perform both time-based and count-based cleanup."""
+    time_removed = cleanup_old_files(directory, max_age_days)
+    count_removed = limit_file_count(directory, max_files)
+    return time_removed + count_removed

src/utils/formatter.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import json
+import copy
+from typing import Dict, Any
+def export_aibom(aibom: Dict[str, Any], bom_type: str = "cyclonedx", spec_version: str = "1.6") -> str:
+    """
+    Exports the internal AIBOM object into a specified format and specification version.
+    Returns the generated SBOM as a formatted JSON string.
+    """
+    # Create a deep copy to avoid modifying the original unified object
+    output = copy.deepcopy(aibom)
+    if bom_type.lower() == "cyclonedx":
+        output["bomFormat"] = "CycloneDX"
+        output["specVersion"] = spec_version
+        # Any specific CycloneDX mappings or adjustments can be placed here over time.
+    elif bom_type.lower() == "spdx":
+        # Placeholder for future SPDX generation logic
+        output["bomFormat"] = "SPDX"
+        output["specVersion"] = spec_version
+        # Since spdx mapping logic to AIBOM isn't fully built yet, this serves as the routing hook
+        pass
+    return json.dumps(output, indent=2)

src/utils/license_utils.py ADDED Viewed

	@@ -0,0 +1,129 @@

+"""
+License utility functions for normalising and verifying SPDX license IDs.
+"""
+import logging
+from typing import Optional, Dict
+logger = logging.getLogger(__name__)
+# Common mapping of license names or incomplete IDs to generic URLs or valid SPDX
+LICENSE_URLS: Dict[str, str] = {
+    "Apache-2.0": "https://www.apache.org/licenses/LICENSE-2.0.txt",
+    "MIT": "https://opensource.org/licenses/MIT",
+    "BSD-3-Clause": "https://opensource.org/licenses/BSD-3-Clause",
+    "BSD-2-Clause": "https://opensource.org/licenses/BSD-2-Clause",
+    "GPL-3.0-only": "https://www.gnu.org/licenses/gpl-3.0.txt",
+    "GPL-2.0-only": "https://www.gnu.org/licenses/gpl-2.0.txt",
+    "LGPL-3.0-only": "https://www.gnu.org/licenses/lgpl-3.0.txt",
+    "CC-BY-4.0": "https://creativecommons.org/licenses/by/4.0/legalcode",
+    "CC-BY-SA-4.0": "https://creativecommons.org/licenses/by-sa/4.0/legalcode",
+    "CC-BY-NC-4.0": "https://creativecommons.org/licenses/by-nc/4.0/legalcode",
+    "CC-BY-ND-4.0": "https://creativecommons.org/licenses/by-nd/4.0/legalcode",
+    "CC-BY-NC-SA-4.0": "https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode",
+    "CC-BY-NC-ND-4.0": "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode",
+    "CC0-1.0": "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
+    "MPL-2.0": "https://www.mozilla.org/en-US/MPL/2.0/",
+    "Unlicense": "https://unlicense.org/",
+    "nvidia-open-model-license": "https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/",
+}
+# Mapping common variations to valid SPDX IDs
+LICENSE_MAPPING: Dict[str, str] = {
+    "apache license 2.0": "Apache-2.0",
+    "apache-2.0": "Apache-2.0",
+    "mit": "MIT",
+    "mit license": "MIT",
+    "bsd-3-clause": "BSD-3-Clause",
+    "cc-by-4.0": "CC-BY-4.0",
+    "cc-by-nc-4.0": "CC-BY-NC-4.0",
+    "cc0-1.0": "CC0-1.0",
+    "gpl-3.0": "GPL-3.0-only",
+    "nvidia open model license agreement": "nvidia-open-model-license",
+    # Add more as needed
+}
+def normalize_license_id(license_id: str) -> Optional[str]:
+    """
+    Normalize a license string to a valid SPDX ID if possible.
+    Returns None if no clear mapping is found.
+    """
+    if not license_id:
+        return None
+    # Check if exact match in our known list
+    if license_id in LICENSE_URLS:
+        return license_id
+    lower_id = license_id.lower()
+    # Check mapping
+    if lower_id in LICENSE_MAPPING:
+        return LICENSE_MAPPING[lower_id]
+    # Check if any key in URLS (case-insensitive) matches
+    for valid_id in LICENSE_URLS:
+        if valid_id.lower() == lower_id:
+            return valid_id
+    # Simple heuristic: if it looks like an ID, return it (e.g. contains hyphens/dots, no spaces)
+    if " " not in license_id and len(license_id) < 50:
+         # Might be valid, might not. Let's return it and rely on validation warnings.
+         return license_id
+    return None
+def get_license_url(license_id: str, fallback: bool = True) -> Optional[str]:
+    """Get the URL for a license based on its ID.
+       If fallback is False, returns None if not in known list.
+    """
+    if license_id in LICENSE_URLS:
+        return LICENSE_URLS[license_id]
+    # Case insensitive fallback
+    lower_id = license_id.lower()
+    for valid_id, url in LICENSE_URLS.items():
+        if valid_id.lower() == lower_id:
+            return url
+    return f"https://spdx.org/licenses/{license_id}.html" if fallback else None
+# Global licensing instance
+_licensing = None
+def is_valid_spdx_license_id(license_id: str) -> bool:
+    """Check if the license ID is a valid SPDX ID"""
+    global _licensing
+    try:
+        from license_expression import get_spdx_licensing
+        if _licensing is None:
+            _licensing = get_spdx_licensing()
+        # Validate that it is a valid SPDX expression AND a simple license ID (no AND/OR/WITH)
+        res = _licensing.validate(license_id)
+        if len(res.errors) > 0:
+            return False
+        # Parse expression to ensure it's a single license, not a compound expression
+        parsed = _licensing.parse(license_id)
+        # Check if it's a simple LicenseSymbol (single ID)
+        # license-expression objects: LicenseSymbol, LicenseExpression (AND, OR, WITH)
+        # We only want simple IDs for the 'id' field in CycloneDX
+        # (though CDX 'expression' field exists, 'id' must be a valid SPDX ID from the enum)
+        # Checking if it has children or is a symbol
+        # parsed object structure depends on library version, but safe bet is type check
+        # A simple license parses to a LicenseSymbol which has no 'children' usually,
+        # or we check if the string representation matches the input (normalized)
+        # Actually simplest way: check if it contains spaces or operators
+        # But let's use the library structure if possible.
+        # "MIT" -> LicenseSymbol
+        # "MIT OR Apache-2.0" -> OR expression
+        return hasattr(parsed, "key") and not hasattr(parsed, "children")
+    except ImportError:
+        logger.warning("license-expression library not found, skipping validation")
+        return True
+    except Exception as e:
+        logger.debug(f"License validation error: {e}")
+        return False

src/utils/rate_limiting.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import time
+from collections import defaultdict
+from fastapi import Request
+from fastapi.responses import JSONResponse
+from starlette.middleware.base import BaseHTTPMiddleware
+import logging
+import asyncio  # Concurrency limiting
+logger = logging.getLogger(__name__)
+class RateLimitMiddleware(BaseHTTPMiddleware):
+    def __init__(
+        self,
+        app,
+        rate_limit_per_minute=10,
+        rate_limit_window=60,
+        protected_routes=["/generate", "/api/generate", "/api/generate-with-report"]
+    ):
+        super().__init__(app)
+        self.rate_limit_per_minute = rate_limit_per_minute
+        self.rate_limit_window = rate_limit_window
+        self.protected_routes = protected_routes
+        self.ip_requests = defaultdict(list)
+        logger.info(f"Rate limit middleware initialized: {rate_limit_per_minute} requests per {rate_limit_window}s")
+    async def dispatch(self, request: Request, call_next):
+        client_ip = request.client.host
+        current_time = time.time()
+        # Only apply rate limiting to protected routes
+        if any(request.url.path.startswith(route) for route in self.protected_routes):
+            # Clean up old requests for this IP
+            self.ip_requests[client_ip] = [t for t in self.ip_requests[client_ip]
+                                          if current_time - t < self.rate_limit_window]
+            # Periodic cleanup of all IPs (every ~100 requests to avoid overhead)
+            # In a production app, use a background task or Redis
+            if len(self.ip_requests) > 1000 and hash(client_ip) % 100 == 0:
+                 self._cleanup_all_ips(current_time)
+            # Check if rate limit exceeded
+            if len(self.ip_requests[client_ip]) >= self.rate_limit_per_minute:
+                logger.warning(f"Rate limit exceeded for IP {client_ip} on {request.url.path}")
+                return JSONResponse(
+                    status_code=429,
+                    content={"detail": "Rate limit exceeded. Please try again later."}
+                )
+            # Add current request timestamp
+            self.ip_requests[client_ip].append(current_time)
+        # Process the request
+        response = await call_next(request)
+        return response
+    def _cleanup_all_ips(self, current_time):
+        """Remove IPs that haven't made requests in the window"""
+        to_remove = []
+        for ip, timestamps in self.ip_requests.items():
+            # If latest timestamp is older than window, remove IP
+            if not timestamps or (current_time - timestamps[-1] > self.rate_limit_window):
+                to_remove.append(ip)
+        for ip in to_remove:
+            del self.ip_requests[ip]
+class ConcurrencyLimitMiddleware(BaseHTTPMiddleware):
+    def __init__(
+        self,
+        app,
+        max_concurrent_requests=5,
+        timeout=5.0,
+        protected_routes=None
+    ):
+        super().__init__(app)
+        self.semaphore = asyncio.Semaphore(max_concurrent_requests)
+        self.timeout = timeout
+        self.protected_routes = protected_routes or ["/generate", "/api/generate", "/api/generate-with-report"]
+        logger.info(f"Concurrency limit middleware initialized: {max_concurrent_requests} concurrent requests")
+    async def dispatch(self, request, call_next):
+        try:
+            # Only apply to protected routes
+            if any(request.url.path.startswith(route) for route in self.protected_routes):
+                try:
+                    # Try to acquire the semaphore
+                    acquired = False
+                    try:
+                        # Use wait_for instead of timeout context manager for compatibility
+                        await asyncio.wait_for(self.semaphore.acquire(), timeout=self.timeout)
+                        acquired = True
+                        return await call_next(request)
+                    finally:
+                        if acquired:
+                            self.semaphore.release()
+                except asyncio.TimeoutError:
+                    # Timeout waiting for semaphore
+                    logger.warning(f"Concurrency limit reached for {request.url.path}")
+                    return JSONResponse(
+                        status_code=503,
+                        content={"detail": "Server is at capacity. Please try again later."}
+                    )
+            else:
+                # For non-protected routes, proceed normally
+                return await call_next(request)
+        except Exception as e:
+            logger.error(f"Error in ConcurrencyLimitMiddleware: {str(e)}")
+            return JSONResponse(
+                status_code=500,
+                content={"detail": f"Internal server error in middleware: {str(e)}"}
+            )
+# Protection against large request payloads
+class RequestSizeLimitMiddleware(BaseHTTPMiddleware):
+    def __init__(self, app, max_content_length=1024*1024):  # 1MB default
+        super().__init__(app)
+        self.max_content_length = max_content_length
+        logger.info(f"Request size limit middleware initialized: {max_content_length} bytes")
+    async def dispatch(self, request: Request, call_next):
+        content_length = request.headers.get('content-length')
+        if content_length:
+            if int(content_length) > self.max_content_length:
+                logger.warning(f"Request too large: {content_length} bytes")
+                return JSONResponse(
+                    status_code=413,
+                    content={"detail": "Request too large"}
+                )
+        return await call_next(request)

src/utils/summarizer.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import logging
+import re
+from typing import Optional, List, Dict, Any
+logger = logging.getLogger(__name__)
+class LocalSummarizer:
+    """
+    Singleton-style wrapper for local LLM summarization.
+    Enhances extraction using robust heuristic rules and LLM generation with retry logic.
+    """
+    _tokenizer = None
+    _model = None
+    _model_name = "google/flan-t5-small"
+    @classmethod
+    def _load_model(cls):
+        """Lazy load the model and tokenizer directly"""
+        if cls._model is None:
+            try:
+                from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+                import transformers
+                logger.info(f"⏳ Loading summarization model ({cls._model_name})...")
+                old_verbosity = transformers.logging.get_verbosity()
+                transformers.logging.set_verbosity_error()
+                cls._tokenizer = AutoTokenizer.from_pretrained(cls._model_name)
+                cls._model = AutoModelForSeq2SeqLM.from_pretrained(cls._model_name)
+                transformers.logging.set_verbosity(old_verbosity)
+                logger.info("✅ Summarization model loaded successfully")
+            except Exception as e:
+                logger.error(f"❌ Failed to load summarization model: {e}")
+                cls._model = False # Mark as failed
+    @staticmethod
+    def _strip_yaml_frontmatter(text: str) -> str:
+        """Strip the YAML frontmatter enclosed in ---"""
+        return re.sub(r'^---\s*\n.*?\n---\s*\n', '', text, flags=re.MULTILINE | re.DOTALL)
+    @staticmethod
+    def _extract_candidates(text: str) -> List[str]:
+        candidates = []
+        # 1. Section Headers (support "1. Introduction")
+        heading_matches = re.finditer(r'^#+\s*(?:\d+[\.\)]?\s*)?(Description|Model [dD]escription|Model Overview|Overview|Introduction|Summary|モデル概要|Model Details)[^\n]*\n(.*?)(?=\n#+\s|\Z)', text, flags=re.MULTILINE | re.DOTALL)
+        for match in heading_matches:
+            if match.group(2).strip():
+                candidates.append(match.group(2).strip())
+        # 2. Inline Labels
+        inline_matches = re.finditer(r'(?:Description:|Overview:|### Description:)\s*(.*?)(?=\n\n|\Z)', text, flags=re.DOTALL | re.IGNORECASE)
+        for match in inline_matches:
+            if match.group(1).strip():
+                candidates.append(match.group(1).strip())
+        # 3. Auto-generated fine-tuned leading sentences
+        tuned_matches = re.finditer(r'^(?:The .*model is a .*|This model is a fine-tuned version of.*|This is a fine-tuned.*)', text, flags=re.MULTILINE | re.IGNORECASE)
+        for match in tuned_matches:
+            candidates.append(match.group(0).strip())
+        # 4. Fallback: First meaningful paragraph
+        # Strip some HTML first just for the fallback rule
+        html_stripped = re.sub(r'<[^>]+>', '', text)
+        paragraphs = re.split(r'\n\s*\n', html_stripped)
+        for p in paragraphs:
+            p = p.strip()
+            if not p:
+                continue
+            if p.startswith('#'):
+                continue
+            # Skip heavy markdown like links/images/badges and github alerts
+            if p.startswith('[!') or p.startswith('<a href') or p.startswith('> [!'):
+                continue
+            # If a paragraph has many links (like a table of contents / link directory)
+            if p.count('](') > 3 or p.count('http') > 3:
+                continue
+            if len(p) > 50:
+                candidates.append(p)
+                break
+        return candidates
+    @staticmethod
+    def _score_candidate(text: str) -> float:
+        score = 0.0
+        text_lower = text.lower()
+        # Length score (sweet spot between 100 and 500 chars)
+        if 50 < len(text) < 1000:
+            score += 10.0
+        # Reward definitional patterns
+        if "is a" in text_lower or "fine-tuned version of" in text_lower or "trained on" in text_lower or "designed for" in text_lower:
+            score += 20.0
+        # Penalize bad patterns
+        if "leaderboard" in text_lower or "benchmark" in text_lower or "results" in text_lower:
+            score -= 50.0
+        if "install" in text_lower or "how to run" in text_lower or "pip install" in text_lower or "read our guide" in text_lower:
+            score -= 30.0
+        # Penalize table/code-heavy paragraphs and bullet points
+        if text.count('|') > 5 or text.count('```') >= 1 or text.count('\n- ') > 2 or text.count('\n* ') > 2:
+            score -= 50.0
+        return score
+    @staticmethod
+    def _clean_text(text: str) -> str:
+        # Remove HTML
+        from bs4 import BeautifulSoup
+        try:
+            soup = BeautifulSoup(text, "html.parser")
+            for tag in soup(["style", "script"]):
+                tag.decompose()
+            text = soup.get_text(separator=' ')
+        except Exception:
+            pass
+        # Remove markdown images
+        text = re.sub(r'!\[.*?\]\([^)]+\)', '', text)
+        # Convert links to just text
+        text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
+        # Remove code blocks
+        text = re.sub(r'```.*?```', '', text, flags=re.DOTALL)
+        # Remove inline code
+        text = re.sub(r'`[^`]*`', '', text)
+        # Remove tables
+        text = re.sub(r'\|.*?\|', '', text)
+        text = re.sub(r'(?m)^[-:| ]+$', '', text) # table separators
+        # Remove boilerplate line by line
+        lines = text.split('\n')
+        clean_lines = []
+        for line in lines:
+            line_lower = line.lower()
+            if 'generated automatically' in line_lower and 'model card' in line_lower:
+                continue
+            if 'completed by the model author' in line_lower:
+                continue
+            if 'model cards for model reporting' in line_lower:
+                continue
+            clean_lines.append(line)
+        text = '\n'.join(clean_lines)
+        # Clean up whitespace
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text
+    @classmethod
+    def _generate(cls, prompt: str, max_output_chars: int) -> Optional[str]:
+        if cls._model is None:
+            cls._load_model()
+        if not cls._model or not cls._tokenizer:
+            return None
+        try:
+            inputs = cls._tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
+            generate_kwargs = {
+                "max_length": 128,  # Increased by ~30% from 64
+                "min_length": 15,  # Avoid single word outputs
+                "do_sample": False,
+                "num_beams": 4,
+                "early_stopping": True,
+                "repetition_penalty": 2.0
+            }
+            summary_ids = cls._model.generate(inputs["input_ids"], **generate_kwargs)
+            summary = cls._tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+            summary = summary.strip()
+            # Remove "Output:" prefix if present
+            if summary.lower().startswith("output:"):
+                summary = re.sub(r'^Output:\s*', '', summary, flags=re.IGNORECASE)
+            if len(summary) > max_output_chars:
+                return summary[:max_output_chars-3] + "..."
+            return summary
+        except Exception as e:
+            logger.warning(f"⚠️ Generation failed: {e}")
+            return None
+    @staticmethod
+    def _is_valid_summary(summary: str, model_id: str) -> bool:
+        if not summary or len(summary) < 15:
+            return False
+        summary_lower = summary.lower()
+        model_name = model_id.split('/')[-1].lower()
+        if summary_lower == model_name or summary_lower == f"{model_name} model":
+            return False
+        # Check for markdown/html artifacts
+        if '#' in summary or '<' in summary or '>' in summary or '*' in summary:
+            return False
+        # Check for instruction-like text
+        if summary_lower.startswith("to install") or summary_lower.startswith("how to") or "pip install" in summary_lower:
+            return False
+        # Refuse literally copying bullet points (e.g. from table)
+        if "- type:" in summary_lower or "number of parameters:" in summary_lower:
+            return False
+        return True
+    @classmethod
+    def summarize(cls, text: str, max_output_chars: int = 332, model_id: str = "") -> Optional[str]:
+        """
+        Robustly extract and summarize model description.
+        """
+        if not text or not text.strip():
+            return None
+        # 1. Strip YAML safely
+        text_without_yaml = cls._strip_yaml_frontmatter(text)
+        # 2. Extract multiple candidate description blocks
+        candidates = cls._extract_candidates(text_without_yaml)
+        if not candidates:
+            # Fallback if candidates are absolutely empty
+            candidates = [text_without_yaml[:1000]]
+        # 3. Score candidates and pick best
+        scored_candidates = [(c, cls._score_candidate(c)) for c in candidates]
+        best_candidate = max(scored_candidates, key=lambda x: x[1])[0]
+        # 4. Clean aggressively
+        cleaned_text = cls._clean_text(best_candidate)
+        if not cleaned_text.strip():
+            return None
+        # Extract just the first few sentences of the cleaned text to avoid confusing the small model
+        # with training details that usually appear at the end of the paragraph.
+        sentences = re.split(r'(?<=[.!?])\s+', cleaned_text)
+        short_text = " ".join(sentences[:3])
+        # 5 & 6 & 7. Summarize, Validate, Retry, Fallback
+        prompt1 = f"In one sentence, explain what this AI model is designed to do based on this description:\n\n{short_text}"
+        summary = cls._generate(prompt1, max_output_chars)
+        if summary and cls._is_valid_summary(summary, model_id):
+            return summary
+        # Retry with stricter prompt
+        logger.info("⚠️ First summary invalid, retrying with stricter prompt.")
+        prompt2 = f"Summarize the main purpose of this AI model in one complete sentence:\n\n{cleaned_text}"
+        summary2 = cls._generate(prompt2, max_output_chars)
+        if summary2 and cls._is_valid_summary(summary2, model_id):
+            return summary2
+        # Fallback to cleaned text (first 1-2 sentences)
+        logger.info("⚠️ Both LLM summaries invalid, falling back to cleaned extracted text.")
+        sentences = re.split(r'(?<=[.!?])\s+', cleaned_text)
+        fallback_summary = " ".join(sentences[:2])
+        if len(fallback_summary) > max_output_chars:
+             return fallback_summary[:max_output_chars-3] + "..."
+        return fallback_summary

src/utils/validation.py ADDED Viewed

	@@ -0,0 +1,158 @@

+"""
+CycloneDX 1.6 Schema Validation for AIBOM Generator.
+This module provides validation of generated AIBOMs against the official
+CycloneDX 1.6 JSON schema to ensure compliance and interoperability.
+"""
+import json
+import logging
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+# Make sure to handle requests import if it's not a core dependency (it is in my project)
+import requests
+import jsonschema
+from jsonschema import Draft7Validator, ValidationError
+from referencing import Registry, Resource
+# Module-level logger
+logger = logging.getLogger(__name__)
+# CycloneDX schema configuration
+CYCLONEDX_1_6_SCHEMA_URL = "https://raw.githubusercontent.com/CycloneDX/specification/master/schema/bom-1.6.schema.json"
+# Correct path relative to this file: src/utils/../schemas -> src/schemas
+SCHEMA_CACHE_DIR = Path(__file__).parent.parent / "schemas"
+SCHEMA_CACHE_FILE = SCHEMA_CACHE_DIR / "bom-1.6.schema.json"
+# Global schema cache
+_cached_schema: Optional[Dict[str, Any]] = None
+def _ensure_cache_dir() -> None:
+    """Ensure the schema cache directory exists."""
+    SCHEMA_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+def _load_schema_from_cache() -> Optional[Dict[str, Any]]:
+    """Load schema from local cache if available."""
+    if SCHEMA_CACHE_FILE.exists():
+        try:
+            with open(SCHEMA_CACHE_FILE, "r", encoding="utf-8") as f:
+                schema = json.load(f)
+                logger.debug("Loaded CycloneDX 1.6 schema from cache")
+                return schema
+        except (json.JSONDecodeError, IOError) as e:
+            logger.warning("Failed to load cached schema: %s", e)
+    return None
+def _download_schema() -> Optional[Dict[str, Any]]:
+    """Download the CycloneDX 1.6 schema from the official repository."""
+    try:
+        logger.info("Downloading CycloneDX 1.6 schema from %s", CYCLONEDX_1_6_SCHEMA_URL)
+        response = requests.get(CYCLONEDX_1_6_SCHEMA_URL, timeout=30)
+        response.raise_for_status()
+        schema = response.json()
+        # Cache the schema locally
+        _ensure_cache_dir()
+        with open(SCHEMA_CACHE_FILE, "w", encoding="utf-8") as f:
+            json.dump(schema, f, indent=2)
+        logger.info("CycloneDX 1.6 schema downloaded and cached")
+        return schema
+    except requests.RequestException as e:
+        logger.error("Failed to download CycloneDX schema: %s", e)
+        return None
+    except (json.JSONDecodeError, IOError) as e:
+        logger.error("Failed to parse or cache schema: %s", e)
+        return None
+def load_schema(force_download: bool = False) -> Optional[Dict[str, Any]]:
+    """
+    Load the CycloneDX 1.6 JSON schema.
+    Uses in-memory cache first, then file cache, then downloads if needed.
+    Args:
+        force_download: If True, download fresh schema even if cached.
+    Returns:
+        The schema dictionary, or None if loading failed.
+    """
+    global _cached_schema
+    # Return in-memory cache if available
+    if _cached_schema is not None and not force_download:
+        return _cached_schema
+    # Try loading from file cache
+    if not force_download:
+        schema = _load_schema_from_cache()
+        if schema:
+            _cached_schema = schema
+            return schema
+    # Download fresh schema
+    schema = _download_schema()
+    if schema:
+        _cached_schema = schema
+    return schema
+def _format_validation_error(error: ValidationError) -> str:
+    """Format a validation error into a readable message."""
+    path = " -> ".join(str(p) for p in error.absolute_path) if error.absolute_path else "root"
+    return f"[{path}] {error.message}"
+def validate_aibom(aibom: Dict[str, Any], strict: bool = False) -> Tuple[bool, List[str]]:
+    """
+    Validate an AIBOM against the CycloneDX 1.6 schema.
+    Args:
+        aibom: The AIBOM dictionary to validate.
+        strict: If True, fail on any schema deviation. If False, collect all errors.
+    Returns:
+        Tuple of (is_valid, list of error messages).
+        If valid, returns (True, []).
+        If invalid, returns (False, [error1, error2, ...]).
+    """
+    schema = load_schema()
+    if schema is None:
+        logger.warning("Could not load CycloneDX schema - skipping validation")
+        return True, ["Schema unavailable"]
+    # Load SPDX schema for reference resolution
+    spdx_path = SCHEMA_CACHE_DIR / "spdx.schema.json"
+    registry = Registry()
+    if spdx_path.exists():
+        try:
+            with open(spdx_path, "r", encoding="utf-8") as f:
+                spdx_schema = json.load(f)
+            resource = Resource.from_contents(spdx_schema)
+            registry = registry.with_resource(uri="spdx.schema.json", resource=resource)
+        except Exception as e:
+            logger.warning("Failed to load SPDX schema for validation: %s", e)
+    validator = Draft7Validator(schema, registry=registry)
+    errors = sorted(validator.iter_errors(aibom), key=lambda e: e.path)
+    if not errors:
+        return True, []
+    error_messages = [_format_validation_error(e) for e in errors]
+    return False, error_messages
+def get_validation_summary(aibom: Dict[str, Any]) -> Dict[str, Any]:
+    """Get a summary of schema validation results."""
+    is_valid, errors = validate_aibom(aibom)
+    return {
+        "valid": is_valid,
+        "error_count": len(errors),
+        "errors": errors[:10] if not is_valid else [] # Limit to first 10
+    }