Spaces:

evalstate
/

openclaw-pr-api

Sleeping

App Files Files Community

evalstate HF Staff commited on 27 days ago

Commit

d09c394

verified ·

1 Parent(s): 41a8c52

Deploy OpenClaw PR API

Browse files

Files changed (25) hide show

pyproject.toml +1 -8
src/slop_farmer/__init__.py +1 -1
src/slop_farmer/app/cli.py +307 -0
src/slop_farmer/app/dataset_refresh.py +1021 -0
src/slop_farmer/app/dataset_status.py +182 -0
src/slop_farmer/app/deploy.py +11 -2
src/slop_farmer/app/hf_checkpoint_import.py +10 -70
src/slop_farmer/app/pipeline.py +12 -90
src/slop_farmer/app/pr_search.py +74 -0
src/slop_farmer/app/pr_search_api.py +61 -3
src/slop_farmer/app/workflow.py +3 -0
src/slop_farmer/app_config.py +22 -0
src/slop_farmer/config.py +38 -0
src/slop_farmer/data/dataset_card.py +107 -0
src/slop_farmer/data/hf_dataset_repo.py +94 -0
src/slop_farmer/data/search_duckdb.py +146 -0
src/slop_farmer/data/snapshot_source.py +31 -0
src/slop_farmer/reports/analysis.py +9 -17
src/slop_farmer/reports/analysis_service.py +97 -25
src/slop_farmer/reports/dashboard.py +9 -2
src/slop_farmer/reports/new_contributor_report.py +11 -3
src/slop_farmer/reports/pr_scope.py +9 -16
src/slop_farmer/reports/pr_search_scope.py +12 -16
src/slop_farmer/reports/pr_search_service.py +166 -1
uv.lock +136 -136

pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "slop-farmer"
-version = "0.1.0"
 description = "GitHub-to-Hub data pipeline for transformers issue and PR triage research."
 readme = "README.md"
 requires-python = ">=3.13.5"
@@ -60,13 +60,6 @@ select = [
 ]
 ignore = ["E501"]
-[tool.slop-farmer.analyze]
-output-dir = "eval_data"
-hf-repo-id = "evalstate/transformers-pr"
-ranking-backend = "hybrid"
-model = "gpt-5.4-mini"
-max-clusters = 10
 [tool.slop-farmer.dashboard-data]
 output-dir = "web/public/data"
 window-days = 14

 [project]
 name = "slop-farmer"
+version = "0.1.1"
 description = "GitHub-to-Hub data pipeline for transformers issue and PR triage research."
 readme = "README.md"
 requires-python = ">=3.13.5"
 ]
 ignore = ["E501"]
 [tool.slop-farmer.dashboard-data]
 output-dir = "web/public/data"
 window-days = 14

src/slop_farmer/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 __all__ = ["__version__"]
-__version__ = "0.1.0"


1	__all__ = ["__version__"]
2
3	+ __version__ = "0.1.1"

src/slop_farmer/app/cli.py CHANGED Viewed

@@ -13,6 +13,8 @@ from slop_farmer.config import (
     AnalysisOptions,
     CheckpointImportOptions,
     DashboardDataOptions,
     DeployDashboardOptions,
     FullPipelineOptions,
     MarkdownReportOptions,
@@ -41,6 +43,7 @@ def build_parser(*, config_path: Path | None = None) -> argparse.ArgumentParser:
     subparsers = parser.add_subparsers(dest="command", required=True)
     _add_scrape_parser(subparsers, defaults["scrape"])
     _add_analyze_parser(subparsers, defaults["analyze"])
     _add_pr_scope_parser(subparsers, defaults["pr-scope"])
     _add_checkpoint_import_parser(subparsers, defaults["import-hf-checkpoint"])
@@ -52,6 +55,7 @@ def build_parser(*, config_path: Path | None = None) -> argparse.ArgumentParser:
     _add_dashboard_data_parser(subparsers, defaults["dashboard-data"])
     _add_publish_snapshot_parser(subparsers, defaults["publish-snapshot"])
     _add_deploy_dashboard_parser(subparsers, defaults["deploy-dashboard"])
     _add_full_pipeline_parser(subparsers, defaults["full-pipeline"])
     return parser
@@ -59,6 +63,7 @@ def build_parser(*, config_path: Path | None = None) -> argparse.ArgumentParser:
 def _load_parser_defaults(config_path: Path | None) -> dict[str, dict[str, Any]]:
     commands = (
         "scrape",
         "analyze",
         "import-hf-checkpoint",
         "pr-scope",
@@ -68,6 +73,7 @@ def _load_parser_defaults(config_path: Path | None) -> dict[str, dict[str, Any]]
         "dashboard-data",
         "publish-snapshot",
         "deploy-dashboard",
         "full-pipeline",
     )
     return {command: command_defaults(command, config_path=config_path) for command in commands}
@@ -184,6 +190,80 @@ def _add_scrape_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
     )
 def _add_analyze_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
     analyze = subparsers.add_parser(
         "analyze", help="Analyze a local snapshot and write a shortlist JSON report."
@@ -637,6 +717,61 @@ def _add_pr_search_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
     status.add_argument("--repo", help="Optional repo override.")
     status.add_argument("--json", action="store_true", help="Emit JSON.")
 def _add_new_contributor_report_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
     new_contributor = subparsers.add_parser(
@@ -659,6 +794,24 @@ def _add_new_contributor_report_parser(subparsers: Any, defaults: dict[str, Any]
     new_contributor.add_argument(
         "--json-output", type=Path, help="Optional JSON output path. Defaults next to the snapshot."
     )
     new_contributor.add_argument(
         "--window-days",
         type=int,
@@ -702,6 +855,24 @@ def _add_dashboard_data_parser(subparsers: Any, defaults: dict[str, Any]) -> Non
         type=Path,
         help="Optional PR scope cluster JSON. Defaults to pr-scope-clusters.json in the snapshot.",
     )
     dashboard.add_argument(
         "--window-days",
         type=int,
@@ -761,6 +932,24 @@ def _add_deploy_dashboard_parser(subparsers: Any, defaults: dict[str, Any]) -> N
     deploy_dashboard.add_argument(
         "--contributors-input", type=Path, help="Optional contributor report JSON override."
     )
     deploy_dashboard.add_argument(
         "--refresh-contributors",
         action="store_true",
@@ -817,6 +1006,31 @@ def _add_deploy_dashboard_parser(subparsers: Any, defaults: dict[str, Any]) -> N
     )
 def _add_full_pipeline_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
     full_pipeline = subparsers.add_parser(
         "full-pipeline",
@@ -933,6 +1147,33 @@ def _run_scrape(args: argparse.Namespace, config_path: Path | None) -> None:
     print(run_pipeline(options))
 def _run_analyze(args: argparse.Namespace, config_path: Path | None) -> None:
     from slop_farmer.reports.analysis import run_analysis
@@ -1041,12 +1282,18 @@ def _run_pr_search(args: argparse.Namespace, config_path: Path | None) -> None:
         explain_pr_search_pair,
         format_pr_search_candidate_clusters,
         format_pr_search_cluster,
         format_pr_search_pair,
         format_pr_search_probe,
         format_pr_search_similar,
         format_pr_search_status,
         get_pr_search_candidate_clusters,
         get_pr_search_cluster,
         get_pr_search_similar,
         get_pr_search_status,
         probe_pr_search_github,
@@ -1140,6 +1387,36 @@ def _run_pr_search(args: argparse.Namespace, config_path: Path | None) -> None:
         print(json.dumps(result, indent=2) if args.json else format_pr_search_status(result))
         return
     raise ValueError(f"Unsupported pr-search command: {args.pr_search_command}")
@@ -1181,6 +1458,7 @@ def _run_new_contributor_report(args: argparse.Namespace, config_path: Path | No
     del config_path
     from slop_farmer.reports.new_contributor_report import run_new_contributor_report
     print(
         run_new_contributor_report(
             NewContributorReportOptions(
@@ -1188,6 +1466,9 @@ def _run_new_contributor_report(args: argparse.Namespace, config_path: Path | No
                 output_dir=args.output_dir,
                 output=args.output,
                 json_output=args.json_output,
                 window_days=args.window_days,
                 max_authors=args.max_authors,
             )
@@ -1199,6 +1480,7 @@ def _run_dashboard_data(args: argparse.Namespace, config_path: Path | None) -> N
     from slop_farmer.reports.dashboard import run_dashboard_data
     dashboard_defaults = command_defaults("dashboard-data", config_path=config_path)
     print(
         run_dashboard_data(
             DashboardDataOptions(
@@ -1207,6 +1489,9 @@ def _run_dashboard_data(args: argparse.Namespace, config_path: Path | None) -> N
                 analysis_input=args.analysis_input,
                 contributors_input=args.contributors_input,
                 pr_scope_input=args.pr_scope_input,
                 window_days=args.window_days,
                 snapshot_root=(
                     Path(dashboard_defaults["snapshot-root"])
@@ -1222,6 +1507,7 @@ def _run_deploy_dashboard(args: argparse.Namespace, config_path: Path | None) ->
     del config_path
     from slop_farmer.app.deploy import run_deploy_dashboard
     run_deploy_dashboard(
         DeployDashboardOptions(
             pipeline_data_dir=args.pipeline_data_dir,
@@ -1229,6 +1515,9 @@ def _run_deploy_dashboard(args: argparse.Namespace, config_path: Path | None) ->
             snapshot_dir=args.snapshot_dir,
             analysis_input=args.analysis_input,
             contributors_input=args.contributors_input,
             refresh_contributors=args.refresh_contributors,
             dashboard_window_days=args.dashboard_window_days,
             contributor_window_days=args.contributor_window_days,
@@ -1247,6 +1536,22 @@ def _run_deploy_dashboard(args: argparse.Namespace, config_path: Path | None) ->
     )
 def _run_publish_snapshot(args: argparse.Namespace, config_path: Path | None) -> None:
     del config_path
     from slop_farmer.app.publish import run_publish_snapshot
@@ -1296,6 +1601,7 @@ def main() -> None:
     handlers: dict[str, CommandHandler] = {
         "scrape": _run_scrape,
         "analyze": _run_analyze,
         "markdown-report": _run_markdown_report,
         "duplicate-prs": _run_duplicate_prs,
@@ -1306,6 +1612,7 @@ def main() -> None:
         "new-contributor-report": _run_new_contributor_report,
         "dashboard-data": _run_dashboard_data,
         "deploy-dashboard": _run_deploy_dashboard,
         "publish-snapshot": _run_publish_snapshot,
         "full-pipeline": _run_full_pipeline,
     }

     AnalysisOptions,
     CheckpointImportOptions,
     DashboardDataOptions,
+    DatasetRefreshOptions,
+    DatasetStatusOptions,
     DeployDashboardOptions,
     FullPipelineOptions,
     MarkdownReportOptions,
     subparsers = parser.add_subparsers(dest="command", required=True)
     _add_scrape_parser(subparsers, defaults["scrape"])
+    _add_refresh_dataset_parser(subparsers, defaults["refresh-dataset"])
     _add_analyze_parser(subparsers, defaults["analyze"])
     _add_pr_scope_parser(subparsers, defaults["pr-scope"])
     _add_checkpoint_import_parser(subparsers, defaults["import-hf-checkpoint"])
     _add_dashboard_data_parser(subparsers, defaults["dashboard-data"])
     _add_publish_snapshot_parser(subparsers, defaults["publish-snapshot"])
     _add_deploy_dashboard_parser(subparsers, defaults["deploy-dashboard"])
+    _add_dataset_status_parser(subparsers, defaults["dataset-status"])
     _add_full_pipeline_parser(subparsers, defaults["full-pipeline"])
     return parser
 def _load_parser_defaults(config_path: Path | None) -> dict[str, dict[str, Any]]:
     commands = (
         "scrape",
+        "refresh-dataset",
         "analyze",
         "import-hf-checkpoint",
         "pr-scope",
         "dashboard-data",
         "publish-snapshot",
         "deploy-dashboard",
+        "dataset-status",
         "full-pipeline",
     )
     return {command: command_defaults(command, config_path=config_path) for command in commands}
     )
+def _add_refresh_dataset_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
+    refresh = subparsers.add_parser(
+        "refresh-dataset",
+        help="Refresh the canonical Hugging Face dataset repo from remote watermark state.",
+    )
+    refresh.add_argument(
+        "--repo",
+        default=defaults.get("repo", "huggingface/transformers"),
+        help="GitHub repository in owner/name form.",
+    )
+    refresh.add_argument(
+        "--hf-repo-id",
+        default=defaults.get("hf-repo-id"),
+        required=defaults.get("hf-repo-id") is None,
+        help="Canonical Hugging Face dataset repo id to refresh.",
+    )
+    refresh.add_argument("--max-issues", type=int, default=defaults.get("max-issues"))
+    refresh.add_argument("--max-prs", type=int, default=defaults.get("max-prs"))
+    refresh.add_argument(
+        "--max-issue-comments", type=int, default=defaults.get("max-issue-comments")
+    )
+    refresh.add_argument(
+        "--max-reviews-per-pr", type=int, default=defaults.get("max-reviews-per-pr")
+    )
+    refresh.add_argument(
+        "--max-review-comments-per-pr",
+        type=int,
+        default=defaults.get("max-review-comments-per-pr"),
+    )
+    refresh.add_argument(
+        "--fetch-timeline",
+        action="store_true",
+        default=bool(defaults.get("fetch-timeline", False)),
+    )
+    refresh.add_argument(
+        "--new-contributor-report",
+        dest="new_contributor_report",
+        action="store_true",
+        default=bool(defaults.get("new-contributor-report", True)),
+    )
+    refresh.add_argument(
+        "--no-new-contributor-report",
+        dest="new_contributor_report",
+        action="store_false",
+    )
+    refresh.add_argument(
+        "--new-contributor-window-days",
+        type=int,
+        default=int(defaults.get("new-contributor-window-days", 42)),
+    )
+    refresh.add_argument(
+        "--new-contributor-max-authors",
+        type=int,
+        default=int(defaults.get("new-contributor-max-authors", 25)),
+    )
+    refresh.add_argument("--http-timeout", type=int, default=300)
+    refresh.add_argument("--http-max-retries", type=int, default=8)
+    refresh.add_argument("--checkpoint-every-comments", type=int, default=1000)
+    refresh.add_argument("--checkpoint-every-prs", type=int, default=25)
+    refresh.add_argument(
+        "--private-hf-repo",
+        dest="private_hf_repo",
+        action="store_true",
+        default=bool(defaults.get("private-hf-repo", False)),
+        help="Create the target dataset repo as private if needed.",
+    )
+    refresh.add_argument(
+        "--private",
+        dest="private_hf_repo",
+        action="store_true",
+        help=argparse.SUPPRESS,
+    )
 def _add_analyze_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
     analyze = subparsers.add_parser(
         "analyze", help="Analyze a local snapshot and write a shortlist JSON report."
     status.add_argument("--repo", help="Optional repo override.")
     status.add_argument("--json", action="store_true", help="Emit JSON.")
+    contributor = pr_search_subparsers.add_parser(
+        "contributor", help="Show indexed contributor summary for one author login."
+    )
+    contributor.add_argument("login", help="GitHub author login to query.")
+    contributor.add_argument(
+        "--db",
+        type=Path,
+        default=Path(defaults["db"]) if defaults.get("db") else None,
+        help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
+    )
+    contributor.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path(defaults.get("output-dir", "data")),
+    )
+    contributor.add_argument("--repo", help="Optional repo override.")
+    contributor.add_argument("--json", action="store_true", help="Emit JSON.")
+    contributor_prs = pr_search_subparsers.add_parser(
+        "contributor-prs", help="List indexed PRs for one contributor login."
+    )
+    contributor_prs.add_argument("login", help="GitHub author login to query.")
+    contributor_prs.add_argument(
+        "--db",
+        type=Path,
+        default=Path(defaults["db"]) if defaults.get("db") else None,
+        help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
+    )
+    contributor_prs.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path(defaults.get("output-dir", "data")),
+    )
+    contributor_prs.add_argument("--repo", help="Optional repo override.")
+    contributor_prs.add_argument("--limit", type=int, default=20, help="Maximum rows to show.")
+    contributor_prs.add_argument("--json", action="store_true", help="Emit JSON.")
+    pr_contributor = pr_search_subparsers.add_parser(
+        "pr-contributor", help="Show contributor summary for the author of one indexed PR."
+    )
+    pr_contributor.add_argument("pr_number", type=int, help="Pull request number to query.")
+    pr_contributor.add_argument(
+        "--db",
+        type=Path,
+        default=Path(defaults["db"]) if defaults.get("db") else None,
+        help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
+    )
+    pr_contributor.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path(defaults.get("output-dir", "data")),
+    )
+    pr_contributor.add_argument("--repo", help="Optional repo override.")
+    pr_contributor.add_argument("--json", action="store_true", help="Emit JSON.")
 def _add_new_contributor_report_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
     new_contributor = subparsers.add_parser(
     new_contributor.add_argument(
         "--json-output", type=Path, help="Optional JSON output path. Defaults next to the snapshot."
     )
+    new_contributor.add_argument(
+        "--hf-repo-id",
+        default=defaults.get("hf-repo-id"),
+        help="Analyze a Hugging Face dataset repo by materializing its parquet export locally.",
+    )
+    new_contributor.add_argument(
+        "--hf-revision",
+        default=defaults.get("hf-revision"),
+        help="Optional Hub revision for metadata and README download.",
+    )
+    new_contributor.add_argument(
+        "--hf-materialize-dir",
+        type=Path,
+        default=Path(defaults["hf-materialize-dir"])
+        if defaults.get("hf-materialize-dir")
+        else None,
+        help="Optional local directory used when materializing an HF dataset snapshot.",
+    )
     new_contributor.add_argument(
         "--window-days",
         type=int,
         type=Path,
         help="Optional PR scope cluster JSON. Defaults to pr-scope-clusters.json in the snapshot.",
     )
+    dashboard.add_argument(
+        "--hf-repo-id",
+        default=defaults.get("hf-repo-id"),
+        help="Analyze a Hugging Face dataset repo by materializing its parquet export locally.",
+    )
+    dashboard.add_argument(
+        "--hf-revision",
+        default=defaults.get("hf-revision"),
+        help="Optional Hub revision for metadata and README download.",
+    )
+    dashboard.add_argument(
+        "--hf-materialize-dir",
+        type=Path,
+        default=Path(defaults["hf-materialize-dir"])
+        if defaults.get("hf-materialize-dir")
+        else None,
+        help="Optional local directory used when materializing an HF dataset snapshot.",
+    )
     dashboard.add_argument(
         "--window-days",
         type=int,
     deploy_dashboard.add_argument(
         "--contributors-input", type=Path, help="Optional contributor report JSON override."
     )
+    deploy_dashboard.add_argument(
+        "--hf-repo-id",
+        default=defaults.get("hf-repo-id"),
+        help="Materialize a Hugging Face dataset repo instead of using the latest local snapshot.",
+    )
+    deploy_dashboard.add_argument(
+        "--hf-revision",
+        default=defaults.get("hf-revision"),
+        help="Optional Hub revision for metadata and README download.",
+    )
+    deploy_dashboard.add_argument(
+        "--hf-materialize-dir",
+        type=Path,
+        default=Path(defaults["hf-materialize-dir"])
+        if defaults.get("hf-materialize-dir")
+        else None,
+        help="Optional local directory used when materializing an HF dataset snapshot.",
+    )
     deploy_dashboard.add_argument(
         "--refresh-contributors",
         action="store_true",
     )
+def _add_dataset_status_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
+    dataset_status = subparsers.add_parser(
+        "dataset-status",
+        help="Inspect canonical dataset freshness and the local latest pointer.",
+    )
+    dataset_status.add_argument("--repo", default=defaults.get("repo"))
+    dataset_status.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path(defaults.get("output-dir", "data")),
+        help="Local workspace root containing snapshots/latest.json.",
+    )
+    dataset_status.add_argument(
+        "--hf-repo-id",
+        default=defaults.get("hf-repo-id"),
+        help="Canonical Hugging Face dataset repo id to inspect.",
+    )
+    dataset_status.add_argument(
+        "--hf-revision",
+        default=defaults.get("hf-revision"),
+        help="Optional Hub revision for metadata and README download.",
+    )
+    dataset_status.add_argument("--json", action="store_true", help="Emit machine-readable JSON.")
 def _add_full_pipeline_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
     full_pipeline = subparsers.add_parser(
         "full-pipeline",
     print(run_pipeline(options))
+def _run_refresh_dataset(args: argparse.Namespace, config_path: Path | None) -> None:
+    del config_path
+    from slop_farmer.app.dataset_refresh import run_dataset_refresh
+    result = run_dataset_refresh(
+        DatasetRefreshOptions(
+            repo=RepoRef.parse(args.repo),
+            hf_repo_id=args.hf_repo_id,
+            private_hf_repo=args.private_hf_repo,
+            max_issues=args.max_issues,
+            max_prs=args.max_prs,
+            max_issue_comments=args.max_issue_comments,
+            max_reviews_per_pr=args.max_reviews_per_pr,
+            max_review_comments_per_pr=args.max_review_comments_per_pr,
+            fetch_timeline=args.fetch_timeline,
+            new_contributor_report=args.new_contributor_report,
+            new_contributor_window_days=args.new_contributor_window_days,
+            new_contributor_max_authors=args.new_contributor_max_authors,
+            http_timeout=args.http_timeout,
+            http_max_retries=args.http_max_retries,
+            checkpoint_every_comments=args.checkpoint_every_comments,
+            checkpoint_every_prs=args.checkpoint_every_prs,
+        )
+    )
+    print(json.dumps(result, indent=2))
 def _run_analyze(args: argparse.Namespace, config_path: Path | None) -> None:
     from slop_farmer.reports.analysis import run_analysis
         explain_pr_search_pair,
         format_pr_search_candidate_clusters,
         format_pr_search_cluster,
+        format_pr_search_contributor,
+        format_pr_search_contributor_pulls,
         format_pr_search_pair,
         format_pr_search_probe,
+        format_pr_search_pull_contributor,
         format_pr_search_similar,
         format_pr_search_status,
         get_pr_search_candidate_clusters,
         get_pr_search_cluster,
+        get_pr_search_contributor,
+        get_pr_search_contributor_pulls,
+        get_pr_search_pull_contributor,
         get_pr_search_similar,
         get_pr_search_status,
         probe_pr_search_github,
         print(json.dumps(result, indent=2) if args.json else format_pr_search_status(result))
         return
+    if args.pr_search_command == "contributor":
+        result = get_pr_search_contributor(db_path, author_login=args.login, repo=args.repo)
+        print(json.dumps(result, indent=2) if args.json else format_pr_search_contributor(result))
+        return
+    if args.pr_search_command == "contributor-prs":
+        result = get_pr_search_contributor_pulls(
+            db_path,
+            author_login=args.login,
+            repo=args.repo,
+            limit=args.limit,
+        )
+        print(
+            json.dumps(result, indent=2)
+            if args.json
+            else format_pr_search_contributor_pulls(result)
+        )
+        return
+    if args.pr_search_command == "pr-contributor":
+        result = get_pr_search_pull_contributor(
+            db_path,
+            pr_number=args.pr_number,
+            repo=args.repo,
+        )
+        print(
+            json.dumps(result, indent=2) if args.json else format_pr_search_pull_contributor(result)
+        )
+        return
     raise ValueError(f"Unsupported pr-search command: {args.pr_search_command}")
     del config_path
     from slop_farmer.reports.new_contributor_report import run_new_contributor_report
+    hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args)
     print(
         run_new_contributor_report(
             NewContributorReportOptions(
                 output_dir=args.output_dir,
                 output=args.output,
                 json_output=args.json_output,
+                hf_repo_id=hf_repo_id,
+                hf_revision=hf_revision,
+                hf_materialize_dir=hf_materialize_dir,
                 window_days=args.window_days,
                 max_authors=args.max_authors,
             )
     from slop_farmer.reports.dashboard import run_dashboard_data
     dashboard_defaults = command_defaults("dashboard-data", config_path=config_path)
+    hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args)
     print(
         run_dashboard_data(
             DashboardDataOptions(
                 analysis_input=args.analysis_input,
                 contributors_input=args.contributors_input,
                 pr_scope_input=args.pr_scope_input,
+                hf_repo_id=hf_repo_id,
+                hf_revision=hf_revision,
+                hf_materialize_dir=hf_materialize_dir,
                 window_days=args.window_days,
                 snapshot_root=(
                     Path(dashboard_defaults["snapshot-root"])
     del config_path
     from slop_farmer.app.deploy import run_deploy_dashboard
+    hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args)
     run_deploy_dashboard(
         DeployDashboardOptions(
             pipeline_data_dir=args.pipeline_data_dir,
             snapshot_dir=args.snapshot_dir,
             analysis_input=args.analysis_input,
             contributors_input=args.contributors_input,
+            hf_repo_id=hf_repo_id,
+            hf_revision=hf_revision,
+            hf_materialize_dir=hf_materialize_dir,
             refresh_contributors=args.refresh_contributors,
             dashboard_window_days=args.dashboard_window_days,
             contributor_window_days=args.contributor_window_days,
     )
+def _run_dataset_status(args: argparse.Namespace, config_path: Path | None) -> None:
+    del config_path
+    from slop_farmer.app.dataset_status import format_dataset_status, get_dataset_status
+    result = get_dataset_status(
+        DatasetStatusOptions(
+            repo=args.repo,
+            output_dir=args.output_dir,
+            hf_repo_id=args.hf_repo_id,
+            hf_revision=args.hf_revision,
+            json_output=args.json,
+        )
+    )
+    print(json.dumps(result, indent=2) if args.json else format_dataset_status(result))
 def _run_publish_snapshot(args: argparse.Namespace, config_path: Path | None) -> None:
     del config_path
     from slop_farmer.app.publish import run_publish_snapshot
     handlers: dict[str, CommandHandler] = {
         "scrape": _run_scrape,
+        "refresh-dataset": _run_refresh_dataset,
         "analyze": _run_analyze,
         "markdown-report": _run_markdown_report,
         "duplicate-prs": _run_duplicate_prs,
         "new-contributor-report": _run_new_contributor_report,
         "dashboard-data": _run_dashboard_data,
         "deploy-dashboard": _run_deploy_dashboard,
+        "dataset-status": _run_dataset_status,
         "publish-snapshot": _run_publish_snapshot,
         "full-pipeline": _run_full_pipeline,
     }

src/slop_farmer/app/dataset_refresh.py ADDED Viewed

	@@ -0,0 +1,1021 @@

+from __future__ import annotations
+import argparse
+import json
+import os
+import shutil
+import tempfile
+import time
+from collections import defaultdict
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+from huggingface_hub import HfApi
+from slop_farmer.app_config import command_defaults, extract_cli_config_path
+from slop_farmer.config import (
+    DatasetRefreshOptions,
+    NewContributorReportOptions,
+    RepoRef,
+    resolve_github_token,
+)
+from slop_farmer.data.dataset_card import build_hf_dataset_card
+from slop_farmer.data.github_api import GitHubClient
+from slop_farmer.data.hf_dataset_repo import (
+    list_remote_paths,
+    load_remote_file,
+    load_remote_json_file,
+    stable_snapshot_candidates,
+)
+from slop_farmer.data.links import build_pr_duplicate_candidate_rows, build_text_link_rows
+from slop_farmer.data.normalize import (
+    issue_url_to_number,
+    normalize_comment,
+    normalize_issue,
+    normalize_pr_diff,
+    normalize_pr_file,
+    normalize_pull_request,
+    normalize_review,
+    normalize_review_comment,
+    normalize_timeline_event,
+)
+from slop_farmer.data.parquet_io import (
+    SCHEMAS,
+    read_parquet_rows,
+    write_json,
+    write_parquet,
+    write_text,
+)
+from slop_farmer.reports.new_contributor_report import run_new_contributor_report
+PRIMARY_KEYS: dict[str, tuple[str, ...]] = {
+    "issues": ("github_id",),
+    "pull_requests": ("github_id",),
+    "comments": ("github_id",),
+    "reviews": ("github_id",),
+    "review_comments": ("github_id",),
+    "pr_files": ("repo", "pull_request_number", "filename"),
+    "pr_diffs": ("repo", "pull_request_number"),
+    "links": (
+        "repo",
+        "source_type",
+        "source_number",
+        "source_github_id",
+        "target_owner",
+        "target_repo",
+        "target_number",
+        "link_type",
+        "link_origin",
+    ),
+    "events": (
+        "repo",
+        "parent_kind",
+        "parent_number",
+        "event",
+        "created_at",
+        "actor_login",
+        "source_issue_number",
+        "source_issue_url",
+        "commit_id",
+        "label_name",
+    ),
+}
+CHECKPOINT_PREFIXES = ("_checkpoints", "checkpoints")
+def log(message: str) -> None:
+    stamp = datetime.now(tz=UTC).strftime("%H:%M:%SZ")
+    print(f"[{stamp}] {message}", flush=True)
+def iso_now() -> str:
+    return datetime.now(tz=UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+def snapshot_id() -> str:
+    return datetime.now(tz=UTC).strftime("%Y%m%dT%H%M%SZ")
+def row_key(row: dict[str, Any], fields: tuple[str, ...]) -> str:
+    return json.dumps([row.get(field) for field in fields], default=str)
+def merge_rows(
+    table_name: str,
+    previous_rows: list[dict[str, Any]],
+    delta_rows: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+    if table_name == "pr_files":
+        refreshed_prs = {
+            (row.get("repo"), row.get("pull_request_number"))
+            for row in delta_rows
+            if row.get("pull_request_number") is not None
+        }
+        previous_rows = [
+            row
+            for row in previous_rows
+            if (row.get("repo"), row.get("pull_request_number")) not in refreshed_prs
+        ]
+    merged: dict[str, dict[str, Any]] = {}
+    for row in previous_rows:
+        merged[row_key(row, PRIMARY_KEYS[table_name])] = row
+    for row in delta_rows:
+        merged[row_key(row, PRIMARY_KEYS[table_name])] = row
+    return list(merged.values())
+def checkpoint_dirs(remote_paths: set[str]) -> list[tuple[str, str]]:
+    by_snapshot_id: dict[str, str] = {}
+    for path in remote_paths:
+        parts = path.split("/")
+        if len(parts) < 3 or parts[0] not in CHECKPOINT_PREFIXES:
+            continue
+        snapshot_key = parts[1]
+        prefix = parts[0]
+        current = by_snapshot_id.get(snapshot_key)
+        if current is None or current.startswith("checkpoints/"):
+            by_snapshot_id[snapshot_key] = f"{prefix}/{snapshot_key}"
+    return [(sid, by_snapshot_id[sid]) for sid in sorted(by_snapshot_id)]
+def copy_remote_file_from_candidates(
+    api: HfApi,
+    repo_id: str,
+    local_dir: Path,
+    destination: Path,
+    candidate_paths: list[str],
+) -> bool:
+    for candidate in candidate_paths:
+        downloaded = load_remote_file(api, repo_id, candidate, local_dir)
+        if downloaded is None:
+            continue
+        destination.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copy2(downloaded, destination)
+        return True
+    return False
+def materialize_previous_snapshot_dir(
+    *,
+    api: Any,
+    repo_id: str,
+    previous_root: Path,
+    stable_snapshot_id: str | None,
+    latest_pointer: dict[str, Any] | None,
+    previous_tables: dict[str, list[dict[str, Any]]],
+) -> Path | None:
+    if not stable_snapshot_id:
+        return None
+    snapshot_dir = (previous_root / "materialized-snapshots" / stable_snapshot_id).resolve()
+    snapshot_dir.mkdir(parents=True, exist_ok=True)
+    for table_name, rows in previous_tables.items():
+        write_parquet(rows, snapshot_dir / f"{table_name}.parquet", table_name)
+    for artifact_name in (
+        "manifest.json",
+        "new_contributors.parquet",
+        "new-contributors-report.json",
+        "new-contributors-report.md",
+    ):
+        copy_remote_file_from_candidates(
+            api,
+            repo_id,
+            previous_root,
+            snapshot_dir / artifact_name,
+            stable_snapshot_candidates(latest_pointer, artifact_name),
+        )
+    return snapshot_dir
+def load_remote_table_from_candidates(
+    api: HfApi,
+    repo_id: str,
+    table_name: str,
+    local_dir: Path,
+    candidate_paths: list[str],
+) -> list[dict[str, Any]]:
+    for candidate in candidate_paths:
+        downloaded = load_remote_file(api, repo_id, candidate, local_dir)
+        if downloaded is not None:
+            return read_parquet_rows(downloaded)
+    return []
+def viewer_comment_rows(
+    comments: list[dict[str, Any]],
+    pull_requests: list[dict[str, Any]],
+) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+    pr_numbers = {int(row["number"]) for row in pull_requests if row.get("number") is not None}
+    issue_comments: list[dict[str, Any]] = []
+    pr_comments: list[dict[str, Any]] = []
+    for row in comments:
+        parent_number = row.get("parent_number")
+        parent_kind = row.get("parent_kind")
+        if parent_kind == "pull_request" or parent_number in pr_numbers:
+            pr_comments.append(row)
+        else:
+            issue_comments.append(row)
+    return issue_comments, pr_comments
+def upload_delta_checkpoint(
+    *,
+    api: HfApi,
+    repo_id: str,
+    work_dir: Path,
+    repo_slug: str,
+    sid: str,
+    stage: str,
+    delta_tables: dict[str, list[dict[str, Any]]],
+    progress: dict[str, Any],
+) -> None:
+    checkpoint_root = work_dir / "checkpoint_upload"
+    if checkpoint_root.exists():
+        shutil.rmtree(checkpoint_root)
+    checkpoint_root.mkdir(parents=True, exist_ok=True)
+    for table_name, rows in delta_tables.items():
+        write_parquet(rows, checkpoint_root / f"{table_name}.parquet", table_name)
+    write_json(
+        {"repo": repo_slug, "snapshot_id": sid, **progress}, checkpoint_root / "progress.json"
+    )
+    write_json(
+        {"repo": repo_slug, "snapshot_id": sid, **progress},
+        checkpoint_root / "state" / "in_progress.json",
+    )
+    api.upload_folder(
+        folder_path=str(checkpoint_root),
+        path_in_repo=f"_checkpoints/{sid}",
+        repo_id=repo_id,
+        repo_type="dataset",
+        commit_message=f"Checkpoint {sid} ({stage})",
+    )
+def remaining_limit(limit: int | None, used: int) -> int | None:
+    if limit is None:
+        return None
+    return max(limit - used, 0)
+def _build_argument_parser(*, config_path: Path | None = None) -> argparse.ArgumentParser:
+    defaults = command_defaults("refresh-dataset", config_path=config_path)
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", type=Path, help="Optional repo config file.")
+    parser.add_argument("--repo", default=defaults.get("repo", "huggingface/transformers"))
+    parser.add_argument("--hf-repo-id", default=defaults.get("hf-repo-id"))
+    parser.add_argument("--max-issues", type=int, default=defaults.get("max-issues"))
+    parser.add_argument("--max-prs", type=int, default=defaults.get("max-prs"))
+    parser.add_argument(
+        "--max-issue-comments",
+        type=int,
+        default=defaults.get("max-issue-comments"),
+    )
+    parser.add_argument(
+        "--max-reviews-per-pr",
+        type=int,
+        default=defaults.get("max-reviews-per-pr"),
+    )
+    parser.add_argument(
+        "--max-review-comments-per-pr",
+        type=int,
+        default=defaults.get("max-review-comments-per-pr"),
+    )
+    parser.add_argument(
+        "--fetch-timeline",
+        action="store_true",
+        default=bool(defaults.get("fetch-timeline", False)),
+    )
+    parser.add_argument(
+        "--new-contributor-report",
+        dest="new_contributor_report",
+        action="store_true",
+        default=bool(defaults.get("new-contributor-report", True)),
+    )
+    parser.add_argument(
+        "--no-new-contributor-report",
+        dest="new_contributor_report",
+        action="store_false",
+    )
+    parser.add_argument(
+        "--new-contributor-window-days",
+        type=int,
+        default=int(defaults.get("new-contributor-window-days", 42)),
+    )
+    parser.add_argument(
+        "--new-contributor-max-authors",
+        type=int,
+        default=int(defaults.get("new-contributor-max-authors", 25)),
+    )
+    parser.add_argument("--http-timeout", type=int, default=300)
+    parser.add_argument("--http-max-retries", type=int, default=8)
+    parser.add_argument("--checkpoint-every-comments", type=int, default=1000)
+    parser.add_argument("--checkpoint-every-prs", type=int, default=25)
+    parser.add_argument(
+        "--private-hf-repo",
+        dest="private_hf_repo",
+        action="store_true",
+        default=bool(defaults.get("private-hf-repo", False)),
+    )
+    parser.add_argument("--private", dest="private_hf_repo", action="store_true")
+    return parser
+def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
+    config_path = extract_cli_config_path(argv)
+    parser = _build_argument_parser(config_path=config_path)
+    args = parser.parse_args(argv)
+    if not args.hf_repo_id:
+        parser.error("--hf-repo-id is required (or set dataset_id in --config)")
+    return args
+def run_dataset_refresh(options: DatasetRefreshOptions) -> dict[str, Any]:
+    hf_token = os.getenv("HF_TOKEN")
+    github_token = resolve_github_token()
+    if not github_token:
+        raise RuntimeError("GITHUB_TOKEN must be set or resolvable via gh auth/.env")
+    repo_slug = options.repo.slug
+    owner, repo_name = options.repo.owner, options.repo.name
+    sid = snapshot_id()
+    crawl_started_at = iso_now()
+    extracted_at = iso_now()
+    api = HfApi(token=hf_token)
+    api.create_repo(
+        repo_id=options.hf_repo_id,
+        repo_type="dataset",
+        private=options.private_hf_repo,
+        exist_ok=True,
+    )
+    with tempfile.TemporaryDirectory(prefix="slop-farmer-job-") as tmp:
+        root = Path(tmp)
+        previous_root = root / "previous"
+        output_root = root / "output"
+        previous_root.mkdir(parents=True, exist_ok=True)
+        output_root.mkdir(parents=True, exist_ok=True)
+        remote_paths = list_remote_paths(api, options.hf_repo_id)
+        previous_watermark = load_remote_json_file(
+            api, options.hf_repo_id, "state/watermark.json", previous_root
+        )
+        remote_manifest = load_remote_json_file(
+            api, options.hf_repo_id, "manifest.json", previous_root
+        )
+        latest_pointer = (
+            load_remote_json_file(api, options.hf_repo_id, "snapshots/latest.json", previous_root)
+            if "snapshots/latest.json" in remote_paths
+            else None
+        )
+        stable_snapshot_id = None
+        if previous_watermark:
+            stable_snapshot_id = previous_watermark.get("last_successful_snapshot_id")
+        elif latest_pointer:
+            stable_snapshot_id = latest_pointer.get("latest_snapshot_id")
+        elif remote_manifest:
+            stable_snapshot_id = remote_manifest.get("snapshot_id")
+        log(f"Starting dataset refresh for {repo_slug}")
+        log(f"Target dataset repo: {options.hf_repo_id}")
+        previous_tables = {
+            table_name: [] for table_name in SCHEMAS if table_name != "new_contributors"
+        }
+        for table_name in previous_tables:
+            previous_tables[table_name] = load_remote_table_from_candidates(
+                api,
+                options.hf_repo_id,
+                table_name,
+                previous_root,
+                stable_snapshot_candidates(latest_pointer, f"{table_name}.parquet"),
+            )
+        checkpoint_progress: dict[str, Any] | None = None
+        best_comment_checkpoint_progress: dict[str, Any] | None = None
+        for checkpoint_sid, checkpoint_dir in checkpoint_dirs(remote_paths):
+            if stable_snapshot_id is not None and checkpoint_sid <= str(stable_snapshot_id):
+                continue
+            progress_payload = load_remote_json_file(
+                api, options.hf_repo_id, f"{checkpoint_dir}/progress.json", previous_root
+            ) or load_remote_json_file(
+                api,
+                options.hf_repo_id,
+                f"{checkpoint_dir}/state/in_progress.json",
+                previous_root,
+            )
+            if progress_payload is not None:
+                checkpoint_progress = progress_payload
+                if (
+                    progress_payload.get("effective_since") is None
+                    and (progress_payload.get("counts") or {}).get("comments", 0) > 0
+                    and (
+                        best_comment_checkpoint_progress is None
+                        or (progress_payload.get("counts") or {}).get("comments", 0)
+                        > (best_comment_checkpoint_progress.get("counts") or {}).get("comments", 0)
+                    )
+                ):
+                    best_comment_checkpoint_progress = progress_payload
+            for table_name in previous_tables:
+                checkpoint_rows = load_remote_table_from_candidates(
+                    api,
+                    options.hf_repo_id,
+                    table_name,
+                    previous_root,
+                    [f"{checkpoint_dir}/{table_name}.parquet"],
+                )
+                if checkpoint_rows:
+                    previous_tables[table_name] = merge_rows(
+                        table_name,
+                        previous_tables[table_name],
+                        checkpoint_rows,
+                    )
+        effective_since = None
+        if checkpoint_progress and checkpoint_progress.get("effective_since") is not None:
+            effective_since = checkpoint_progress.get("effective_since")
+            log(f"Resuming from incomplete checkpoint window starting at {effective_since}")
+        elif previous_watermark and previous_watermark.get("next_since") is not None:
+            effective_since = previous_watermark.get("next_since")
+            log(f"Resuming from remote watermark {effective_since}")
+        elif (
+            remote_manifest
+            and isinstance(remote_manifest.get("watermark"), dict)
+            and remote_manifest["watermark"].get("next_since") is not None
+        ):
+            effective_since = remote_manifest["watermark"].get("next_since")
+            log(f"Bootstrapping remote watermark from root manifest {effective_since}")
+        else:
+            log("No successful watermark found; running full snapshot")
+        client = GitHubClient(
+            token=github_token,
+            timeout=options.http_timeout,
+            max_retries=options.http_max_retries,
+            log=log,
+        )
+        previous_snapshot_dir = materialize_previous_snapshot_dir(
+            api=api,
+            repo_id=options.hf_repo_id,
+            previous_root=previous_root,
+            stable_snapshot_id=str(stable_snapshot_id) if stable_snapshot_id is not None else None,
+            latest_pointer=latest_pointer,
+            previous_tables=previous_tables,
+        )
+        rate_limit = client.get_json("/rate_limit")
+        core = (rate_limit.get("resources") or {}).get("core") or {}
+        limit = core.get("limit")
+        remaining = core.get("remaining")
+        reset_at = core.get("reset")
+        log(f"GitHub core rate limit: limit={limit} remaining={remaining} reset={reset_at}")
+        if limit is not None and int(limit) <= 60:
+            raise RuntimeError("GITHUB_TOKEN appears to be missing, invalid, or not being applied")
+        if remaining == 0 and reset_at:
+            sleep_for = max(int(reset_at) - int(time.time()), 1)
+            log(f"GitHub token exhausted before bootstrap; sleeping {sleep_for}s until reset")
+            time.sleep(sleep_for)
+        log("Fetching changed issue and pull request stubs from GitHub")
+        issue_stubs = list(
+            client.iter_repo_issues(owner, repo_name, effective_since, options.max_issues)
+        )
+        issues = [item for item in issue_stubs if "pull_request" not in item]
+        pr_stubs = [item for item in issue_stubs if "pull_request" in item]
+        if options.max_prs is not None:
+            pr_stubs = pr_stubs[: options.max_prs]
+        log(f"Fetched {len(issue_stubs)} changed stubs")
+        issue_number_to_kind = {
+            item["number"]: ("pull_request" if "pull_request" in item else "issue")
+            for item in issue_stubs
+        }
+        issue_rows = [normalize_issue(repo_slug, item, sid, extracted_at) for item in issues]
+        comment_rows: list[dict[str, Any]] = []
+        next_comment_checkpoint = options.checkpoint_every_comments
+        reuse_checkpoint_comments = (
+            stable_snapshot_id is None
+            and effective_since is None
+            and best_comment_checkpoint_progress is not None
+            and bool(previous_tables["comments"])
+        )
+        if reuse_checkpoint_comments:
+            log(
+                f"Reusing {len(previous_tables['comments'])} checkpoint comments from prior partial runs"
+            )
+        else:
+            for index, item in enumerate(issue_stubs, start=1):
+                if not item.get("comments"):
+                    continue
+                remaining_comments = remaining_limit(options.max_issue_comments, len(comment_rows))
+                if remaining_comments == 0:
+                    break
+                if index == 1 or index % 25 == 0:
+                    log(f"Collecting discussion comments; {len(comment_rows)} collected so far")
+                for comment in client.iter_issue_comments_for_number(
+                    owner,
+                    repo_name,
+                    int(item["number"]),
+                    effective_since,
+                    remaining_comments,
+                ):
+                    parent_number = issue_url_to_number(comment.get("issue_url"))
+                    parent_kind = issue_number_to_kind.get(parent_number, "issue_or_pr")
+                    comment_rows.append(
+                        normalize_comment(
+                            repo_slug,
+                            comment,
+                            parent_kind,
+                            parent_number,
+                            sid,
+                            extracted_at,
+                        )
+                    )
+                    remaining_comments = remaining_limit(
+                        options.max_issue_comments,
+                        len(comment_rows),
+                    )
+                    if (
+                        options.checkpoint_every_comments
+                        and len(comment_rows) >= next_comment_checkpoint
+                    ):
+                        log(f"Pushing comment checkpoint to Hub at {len(comment_rows)} comments")
+                        upload_delta_checkpoint(
+                            api=api,
+                            repo_id=options.hf_repo_id,
+                            work_dir=root,
+                            repo_slug=repo_slug,
+                            sid=sid,
+                            stage="comments",
+                            delta_tables={
+                                "issues": issue_rows,
+                                "pull_requests": [],
+                                "comments": comment_rows,
+                                "reviews": [],
+                                "review_comments": [],
+                                "pr_files": [],
+                                "pr_diffs": [],
+                                "links": [],
+                                "events": [],
+                            },
+                            progress={
+                                "stage": "comments",
+                                "effective_since": effective_since,
+                                "counts": {
+                                    "issues": len(issue_rows),
+                                    "comments": len(comment_rows),
+                                    "pull_requests": 0,
+                                    "reviews": 0,
+                                    "review_comments": 0,
+                                    "pr_files": 0,
+                                    "pr_diffs": 0,
+                                    "links": 0,
+                                    "events": 0,
+                                },
+                            },
+                        )
+                        next_comment_checkpoint += options.checkpoint_every_comments
+                    if remaining_comments == 0:
+                        break
+        pr_rows: list[dict[str, Any]] = []
+        review_rows: list[dict[str, Any]] = []
+        review_comment_rows: list[dict[str, Any]] = []
+        pr_file_rows: list[dict[str, Any]] = []
+        pr_diff_rows: list[dict[str, Any]] = []
+        event_rows: list[dict[str, Any]] = []
+        next_pr_checkpoint = options.checkpoint_every_prs
+        previous_pr_rows_by_number = {
+            int(row["number"]): row
+            for row in previous_tables["pull_requests"]
+            if row.get("number") is not None
+        }
+        previous_review_rows_by_number: defaultdict[int, list[dict[str, Any]]] = defaultdict(list)
+        for row in previous_tables["reviews"]:
+            if row.get("pull_request_number") is not None:
+                previous_review_rows_by_number[int(row["pull_request_number"])].append(row)
+        previous_review_comment_rows_by_number: defaultdict[int, list[dict[str, Any]]] = (
+            defaultdict(list)
+        )
+        for row in previous_tables["review_comments"]:
+            if row.get("pull_request_number") is not None:
+                previous_review_comment_rows_by_number[int(row["pull_request_number"])].append(row)
+        previous_pr_file_rows_by_number: defaultdict[int, list[dict[str, Any]]] = defaultdict(list)
+        for row in previous_tables["pr_files"]:
+            if row.get("pull_request_number") is not None:
+                previous_pr_file_rows_by_number[int(row["pull_request_number"])].append(row)
+        previous_pr_diff_rows_by_number = {
+            int(row["pull_request_number"]): row
+            for row in previous_tables["pr_diffs"]
+            if row.get("pull_request_number") is not None
+        }
+        previous_pr_event_rows_by_number: defaultdict[int, list[dict[str, Any]]] = defaultdict(list)
+        for row in previous_tables["events"]:
+            if row.get("parent_kind") == "pull_request" and row.get("parent_number") is not None:
+                previous_pr_event_rows_by_number[int(row["parent_number"])].append(row)
+        hydration_pr_stubs: list[dict[str, Any]] = []
+        for pr_stub in pr_stubs:
+            number = int(pr_stub["number"])
+            previous_pr_row = previous_pr_rows_by_number.get(number)
+            if previous_pr_row and previous_pr_row.get("updated_at") == pr_stub.get("updated_at"):
+                pr_rows.append(previous_pr_row)
+                review_rows.extend(previous_review_rows_by_number[number])
+                review_comment_rows.extend(previous_review_comment_rows_by_number[number])
+                pr_file_rows.extend(previous_pr_file_rows_by_number[number])
+                if number in previous_pr_diff_rows_by_number:
+                    pr_diff_rows.append(previous_pr_diff_rows_by_number[number])
+                event_rows.extend(previous_pr_event_rows_by_number[number])
+                continue
+            hydration_pr_stubs.append(pr_stub)
+        reused_pr_count = len(pr_rows)
+        if reused_pr_count:
+            log(f"Reusing hydrated data for {reused_pr_count} pull requests from prior checkpoints")
+        if options.checkpoint_every_prs:
+            while reused_pr_count >= next_pr_checkpoint:
+                next_pr_checkpoint += options.checkpoint_every_prs
+        total_prs = len(pr_stubs)
+        remaining_prs = len(hydration_pr_stubs)
+        for index, pr_stub in enumerate(hydration_pr_stubs, start=1):
+            number = int(pr_stub["number"])
+            hydrated_count = reused_pr_count + index
+            if index == 1 or hydrated_count % 10 == 0 or index == remaining_prs:
+                log(f"Hydrating pull requests: {hydrated_count}/{total_prs}")
+            detail = client.get_pull_request(owner, repo_name, number)
+            pr_rows.append(normalize_pull_request(repo_slug, pr_stub, detail, sid, extracted_at))
+            for review in client.iter_pull_reviews(
+                owner, repo_name, number, options.max_reviews_per_pr
+            ):
+                review_rows.append(normalize_review(repo_slug, number, review, sid, extracted_at))
+            for comment in client.iter_pull_review_comments(
+                owner,
+                repo_name,
+                number,
+                options.max_review_comments_per_pr,
+            ):
+                review_comment_rows.append(
+                    normalize_review_comment(repo_slug, number, comment, sid, extracted_at)
+                )
+            for pr_file in client.iter_pull_files(owner, repo_name, number):
+                pr_file_rows.append(
+                    normalize_pr_file(repo_slug, number, pr_file, sid, extracted_at)
+                )
+            pr_diff_rows.append(
+                normalize_pr_diff(
+                    repo_slug,
+                    number,
+                    pr_stub.get("html_url"),
+                    pr_stub.get("url"),
+                    client.get_pull_request_diff(owner, repo_name, number),
+                    sid,
+                    extracted_at,
+                )
+            )
+            if options.fetch_timeline:
+                for event in client.iter_issue_timeline(owner, repo_name, number):
+                    event_rows.append(
+                        normalize_timeline_event(
+                            repo_slug,
+                            number,
+                            "pull_request",
+                            event,
+                            sid,
+                            extracted_at,
+                        )
+                    )
+            if options.checkpoint_every_prs and len(pr_rows) >= next_pr_checkpoint:
+                log(f"Pushing PR checkpoint to Hub at {len(pr_rows)} hydrated PRs")
+                upload_delta_checkpoint(
+                    api=api,
+                    repo_id=options.hf_repo_id,
+                    work_dir=root,
+                    repo_slug=repo_slug,
+                    sid=sid,
+                    stage="pull_requests",
+                    delta_tables={
+                        "issues": issue_rows,
+                        "pull_requests": pr_rows,
+                        "comments": comment_rows,
+                        "reviews": review_rows,
+                        "review_comments": review_comment_rows,
+                        "pr_files": pr_file_rows,
+                        "pr_diffs": pr_diff_rows,
+                        "links": [],
+                        "events": event_rows,
+                    },
+                    progress={
+                        "stage": "pull_requests",
+                        "effective_since": effective_since,
+                        "counts": {
+                            "issues": len(issue_rows),
+                            "comments": len(comment_rows),
+                            "pull_requests": len(pr_rows),
+                            "reviews": len(review_rows),
+                            "review_comments": len(review_comment_rows),
+                            "pr_files": len(pr_file_rows),
+                            "pr_diffs": len(pr_diff_rows),
+                            "links": 0,
+                            "events": len(event_rows),
+                        },
+                    },
+                )
+                next_pr_checkpoint += options.checkpoint_every_prs
+        if options.fetch_timeline:
+            log(f"Fetching issue timelines for {len(issues)} changed issues")
+            for issue in issues:
+                for event in client.iter_issue_timeline(owner, repo_name, int(issue["number"])):
+                    event_rows.append(
+                        normalize_timeline_event(
+                            repo_slug,
+                            int(issue["number"]),
+                            "issue",
+                            event,
+                            sid,
+                            extracted_at,
+                        )
+                    )
+        link_rows: list[dict[str, Any]] = []
+        for row in issue_rows:
+            link_rows.extend(
+                build_text_link_rows(
+                    repo=repo_slug,
+                    owner=owner,
+                    repo_name=repo_name,
+                    source_type="issue",
+                    source_number=row["number"],
+                    source_id=row["github_id"],
+                    body=row["body"],
+                    snapshot_id=sid,
+                    extracted_at=extracted_at,
+                )
+            )
+        for row in pr_rows:
+            link_rows.extend(
+                build_text_link_rows(
+                    repo=repo_slug,
+                    owner=owner,
+                    repo_name=repo_name,
+                    source_type="pull_request",
+                    source_number=row["number"],
+                    source_id=row["github_id"],
+                    body=row["body"],
+                    snapshot_id=sid,
+                    extracted_at=extracted_at,
+                )
+            )
+        for row in comment_rows or previous_tables["comments"]:
+            if row["parent_number"] is None:
+                continue
+            link_rows.extend(
+                build_text_link_rows(
+                    repo=repo_slug,
+                    owner=owner,
+                    repo_name=repo_name,
+                    source_type="comment",
+                    source_number=row["parent_number"],
+                    source_id=row["github_id"],
+                    body=row["body"],
+                    snapshot_id=sid,
+                    extracted_at=extracted_at,
+                )
+            )
+        for row in review_rows:
+            link_rows.extend(
+                build_text_link_rows(
+                    repo=repo_slug,
+                    owner=owner,
+                    repo_name=repo_name,
+                    source_type="review",
+                    source_number=row["pull_request_number"],
+                    source_id=row["github_id"],
+                    body=row["body"],
+                    snapshot_id=sid,
+                    extracted_at=extracted_at,
+                )
+            )
+        for row in review_comment_rows:
+            link_rows.extend(
+                build_text_link_rows(
+                    repo=repo_slug,
+                    owner=owner,
+                    repo_name=repo_name,
+                    source_type="review_comment",
+                    source_number=row["pull_request_number"],
+                    source_id=row["github_id"],
+                    body=row["body"],
+                    snapshot_id=sid,
+                    extracted_at=extracted_at,
+                )
+            )
+        link_rows.extend(
+            build_pr_duplicate_candidate_rows(
+                repo=repo_slug,
+                pull_requests=pr_rows,
+                link_rows=link_rows,
+                snapshot_id=sid,
+                extracted_at=extracted_at,
+            )
+        )
+        for event in event_rows:
+            if event.get("source_issue_number"):
+                link_rows.append(
+                    {
+                        "repo": repo_slug,
+                        "source_type": event["parent_kind"],
+                        "source_number": event["parent_number"],
+                        "source_github_id": None,
+                        "target_owner": owner,
+                        "target_repo": repo_name,
+                        "target_number": event["source_issue_number"],
+                        "link_type": f"timeline:{event['event']}",
+                        "link_origin": "timeline",
+                        "snapshot_id": sid,
+                        "extracted_at": extracted_at,
+                    }
+                )
+        delta_tables = {
+            "issues": issue_rows,
+            "pull_requests": pr_rows,
+            "comments": comment_rows,
+            "reviews": review_rows,
+            "review_comments": review_comment_rows,
+            "pr_files": pr_file_rows,
+            "pr_diffs": pr_diff_rows,
+            "links": link_rows,
+            "events": event_rows,
+        }
+        if any(delta_tables.values()):
+            log("Pushing final delta checkpoint to Hub before merge upload")
+            upload_delta_checkpoint(
+                api=api,
+                repo_id=options.hf_repo_id,
+                work_dir=root,
+                repo_slug=repo_slug,
+                sid=sid,
+                stage="final-delta",
+                delta_tables=delta_tables,
+                progress={
+                    "stage": "final-delta",
+                    "effective_since": effective_since,
+                    "counts": {name: len(rows) for name, rows in delta_tables.items()},
+                },
+            )
+        final_tables = {
+            table_name: merge_rows(table_name, previous_tables[table_name], delta_rows)
+            for table_name, delta_rows in delta_tables.items()
+        }
+        manifest = {
+            "repo": repo_slug,
+            "snapshot_id": sid,
+            "crawl_started_at": crawl_started_at,
+            "extracted_at": extracted_at,
+            "watermark": {
+                "effective_since": effective_since,
+                "next_since": crawl_started_at,
+                "previous_snapshot_dir": (
+                    str(previous_snapshot_dir) if previous_snapshot_dir is not None else None
+                ),
+            },
+            "delta_counts": {
+                "issue_stubs": len(issue_stubs),
+                "issues": len(issue_rows),
+                "pull_requests": len(pr_rows),
+                "comments": len(comment_rows),
+                "reviews": len(review_rows),
+                "review_comments": len(review_comment_rows),
+                "pr_files": len(pr_file_rows),
+                "pr_diffs": len(pr_diff_rows),
+                "timeline_events": len(event_rows),
+                "links": len(link_rows),
+            },
+            "counts": {
+                "issues": len(final_tables["issues"]),
+                "pull_requests": len(final_tables["pull_requests"]),
+                "comments": len(final_tables["comments"]),
+                "reviews": len(final_tables["reviews"]),
+                "review_comments": len(final_tables["review_comments"]),
+                "pr_files": len(final_tables["pr_files"]),
+                "pr_diffs": len(final_tables["pr_diffs"]),
+                "timeline_events": len(final_tables["events"]),
+                "links": len(final_tables["links"]),
+            },
+        }
+        log("Writing updated dataset files")
+        for table_name, rows in final_tables.items():
+            write_parquet(rows, output_root / f"{table_name}.parquet", table_name)
+        issue_comment_rows, pr_comment_rows = viewer_comment_rows(
+            final_tables["comments"],
+            final_tables["pull_requests"],
+        )
+        write_parquet(issue_comment_rows, output_root / "issue_comments.parquet", "comments")
+        write_parquet(pr_comment_rows, output_root / "pr_comments.parquet", "comments")
+        if options.new_contributor_report:
+            write_json(manifest, output_root / "manifest.json")
+            log("Generating new contributor dataset/report artifacts")
+            run_new_contributor_report(
+                NewContributorReportOptions(
+                    snapshot_dir=output_root,
+                    output_dir=output_root,
+                    output=None,
+                    json_output=None,
+                    hf_repo_id=None,
+                    hf_revision=None,
+                    hf_materialize_dir=None,
+                    window_days=options.new_contributor_window_days,
+                    max_authors=options.new_contributor_max_authors,
+                )
+            )
+            manifest["counts"]["new_contributors"] = len(
+                read_parquet_rows(output_root / "new_contributors.parquet")
+            )
+            manifest["artifacts"] = {
+                "new_contributors_parquet": "new_contributors.parquet",
+                "new_contributors_json": "new-contributors-report.json",
+                "new_contributors_markdown": "new-contributors-report.md",
+            }
+        manifest["watermark"].pop("previous_snapshot_dir", None)
+        write_json(manifest, output_root / "manifest.json")
+        write_text(
+            build_hf_dataset_card(
+                repo_slug,
+                sid,
+                include_new_contributors=options.new_contributor_report,
+            ),
+            output_root / "README.md",
+        )
+        write_json(
+            {
+                "repo": repo_slug,
+                "last_successful_snapshot_id": sid,
+                "effective_since": effective_since,
+                "next_since": crawl_started_at,
+                "updated_at": extracted_at,
+            },
+            output_root / "state" / "watermark.json",
+        )
+        write_json(manifest, output_root / "snapshots" / sid / "manifest.json")
+        write_json(
+            {
+                "repo": repo_slug,
+                "latest_snapshot_id": sid,
+                "snapshot_dir": f"snapshots/{sid}",
+                "manifest_path": "manifest.json",
+                "archived_manifest_path": f"snapshots/{sid}/manifest.json",
+                "next_since": crawl_started_at,
+            },
+            output_root / "snapshots" / "latest.json",
+        )
+        log("Uploading updated dataset to the Hub")
+        api.upload_folder(
+            folder_path=str(output_root),
+            repo_id=options.hf_repo_id,
+            repo_type="dataset",
+            commit_message=f"Refresh {repo_name} dataset snapshot {sid}",
+        )
+        log(f"Dataset refresh complete for {options.hf_repo_id}")
+        return {
+            "repo": repo_slug,
+            "dataset_id": options.hf_repo_id,
+            "snapshot_id": sid,
+            "effective_since": effective_since,
+            "counts": manifest["counts"],
+        }
+def main(argv: list[str] | None = None) -> None:
+    args = parse_args(argv)
+    result = run_dataset_refresh(
+        DatasetRefreshOptions(
+            repo=RepoRef.parse(args.repo),
+            hf_repo_id=args.hf_repo_id,
+            private_hf_repo=args.private_hf_repo,
+            max_issues=args.max_issues,
+            max_prs=args.max_prs,
+            max_issue_comments=args.max_issue_comments,
+            max_reviews_per_pr=args.max_reviews_per_pr,
+            max_review_comments_per_pr=args.max_review_comments_per_pr,
+            fetch_timeline=args.fetch_timeline,
+            new_contributor_report=args.new_contributor_report,
+            new_contributor_window_days=args.new_contributor_window_days,
+            new_contributor_max_authors=args.new_contributor_max_authors,
+            http_timeout=args.http_timeout,
+            http_max_retries=args.http_max_retries,
+            checkpoint_every_comments=args.checkpoint_every_comments,
+            checkpoint_every_prs=args.checkpoint_every_prs,
+        )
+    )
+    print(json.dumps(result, indent=2))
+if __name__ == "__main__":
+    main()

src/slop_farmer/app/dataset_status.py ADDED Viewed

	@@ -0,0 +1,182 @@

+from __future__ import annotations
+import tempfile
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+from huggingface_hub import HfApi
+from slop_farmer.config import DatasetStatusOptions
+from slop_farmer.data.hf_dataset_repo import (
+    list_remote_paths,
+    load_remote_file,
+    load_remote_json_file,
+    stable_snapshot_candidates,
+)
+from slop_farmer.data.parquet_io import read_json
+def _coerce_datetime(value: Any) -> datetime | None:
+    if not isinstance(value, str) or not value:
+        return None
+    try:
+        return datetime.fromisoformat(value.replace("Z", "+00:00"))
+    except ValueError:
+        return None
+def _age_summary(value: str | None) -> dict[str, Any]:
+    timestamp = _coerce_datetime(value)
+    if timestamp is None:
+        return {"seconds": None, "summary": "unknown", "staleness": "unknown"}
+    age_seconds = max(int((datetime.now(tz=UTC) - timestamp).total_seconds()), 0)
+    if age_seconds <= 6 * 3600:
+        staleness = "fresh"
+    elif age_seconds <= 24 * 3600:
+        staleness = "aging"
+    else:
+        staleness = "stale"
+    if age_seconds < 3600:
+        summary = f"{age_seconds // 60}m"
+    elif age_seconds < 24 * 3600:
+        summary = f"{age_seconds // 3600}h"
+    else:
+        summary = f"{age_seconds // 86400}d"
+    return {"seconds": age_seconds, "summary": summary, "staleness": staleness}
+def _local_status(output_dir: Path) -> dict[str, Any] | None:
+    latest_path = output_dir.resolve() / "snapshots" / "latest.json"
+    if not latest_path.exists():
+        return None
+    payload = read_json(latest_path)
+    snapshot_dir = payload.get("snapshot_dir")
+    manifest = {}
+    if isinstance(snapshot_dir, str) and snapshot_dir:
+        manifest_path = Path(snapshot_dir).resolve() / "manifest.json"
+        if manifest_path.exists():
+            manifest = read_json(manifest_path)
+    return {
+        "latest_path": str(latest_path),
+        "latest_pointer": payload,
+        "snapshot_dir": snapshot_dir,
+        "snapshot_id": manifest.get("snapshot_id") or payload.get("latest_snapshot_id"),
+    }
+def _remote_status(repo_id: str, revision: str | None) -> dict[str, Any]:
+    api = HfApi()
+    with tempfile.TemporaryDirectory(prefix="slop-farmer-dataset-status-") as tmp:
+        root = Path(tmp)
+        remote_paths = list_remote_paths(api, repo_id, revision=revision)
+        latest_pointer = load_remote_json_file(
+            api,
+            repo_id,
+            "snapshots/latest.json",
+            root,
+            revision=revision,
+        )
+        watermark = load_remote_json_file(
+            api,
+            repo_id,
+            "state/watermark.json",
+            root,
+            revision=revision,
+        )
+        manifest = None
+        if latest_pointer is not None:
+            for candidate in stable_snapshot_candidates(latest_pointer, "manifest.json"):
+                downloaded = load_remote_file(
+                    api,
+                    repo_id,
+                    candidate,
+                    root,
+                    revision=revision,
+                )
+                if downloaded is None:
+                    continue
+                manifest = read_json(downloaded)
+                break
+        snapshot_prefix = (
+            str(latest_pointer.get("snapshot_dir") or "").strip("/")
+            if isinstance(latest_pointer, dict)
+            else ""
+        )
+        contributors_present = any(
+            path in remote_paths
+            for path in (
+                "new_contributors.parquet",
+                "new-contributors-report.json",
+                "new-contributors-report.md",
+            )
+        )
+        if snapshot_prefix:
+            contributors_present = contributors_present or any(
+                path in remote_paths
+                for path in (
+                    f"{snapshot_prefix}/new_contributors.parquet",
+                    f"{snapshot_prefix}/new-contributors-report.json",
+                    f"{snapshot_prefix}/new-contributors-report.md",
+                )
+            )
+        extracted_at = manifest.get("extracted_at") if manifest else None
+        return {
+            "dataset_id": repo_id,
+            "revision": revision,
+            "latest_pointer": latest_pointer,
+            "watermark": watermark,
+            "manifest": manifest,
+            "contributors_present": contributors_present,
+            "remote_path_count": len(remote_paths),
+            "age": _age_summary(extracted_at),
+        }
+def get_dataset_status(options: DatasetStatusOptions) -> dict[str, Any]:
+    remote = _remote_status(options.hf_repo_id, options.hf_revision) if options.hf_repo_id else None
+    local = _local_status(options.output_dir)
+    repo = options.repo
+    if repo is None and remote and remote.get("manifest"):
+        repo = remote["manifest"].get("repo")
+    if repo is None and local and isinstance(local.get("latest_pointer"), dict):
+        repo = local["latest_pointer"].get("repo")
+    return {
+        "repo": repo,
+        "dataset_id": options.hf_repo_id,
+        "remote": remote,
+        "local": local,
+    }
+def format_dataset_status(status: dict[str, Any]) -> str:
+    remote = status.get("remote") or {}
+    local = status.get("local") or {}
+    manifest = remote.get("manifest") or {}
+    watermark = remote.get("watermark") or {}
+    latest_pointer = remote.get("latest_pointer") or {}
+    age = remote.get("age") or {}
+    lines = [
+        f"Repo: {status.get('repo') or '?'}",
+        f"Dataset: {status.get('dataset_id') or 'not configured'}",
+    ]
+    if remote:
+        lines.extend(
+            [
+                f"Remote latest snapshot: {manifest.get('snapshot_id') or latest_pointer.get('latest_snapshot_id') or '?'}",
+                f"Remote extracted at: {manifest.get('extracted_at') or '?'}",
+                f"Remote next_since: {watermark.get('next_since') or latest_pointer.get('next_since') or '?'}",
+                f"Contributor artifacts: {'yes' if remote.get('contributors_present') else 'no'}",
+                f"Freshness: {age.get('summary') or 'unknown'} ({age.get('staleness') or 'unknown'})",
+            ]
+        )
+    if local:
+        lines.extend(
+            [
+                f"Local latest pointer: {local.get('latest_path')}",
+                f"Local snapshot id: {local.get('snapshot_id') or '?'}",
+            ]
+        )
+    else:
+        lines.append("Local latest pointer: none")
+    return "\n".join(lines)

src/slop_farmer/app/deploy.py CHANGED Viewed

@@ -5,6 +5,7 @@ import subprocess
 from pathlib import Path
 from slop_farmer.config import DeployDashboardOptions
 def run_deploy_dashboard(options: DeployDashboardOptions) -> None:
@@ -17,6 +18,16 @@ def run_deploy_dashboard(options: DeployDashboardOptions) -> None:
         {
             "PIPELINE_DATA_DIR": str(options.pipeline_data_dir),
             "WEB_DIR": str(options.web_dir),
             "DASHBOARD_WINDOW_DAYS": str(options.dashboard_window_days),
             "CONTRIBUTOR_WINDOW_DAYS": str(options.contributor_window_days),
             "CONTRIBUTOR_MAX_AUTHORS": str(options.contributor_max_authors),
@@ -28,8 +39,6 @@ def run_deploy_dashboard(options: DeployDashboardOptions) -> None:
             "SPACE_SHORT_DESCRIPTION": options.space_short_description,
         }
     )
-    if options.snapshot_dir is not None:
-        env["SNAPSHOT_DIR"] = str(options.snapshot_dir)
     if options.analysis_input is not None:
         env["ANALYSIS_INPUT"] = str(options.analysis_input)
     if options.contributors_input is not None:

 from pathlib import Path
 from slop_farmer.config import DeployDashboardOptions
+from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir
 def run_deploy_dashboard(options: DeployDashboardOptions) -> None:
         {
             "PIPELINE_DATA_DIR": str(options.pipeline_data_dir),
             "WEB_DIR": str(options.web_dir),
+            "SNAPSHOT_DIR": str(
+                resolve_snapshot_source_dir(
+                    snapshot_dir=options.snapshot_dir,
+                    local_snapshots_root=options.pipeline_data_dir.resolve() / "snapshots",
+                    hf_repo_id=options.hf_repo_id,
+                    hf_revision=options.hf_revision,
+                    hf_materialize_dir=options.hf_materialize_dir,
+                    hf_output_dir=options.pipeline_data_dir,
+                )
+            ),
             "DASHBOARD_WINDOW_DAYS": str(options.dashboard_window_days),
             "CONTRIBUTOR_WINDOW_DAYS": str(options.contributor_window_days),
             "CONTRIBUTOR_MAX_AUTHORS": str(options.contributor_max_authors),
             "SPACE_SHORT_DESCRIPTION": options.space_short_description,
         }
     )
     if options.analysis_input is not None:
         env["ANALYSIS_INPUT"] = str(options.analysis_input)
     if options.contributors_input is not None:

src/slop_farmer/app/hf_checkpoint_import.py CHANGED Viewed

@@ -28,6 +28,7 @@ from huggingface_hub import HfApi, hf_hub_download
 from slop_farmer.app.publish import publish_snapshot
 from slop_farmer.config import CheckpointImportOptions
 from slop_farmer.data.links import build_pr_duplicate_candidate_rows, build_text_link_rows
 from slop_farmer.data.parquet_io import (
     SCHEMAS,
@@ -455,76 +456,15 @@ def _viewer_comment_rows(
 def _dataset_card(
     repo_slug: str, snapshot_id: str, source_repo_id: str, checkpoint_root: str
 ) -> str:
-    return f"""---
-pretty_name: Transformers PR Slop Dataset
-configs:
-- config_name: issues
-  data_files:
-  - split: train
-    path: issues.parquet
-  default: true
-- config_name: prs
-  data_files:
-  - split: train
-    path: pull_requests.parquet
-- config_name: issue_comments
-  data_files:
-  - split: train
-    path: issue_comments.parquet
-- config_name: pr_comments
-  data_files:
-  - split: train
-    path: pr_comments.parquet
-- config_name: pr_reviews
-  data_files:
-  - split: train
-    path: reviews.parquet
-- config_name: pr_files
-  data_files:
-  - split: train
-    path: pr_files.parquet
-- config_name: pr_diffs
-  data_files:
-  - split: train
-    path: pr_diffs.parquet
-- config_name: review_comments
-  data_files:
-  - split: train
-    path: review_comments.parquet
-- config_name: links
-  data_files:
-  - split: train
-    path: links.parquet
-- config_name: events
-  data_files:
-  - split: train
-    path: events.parquet
----
----
-# Transformers PR Slop Dataset
-Imported checkpoint snapshot for `{repo_slug}`.
-Files:
-- `issues.parquet`
-- `pull_requests.parquet`
-- `comments.parquet`
-- `issue_comments.parquet`
-- `pr_comments.parquet`
-- `reviews.parquet`
-- `pr_files.parquet`
-- `pr_diffs.parquet`
-- `review_comments.parquet`
-- `links.parquet`
-- `events.parquet`
-Notes:
-- source HF dataset: `{source_repo_id}`
-- source checkpoint root: `{checkpoint_root}`
-- latest imported checkpoint: `{snapshot_id}`
-- links were regenerated locally from text references and timeline events
-"""
 def _snapshot_dir_name(source_repo_id: str, checkpoint_id: str) -> str:

 from slop_farmer.app.publish import publish_snapshot
 from slop_farmer.config import CheckpointImportOptions
+from slop_farmer.data.dataset_card import build_hf_dataset_card
 from slop_farmer.data.links import build_pr_duplicate_candidate_rows, build_text_link_rows
 from slop_farmer.data.parquet_io import (
     SCHEMAS,
 def _dataset_card(
     repo_slug: str, snapshot_id: str, source_repo_id: str, checkpoint_root: str
 ) -> str:
+    return build_hf_dataset_card(
+        repo_slug,
+        snapshot_id,
+        notes=[
+            f"source HF dataset: `{source_repo_id}`",
+            f"source checkpoint root: `{checkpoint_root}`",
+            "links were regenerated locally from text references and timeline events",
+        ],
+    )
 def _snapshot_dir_name(source_repo_id: str, checkpoint_id: str) -> str:

src/slop_farmer/app/pipeline.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import Any, Protocol
 from slop_farmer.app.publish import publish_snapshot
 from slop_farmer.config import NewContributorReportOptions, PipelineOptions, resolve_github_token
 from slop_farmer.data.github_api import GitHubClient
 from slop_farmer.data.links import build_pr_duplicate_candidate_rows, build_text_link_rows
 from slop_farmer.data.normalize import (
@@ -112,96 +113,14 @@ def _reference_time_for_age_caps(crawl_started_at: str) -> datetime:
 def _dataset_card(
     repo: str, snapshot_id: str, manifest: dict[str, Any], *, include_new_contributors: bool = False
 ) -> str:
-    new_contributor_config = ""
-    new_contributor_file = ""
-    if include_new_contributors:
-        new_contributor_config = """- config_name: new_contributors
-  data_files:
-  - split: train
-    path: new_contributors.parquet
-"""
-        new_contributor_file = """- `new_contributors.parquet`
-- `new-contributors-report.json`
-- `new-contributors-report.md`
-"""
-    return f"""---
-pretty_name: Transformers PR Slop Dataset
-configs:
-- config_name: issues
-  data_files:
-  - split: train
-    path: issues.parquet
-  default: true
-- config_name: prs
-  data_files:
-  - split: train
-    path: pull_requests.parquet
-- config_name: issue_comments
-  data_files:
-  - split: train
-    path: issue_comments.parquet
-- config_name: pr_comments
-  data_files:
-  - split: train
-    path: pr_comments.parquet
-- config_name: pr_reviews
-  data_files:
-  - split: train
-    path: reviews.parquet
-- config_name: pr_files
-  data_files:
-  - split: train
-    path: pr_files.parquet
-- config_name: pr_diffs
-  data_files:
-  - split: train
-    path: pr_diffs.parquet
-- config_name: review_comments
-  data_files:
-  - split: train
-    path: review_comments.parquet
-- config_name: links
-  data_files:
-  - split: train
-    path: links.parquet
-- config_name: events
-  data_files:
-  - split: train
-    path: events.parquet
-{new_contributor_config}---
----
-# Transformers PR Slop Dataset
-Normalized snapshots of issues, pull requests, comments, reviews, and linkage data from `{repo}`.
-Files:
-- `issues.parquet`
-- `pull_requests.parquet`
-- `comments.parquet`
-- `issue_comments.parquet` (derived view of issue discussion comments)
-- `pr_comments.parquet` (derived view of pull request discussion comments)
-- `reviews.parquet`
-- `pr_files.parquet`
-- `pr_diffs.parquet`
-- `review_comments.parquet`
-- `links.parquet`
-- `events.parquet`
-{new_contributor_file}
-Use:
-- duplicate PR and issue analysis
-- triage and ranking experiments
-- eval set creation
-Notes:
-- updated daily
-- latest snapshot: `{snapshot_id}`
-- raw data only; no labels or moderation decisions
-- PR metadata, file-level patch hunks, and full unified diffs are included
-- new contributor reviewer artifacts are included when generated for the snapshot
-- full file contents for changed files are not included
-"""
 def _viewer_comment_rows(
@@ -1045,6 +964,9 @@ def run_pipeline(options: PipelineOptions, client: GitHubClientLike | None = Non
                 output_dir=options.output_dir,
                 output=None,
                 json_output=None,
                 window_days=options.new_contributor_window_days,
                 max_authors=options.new_contributor_max_authors,
             )

 from slop_farmer.app.publish import publish_snapshot
 from slop_farmer.config import NewContributorReportOptions, PipelineOptions, resolve_github_token
+from slop_farmer.data.dataset_card import build_hf_dataset_card
 from slop_farmer.data.github_api import GitHubClient
 from slop_farmer.data.links import build_pr_duplicate_candidate_rows, build_text_link_rows
 from slop_farmer.data.normalize import (
 def _dataset_card(
     repo: str, snapshot_id: str, manifest: dict[str, Any], *, include_new_contributors: bool = False
 ) -> str:
+    notes = ["new contributor reviewer artifacts are included"] if include_new_contributors else []
+    del manifest
+    return build_hf_dataset_card(
+        repo,
+        snapshot_id,
+        include_new_contributors=include_new_contributors,
+        notes=notes,
+    )
 def _viewer_comment_rows(
                 output_dir=options.output_dir,
                 output=None,
                 json_output=None,
+                hf_repo_id=None,
+                hf_revision=None,
+                hf_materialize_dir=None,
                 window_days=options.new_contributor_window_days,
                 max_authors=options.new_contributor_max_authors,
             )

src/slop_farmer/app/pr_search.py CHANGED Viewed

@@ -10,9 +10,12 @@ get_pr_search_status = pr_search_service.get_pr_search_status
 get_pr_search_similar = pr_search_service.get_pr_search_similar
 get_pr_search_similar_lookup = pr_search_service.get_pr_search_similar_lookup
 get_pr_search_candidate_clusters = pr_search_service.get_pr_search_candidate_clusters
 get_pr_search_clusters = pr_search_service.get_pr_search_clusters
 list_pr_search_clusters = pr_search_service.list_pr_search_clusters
 get_pr_search_cluster = pr_search_service.get_pr_search_cluster
 explain_pr_search_pair = pr_search_service.explain_pr_search_pair
 probe_pr_search_live = pr_search_service.probe_pr_search_live
 probe_pr_search_github = pr_search_service.probe_pr_search_github
@@ -31,6 +34,7 @@ def format_pr_search_status(result: Mapping[str, Any]) -> str:
             (
                 "Rows: "
                 f"documents={counts['documents']} "
                 f"features={counts['features']} "
                 f"neighbors={counts['neighbors']} "
                 f"clusters={counts['clusters']} "
@@ -245,3 +249,73 @@ def format_pr_search_probe(result: Mapping[str, Any]) -> str:
             if row.get("reason"):
                 lines.append(f"   reason: {row['reason']}")
     return "\n".join(lines)

 get_pr_search_similar = pr_search_service.get_pr_search_similar
 get_pr_search_similar_lookup = pr_search_service.get_pr_search_similar_lookup
 get_pr_search_candidate_clusters = pr_search_service.get_pr_search_candidate_clusters
+get_pr_search_contributor = pr_search_service.get_pr_search_contributor
+get_pr_search_contributor_pulls = pr_search_service.get_pr_search_contributor_pulls
 get_pr_search_clusters = pr_search_service.get_pr_search_clusters
 list_pr_search_clusters = pr_search_service.list_pr_search_clusters
 get_pr_search_cluster = pr_search_service.get_pr_search_cluster
+get_pr_search_pull_contributor = pr_search_service.get_pr_search_pull_contributor
 explain_pr_search_pair = pr_search_service.explain_pr_search_pair
 probe_pr_search_live = pr_search_service.probe_pr_search_live
 probe_pr_search_github = pr_search_service.probe_pr_search_github
             (
                 "Rows: "
                 f"documents={counts['documents']} "
+                f"contributors={counts.get('contributors', 0)} "
                 f"features={counts['features']} "
                 f"neighbors={counts['neighbors']} "
                 f"clusters={counts['clusters']} "
             if row.get("reason"):
                 lines.append(f"   reason: {row['reason']}")
     return "\n".join(lines)
+def format_pr_search_contributor(result: Mapping[str, Any]) -> str:
+    contributor = result["contributor"]
+    lines = [
+        f"Contributor {contributor['author_login']}",
+        f"Repo: {result['repo']}",
+        f"Snapshot: {result['snapshot_id']}",
+        f"Name: {contributor.get('name') or '-'}",
+        f"Profile: {contributor.get('profile_url') or '-'}",
+        f"Association: {contributor.get('repo_association') or '-'}",
+        f"First seen in snapshot: {'yes' if contributor.get('first_seen_in_snapshot') else 'no'}",
+        (
+            "Scores: "
+            f"follow-through={contributor.get('follow_through_score') or '-'} "
+            f"breadth={contributor.get('breadth_score') or '-'} "
+            f"risk={contributor.get('automation_risk_signal') or '-'}"
+        ),
+        f"Heuristic: {contributor.get('heuristic_note') or '-'}",
+        f"Public orgs: {', '.join(contributor.get('public_orgs') or []) or '-'}",
+        "",
+        "Recent indexed PRs:",
+    ]
+    pulls = result.get("pulls") or []
+    if not pulls:
+        lines.append("- none")
+        return "\n".join(lines)
+    for row in pulls:
+        lines.append(
+            f"- PR #{row['pr_number']}: {row.get('title') or ''} "
+            f"[state={row.get('state') or '-'} merged={'yes' if row.get('merged') else 'no'}]"
+        )
+    return "\n".join(lines)
+def format_pr_search_contributor_pulls(result: Mapping[str, Any]) -> str:
+    contributor = result["contributor"]
+    lines = [
+        f"Contributor PRs: {contributor['author_login']}",
+        f"Repo: {result['repo']}",
+        f"Snapshot: {result['snapshot_id']}",
+        f"Pull requests: {result.get('pull_count', len(result.get('pulls') or []))}",
+        "",
+    ]
+    pulls = result.get("pulls") or []
+    if not pulls:
+        lines.append("No indexed PRs found for that contributor.")
+        return "\n".join(lines)
+    for row in pulls:
+        lines.append(
+            f"- PR #{row['pr_number']}: {row.get('title') or ''} "
+            f"(updated={row.get('updated_at') or '-'}, state={row.get('state') or '-'})"
+        )
+    return "\n".join(lines)
+def format_pr_search_pull_contributor(result: Mapping[str, Any]) -> str:
+    pr = result["pr"]
+    contributor = result["contributor"]
+    return "\n".join(
+        [
+            f"PR #{pr['pr_number']}: {pr.get('title') or ''}",
+            f"Author: {contributor['author_login']}",
+            f"Risk: {contributor.get('automation_risk_signal') or '-'}",
+            f"Follow-through: {contributor.get('follow_through_score') or '-'}",
+            f"Breadth: {contributor.get('breadth_score') or '-'}",
+            f"Heuristic: {contributor.get('heuristic_note') or '-'}",
+            f"Profile: {contributor.get('profile_url') or '-'}",
+        ]
+    )

src/slop_farmer/app/pr_search_api.py CHANGED Viewed

@@ -22,6 +22,9 @@ from slop_farmer.reports.analysis_service import (
 from slop_farmer.reports.pr_search_service import (
     get_pr_search_cluster,
     get_pr_search_clusters,
     get_pr_search_similar_lookup,
     get_pr_search_status,
     list_pr_search_clusters,
@@ -34,6 +37,7 @@ class PrSearchApiSettings:
     default_repo: str | None
     index_path: Path
     output_dir: Path
     snapshot_dir: Path | None = None
     hf_repo_id: str | None = None
     hf_revision: str | None = None
@@ -66,6 +70,7 @@ class PrSearchApiSettings:
             default_repo=os.environ.get("DEFAULT_REPO"),
             index_path=index_path,
             output_dir=output_dir,
             snapshot_dir=snapshot_dir,
             hf_repo_id=os.environ.get("HF_REPO_ID"),
             hf_revision=os.environ.get("HF_REVISION"),
@@ -103,7 +108,7 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
             app.state.startup_error = str(exc)
         yield
-    app = FastAPI(title="slop PR search API", version="0.1.0", lifespan=lifespan)
     @app.exception_handler(ValueError)
     async def handle_value_error(_request: Request, exc: ValueError) -> JSONResponse:
@@ -212,6 +217,44 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
             ),
         )
     @app.get("/v1/repos/{owner}/{repo}/analysis/status")
     async def analysis_status(
         owner: str,
@@ -221,7 +264,12 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
     ) -> dict[str, Any]:
         settings = request.app.state.settings
         repo_slug = _repo_slug(settings, owner, repo)
-        return get_analysis_status(settings.index_path, repo=repo_slug, variant=variant)
     @app.get("/v1/repos/{owner}/{repo}/pulls/{number}/analysis")
     async def pr_analysis(
@@ -238,6 +286,7 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
             repo=repo_slug,
             pr_number=number,
             variant=variant,
         )
     @app.get("/v1/repos/{owner}/{repo}/analysis/meta-bugs")
@@ -254,6 +303,7 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
             settings.index_path,
             repo=repo_slug,
             variant=variant,
             limit=_limit(
                 limit,
                 default=settings.cluster_list_limit_default,
@@ -276,6 +326,7 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
             repo=repo_slug,
             cluster_id=cluster_id,
             variant=variant,
         )
     @app.get("/v1/repos/{owner}/{repo}/analysis/duplicate-prs")
@@ -292,6 +343,7 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
             settings.index_path,
             repo=repo_slug,
             variant=variant,
             limit=_limit(
                 limit,
                 default=settings.cluster_list_limit_default,
@@ -308,7 +360,12 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
     ) -> dict[str, Any]:
         settings = request.app.state.settings
         repo_slug = _repo_slug(settings, owner, repo)
-        return get_analysis_best(settings.index_path, repo=repo_slug, variant=variant)
     return app
@@ -395,6 +452,7 @@ def _looks_not_found(exc: ValueError) -> bool:
     message = str(exc).lower()
     return (
         "not found" in message
         or "no analysis report was found" in message
         or "no active pr search run" in message
         or "was not found in the active indexed universe" in message

 from slop_farmer.reports.pr_search_service import (
     get_pr_search_cluster,
     get_pr_search_clusters,
+    get_pr_search_contributor,
+    get_pr_search_contributor_pulls,
+    get_pr_search_pull_contributor,
     get_pr_search_similar_lookup,
     get_pr_search_status,
     list_pr_search_clusters,
     default_repo: str | None
     index_path: Path
     output_dir: Path
+    analysis_dir: Path | None = None
     snapshot_dir: Path | None = None
     hf_repo_id: str | None = None
     hf_revision: str | None = None
             default_repo=os.environ.get("DEFAULT_REPO"),
             index_path=index_path,
             output_dir=output_dir,
+            analysis_dir=_env_path("ANALYSIS_DIR") or (output_dir / "analysis"),
             snapshot_dir=snapshot_dir,
             hf_repo_id=os.environ.get("HF_REPO_ID"),
             hf_revision=os.environ.get("HF_REVISION"),
             app.state.startup_error = str(exc)
         yield
+    app = FastAPI(title="slop PR search API", version="0.1.1", lifespan=lifespan)
     @app.exception_handler(ValueError)
     async def handle_value_error(_request: Request, exc: ValueError) -> JSONResponse:
             ),
         )
+    @app.get("/v1/repos/{owner}/{repo}/contributors/{login}")
+    async def contributor_view(
+        owner: str, repo: str, login: str, request: Request
+    ) -> dict[str, Any]:
+        settings = request.app.state.settings
+        repo_slug = _repo_slug(settings, owner, repo)
+        return get_pr_search_contributor(settings.index_path, repo=repo_slug, author_login=login)
+    @app.get("/v1/repos/{owner}/{repo}/contributors/{login}/pulls")
+    async def contributor_pulls(
+        owner: str,
+        repo: str,
+        login: str,
+        request: Request,
+        limit: int | None = None,
+    ) -> dict[str, Any]:
+        settings = request.app.state.settings
+        repo_slug = _repo_slug(settings, owner, repo)
+        return get_pr_search_contributor_pulls(
+            settings.index_path,
+            repo=repo_slug,
+            author_login=login,
+            limit=_limit(
+                limit, default=settings.similar_limit_default, maximum=settings.similar_limit_max
+            ),
+        )
+    @app.get("/v1/repos/{owner}/{repo}/pulls/{number}/contributor")
+    async def pull_contributor(
+        owner: str,
+        repo: str,
+        number: int,
+        request: Request,
+    ) -> dict[str, Any]:
+        settings = request.app.state.settings
+        repo_slug = _repo_slug(settings, owner, repo)
+        return get_pr_search_pull_contributor(settings.index_path, repo=repo_slug, pr_number=number)
     @app.get("/v1/repos/{owner}/{repo}/analysis/status")
     async def analysis_status(
         owner: str,
     ) -> dict[str, Any]:
         settings = request.app.state.settings
         repo_slug = _repo_slug(settings, owner, repo)
+        return get_analysis_status(
+            settings.index_path,
+            repo=repo_slug,
+            variant=variant,
+            analysis_root=settings.analysis_dir,
+        )
     @app.get("/v1/repos/{owner}/{repo}/pulls/{number}/analysis")
     async def pr_analysis(
             repo=repo_slug,
             pr_number=number,
             variant=variant,
+            analysis_root=settings.analysis_dir,
         )
     @app.get("/v1/repos/{owner}/{repo}/analysis/meta-bugs")
             settings.index_path,
             repo=repo_slug,
             variant=variant,
+            analysis_root=settings.analysis_dir,
             limit=_limit(
                 limit,
                 default=settings.cluster_list_limit_default,
             repo=repo_slug,
             cluster_id=cluster_id,
             variant=variant,
+            analysis_root=settings.analysis_dir,
         )
     @app.get("/v1/repos/{owner}/{repo}/analysis/duplicate-prs")
             settings.index_path,
             repo=repo_slug,
             variant=variant,
+            analysis_root=settings.analysis_dir,
             limit=_limit(
                 limit,
                 default=settings.cluster_list_limit_default,
     ) -> dict[str, Any]:
         settings = request.app.state.settings
         repo_slug = _repo_slug(settings, owner, repo)
+        return get_analysis_best(
+            settings.index_path,
+            repo=repo_slug,
+            variant=variant,
+            analysis_root=settings.analysis_dir,
+        )
     return app
     message = str(exc).lower()
     return (
         "not found" in message
+        or "analysis report was not found" in message
         or "no analysis report was found" in message
         or "no active pr search run" in message
         or "was not found in the active indexed universe" in message

src/slop_farmer/app/workflow.py CHANGED Viewed

@@ -74,6 +74,9 @@ def run_full_pipeline(options: FullPipelineOptions) -> str:
             analysis_input=analysis_path,
             contributors_input=snapshot_dir / "new-contributors-report.json",
             pr_scope_input=snapshot_dir / "pr-scope-clusters.json",
             window_days=options.dashboard_window_days,
         )
     )

             analysis_input=analysis_path,
             contributors_input=snapshot_dir / "new-contributors-report.json",
             pr_scope_input=snapshot_dir / "pr-scope-clusters.json",
+            hf_repo_id=None,
+            hf_revision=None,
+            hf_materialize_dir=None,
             window_days=options.dashboard_window_days,
         )
     )

src/slop_farmer/app_config.py CHANGED Viewed

@@ -184,6 +184,18 @@ def _dashboard_config_defaults(config_path: Path) -> dict[str, dict[str, Any]]:
             "new-contributor-window-days": contributor_window_days,
             "new-contributor-max-authors": contributor_max_authors,
         },
         "analyze": {
             "output-dir": str(data_dir) if data_dir else None,
             "hf-repo-id": analysis.get("hf-repo-id", dataset_id),
@@ -201,6 +213,7 @@ def _dashboard_config_defaults(config_path: Path) -> dict[str, dict[str, Any]]:
         },
         "pr-scope": {
             "output-dir": str(data_dir) if data_dir else None,
             "cluster-suppression-rules": cluster_suppression_rules,
         },
         "pr-search": {
@@ -210,12 +223,14 @@ def _dashboard_config_defaults(config_path: Path) -> dict[str, dict[str, Any]]:
         },
         "new-contributor-report": {
             "output-dir": str(data_dir) if data_dir else None,
             "window-days": contributor_window_days,
             "max-authors": contributor_max_authors,
         },
         "dashboard-data": {
             "output-dir": str(dashboard_dir) if dashboard_dir else None,
             "snapshot-root": str(data_dir / "snapshots") if data_dir else None,
             "window-days": dashboard_window_days,
         },
         "publish-snapshot": {
@@ -236,6 +251,7 @@ def _dashboard_config_defaults(config_path: Path) -> dict[str, dict[str, Any]]:
         "deploy-dashboard": {
             "pipeline-data-dir": str(data_dir) if data_dir else None,
             "web-dir": str(web_dir) if web_dir else None,
             "dashboard-window-days": dashboard_window_days,
             "contributor-window-days": contributor_window_days,
             "contributor-max-authors": contributor_max_authors,
@@ -248,6 +264,11 @@ def _dashboard_config_defaults(config_path: Path) -> dict[str, dict[str, Any]]:
             "dataset-id": dataset_id,
             "space-tags": tags_value,
         },
     }
     for command, values in defaults.items():
         defaults[command] = {key: value for key, value in values.items() if value is not None}
@@ -259,6 +280,7 @@ def _dashboard_config_defaults(config_path: Path) -> dict[str, dict[str, Any]]:
         defaults[command].update(_resolve_command_paths(config_path, values))
     defaults["scrape"].update(_resolve_command_paths(config_path, scrape))
     defaults["analyze"].update(_resolve_command_paths(config_path, analysis))
     defaults["full-pipeline"].update(_resolve_command_paths(config_path, full_pipeline))
     return defaults

             "new-contributor-window-days": contributor_window_days,
             "new-contributor-max-authors": contributor_max_authors,
         },
+        "refresh-dataset": {
+            "repo": repo,
+            "hf-repo-id": dataset_id,
+            "fetch-timeline": scrape.get("fetch-timeline"),
+            "max-issues": scrape.get("max-issues"),
+            "max-prs": scrape.get("max-prs"),
+            "max-issue-comments": scrape.get("max-issue-comments"),
+            "max-reviews-per-pr": scrape.get("max-reviews-per-pr"),
+            "max-review-comments-per-pr": scrape.get("max-review-comments-per-pr"),
+            "new-contributor-window-days": contributor_window_days,
+            "new-contributor-max-authors": contributor_max_authors,
+        },
         "analyze": {
             "output-dir": str(data_dir) if data_dir else None,
             "hf-repo-id": analysis.get("hf-repo-id", dataset_id),
         },
         "pr-scope": {
             "output-dir": str(data_dir) if data_dir else None,
+            "hf-repo-id": dataset_id,
             "cluster-suppression-rules": cluster_suppression_rules,
         },
         "pr-search": {
         },
         "new-contributor-report": {
             "output-dir": str(data_dir) if data_dir else None,
+            "hf-repo-id": dataset_id,
             "window-days": contributor_window_days,
             "max-authors": contributor_max_authors,
         },
         "dashboard-data": {
             "output-dir": str(dashboard_dir) if dashboard_dir else None,
             "snapshot-root": str(data_dir / "snapshots") if data_dir else None,
+            "hf-repo-id": dataset_id,
             "window-days": dashboard_window_days,
         },
         "publish-snapshot": {
         "deploy-dashboard": {
             "pipeline-data-dir": str(data_dir) if data_dir else None,
             "web-dir": str(web_dir) if web_dir else None,
+            "hf-repo-id": dataset_id,
             "dashboard-window-days": dashboard_window_days,
             "contributor-window-days": contributor_window_days,
             "contributor-max-authors": contributor_max_authors,
             "dataset-id": dataset_id,
             "space-tags": tags_value,
         },
+        "dataset-status": {
+            "repo": repo,
+            "output-dir": str(data_dir) if data_dir else None,
+            "hf-repo-id": dataset_id,
+        },
     }
     for command, values in defaults.items():
         defaults[command] = {key: value for key, value in values.items() if value is not None}
         defaults[command].update(_resolve_command_paths(config_path, values))
     defaults["scrape"].update(_resolve_command_paths(config_path, scrape))
+    defaults["refresh-dataset"].update(_resolve_command_paths(config_path, scrape))
     defaults["analyze"].update(_resolve_command_paths(config_path, analysis))
     defaults["full-pipeline"].update(_resolve_command_paths(config_path, full_pipeline))
     return defaults

src/slop_farmer/config.py CHANGED Viewed

@@ -127,6 +127,9 @@ class NewContributorReportOptions:
     json_output: Path | None
     window_days: int
     max_authors: int
 @dataclass(slots=True)
@@ -137,6 +140,9 @@ class DashboardDataOptions:
     contributors_input: Path | None
     pr_scope_input: Path | None
     window_days: int
     snapshot_root: Path | None = None
@@ -155,6 +161,9 @@ class DeployDashboardOptions:
     snapshot_dir: Path | None
     analysis_input: Path | None
     contributors_input: Path | None
     refresh_contributors: bool
     dashboard_window_days: int
     contributor_window_days: int
@@ -233,3 +242,32 @@ class FullPipelineOptions:
     max_issues: int | None
     max_prs: int | None
     open_prs_only: bool = False

     json_output: Path | None
     window_days: int
     max_authors: int
+    hf_repo_id: str | None = None
+    hf_revision: str | None = None
+    hf_materialize_dir: Path | None = None
 @dataclass(slots=True)
     contributors_input: Path | None
     pr_scope_input: Path | None
     window_days: int
+    hf_repo_id: str | None = None
+    hf_revision: str | None = None
+    hf_materialize_dir: Path | None = None
     snapshot_root: Path | None = None
     snapshot_dir: Path | None
     analysis_input: Path | None
     contributors_input: Path | None
+    hf_repo_id: str | None
+    hf_revision: str | None
+    hf_materialize_dir: Path | None
     refresh_contributors: bool
     dashboard_window_days: int
     contributor_window_days: int
     max_issues: int | None
     max_prs: int | None
     open_prs_only: bool = False
+@dataclass(slots=True)
+class DatasetRefreshOptions:
+    repo: RepoRef
+    hf_repo_id: str
+    private_hf_repo: bool
+    max_issues: int | None
+    max_prs: int | None
+    max_issue_comments: int | None
+    max_reviews_per_pr: int | None
+    max_review_comments_per_pr: int | None
+    fetch_timeline: bool
+    new_contributor_report: bool
+    new_contributor_window_days: int
+    new_contributor_max_authors: int
+    http_timeout: int
+    http_max_retries: int
+    checkpoint_every_comments: int
+    checkpoint_every_prs: int
+@dataclass(slots=True)
+class DatasetStatusOptions:
+    output_dir: Path
+    hf_repo_id: str | None
+    hf_revision: str | None
+    repo: str | None = None
+    json_output: bool = False

src/slop_farmer/data/dataset_card.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from __future__ import annotations
+def _repo_title(repo_slug: str) -> str:
+    name = repo_slug.split("/", 1)[-1]
+    return name.replace("-", " ").replace("_", " ").title()
+def build_hf_dataset_card(
+    repo_slug: str,
+    snapshot_id: str,
+    *,
+    include_new_contributors: bool = False,
+    notes: list[str] | None = None,
+) -> str:
+    repo_title = _repo_title(repo_slug)
+    dataset_title = f"{repo_title} PR Dataset"
+    new_contributor_config = ""
+    new_contributor_files = ""
+    if include_new_contributors:
+        new_contributor_config = """- config_name: new_contributors
+  data_files:
+  - split: train
+    path: new_contributors.parquet
+"""
+        new_contributor_files = """- `new_contributors.parquet`
+- `new-contributors-report.json`
+- `new-contributors-report.md`
+"""
+    note_lines = "\n".join(f"- {note}" for note in (notes or []))
+    if note_lines:
+        note_lines = f"{note_lines}\n"
+    return f"""---
+pretty_name: {dataset_title}
+configs:
+- config_name: issues
+  data_files:
+  - split: train
+    path: issues.parquet
+  default: true
+- config_name: prs
+  data_files:
+  - split: train
+    path: pull_requests.parquet
+- config_name: issue_comments
+  data_files:
+  - split: train
+    path: issue_comments.parquet
+- config_name: pr_comments
+  data_files:
+  - split: train
+    path: pr_comments.parquet
+- config_name: pr_reviews
+  data_files:
+  - split: train
+    path: reviews.parquet
+- config_name: pr_files
+  data_files:
+  - split: train
+    path: pr_files.parquet
+- config_name: pr_diffs
+  data_files:
+  - split: train
+    path: pr_diffs.parquet
+- config_name: review_comments
+  data_files:
+  - split: train
+    path: review_comments.parquet
+- config_name: links
+  data_files:
+  - split: train
+    path: links.parquet
+- config_name: events
+  data_files:
+  - split: train
+    path: events.parquet
+{new_contributor_config}---
+---
+# {dataset_title}
+Normalized snapshots of issues, pull requests, comments, reviews, and linkage data from `{repo_slug}`.
+Files:
+- `issues.parquet`
+- `pull_requests.parquet`
+- `comments.parquet`
+- `issue_comments.parquet` (derived view of issue discussion comments)
+- `pr_comments.parquet` (derived view of pull request discussion comments)
+- `reviews.parquet`
+- `pr_files.parquet`
+- `pr_diffs.parquet`
+- `review_comments.parquet`
+- `links.parquet`
+- `events.parquet`
+{new_contributor_files}
+Use:
+- duplicate PR and issue analysis
+- triage and ranking experiments
+- eval set creation
+Notes:
+- latest snapshot: `{snapshot_id}`
+- raw data only; no labels or moderation decisions
+- PR metadata, file-level patch hunks, and full unified diffs are included
+- full file contents for changed files are not included
+{note_lines}"""

src/slop_farmer/data/hf_dataset_repo.py ADDED Viewed

	@@ -0,0 +1,94 @@

+from __future__ import annotations
+import json
+import os
+from pathlib import Path
+from typing import Any
+from huggingface_hub import HfApi, hf_hub_download
+def load_remote_file(
+    api: HfApi,
+    repo_id: str,
+    path_in_repo: str,
+    local_dir: Path,
+    *,
+    revision: str | None = None,
+) -> Path | None:
+    del api
+    try:
+        downloaded = hf_hub_download(
+            repo_id=repo_id,
+            filename=path_in_repo,
+            repo_type="dataset",
+            revision=revision,
+            local_dir=str(local_dir),
+            token=os.getenv("HF_TOKEN"),
+        )
+    except Exception:
+        return None
+    return Path(downloaded)
+def load_remote_json_file(
+    api: HfApi,
+    repo_id: str,
+    path_in_repo: str,
+    local_dir: Path,
+    *,
+    revision: str | None = None,
+) -> dict[str, Any] | None:
+    downloaded = load_remote_file(
+        api,
+        repo_id,
+        path_in_repo,
+        local_dir,
+        revision=revision,
+    )
+    if downloaded is None:
+        return None
+    return json.loads(downloaded.read_text(encoding="utf-8"))
+def list_remote_paths(api: HfApi, repo_id: str, *, revision: str | None = None) -> set[str]:
+    try:
+        info = api.dataset_info(repo_id=repo_id, revision=revision, files_metadata=True)
+    except TypeError:
+        info = api.dataset_info(repo_id=repo_id, revision=revision)
+    except Exception:
+        return set()
+    return {sibling.rfilename for sibling in getattr(info, "siblings", [])}
+def stable_snapshot_candidates(latest_payload: dict[str, Any] | None, filename: str) -> list[str]:
+    if latest_payload is None:
+        return [filename]
+    candidates: list[str] = []
+    manifest_path = str(latest_payload.get("manifest_path") or "").strip("/")
+    snapshot_dir = str(latest_payload.get("snapshot_dir") or "").strip("/")
+    latest_snapshot_id = str(latest_payload.get("latest_snapshot_id") or "").strip()
+    if filename == "manifest.json" and manifest_path:
+        candidates.append(manifest_path)
+    if snapshot_dir and snapshot_dir not in {".", "/"}:
+        candidates.append(f"{snapshot_dir}/{filename}")
+    archived_manifest_path = str(latest_payload.get("archived_manifest_path") or "").strip("/")
+    if filename == "manifest.json" and archived_manifest_path:
+        candidates.append(archived_manifest_path)
+    if manifest_path and "/" in manifest_path:
+        manifest_dir = manifest_path.rsplit("/", 1)[0]
+        candidates.append(f"{manifest_dir}/{filename}")
+    if latest_snapshot_id:
+        candidates.append(f"snapshots/{latest_snapshot_id}/{filename}")
+    candidates.append(filename)
+    deduped: list[str] = []
+    seen: set[str] = set()
+    for candidate in candidates:
+        normalized = candidate.lstrip("./")
+        if not normalized or normalized in seen:
+            continue
+        seen.add(normalized)
+        deduped.append(normalized)
+    return deduped

src/slop_farmer/data/search_duckdb.py CHANGED Viewed

@@ -31,6 +31,7 @@ TABLE_COLUMNS: dict[str, tuple[str, ...]] = {
         "repo",
         "pr_number",
         "github_id",
         "state",
         "draft",
         "merged",
@@ -46,6 +47,48 @@ TABLE_COLUMNS: dict[str, tuple[str, ...]] = {
         "review_comments_count",
         "html_url",
     ),
     "pr_scope_features": (
         "run_id",
         "repo",
@@ -144,6 +187,7 @@ CREATE TABLE IF NOT EXISTS pr_search_documents (
     repo VARCHAR,
     pr_number BIGINT,
     github_id BIGINT,
     state VARCHAR,
     draft BOOLEAN,
     merged BOOLEAN,
@@ -159,6 +203,48 @@ CREATE TABLE IF NOT EXISTS pr_search_documents (
     review_comments_count BIGINT,
     html_url VARCHAR
 );
 CREATE TABLE IF NOT EXISTS pr_scope_features (
     run_id VARCHAR,
     repo VARCHAR,
@@ -232,6 +318,8 @@ CREATE TABLE IF NOT EXISTS pr_scope_cluster_candidates (
 CREATE INDEX IF NOT EXISTS idx_pr_search_active_run_repo ON pr_search_active_run (repo);
 CREATE INDEX IF NOT EXISTS idx_pr_search_runs_repo_status ON pr_search_runs (repo, status);
 CREATE INDEX IF NOT EXISTS idx_pr_search_documents_run_pr ON pr_search_documents (run_id, pr_number);
 CREATE INDEX IF NOT EXISTS idx_pr_scope_features_run_pr ON pr_scope_features (run_id, pr_number);
 CREATE INDEX IF NOT EXISTS idx_pr_scope_run_artifacts_run ON pr_scope_run_artifacts (run_id);
 CREATE INDEX IF NOT EXISTS idx_pr_scope_neighbors_run_left ON pr_scope_neighbors (run_id, left_pr_number);
@@ -256,6 +344,9 @@ def connect_pr_search_db(path: Path, *, read_only: bool = False) -> duckdb.DuckD
 def ensure_pr_search_schema(connection: duckdb.DuckDBPyConnection) -> None:
     connection.execute(SCHEMA_SQL)
 def insert_rows(
@@ -353,6 +444,7 @@ def resolve_active_run(
 def get_run_counts(connection: duckdb.DuckDBPyConnection, *, run_id: str) -> dict[str, int]:
     return {
         "documents": _count(connection, "pr_search_documents", run_id),
         "features": _count(connection, "pr_scope_features", run_id),
         "run_artifacts": _count(connection, "pr_scope_run_artifacts", run_id),
         "neighbors": _count(connection, "pr_scope_neighbors", run_id),
@@ -375,6 +467,60 @@ def get_document(
     )
 def get_feature(
     connection: duckdb.DuckDBPyConnection,
     *,

         "repo",
         "pr_number",
         "github_id",
+        "author_login",
         "state",
         "draft",
         "merged",
         "review_comments_count",
         "html_url",
     ),
+    "pr_search_contributors": (
+        "run_id",
+        "repo",
+        "snapshot_id",
+        "report_generated_at",
+        "window_days",
+        "author_login",
+        "name",
+        "profile_url",
+        "repo_pull_requests_url",
+        "repo_issues_url",
+        "repo_first_seen_at",
+        "repo_last_seen_at",
+        "repo_primary_artifact_count",
+        "repo_artifact_count",
+        "snapshot_issue_count",
+        "snapshot_pr_count",
+        "snapshot_comment_count",
+        "snapshot_review_count",
+        "snapshot_review_comment_count",
+        "repo_association",
+        "new_to_repo",
+        "first_seen_in_snapshot",
+        "report_reason",
+        "account_age_days",
+        "young_account",
+        "follow_through_score",
+        "breadth_score",
+        "automation_risk_signal",
+        "heuristic_note",
+        "public_orgs_json",
+        "visible_authored_pr_count",
+        "merged_pr_count",
+        "closed_unmerged_pr_count",
+        "open_pr_count",
+        "merged_pr_rate",
+        "closed_unmerged_pr_rate",
+        "still_open_pr_rate",
+        "distinct_repos_with_authored_prs",
+        "distinct_repos_with_open_prs",
+        "fetch_error",
+    ),
     "pr_scope_features": (
         "run_id",
         "repo",
     repo VARCHAR,
     pr_number BIGINT,
     github_id BIGINT,
+    author_login VARCHAR,
     state VARCHAR,
     draft BOOLEAN,
     merged BOOLEAN,
     review_comments_count BIGINT,
     html_url VARCHAR
 );
+CREATE TABLE IF NOT EXISTS pr_search_contributors (
+    run_id VARCHAR,
+    repo VARCHAR,
+    snapshot_id VARCHAR,
+    report_generated_at VARCHAR,
+    window_days BIGINT,
+    author_login VARCHAR,
+    name VARCHAR,
+    profile_url VARCHAR,
+    repo_pull_requests_url VARCHAR,
+    repo_issues_url VARCHAR,
+    repo_first_seen_at VARCHAR,
+    repo_last_seen_at VARCHAR,
+    repo_primary_artifact_count BIGINT,
+    repo_artifact_count BIGINT,
+    snapshot_issue_count BIGINT,
+    snapshot_pr_count BIGINT,
+    snapshot_comment_count BIGINT,
+    snapshot_review_count BIGINT,
+    snapshot_review_comment_count BIGINT,
+    repo_association VARCHAR,
+    new_to_repo BOOLEAN,
+    first_seen_in_snapshot BOOLEAN,
+    report_reason VARCHAR,
+    account_age_days BIGINT,
+    young_account BOOLEAN,
+    follow_through_score VARCHAR,
+    breadth_score VARCHAR,
+    automation_risk_signal VARCHAR,
+    heuristic_note VARCHAR,
+    public_orgs_json VARCHAR,
+    visible_authored_pr_count BIGINT,
+    merged_pr_count BIGINT,
+    closed_unmerged_pr_count BIGINT,
+    open_pr_count BIGINT,
+    merged_pr_rate DOUBLE,
+    closed_unmerged_pr_rate DOUBLE,
+    still_open_pr_rate DOUBLE,
+    distinct_repos_with_authored_prs BIGINT,
+    distinct_repos_with_open_prs BIGINT,
+    fetch_error VARCHAR
+);
 CREATE TABLE IF NOT EXISTS pr_scope_features (
     run_id VARCHAR,
     repo VARCHAR,
 CREATE INDEX IF NOT EXISTS idx_pr_search_active_run_repo ON pr_search_active_run (repo);
 CREATE INDEX IF NOT EXISTS idx_pr_search_runs_repo_status ON pr_search_runs (repo, status);
 CREATE INDEX IF NOT EXISTS idx_pr_search_documents_run_pr ON pr_search_documents (run_id, pr_number);
+CREATE INDEX IF NOT EXISTS idx_pr_search_documents_run_author ON pr_search_documents (run_id, author_login);
+CREATE INDEX IF NOT EXISTS idx_pr_search_contributors_run_author ON pr_search_contributors (run_id, author_login);
 CREATE INDEX IF NOT EXISTS idx_pr_scope_features_run_pr ON pr_scope_features (run_id, pr_number);
 CREATE INDEX IF NOT EXISTS idx_pr_scope_run_artifacts_run ON pr_scope_run_artifacts (run_id);
 CREATE INDEX IF NOT EXISTS idx_pr_scope_neighbors_run_left ON pr_scope_neighbors (run_id, left_pr_number);
 def ensure_pr_search_schema(connection: duckdb.DuckDBPyConnection) -> None:
     connection.execute(SCHEMA_SQL)
+    connection.execute(
+        "ALTER TABLE pr_search_documents ADD COLUMN IF NOT EXISTS author_login VARCHAR"
+    )
 def insert_rows(
 def get_run_counts(connection: duckdb.DuckDBPyConnection, *, run_id: str) -> dict[str, int]:
     return {
         "documents": _count(connection, "pr_search_documents", run_id),
+        "contributors": _count(connection, "pr_search_contributors", run_id),
         "features": _count(connection, "pr_scope_features", run_id),
         "run_artifacts": _count(connection, "pr_scope_run_artifacts", run_id),
         "neighbors": _count(connection, "pr_scope_neighbors", run_id),
     )
+def get_contributor(
+    connection: duckdb.DuckDBPyConnection,
+    *,
+    run_id: str,
+    author_login: str,
+) -> dict[str, Any] | None:
+    return fetch_one(
+        connection,
+        """
+        SELECT *
+        FROM pr_search_contributors
+        WHERE run_id = ? AND lower(author_login) = lower(?)
+        """,
+        [run_id, author_login],
+    )
+def get_contributor_pulls(
+    connection: duckdb.DuckDBPyConnection,
+    *,
+    run_id: str,
+    author_login: str,
+    limit: int,
+) -> list[dict[str, Any]]:
+    return fetch_rows(
+        connection,
+        """
+        SELECT
+            pr_number,
+            github_id,
+            author_login,
+            state,
+            draft,
+            merged,
+            title,
+            base_ref,
+            created_at,
+            updated_at,
+            merged_at,
+            additions,
+            deletions,
+            changed_files,
+            comments_count,
+            review_comments_count,
+            html_url
+        FROM pr_search_documents
+        WHERE run_id = ? AND lower(author_login) = lower(?)
+        ORDER BY updated_at DESC NULLS LAST, pr_number DESC
+        LIMIT ?
+        """,
+        [run_id, author_login, limit],
+    )
 def get_feature(
     connection: duckdb.DuckDBPyConnection,
     *,

src/slop_farmer/data/snapshot_source.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from __future__ import annotations
+from pathlib import Path
+from slop_farmer.data.snapshot_materialize import materialize_hf_dataset_snapshot
+from slop_farmer.data.snapshot_paths import (
+    default_hf_materialize_dir,
+    resolve_snapshot_dir_from_snapshots_root,
+)
+def resolve_snapshot_source_dir(
+    *,
+    snapshot_dir: Path | None,
+    local_snapshots_root: Path,
+    hf_repo_id: str | None,
+    hf_revision: str | None,
+    hf_materialize_dir: Path | None,
+    hf_output_dir: Path | None = None,
+) -> Path:
+    if snapshot_dir is not None:
+        return snapshot_dir.resolve()
+    if hf_repo_id:
+        output_dir = (hf_output_dir or local_snapshots_root.parent).resolve()
+        return materialize_hf_dataset_snapshot(
+            repo_id=hf_repo_id,
+            local_dir=hf_materialize_dir
+            or default_hf_materialize_dir(output_dir, hf_repo_id, hf_revision),
+            revision=hf_revision,
+        ).resolve()
+    return resolve_snapshot_dir_from_snapshots_root(local_snapshots_root.resolve(), None)

src/slop_farmer/reports/analysis.py CHANGED Viewed

@@ -19,11 +19,7 @@ from rank_bm25 import BM25Okapi
 from slop_farmer.config import AnalysisOptions, MarkdownReportOptions
 from slop_farmer.data.links import build_text_link_rows
 from slop_farmer.data.parquet_io import read_json, read_parquet_rows, write_text
-from slop_farmer.data.snapshot_materialize import materialize_hf_dataset_snapshot
-from slop_farmer.data.snapshot_paths import (
-    default_hf_materialize_dir,
-    resolve_snapshot_dir_from_output,
-)
 from slop_farmer.reports.analysis_cache import (
     HYBRID_REVIEW_CACHE_SCHEMA_VERSION,
     PREPARED_REVIEW_UNIT_SCHEMA_VERSION,
@@ -766,18 +762,14 @@ def _artifact_suffix(row: dict[str, Any] | None, kind: str) -> str:
 def _resolve_snapshot_dir(options: AnalysisOptions) -> Path:
-    if options.hf_repo_id:
-        materialize_dir = options.hf_materialize_dir or default_hf_materialize_dir(
-            options.output_dir,
-            options.hf_repo_id,
-            options.hf_revision,
-        )
-        return materialize_hf_dataset_snapshot(
-            repo_id=options.hf_repo_id,
-            local_dir=materialize_dir,
-            revision=options.hf_revision,
-        ).resolve()
-    return resolve_snapshot_dir_from_output(options.output_dir, options.snapshot_dir)
 def _load_snapshot(snapshot_dir: Path) -> SnapshotData:

 from slop_farmer.config import AnalysisOptions, MarkdownReportOptions
 from slop_farmer.data.links import build_text_link_rows
 from slop_farmer.data.parquet_io import read_json, read_parquet_rows, write_text
+from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir
 from slop_farmer.reports.analysis_cache import (
     HYBRID_REVIEW_CACHE_SCHEMA_VERSION,
     PREPARED_REVIEW_UNIT_SCHEMA_VERSION,
 def _resolve_snapshot_dir(options: AnalysisOptions) -> Path:
+    return resolve_snapshot_source_dir(
+        snapshot_dir=options.snapshot_dir,
+        local_snapshots_root=options.output_dir.resolve() / "snapshots",
+        hf_repo_id=options.hf_repo_id,
+        hf_revision=options.hf_revision,
+        hf_materialize_dir=options.hf_materialize_dir,
+        hf_output_dir=options.output_dir,
+    )
 def _load_snapshot(snapshot_dir: Path) -> SnapshotData:

src/slop_farmer/reports/analysis_service.py CHANGED Viewed

@@ -24,6 +24,8 @@ class ActiveSnapshotContext:
 class AnalysisContext:
     active_run: dict[str, Any]
     report: dict[str, Any]
     variant_requested: str
     variant_used: str
@@ -33,26 +35,31 @@ def get_analysis_status(
     *,
     repo: str | None = None,
     variant: str = "auto",
 ) -> dict[str, Any]:
     active = _resolve_active_snapshot_context(db_path, repo=repo)
-    report_path, variant_used = _resolve_analysis_report_path(
         active.snapshot_dir,
         variant,
         required=False,
     )
     payload = {
         "repo": str(active.active_run["repo"]),
-        "snapshot_id": str(active.active_run["snapshot_id"]),
         "run_id": str(active.active_run["id"]),
         "variant_requested": _normalize_analysis_variant(variant),
         "available": report_path is not None,
     }
-    if report_path is None or variant_used is None:
         return payload
     report = _load_report(report_path)
     return {
         **payload,
         "variant_used": variant_used,
         "llm_enrichment": bool(report.get("llm_enrichment")),
         "generated_at": report.get("generated_at"),
         "counts": _analysis_counts(report),
@@ -65,8 +72,14 @@ def get_pr_analysis(
     pr_number: int,
     repo: str | None = None,
     variant: str = "auto",
 ) -> dict[str, Any]:
-    context = _load_analysis_context(db_path, repo=repo, variant=variant)
     meta_bug, rank = _find_meta_bug_for_pr(context.report, pr_number)
     duplicate_pr = _find_duplicate_pr_for_pr(context.report, pr_number)
     return {
@@ -84,8 +97,14 @@ def list_analysis_meta_bugs(
     repo: str | None = None,
     variant: str = "auto",
     limit: int = 50,
 ) -> dict[str, Any]:
-    context = _load_analysis_context(db_path, repo=repo, variant=variant)
     meta_bugs = [
         _meta_bug_payload(cluster, rank=index)
         for index, cluster in enumerate(context.report.get("meta_bugs", [])[:limit], start=1)
@@ -103,8 +122,14 @@ def get_analysis_meta_bug(
     cluster_id: str,
     repo: str | None = None,
     variant: str = "auto",
 ) -> dict[str, Any]:
-    context = _load_analysis_context(db_path, repo=repo, variant=variant)
     for index, cluster in enumerate(context.report.get("meta_bugs", []), start=1):
         if str(cluster.get("cluster_id")) != cluster_id:
             continue
@@ -113,7 +138,7 @@ def get_analysis_meta_bug(
             "meta_bug": _meta_bug_payload(cluster, rank=index),
             "duplicate_pr": _find_duplicate_pr_by_cluster_id(context.report, cluster_id),
         }
-    raise ValueError(f"Analysis cluster {cluster_id!r} was not found in the active snapshot.")
 def list_analysis_duplicate_prs(
@@ -122,8 +147,14 @@ def list_analysis_duplicate_prs(
     repo: str | None = None,
     variant: str = "auto",
     limit: int = 50,
 ) -> dict[str, Any]:
-    context = _load_analysis_context(db_path, repo=repo, variant=variant)
     duplicate_prs = [
         {"rank": index, **dict(entry)}
         for index, entry in enumerate(context.report.get("duplicate_prs", [])[:limit], start=1)
@@ -140,8 +171,14 @@ def get_analysis_best(
     *,
     repo: str | None = None,
     variant: str = "auto",
 ) -> dict[str, Any]:
-    context = _load_analysis_context(db_path, repo=repo, variant=variant)
     return {
         **_analysis_base_payload(context),
         "best_issue": _best_entry_with_cluster_id(
@@ -180,18 +217,24 @@ def _load_analysis_context(
     *,
     repo: str | None,
     variant: str,
 ) -> AnalysisContext:
     active = _resolve_active_snapshot_context(db_path, repo=repo)
-    report_path, variant_used = _resolve_analysis_report_path(
         active.snapshot_dir,
         variant,
         required=True,
     )
     assert report_path is not None
     assert variant_used is not None
     return AnalysisContext(
         active_run=active.active_run,
         report=_load_report(report_path),
         variant_requested=_normalize_analysis_variant(variant),
         variant_used=variant_used,
     )
@@ -199,31 +242,56 @@ def _load_analysis_context(
 def _resolve_analysis_report_path(
     snapshot_dir: Path,
     variant: str,
     *,
     required: bool,
-) -> tuple[Path | None, str | None]:
     normalized = _normalize_analysis_variant(variant)
     if normalized == "auto":
-        hybrid_path = snapshot_dir / ANALYSIS_REPORT_FILENAMES["hybrid"]
-        if hybrid_path.exists():
-            return hybrid_path, "hybrid"
-        deterministic_path = snapshot_dir / ANALYSIS_REPORT_FILENAMES["deterministic"]
-        if deterministic_path.exists():
-            return deterministic_path, "deterministic"
         if not required:
-            return None, None
-        raise ValueError("No analysis report was found for the active snapshot.")
-    report_path = snapshot_dir / ANALYSIS_REPORT_FILENAMES[normalized]
-    if report_path.exists():
-        return report_path, normalized
     if not required:
-        return None, None
     raise ValueError(
-        f"{normalized.capitalize()} analysis report was not found for the active snapshot."
     )
 def _normalize_analysis_variant(variant: str) -> str:
     normalized = variant.strip().lower()
     if normalized not in ANALYSIS_VARIANTS:
@@ -234,12 +302,16 @@ def _normalize_analysis_variant(variant: str) -> str:
 def _analysis_base_payload(context: AnalysisContext) -> dict[str, Any]:
     return {
         "repo": str(context.active_run["repo"]),
-        "snapshot_id": str(context.active_run["snapshot_id"]),
         "run_id": str(context.active_run["id"]),
         "variant_requested": context.variant_requested,
         "variant_used": context.variant_used,
         "llm_enrichment": bool(context.report.get("llm_enrichment")),
         "generated_at": context.report.get("generated_at"),
     }

 class AnalysisContext:
     active_run: dict[str, Any]
     report: dict[str, Any]
+    report_path: Path
+    report_source: str
     variant_requested: str
     variant_used: str
     *,
     repo: str | None = None,
     variant: str = "auto",
+    analysis_root: Path | None = None,
 ) -> dict[str, Any]:
     active = _resolve_active_snapshot_context(db_path, repo=repo)
+    report_path, variant_used, report_source = _resolve_analysis_report_path(
         active.snapshot_dir,
+        str(active.active_run["repo"]),
         variant,
+        analysis_root=analysis_root,
         required=False,
     )
     payload = {
         "repo": str(active.active_run["repo"]),
+        "active_snapshot_id": str(active.active_run["snapshot_id"]),
         "run_id": str(active.active_run["id"]),
         "variant_requested": _normalize_analysis_variant(variant),
         "available": report_path is not None,
     }
+    if report_path is None or variant_used is None or report_source is None:
         return payload
     report = _load_report(report_path)
     return {
         **payload,
+        "snapshot_id": str(report.get("snapshot_id") or active.active_run["snapshot_id"]),
         "variant_used": variant_used,
+        "analysis_source": report_source,
         "llm_enrichment": bool(report.get("llm_enrichment")),
         "generated_at": report.get("generated_at"),
         "counts": _analysis_counts(report),
     pr_number: int,
     repo: str | None = None,
     variant: str = "auto",
+    analysis_root: Path | None = None,
 ) -> dict[str, Any]:
+    context = _load_analysis_context(
+        db_path,
+        repo=repo,
+        variant=variant,
+        analysis_root=analysis_root,
+    )
     meta_bug, rank = _find_meta_bug_for_pr(context.report, pr_number)
     duplicate_pr = _find_duplicate_pr_for_pr(context.report, pr_number)
     return {
     repo: str | None = None,
     variant: str = "auto",
     limit: int = 50,
+    analysis_root: Path | None = None,
 ) -> dict[str, Any]:
+    context = _load_analysis_context(
+        db_path,
+        repo=repo,
+        variant=variant,
+        analysis_root=analysis_root,
+    )
     meta_bugs = [
         _meta_bug_payload(cluster, rank=index)
         for index, cluster in enumerate(context.report.get("meta_bugs", [])[:limit], start=1)
     cluster_id: str,
     repo: str | None = None,
     variant: str = "auto",
+    analysis_root: Path | None = None,
 ) -> dict[str, Any]:
+    context = _load_analysis_context(
+        db_path,
+        repo=repo,
+        variant=variant,
+        analysis_root=analysis_root,
+    )
     for index, cluster in enumerate(context.report.get("meta_bugs", []), start=1):
         if str(cluster.get("cluster_id")) != cluster_id:
             continue
             "meta_bug": _meta_bug_payload(cluster, rank=index),
             "duplicate_pr": _find_duplicate_pr_by_cluster_id(context.report, cluster_id),
         }
+    raise ValueError(f"Analysis cluster {cluster_id!r} was not found in the active analysis view.")
 def list_analysis_duplicate_prs(
     repo: str | None = None,
     variant: str = "auto",
     limit: int = 50,
+    analysis_root: Path | None = None,
 ) -> dict[str, Any]:
+    context = _load_analysis_context(
+        db_path,
+        repo=repo,
+        variant=variant,
+        analysis_root=analysis_root,
+    )
     duplicate_prs = [
         {"rank": index, **dict(entry)}
         for index, entry in enumerate(context.report.get("duplicate_prs", [])[:limit], start=1)
     *,
     repo: str | None = None,
     variant: str = "auto",
+    analysis_root: Path | None = None,
 ) -> dict[str, Any]:
+    context = _load_analysis_context(
+        db_path,
+        repo=repo,
+        variant=variant,
+        analysis_root=analysis_root,
+    )
     return {
         **_analysis_base_payload(context),
         "best_issue": _best_entry_with_cluster_id(
     *,
     repo: str | None,
     variant: str,
+    analysis_root: Path | None,
 ) -> AnalysisContext:
     active = _resolve_active_snapshot_context(db_path, repo=repo)
+    report_path, variant_used, report_source = _resolve_analysis_report_path(
         active.snapshot_dir,
+        str(active.active_run["repo"]),
         variant,
+        analysis_root=analysis_root,
         required=True,
     )
     assert report_path is not None
     assert variant_used is not None
+    assert report_source is not None
     return AnalysisContext(
         active_run=active.active_run,
         report=_load_report(report_path),
+        report_path=report_path,
+        report_source=report_source,
         variant_requested=_normalize_analysis_variant(variant),
         variant_used=variant_used,
     )
 def _resolve_analysis_report_path(
     snapshot_dir: Path,
+    repo: str,
     variant: str,
     *,
+    analysis_root: Path | None,
     required: bool,
+) -> tuple[Path | None, str | None, str | None]:
     normalized = _normalize_analysis_variant(variant)
+    candidate_dirs = _candidate_analysis_dirs(
+        snapshot_dir=snapshot_dir,
+        repo=repo,
+        analysis_root=analysis_root,
+    )
     if normalized == "auto":
+        for source, directory in candidate_dirs:
+            hybrid_path = directory / ANALYSIS_REPORT_FILENAMES["hybrid"]
+            if hybrid_path.exists():
+                return hybrid_path, "hybrid", source
+            deterministic_path = directory / ANALYSIS_REPORT_FILENAMES["deterministic"]
+            if deterministic_path.exists():
+                return deterministic_path, "deterministic", source
         if not required:
+            return None, None, None
+        raise ValueError(
+            "No analysis report was found for the current analysis path or active snapshot."
+        )
+    for source, directory in candidate_dirs:
+        report_path = directory / ANALYSIS_REPORT_FILENAMES[normalized]
+        if report_path.exists():
+            return report_path, normalized, source
     if not required:
+        return None, None, None
     raise ValueError(
+        f"{normalized.capitalize()} analysis report was not found for the current analysis path or active snapshot."
     )
+def _candidate_analysis_dirs(
+    *,
+    snapshot_dir: Path,
+    repo: str,
+    analysis_root: Path | None,
+) -> list[tuple[str, Path]]:
+    owner, name = repo.split("/", 1)
+    candidates: list[tuple[str, Path]] = []
+    if analysis_root is not None:
+        candidates.append(("current", analysis_root / owner / name / "current"))
+    candidates.append(("snapshot", snapshot_dir))
+    return candidates
 def _normalize_analysis_variant(variant: str) -> str:
     normalized = variant.strip().lower()
     if normalized not in ANALYSIS_VARIANTS:
 def _analysis_base_payload(context: AnalysisContext) -> dict[str, Any]:
+    active_snapshot_id = str(context.active_run["snapshot_id"])
+    snapshot_id = str(context.report.get("snapshot_id") or active_snapshot_id)
     return {
         "repo": str(context.active_run["repo"]),
+        "snapshot_id": snapshot_id,
+        "active_snapshot_id": active_snapshot_id,
         "run_id": str(context.active_run["id"]),
         "variant_requested": context.variant_requested,
         "variant_used": context.variant_used,
+        "analysis_source": context.report_source,
         "llm_enrichment": bool(context.report.get("llm_enrichment")),
         "generated_at": context.report.get("generated_at"),
     }

src/slop_farmer/reports/dashboard.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import Any
 from slop_farmer.config import DashboardDataOptions
 from slop_farmer.data.parquet_io import read_json, read_parquet_rows
-from slop_farmer.data.snapshot_paths import resolve_snapshot_dir_from_snapshots_root
 def run_dashboard_data(options: DashboardDataOptions) -> Path:
@@ -88,7 +88,14 @@ def _resolve_snapshot_dir(options: DashboardDataOptions) -> Path:
         if options.snapshot_root is not None
         else (Path("data") / "snapshots").resolve()
     )
-    return resolve_snapshot_dir_from_snapshots_root(snapshots_root, options.snapshot_dir)
 def _read_optional_json(path: Path) -> dict[str, Any]:

 from slop_farmer.config import DashboardDataOptions
 from slop_farmer.data.parquet_io import read_json, read_parquet_rows
+from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir
 def run_dashboard_data(options: DashboardDataOptions) -> Path:
         if options.snapshot_root is not None
         else (Path("data") / "snapshots").resolve()
     )
+    return resolve_snapshot_source_dir(
+        snapshot_dir=options.snapshot_dir,
+        local_snapshots_root=snapshots_root,
+        hf_repo_id=options.hf_repo_id,
+        hf_revision=options.hf_revision,
+        hf_materialize_dir=options.hf_materialize_dir,
+        hf_output_dir=snapshots_root.parent,
+    )
 def _read_optional_json(path: Path) -> dict[str, Any]:

src/slop_farmer/reports/new_contributor_report.py CHANGED Viewed

@@ -12,7 +12,7 @@ from typing import Any
 from slop_farmer.config import NewContributorReportOptions, resolve_github_token
 from slop_farmer.data.http import urlopen_with_retry
 from slop_farmer.data.parquet_io import read_json, read_parquet_rows, write_parquet, write_text
-from slop_farmer.data.snapshot_paths import resolve_snapshot_dir_from_output
 from slop_farmer.reports.user_activity import summarize_user
 GRAPHQL_URL = "https://api.github.com/graphql"
@@ -131,7 +131,14 @@ def run_new_contributor_report(options: NewContributorReportOptions) -> Path:
 def _resolve_snapshot_dir(options: NewContributorReportOptions) -> Path:
-    return resolve_snapshot_dir_from_output(options.output_dir, options.snapshot_dir)
 def _load_snapshot(snapshot_dir: Path) -> dict[str, Any]:
@@ -244,7 +251,6 @@ def _report_contributors(
             previous_report_reusable
             and previous_entry is not None
             and not previous_entry.get("fetch_error")
-            and not known_via_prior_merged_pr
         ):
             contributors.append(
                 _reused_previous_report_entry(
@@ -256,6 +262,8 @@ def _report_contributors(
                 )
             )
             reused_previous_report += 1
             continue
         try:
             summary = summarize_user(row["author_login"], options.window_days, None)

 from slop_farmer.config import NewContributorReportOptions, resolve_github_token
 from slop_farmer.data.http import urlopen_with_retry
 from slop_farmer.data.parquet_io import read_json, read_parquet_rows, write_parquet, write_text
+from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir
 from slop_farmer.reports.user_activity import summarize_user
 GRAPHQL_URL = "https://api.github.com/graphql"
 def _resolve_snapshot_dir(options: NewContributorReportOptions) -> Path:
+    return resolve_snapshot_source_dir(
+        snapshot_dir=options.snapshot_dir,
+        local_snapshots_root=options.output_dir.resolve() / "snapshots",
+        hf_repo_id=options.hf_repo_id,
+        hf_revision=options.hf_revision,
+        hf_materialize_dir=options.hf_materialize_dir,
+        hf_output_dir=options.output_dir,
+    )
 def _load_snapshot(snapshot_dir: Path) -> dict[str, Any]:
             previous_report_reusable
             and previous_entry is not None
             and not previous_entry.get("fetch_error")
         ):
             contributors.append(
                 _reused_previous_report_entry(
                 )
             )
             reused_previous_report += 1
+            if known_via_prior_merged_pr:
+                reused_known_merged += 1
             continue
         try:
             summary = summarize_user(row["author_login"], options.window_days, None)

src/slop_farmer/reports/pr_scope.py CHANGED Viewed

@@ -42,11 +42,7 @@ from typing import Any
 from pydantic import BaseModel, Field
 from slop_farmer.data.parquet_io import read_json, read_parquet_rows
-from slop_farmer.data.snapshot_materialize import materialize_hf_dataset_snapshot
-from slop_farmer.data.snapshot_paths import (
-    default_hf_materialize_dir,
-    resolve_snapshot_dir_from_output,
-)
 from slop_farmer.reports.pr_heuristics import (
     compile_cluster_suppression_rules,
     suppressed_pull_request_reasons,
@@ -260,17 +256,14 @@ def run_pr_scope_report(options: Any) -> Path:
 def _resolve_snapshot_dir(options: Any) -> Path:
-    if options.hf_repo_id:
-        snapshot_dir = materialize_hf_dataset_snapshot(
-            repo_id=options.hf_repo_id,
-            local_dir=options.hf_materialize_dir
-            or default_hf_materialize_dir(
-                options.output_dir, options.hf_repo_id, options.hf_revision
-            ),
-            revision=options.hf_revision,
-        )
-        return snapshot_dir.resolve()
-    return resolve_snapshot_dir_from_output(options.output_dir, options.snapshot_dir)
 def _load_snapshot_context(snapshot_dir: Path) -> dict[str, Any]:

 from pydantic import BaseModel, Field
 from slop_farmer.data.parquet_io import read_json, read_parquet_rows
+from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir
 from slop_farmer.reports.pr_heuristics import (
     compile_cluster_suppression_rules,
     suppressed_pull_request_reasons,
 def _resolve_snapshot_dir(options: Any) -> Path:
+    return resolve_snapshot_source_dir(
+        snapshot_dir=options.snapshot_dir,
+        local_snapshots_root=options.output_dir.resolve() / "snapshots",
+        hf_repo_id=options.hf_repo_id,
+        hf_revision=options.hf_revision,
+        hf_materialize_dir=options.hf_materialize_dir,
+        hf_output_dir=options.output_dir,
+    )
 def _load_snapshot_context(snapshot_dir: Path) -> dict[str, Any]:

src/slop_farmer/reports/pr_search_scope.py CHANGED Viewed

@@ -10,11 +10,7 @@ from typing import Any
 from slop_farmer.config import PrSearchRefreshOptions
 from slop_farmer.data.parquet_io import read_json, read_parquet_rows
-from slop_farmer.data.snapshot_materialize import materialize_hf_dataset_snapshot
-from slop_farmer.data.snapshot_paths import (
-    default_hf_materialize_dir,
-    resolve_snapshot_dir_from_output,
-)
 from slop_farmer.reports.pr_heuristics import (
     compile_cluster_suppression_rules,
     suppressed_pull_request_reasons,
@@ -36,17 +32,14 @@ DEFAULT_CANDIDATE_LIMIT = 5
 def resolve_pr_search_snapshot_dir(options: PrSearchRefreshOptions) -> Path:
-    if options.hf_repo_id:
-        snapshot_dir = materialize_hf_dataset_snapshot(
-            repo_id=options.hf_repo_id,
-            local_dir=options.hf_materialize_dir
-            or default_hf_materialize_dir(
-                options.output_dir, options.hf_repo_id, options.hf_revision
-            ),
-            revision=options.hf_revision,
-        )
-        return snapshot_dir.resolve()
-    return resolve_snapshot_dir_from_output(options.output_dir, options.snapshot_dir)
 def load_pr_search_snapshot(snapshot_dir: Path) -> dict[str, Any]:
@@ -54,6 +47,7 @@ def load_pr_search_snapshot(snapshot_dir: Path) -> dict[str, Any]:
     manifest = read_json(manifest_path) if manifest_path.exists() else {}
     pull_requests = read_parquet_rows(snapshot_dir / "pull_requests.parquet")
     pr_files = read_parquet_rows(snapshot_dir / "pr_files.parquet")
     repo = manifest.get("repo") or (pull_requests[0].get("repo") if pull_requests else None) or ""
     snapshot_id = manifest.get("snapshot_id") or snapshot_dir.name
     return {
@@ -62,6 +56,7 @@ def load_pr_search_snapshot(snapshot_dir: Path) -> dict[str, Any]:
         "manifest": manifest,
         "pull_requests": pull_requests,
         "pr_files": pr_files,
     }
@@ -412,6 +407,7 @@ def _document_row(row: Mapping[str, Any]) -> dict[str, Any]:
     return {
         "pr_number": int(row["number"]),
         "github_id": row.get("github_id"),
         "state": row.get("state"),
         "draft": bool(row.get("draft")),
         "merged": bool(row.get("merged")),

 from slop_farmer.config import PrSearchRefreshOptions
 from slop_farmer.data.parquet_io import read_json, read_parquet_rows
+from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir
 from slop_farmer.reports.pr_heuristics import (
     compile_cluster_suppression_rules,
     suppressed_pull_request_reasons,
 def resolve_pr_search_snapshot_dir(options: PrSearchRefreshOptions) -> Path:
+    return resolve_snapshot_source_dir(
+        snapshot_dir=options.snapshot_dir,
+        local_snapshots_root=options.output_dir.resolve() / "snapshots",
+        hf_repo_id=options.hf_repo_id,
+        hf_revision=options.hf_revision,
+        hf_materialize_dir=options.hf_materialize_dir,
+        hf_output_dir=options.output_dir,
+    )
 def load_pr_search_snapshot(snapshot_dir: Path) -> dict[str, Any]:
     manifest = read_json(manifest_path) if manifest_path.exists() else {}
     pull_requests = read_parquet_rows(snapshot_dir / "pull_requests.parquet")
     pr_files = read_parquet_rows(snapshot_dir / "pr_files.parquet")
+    contributors = read_parquet_rows(snapshot_dir / "new_contributors.parquet")
     repo = manifest.get("repo") or (pull_requests[0].get("repo") if pull_requests else None) or ""
     snapshot_id = manifest.get("snapshot_id") or snapshot_dir.name
     return {
         "manifest": manifest,
         "pull_requests": pull_requests,
         "pr_files": pr_files,
+        "contributors": contributors,
     }
     return {
         "pr_number": int(row["number"]),
         "github_id": row.get("github_id"),
+        "author_login": row.get("author_login"),
         "state": row.get("state"),
         "draft": bool(row.get("draft")),
         "merged": bool(row.get("merged")),

src/slop_farmer/reports/pr_search_service.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
 import json
-from collections.abc import Iterable, Mapping
 from contextlib import suppress
 from pathlib import Path
 from typing import Any, Protocol
@@ -17,6 +17,8 @@ from slop_farmer.data.search_duckdb import (
     get_cluster,
     get_cluster_ids_for_prs,
     get_cluster_members,
     get_document,
     get_feature,
     get_pair_neighbor_row,
@@ -99,6 +101,16 @@ def run_pr_search_refresh(options: PrSearchRefreshOptions) -> dict[str, Any]:
             "pr_search_documents",
             _scoped_rows(artifacts["documents"], run_id=run_id, repo=repo),
         )
         insert_rows(
             connection,
             "pr_scope_features",
@@ -290,6 +302,85 @@ def get_pr_search_candidate_clusters(
         connection.close()
 def get_pr_search_similar_lookup(
     db_path: Path,
     *,
@@ -801,6 +892,15 @@ def _require_feature(connection: Any, *, run_id: str, pr_number: int) -> dict[st
     return feature
 def _json_list(raw: Any) -> list[str]:
     if isinstance(raw, list):
         return [str(item) for item in raw]
@@ -838,6 +938,71 @@ def _without_json_fields(row: Mapping[str, Any]) -> dict[str, Any]:
     return {str(key): value for key, value in row.items() if not str(key).endswith("_json")}
 def _normalize_lookup_mode(mode: str) -> str:
     normalized = mode.strip().lower()
     if normalized not in {"auto", "indexed", "live"}:

 from __future__ import annotations
 import json
+from collections.abc import Iterable, Mapping, Sequence
 from contextlib import suppress
 from pathlib import Path
 from typing import Any, Protocol
     get_cluster,
     get_cluster_ids_for_prs,
     get_cluster_members,
+    get_contributor,
+    get_contributor_pulls,
     get_document,
     get_feature,
     get_pair_neighbor_row,
             "pr_search_documents",
             _scoped_rows(artifacts["documents"], run_id=run_id, repo=repo),
         )
+        insert_rows(
+            connection,
+            "pr_search_contributors",
+            _contributor_rows(
+                snapshot["contributors"],
+                run_id=run_id,
+                repo=repo,
+                snapshot_id=str(snapshot["snapshot_id"]),
+            ),
+        )
         insert_rows(
             connection,
             "pr_scope_features",
         connection.close()
+def get_pr_search_contributor(
+    db_path: Path,
+    *,
+    author_login: str,
+    repo: str | None = None,
+) -> dict[str, Any]:
+    connection = connect_pr_search_db(db_path, read_only=True)
+    try:
+        active_run = resolve_active_run(connection, repo=repo)
+        run_id = str(active_run["id"])
+        contributor = _require_contributor(connection, run_id=run_id, author_login=author_login)
+        pulls = _document_rows(
+            get_contributor_pulls(connection, run_id=run_id, author_login=author_login, limit=20)
+        )
+        return {
+            "repo": active_run["repo"],
+            "snapshot_id": active_run["snapshot_id"],
+            "run_id": run_id,
+            "contributor": contributor,
+            "pulls": pulls,
+            "pull_count": len(pulls),
+        }
+    finally:
+        connection.close()
+def get_pr_search_contributor_pulls(
+    db_path: Path,
+    *,
+    author_login: str,
+    repo: str | None = None,
+    limit: int = 20,
+) -> dict[str, Any]:
+    connection = connect_pr_search_db(db_path, read_only=True)
+    try:
+        active_run = resolve_active_run(connection, repo=repo)
+        run_id = str(active_run["id"])
+        contributor = _require_contributor(connection, run_id=run_id, author_login=author_login)
+        pulls = _document_rows(
+            get_contributor_pulls(connection, run_id=run_id, author_login=author_login, limit=limit)
+        )
+        return {
+            "repo": active_run["repo"],
+            "snapshot_id": active_run["snapshot_id"],
+            "run_id": run_id,
+            "contributor": contributor,
+            "pulls": pulls,
+            "pull_count": len(pulls),
+        }
+    finally:
+        connection.close()
+def get_pr_search_pull_contributor(
+    db_path: Path,
+    *,
+    pr_number: int,
+    repo: str | None = None,
+) -> dict[str, Any]:
+    connection = connect_pr_search_db(db_path, read_only=True)
+    try:
+        active_run = resolve_active_run(connection, repo=repo)
+        run_id = str(active_run["id"])
+        document = _require_document(connection, run_id=run_id, pr_number=pr_number)
+        author_login = str(document.get("author_login") or "").strip()
+        if not author_login:
+            raise ValueError(f"PR #{pr_number} does not have an indexed author_login.")
+        contributor = _require_contributor(connection, run_id=run_id, author_login=author_login)
+        return {
+            "repo": active_run["repo"],
+            "snapshot_id": active_run["snapshot_id"],
+            "run_id": run_id,
+            "pr": _without_json_fields(document),
+            "contributor": contributor,
+        }
+    finally:
+        connection.close()
 def get_pr_search_similar_lookup(
     db_path: Path,
     *,
     return feature
+def _require_contributor(connection: Any, *, run_id: str, author_login: str) -> dict[str, Any]:
+    contributor = get_contributor(connection, run_id=run_id, author_login=author_login)
+    if contributor is None:
+        raise ValueError(
+            f"Contributor {author_login!r} was not found in the active indexed universe."
+        )
+    return _contributor_row(contributor)
 def _json_list(raw: Any) -> list[str]:
     if isinstance(raw, list):
         return [str(item) for item in raw]
     return {str(key): value for key, value in row.items() if not str(key).endswith("_json")}
+def _document_rows(rows: Sequence[Mapping[str, Any]]) -> list[dict[str, Any]]:
+    return [_without_json_fields(row) for row in rows]
+def _contributor_rows(
+    rows: list[Mapping[str, Any]],
+    *,
+    run_id: str,
+    repo: str,
+    snapshot_id: str,
+) -> list[dict[str, Any]]:
+    return [
+        {
+            "run_id": run_id,
+            "repo": repo,
+            "snapshot_id": snapshot_id,
+            "report_generated_at": row.get("report_generated_at"),
+            "window_days": row.get("window_days"),
+            "author_login": row.get("author_login"),
+            "name": row.get("name"),
+            "profile_url": row.get("profile_url"),
+            "repo_pull_requests_url": row.get("repo_pull_requests_url"),
+            "repo_issues_url": row.get("repo_issues_url"),
+            "repo_first_seen_at": row.get("repo_first_seen_at"),
+            "repo_last_seen_at": row.get("repo_last_seen_at"),
+            "repo_primary_artifact_count": row.get("repo_primary_artifact_count"),
+            "repo_artifact_count": row.get("repo_artifact_count"),
+            "snapshot_issue_count": row.get("snapshot_issue_count"),
+            "snapshot_pr_count": row.get("snapshot_pr_count"),
+            "snapshot_comment_count": row.get("snapshot_comment_count"),
+            "snapshot_review_count": row.get("snapshot_review_count"),
+            "snapshot_review_comment_count": row.get("snapshot_review_comment_count"),
+            "repo_association": row.get("repo_association"),
+            "new_to_repo": row.get("new_to_repo"),
+            "first_seen_in_snapshot": row.get("first_seen_in_snapshot"),
+            "report_reason": row.get("report_reason"),
+            "account_age_days": row.get("account_age_days"),
+            "young_account": row.get("young_account"),
+            "follow_through_score": row.get("follow_through_score"),
+            "breadth_score": row.get("breadth_score"),
+            "automation_risk_signal": row.get("automation_risk_signal"),
+            "heuristic_note": row.get("heuristic_note"),
+            "public_orgs_json": row.get("public_orgs"),
+            "visible_authored_pr_count": row.get("visible_authored_pr_count"),
+            "merged_pr_count": row.get("merged_pr_count"),
+            "closed_unmerged_pr_count": row.get("closed_unmerged_pr_count"),
+            "open_pr_count": row.get("open_pr_count"),
+            "merged_pr_rate": row.get("merged_pr_rate"),
+            "closed_unmerged_pr_rate": row.get("closed_unmerged_pr_rate"),
+            "still_open_pr_rate": row.get("still_open_pr_rate"),
+            "distinct_repos_with_authored_prs": row.get("distinct_repos_with_authored_prs"),
+            "distinct_repos_with_open_prs": row.get("distinct_repos_with_open_prs"),
+            "fetch_error": row.get("fetch_error"),
+        }
+        for row in rows
+    ]
+def _contributor_row(row: Mapping[str, Any]) -> dict[str, Any]:
+    return {
+        **_without_json_fields(row),
+        "public_orgs": _json_list(row.get("public_orgs_json")),
+    }
 def _normalize_lookup_mode(mode: str) -> str:
     normalized = mode.strip().lower()
     if normalized not in {"auto", "indexed", "live"}:

uv.lock CHANGED Viewed

@@ -4,7 +4,7 @@ requires-python = ">=3.13.5"
 [[package]]
 name = "a2a-sdk"
-version = "0.3.25"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "google-api-core" },
@@ -13,9 +13,9 @@ dependencies = [
     { name = "protobuf" },
     { name = "pydantic" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/55/83/3c99b276d09656cce039464509f05bf385e5600d6dc046a131bbcf686930/a2a_sdk-0.3.25.tar.gz", hash = "sha256:afda85bab8d6af0c5d15e82f326c94190f6be8a901ce562d045a338b7127242f", size = 270638, upload-time = "2026-03-10T13:08:46.417Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bd/f9/6a62520b7ecb945188a6e1192275f4732ff9341cd4629bc975a6c146aeab/a2a_sdk-0.3.25-py3-none-any.whl", hash = "sha256:2fce38faea82eb0b6f9f9c2bcf761b0d78612c80ef0e599b50d566db1b2654b5", size = 149609, upload-time = "2026-03-10T13:08:44.7Z" },
 ]
 [[package]]
@@ -53,7 +53,7 @@ wheels = [
 [[package]]
 name = "aiohttp"
-version = "3.13.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohappyeyeballs" },
@@ -64,59 +64,59 @@ dependencies = [
     { name = "propcache" },
     { name = "yarl" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/50/42/32cf8e7704ceb4481406eb87161349abb46a57fee3f008ba9cb610968646/aiohttp-3.13.3.tar.gz", hash = "sha256:a949eee43d3782f2daae4f4a2819b2cb9b0c5d3b7f7a927067cc84dafdbb9f88", size = 7844556, upload-time = "2026-01-03T17:33:05.204Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/97/8a/12ca489246ca1faaf5432844adbfce7ff2cc4997733e0af120869345643a/aiohttp-3.13.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5dff64413671b0d3e7d5918ea490bdccb97a4ad29b3f311ed423200b2203e01c", size = 734190, upload-time = "2026-01-03T17:30:45.832Z" },
-    { url = "https://files.pythonhosted.org/packages/32/08/de43984c74ed1fca5c014808963cc83cb00d7bb06af228f132d33862ca76/aiohttp-3.13.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:87b9aab6d6ed88235aa2970294f496ff1a1f9adcd724d800e9b952395a80ffd9", size = 491783, upload-time = "2026-01-03T17:30:47.466Z" },
-    { url = "https://files.pythonhosted.org/packages/17/f8/8dd2cf6112a5a76f81f81a5130c57ca829d101ad583ce57f889179accdda/aiohttp-3.13.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:425c126c0dc43861e22cb1c14ba4c8e45d09516d0a3ae0a3f7494b79f5f233a3", size = 490704, upload-time = "2026-01-03T17:30:49.373Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/40/a46b03ca03936f832bc7eaa47cfbb1ad012ba1be4790122ee4f4f8cba074/aiohttp-3.13.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f9120f7093c2a32d9647abcaf21e6ad275b4fbec5b55969f978b1a97c7c86bf", size = 1720652, upload-time = "2026-01-03T17:30:50.974Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/7e/917fe18e3607af92657e4285498f500dca797ff8c918bd7d90b05abf6c2a/aiohttp-3.13.3-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:697753042d57f4bf7122cab985bf15d0cef23c770864580f5af4f52023a56bd6", size = 1692014, upload-time = "2026-01-03T17:30:52.729Z" },
-    { url = "https://files.pythonhosted.org/packages/71/b6/cefa4cbc00d315d68973b671cf105b21a609c12b82d52e5d0c9ae61d2a09/aiohttp-3.13.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6de499a1a44e7de70735d0b39f67c8f25eb3d91eb3103be99ca0fa882cdd987d", size = 1759777, upload-time = "2026-01-03T17:30:54.537Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/e3/e06ee07b45e59e6d81498b591fc589629be1553abb2a82ce33efe2a7b068/aiohttp-3.13.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:37239e9f9a7ea9ac5bf6b92b0260b01f8a22281996da609206a84df860bc1261", size = 1861276, upload-time = "2026-01-03T17:30:56.512Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/24/75d274228acf35ceeb2850b8ce04de9dd7355ff7a0b49d607ee60c29c518/aiohttp-3.13.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f76c1e3fe7d7c8afad7ed193f89a292e1999608170dcc9751a7462a87dfd5bc0", size = 1743131, upload-time = "2026-01-03T17:30:58.256Z" },
-    { url = "https://files.pythonhosted.org/packages/04/98/3d21dde21889b17ca2eea54fdcff21b27b93f45b7bb94ca029c31ab59dc3/aiohttp-3.13.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fc290605db2a917f6e81b0e1e0796469871f5af381ce15c604a3c5c7e51cb730", size = 1556863, upload-time = "2026-01-03T17:31:00.445Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/84/da0c3ab1192eaf64782b03971ab4055b475d0db07b17eff925e8c93b3aa5/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4021b51936308aeea0367b8f006dc999ca02bc118a0cc78c303f50a2ff6afb91", size = 1682793, upload-time = "2026-01-03T17:31:03.024Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/0f/5802ada182f575afa02cbd0ec5180d7e13a402afb7c2c03a9aa5e5d49060/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:49a03727c1bba9a97d3e93c9f93ca03a57300f484b6e935463099841261195d3", size = 1716676, upload-time = "2026-01-03T17:31:04.842Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/8c/714d53bd8b5a4560667f7bbbb06b20c2382f9c7847d198370ec6526af39c/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3d9908a48eb7416dc1f4524e69f1d32e5d90e3981e4e37eb0aa1cd18f9cfa2a4", size = 1733217, upload-time = "2026-01-03T17:31:06.868Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/79/e2176f46d2e963facea939f5be2d26368ce543622be6f00a12844d3c991f/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2712039939ec963c237286113c68dbad80a82a4281543f3abf766d9d73228998", size = 1552303, upload-time = "2026-01-03T17:31:08.958Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/6a/28ed4dea1759916090587d1fe57087b03e6c784a642b85ef48217b0277ae/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:7bfdc049127717581866fa4708791220970ce291c23e28ccf3922c700740fdc0", size = 1763673, upload-time = "2026-01-03T17:31:10.676Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/35/4a3daeb8b9fab49240d21c04d50732313295e4bd813a465d840236dd0ce1/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8057c98e0c8472d8846b9c79f56766bcc57e3e8ac7bfd510482332366c56c591", size = 1721120, upload-time = "2026-01-03T17:31:12.575Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/9f/d643bb3c5fb99547323e635e251c609fbbc660d983144cfebec529e09264/aiohttp-3.13.3-cp313-cp313-win32.whl", hash = "sha256:1449ceddcdbcf2e0446957863af03ebaaa03f94c090f945411b61269e2cb5daf", size = 427383, upload-time = "2026-01-03T17:31:14.382Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/f1/ab0395f8a79933577cdd996dd2f9aa6014af9535f65dddcf88204682fe62/aiohttp-3.13.3-cp313-cp313-win_amd64.whl", hash = "sha256:693781c45a4033d31d4187d2436f5ac701e7bbfe5df40d917736108c1cc7436e", size = 453899, upload-time = "2026-01-03T17:31:15.958Z" },
-    { url = "https://files.pythonhosted.org/packages/99/36/5b6514a9f5d66f4e2597e40dea2e3db271e023eb7a5d22defe96ba560996/aiohttp-3.13.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:ea37047c6b367fd4bd632bff8077449b8fa034b69e812a18e0132a00fae6e808", size = 737238, upload-time = "2026-01-03T17:31:17.909Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/49/459327f0d5bcd8c6c9ca69e60fdeebc3622861e696490d8674a6d0cb90a6/aiohttp-3.13.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:6fc0e2337d1a4c3e6acafda6a78a39d4c14caea625124817420abceed36e2415", size = 492292, upload-time = "2026-01-03T17:31:19.919Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/0b/b97660c5fd05d3495b4eb27f2d0ef18dc1dc4eff7511a9bf371397ff0264/aiohttp-3.13.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c685f2d80bb67ca8c3837823ad76196b3694b0159d232206d1e461d3d434666f", size = 493021, upload-time = "2026-01-03T17:31:21.636Z" },
-    { url = "https://files.pythonhosted.org/packages/54/d4/438efabdf74e30aeceb890c3290bbaa449780583b1270b00661126b8aae4/aiohttp-3.13.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48e377758516d262bde50c2584fc6c578af272559c409eecbdd2bae1601184d6", size = 1717263, upload-time = "2026-01-03T17:31:23.296Z" },
-    { url = "https://files.pythonhosted.org/packages/71/f2/7bddc7fd612367d1459c5bcf598a9e8f7092d6580d98de0e057eb42697ad/aiohttp-3.13.3-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:34749271508078b261c4abb1767d42b8d0c0cc9449c73a4df494777dc55f0687", size = 1669107, upload-time = "2026-01-03T17:31:25.334Z" },
-    { url = "https://files.pythonhosted.org/packages/00/5a/1aeaecca40e22560f97610a329e0e5efef5e0b5afdf9f857f0d93839ab2e/aiohttp-3.13.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:82611aeec80eb144416956ec85b6ca45a64d76429c1ed46ae1b5f86c6e0c9a26", size = 1760196, upload-time = "2026-01-03T17:31:27.394Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/f8/0ff6992bea7bd560fc510ea1c815f87eedd745fe035589c71ce05612a19a/aiohttp-3.13.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2fff83cfc93f18f215896e3a190e8e5cb413ce01553901aca925176e7568963a", size = 1843591, upload-time = "2026-01-03T17:31:29.238Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/d1/e30e537a15f53485b61f5be525f2157da719819e8377298502aebac45536/aiohttp-3.13.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bbe7d4cecacb439e2e2a8a1a7b935c25b812af7a5fd26503a66dadf428e79ec1", size = 1720277, upload-time = "2026-01-03T17:31:31.053Z" },
-    { url = "https://files.pythonhosted.org/packages/84/45/23f4c451d8192f553d38d838831ebbc156907ea6e05557f39563101b7717/aiohttp-3.13.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b928f30fe49574253644b1ca44b1b8adbd903aa0da4b9054a6c20fc7f4092a25", size = 1548575, upload-time = "2026-01-03T17:31:32.87Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/ed/0a42b127a43712eda7807e7892c083eadfaf8429ca8fb619662a530a3aab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7b5e8fe4de30df199155baaf64f2fcd604f4c678ed20910db8e2c66dc4b11603", size = 1679455, upload-time = "2026-01-03T17:31:34.76Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/b5/c05f0c2b4b4fe2c9d55e73b6d3ed4fd6c9dc2684b1d81cbdf77e7fad9adb/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:8542f41a62bcc58fc7f11cf7c90e0ec324ce44950003feb70640fc2a9092c32a", size = 1687417, upload-time = "2026-01-03T17:31:36.699Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/6b/915bc5dad66aef602b9e459b5a973529304d4e89ca86999d9d75d80cbd0b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:5e1d8c8b8f1d91cd08d8f4a3c2b067bfca6ec043d3ff36de0f3a715feeedf926", size = 1729968, upload-time = "2026-01-03T17:31:38.622Z" },
-    { url = "https://files.pythonhosted.org/packages/11/3b/e84581290a9520024a08640b63d07673057aec5ca548177a82026187ba73/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:90455115e5da1c3c51ab619ac57f877da8fd6d73c05aacd125c5ae9819582aba", size = 1545690, upload-time = "2026-01-03T17:31:40.57Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/04/0c3655a566c43fd647c81b895dfe361b9f9ad6d58c19309d45cff52d6c3b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:042e9e0bcb5fba81886c8b4fbb9a09d6b8a00245fd8d88e4d989c1f96c74164c", size = 1746390, upload-time = "2026-01-03T17:31:42.857Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/53/71165b26978f719c3419381514c9690bd5980e764a09440a10bb816ea4ab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2eb752b102b12a76ca02dff751a801f028b4ffbbc478840b473597fc91a9ed43", size = 1702188, upload-time = "2026-01-03T17:31:44.984Z" },
-    { url = "https://files.pythonhosted.org/packages/29/a7/cbe6c9e8e136314fa1980da388a59d2f35f35395948a08b6747baebb6aa6/aiohttp-3.13.3-cp314-cp314-win32.whl", hash = "sha256:b556c85915d8efaed322bf1bdae9486aa0f3f764195a0fb6ee962e5c71ef5ce1", size = 433126, upload-time = "2026-01-03T17:31:47.463Z" },
-    { url = "https://files.pythonhosted.org/packages/de/56/982704adea7d3b16614fc5936014e9af85c0e34b58f9046655817f04306e/aiohttp-3.13.3-cp314-cp314-win_amd64.whl", hash = "sha256:9bf9f7a65e7aa20dd764151fb3d616c81088f91f8df39c3893a536e279b4b984", size = 459128, upload-time = "2026-01-03T17:31:49.2Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/2a/3c79b638a9c3d4658d345339d22070241ea341ed4e07b5ac60fb0f418003/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:05861afbbec40650d8a07ea324367cb93e9e8cc7762e04dd4405df99fa65159c", size = 769512, upload-time = "2026-01-03T17:31:51.134Z" },
-    { url = "https://files.pythonhosted.org/packages/29/b9/3e5014d46c0ab0db8707e0ac2711ed28c4da0218c358a4e7c17bae0d8722/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2fc82186fadc4a8316768d61f3722c230e2c1dcab4200d52d2ebdf2482e47592", size = 506444, upload-time = "2026-01-03T17:31:52.85Z" },
-    { url = "https://files.pythonhosted.org/packages/90/03/c1d4ef9a054e151cd7839cdc497f2638f00b93cbe8043983986630d7a80c/aiohttp-3.13.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0add0900ff220d1d5c5ebbf99ed88b0c1bbf87aa7e4262300ed1376a6b13414f", size = 510798, upload-time = "2026-01-03T17:31:54.91Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/76/8c1e5abbfe8e127c893fe7ead569148a4d5a799f7cf958d8c09f3eedf097/aiohttp-3.13.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:568f416a4072fbfae453dcf9a99194bbb8bdeab718e08ee13dfa2ba0e4bebf29", size = 1868835, upload-time = "2026-01-03T17:31:56.733Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/ac/984c5a6f74c363b01ff97adc96a3976d9c98940b8969a1881575b279ac5d/aiohttp-3.13.3-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:add1da70de90a2569c5e15249ff76a631ccacfe198375eead4aadf3b8dc849dc", size = 1720486, upload-time = "2026-01-03T17:31:58.65Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/9a/b7039c5f099c4eb632138728828b33428585031a1e658d693d41d07d89d1/aiohttp-3.13.3-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:10b47b7ba335d2e9b1239fa571131a87e2d8ec96b333e68b2a305e7a98b0bae2", size = 1847951, upload-time = "2026-01-03T17:32:00.989Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/02/3bec2b9a1ba3c19ff89a43a19324202b8eb187ca1e928d8bdac9bbdddebd/aiohttp-3.13.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3dd4dce1c718e38081c8f35f323209d4c1df7d4db4bab1b5c88a6b4d12b74587", size = 1941001, upload-time = "2026-01-03T17:32:03.122Z" },
-    { url = "https://files.pythonhosted.org/packages/37/df/d879401cedeef27ac4717f6426c8c36c3091c6e9f08a9178cc87549c537f/aiohttp-3.13.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34bac00a67a812570d4a460447e1e9e06fae622946955f939051e7cc895cfab8", size = 1797246, upload-time = "2026-01-03T17:32:05.255Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/15/be122de1f67e6953add23335c8ece6d314ab67c8bebb3f181063010795a7/aiohttp-3.13.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a19884d2ee70b06d9204b2727a7b9f983d0c684c650254679e716b0b77920632", size = 1627131, upload-time = "2026-01-03T17:32:07.607Z" },
-    { url = "https://files.pythonhosted.org/packages/12/12/70eedcac9134cfa3219ab7af31ea56bc877395b1ac30d65b1bc4b27d0438/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5f8ca7f2bb6ba8348a3614c7918cc4bb73268c5ac2a207576b7afea19d3d9f64", size = 1795196, upload-time = "2026-01-03T17:32:09.59Z" },
-    { url = "https://files.pythonhosted.org/packages/32/11/b30e1b1cd1f3054af86ebe60df96989c6a414dd87e27ad16950eee420bea/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:b0d95340658b9d2f11d9697f59b3814a9d3bb4b7a7c20b131df4bcef464037c0", size = 1782841, upload-time = "2026-01-03T17:32:11.445Z" },
-    { url = "https://files.pythonhosted.org/packages/88/0d/d98a9367b38912384a17e287850f5695c528cff0f14f791ce8ee2e4f7796/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:a1e53262fd202e4b40b70c3aff944a8155059beedc8a89bba9dc1f9ef06a1b56", size = 1795193, upload-time = "2026-01-03T17:32:13.705Z" },
-    { url = "https://files.pythonhosted.org/packages/43/a5/a2dfd1f5ff5581632c7f6a30e1744deda03808974f94f6534241ef60c751/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:d60ac9663f44168038586cab2157e122e46bdef09e9368b37f2d82d354c23f72", size = 1621979, upload-time = "2026-01-03T17:32:15.965Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/f0/12973c382ae7c1cccbc4417e129c5bf54c374dfb85af70893646e1f0e749/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:90751b8eed69435bac9ff4e3d2f6b3af1f57e37ecb0fbeee59c0174c9e2d41df", size = 1822193, upload-time = "2026-01-03T17:32:18.219Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/5f/24155e30ba7f8c96918af1350eb0663e2430aad9e001c0489d89cd708ab1/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fc353029f176fd2b3ec6cfc71be166aba1936fe5d73dd1992ce289ca6647a9aa", size = 1769801, upload-time = "2026-01-03T17:32:20.25Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/f8/7314031ff5c10e6ece114da79b338ec17eeff3a079e53151f7e9f43c4723/aiohttp-3.13.3-cp314-cp314t-win32.whl", hash = "sha256:2e41b18a58da1e474a057b3d35248d8320029f61d70a37629535b16a0c8f3767", size = 466523, upload-time = "2026-01-03T17:32:22.215Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/63/278a98c715ae467624eafe375542d8ba9b4383a016df8fdefe0ae28382a7/aiohttp-3.13.3-cp314-cp314t-win_amd64.whl", hash = "sha256:44531a36aa2264a1860089ffd4dce7baf875ee5a6079d5fb42e261c704ef7344", size = 499694, upload-time = "2026-01-03T17:32:24.546Z" },
 ]
 [[package]]
@@ -151,7 +151,7 @@ wheels = [
 [[package]]
 name = "anthropic"
-version = "0.86.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -163,9 +163,9 @@ dependencies = [
     { name = "sniffio" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/37/7a/8b390dc47945d3169875d342847431e5f7d5fa716b2e37494d57cfc1db10/anthropic-0.86.0.tar.gz", hash = "sha256:60023a7e879aa4fbb1fed99d487fe407b2ebf6569603e5047cfe304cebdaa0e5", size = 583820, upload-time = "2026-03-18T18:43:08.017Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/63/5f/67db29c6e5d16c8c9c4652d3efb934d89cb750cad201539141781d8eae14/anthropic-0.86.0-py3-none-any.whl", hash = "sha256:9d2bbd339446acce98858c5627d33056efe01f70435b22b63546fe7edae0cd57", size = 469400, upload-time = "2026-03-18T18:43:06.526Z" },
 ]
 [package.optional-dependencies]
@@ -539,15 +539,15 @@ wheels = [
 [[package]]
 name = "email-validator"
-version = "2.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "dnspython" },
     { name = "idna" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f5/22/900cb125c76b7aaa450ce02fd727f452243f2e91a61af068b40adba60ea9/email_validator-2.3.0.tar.gz", hash = "sha256:9fc05c37f2f6cf439ff414f8fc46d917929974a82244c20eb10231ba60c54426", size = 51238, upload-time = "2025-08-26T13:09:06.831Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" },
 ]
 [[package]]
@@ -561,7 +561,7 @@ wheels = [
 [[package]]
 name = "fast-agent-mcp"
-version = "0.6.16"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "a2a-sdk" },
@@ -598,14 +598,14 @@ dependencies = [
     { name = "uvloop", marker = "sys_platform != 'win32'" },
     { name = "watchfiles" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/77/5d/9b09206f6eda31ab116f9c6f3936e17ddd2465879ce07d894ae01123945b/fast_agent_mcp-0.6.16.tar.gz", hash = "sha256:7d9f9cb1da85331fe68ff3b40dd0afea0fc4a784a400e4ba8efcd64f9203e19d", size = 2072918, upload-time = "2026-04-13T22:18:24.138Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7d/e2/ce7ca28d3577968e9ba18635f76c9692e48dc2c5d78a599824b55761aed8/fast_agent_mcp-0.6.16-py3-none-any.whl", hash = "sha256:aec994c14bea9866ed3e806bc04d2f1fdf31ca415c5bbd771e3c3ded817a4742", size = 1552277, upload-time = "2026-04-13T22:18:25.741Z" },
 ]
 [[package]]
 name = "fastapi"
-version = "0.135.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
@@ -614,14 +614,14 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c4/73/5903c4b13beae98618d64eb9870c3fac4f605523dd0312ca5c80dadbd5b9/fastapi-0.135.2.tar.gz", hash = "sha256:88a832095359755527b7f63bb4c6bc9edb8329a026189eed83d6c1afcf419d56", size = 395833, upload-time = "2026-03-23T14:12:41.697Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8f/ea/18f6d0457f9efb2fc6fa594857f92810cadb03024975726db6546b3d6fcf/fastapi-0.135.2-py3-none-any.whl", hash = "sha256:0af0447d541867e8db2a6a25c23a8c4bd80e2394ac5529bd87501bbb9e240ca5", size = 117407, upload-time = "2026-03-23T14:12:43.284Z" },
 ]
 [[package]]
 name = "fastmcp"
-version = "3.2.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "authlib" },
@@ -646,9 +646,9 @@ dependencies = [
     { name = "watchfiles" },
     { name = "websockets" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d0/32/4f1b2cfd7b50db89114949f90158b1dcc2c92a1917b9f57c0ff24e47a2f4/fastmcp-3.2.0.tar.gz", hash = "sha256:d4830b8ffc3592d3d9c76dc0f398904cf41f04910e41a0de38cc1004e0903bef", size = 26318581, upload-time = "2026-03-30T20:25:37.692Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4f/67/684fa2d2de1e7504549d4ca457b4f854ccec3cd3be03bd86b33b599fbf58/fastmcp-3.2.0-py3-none-any.whl", hash = "sha256:e71aba3df16f86f546a4a9e513261d3233bcc92bef0dfa647bac3fa33623f681", size = 705550, upload-time = "2026-03-30T20:25:35.499Z" },
 ]
 [[package]]
@@ -778,7 +778,7 @@ requests = [
 [[package]]
 name = "google-genai"
-version = "1.68.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -792,9 +792,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "websockets" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/9c/2c/f059982dbcb658cc535c81bbcbe7e2c040d675f4b563b03cdb01018a4bc3/google_genai-1.68.0.tar.gz", hash = "sha256:ac30c0b8bc630f9372993a97e4a11dae0e36f2e10d7c55eacdca95a9fa14ca96", size = 511285, upload-time = "2026-03-18T01:03:18.243Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/84/de/7d3ee9c94b74c3578ea4f88d45e8de9405902f857932334d81e89bce3dfa/google_genai-1.68.0-py3-none-any.whl", hash = "sha256:a1bc9919c0e2ea2907d1e319b65471d3d6d58c54822039a249fe1323e4178d15", size = 750912, upload-time = "2026-03-18T01:03:15.983Z" },
 ]
 [[package]]
@@ -1082,7 +1082,7 @@ wheels = [
 [[package]]
 name = "jsonschema"
-version = "4.26.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
@@ -1090,9 +1090,9 @@ dependencies = [
     { name = "referencing" },
     { name = "rpds-py" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" },
 ]
 [[package]]
@@ -1870,7 +1870,7 @@ wheels = [
 [[package]]
 name = "pydantic"
-version = "2.12.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-types" },
@@ -1878,9 +1878,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" },
 ]
 [package.optional-dependencies]
@@ -1890,69 +1890,72 @@ email = [
 [[package]]
 name = "pydantic-core"
-version = "2.41.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" },
-    { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" },
-    { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" },
-    { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" },
-    { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" },
-    { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" },
-    { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" },
-    { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" },
-    { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" },
-    { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" },
-    { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" },
-    { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" },
-    { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" },
-    { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" },
-    { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" },
-    { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" },
 ]
 [[package]]
 name = "pydantic-settings"
-version = "2.13.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pydantic" },
     { name = "python-dotenv" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/52/6d/fffca34caecc4a3f97bda81b2098da5e8ab7efc9a66e819074a11955d87e/pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025", size = 223826, upload-time = "2026-02-19T13:45:08.055Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" },
 ]
 [[package]]
@@ -1993,12 +1996,9 @@ crypto = [
 [[package]]
 name = "pyperclip"
-version = "1.11.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e8/52/d87eba7cb129b81563019d1679026e7a112ef76855d6159d24754dbd2a51/pyperclip-1.11.0.tar.gz", hash = "sha256:244035963e4428530d9e3a6101a1ef97209c6825edab1567beac148ccc1db1b6", size = 12185, upload-time = "2025-09-26T14:40:37.245Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/df/80/fc9d01d5ed37ba4c42ca2b55b4339ae6e200b456be3a1aaddf4a9fa99b8c/pyperclip-1.11.0-py3-none-any.whl", hash = "sha256:299403e9ff44581cb9ba2ffeed69c7aa96a008622ad0c46cb575ca75b5b84273", size = 11063, upload-time = "2025-09-26T14:40:36.069Z" },
-]
 [[package]]
 name = "pytest"
@@ -2366,7 +2366,7 @@ wheels = [
 [[package]]
 name = "slop-farmer"
-version = "0.1.0"
 source = { editable = "." }
 dependencies = [
     { name = "duckdb" },
@@ -2394,7 +2394,7 @@ llm = [
 [package.metadata]
 requires-dist = [
     { name = "duckdb", specifier = ">=1.2.2" },
-    { name = "fast-agent-mcp", specifier = ">=0.6.16" },
     { name = "fast-agent-mcp", marker = "python_full_version >= '3.13.5' and extra == 'llm'", specifier = ">=0.6.16" },
     { name = "fastapi", specifier = ">=0.115.0" },
     { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.28.0" },

 [[package]]
 name = "a2a-sdk"
+version = "0.3.26"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "google-api-core" },
     { name = "protobuf" },
     { name = "pydantic" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/be/97/a6840e01795b182ce751ca165430d46459927cde9bfab838087cbb24aef7/a2a_sdk-0.3.26.tar.gz", hash = "sha256:44068e2d037afbb07ab899267439e9bc7eaa7ac2af94f1e8b239933c993ad52d", size = 274598, upload-time = "2026-04-09T15:21:13.902Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/dd/d5/51f4ee1bf3b736add42a542d3c8a3fd3fa85f3d36c17972127defc46c26f/a2a_sdk-0.3.26-py3-none-any.whl", hash = "sha256:754e0573f6d33b225c1d8d51f640efa69cbbed7bdfb06ce9c3540ea9f58d4a91", size = 151016, upload-time = "2026-04-09T15:21:12.35Z" },
 ]
 [[package]]
 [[package]]
 name = "aiohttp"
+version = "3.13.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohappyeyeballs" },
     { name = "propcache" },
     { name = "yarl" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/77/9a/152096d4808df8e4268befa55fba462f440f14beab85e8ad9bf990516918/aiohttp-3.13.5.tar.gz", hash = "sha256:9d98cc980ecc96be6eb4c1994ce35d28d8b1f5e5208a23b421187d1209dbb7d1", size = 7858271, upload-time = "2026-03-31T22:01:03.343Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/e9/d76bf503005709e390122d34e15256b88f7008e246c4bdbe915cd4f1adce/aiohttp-3.13.5-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5029cc80718bbd545123cd8fe5d15025eccaaaace5d0eeec6bd556ad6163d61", size = 742930, upload-time = "2026-03-31T21:58:13.155Z" },
+    { url = "https://files.pythonhosted.org/packages/57/00/4b7b70223deaebd9bb85984d01a764b0d7bd6526fcdc73cca83bcbe7243e/aiohttp-3.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4bb6bf5811620003614076bdc807ef3b5e38244f9d25ca5fe888eaccea2a9832", size = 496927, upload-time = "2026-03-31T21:58:15.073Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/f5/0fb20fb49f8efdcdce6cd8127604ad2c503e754a8f139f5e02b01626523f/aiohttp-3.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a84792f8631bf5a94e52d9cc881c0b824ab42717165a5579c760b830d9392ac9", size = 497141, upload-time = "2026-03-31T21:58:17.009Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/86/b7c870053e36a94e8951b803cb5b909bfbc9b90ca941527f5fcafbf6b0fa/aiohttp-3.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:57653eac22c6a4c13eb22ecf4d673d64a12f266e72785ab1c8b8e5940d0e8090", size = 1732476, upload-time = "2026-03-31T21:58:18.925Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/e5/4e161f84f98d80c03a238671b4136e6530453d65262867d989bbe78244d0/aiohttp-3.13.5-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5e5f7debc7a57af53fdf5c5009f9391d9f4c12867049d509bf7bb164a6e295b", size = 1706507, upload-time = "2026-03-31T21:58:21.094Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/56/ea11a9f01518bd5a2a2fcee869d248c4b8a0cfa0bb13401574fa31adf4d4/aiohttp-3.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c719f65bebcdf6716f10e9eff80d27567f7892d8988c06de12bbbd39307c6e3a", size = 1773465, upload-time = "2026-03-31T21:58:23.159Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/40/333ca27fb74b0383f17c90570c748f7582501507307350a79d9f9f3c6eb1/aiohttp-3.13.5-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d97f93fdae594d886c5a866636397e2bcab146fd7a132fd6bb9ce182224452f8", size = 1873523, upload-time = "2026-03-31T21:58:25.59Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/d2/e2f77eef1acb7111405433c707dc735e63f67a56e176e72e9e7a2cd3f493/aiohttp-3.13.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3df334e39d4c2f899a914f1dba283c1aadc311790733f705182998c6f7cae665", size = 1754113, upload-time = "2026-03-31T21:58:27.624Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/56/3f653d7f53c89669301ec9e42c95233e2a0c0a6dd051269e6e678db4fdb0/aiohttp-3.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fe6970addfea9e5e081401bcbadf865d2b6da045472f58af08427e108d618540", size = 1562351, upload-time = "2026-03-31T21:58:29.918Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/a6/9b3e91eb8ae791cce4ee736da02211c85c6f835f1bdfac0594a8a3b7018c/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7becdf835feff2f4f335d7477f121af787e3504b48b449ff737afb35869ba7bb", size = 1693205, upload-time = "2026-03-31T21:58:32.214Z" },
+    { url = "https://files.pythonhosted.org/packages/98/fc/bfb437a99a2fcebd6b6eaec609571954de2ed424f01c352f4b5504371dd3/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:676e5651705ad5d8a70aeb8eb6936c436d8ebbd56e63436cb7dd9bb36d2a9a46", size = 1730618, upload-time = "2026-03-31T21:58:34.728Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/b6/c8534862126191a034f68153194c389addc285a0f1347d85096d349bbc15/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:9b16c653d38eb1a611cc898c41e76859ca27f119d25b53c12875fd0474ae31a8", size = 1745185, upload-time = "2026-03-31T21:58:36.909Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/93/4ca8ee2ef5236e2707e0fd5fecb10ce214aee1ff4ab307af9c558bda3b37/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:999802d5fa0389f58decd24b537c54aa63c01c3219ce17d1214cbda3c2b22d2d", size = 1557311, upload-time = "2026-03-31T21:58:39.38Z" },
+    { url = "https://files.pythonhosted.org/packages/57/ae/76177b15f18c5f5d094f19901d284025db28eccc5ae374d1d254181d33f4/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ec707059ee75732b1ba130ed5f9580fe10ff75180c812bc267ded039db5128c6", size = 1773147, upload-time = "2026-03-31T21:58:41.476Z" },
+    { url = "https://files.pythonhosted.org/packages/01/a4/62f05a0a98d88af59d93b7fcac564e5f18f513cb7471696ac286db970d6a/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2d6d44a5b48132053c2f6cd5c8cb14bc67e99a63594e336b0f2af81e94d5530c", size = 1730356, upload-time = "2026-03-31T21:58:44.049Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/85/fc8601f59dfa8c9523808281f2da571f8b4699685f9809a228adcc90838d/aiohttp-3.13.5-cp313-cp313-win32.whl", hash = "sha256:329f292ed14d38a6c4c435e465f48bebb47479fd676a0411936cc371643225cc", size = 432637, upload-time = "2026-03-31T21:58:46.167Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/1b/ac685a8882896acf0f6b31d689e3792199cfe7aba37969fa91da63a7fa27/aiohttp-3.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:69f571de7500e0557801c0b51f4780482c0ec5fe2ac851af5a92cfce1af1cb83", size = 458896, upload-time = "2026-03-31T21:58:48.119Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/ce/46572759afc859e867a5bc8ec3487315869013f59281ce61764f76d879de/aiohttp-3.13.5-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:eb4639f32fd4a9904ab8fb45bf3383ba71137f3d9d4ba25b3b3f3109977c5b8c", size = 745721, upload-time = "2026-03-31T21:58:50.229Z" },
+    { url = "https://files.pythonhosted.org/packages/13/fe/8a2efd7626dbe6049b2ef8ace18ffda8a4dfcbe1bcff3ac30c0c7575c20b/aiohttp-3.13.5-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:7e5dc4311bd5ac493886c63cbf76ab579dbe4641268e7c74e48e774c74b6f2be", size = 497663, upload-time = "2026-03-31T21:58:52.232Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/91/cc8cc78a111826c54743d88651e1687008133c37e5ee615fee9b57990fac/aiohttp-3.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:756c3c304d394977519824449600adaf2be0ccee76d206ee339c5e76b70ded25", size = 499094, upload-time = "2026-03-31T21:58:54.566Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/33/a8362cb15cf16a3af7e86ed11962d5cd7d59b449202dc576cdc731310bde/aiohttp-3.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecc26751323224cf8186efcf7fbcbc30f4e1d8c7970659daf25ad995e4032a56", size = 1726701, upload-time = "2026-03-31T21:58:56.864Z" },
+    { url = "https://files.pythonhosted.org/packages/45/0c/c091ac5c3a17114bd76cbf85d674650969ddf93387876cf67f754204bd77/aiohttp-3.13.5-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10a75acfcf794edf9d8db50e5a7ec5fc818b2a8d3f591ce93bc7b1210df016d2", size = 1683360, upload-time = "2026-03-31T21:58:59.072Z" },
+    { url = "https://files.pythonhosted.org/packages/23/73/bcee1c2b79bc275e964d1446c55c54441a461938e70267c86afaae6fba27/aiohttp-3.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0f7a18f258d124cd678c5fe072fe4432a4d5232b0657fca7c1847f599233c83a", size = 1773023, upload-time = "2026-03-31T21:59:01.776Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/ef/720e639df03004fee2d869f771799d8c23046dec47d5b81e396c7cda583a/aiohttp-3.13.5-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:df6104c009713d3a89621096f3e3e88cc323fd269dbd7c20afe18535094320be", size = 1853795, upload-time = "2026-03-31T21:59:04.568Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/c9/989f4034fb46841208de7aeeac2c6d8300745ab4f28c42f629ba77c2d916/aiohttp-3.13.5-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:241a94f7de7c0c3b616627aaad530fe2cb620084a8b144d3be7b6ecfe95bae3b", size = 1730405, upload-time = "2026-03-31T21:59:07.221Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/75/ee1fd286ca7dc599d824b5651dad7b3be7ff8d9a7e7b3fe9820d9180f7db/aiohttp-3.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c974fb66180e58709b6fc402846f13791240d180b74de81d23913abe48e96d94", size = 1558082, upload-time = "2026-03-31T21:59:09.484Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/20/1e9e6650dfc436340116b7aa89ff8cb2bbdf0abc11dfaceaad8f74273a10/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6e27ea05d184afac78aabbac667450c75e54e35f62238d44463131bd3f96753d", size = 1692346, upload-time = "2026-03-31T21:59:12.068Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/40/8ebc6658d48ea630ac7903912fe0dd4e262f0e16825aa4c833c56c9f1f56/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a79a6d399cef33a11b6f004c67bb07741d91f2be01b8d712d52c75711b1e07c7", size = 1698891, upload-time = "2026-03-31T21:59:14.552Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/78/ea0ae5ec8ba7a5c10bdd6e318f1ba5e76fcde17db8275188772afc7917a4/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c632ce9c0b534fbe25b52c974515ed674937c5b99f549a92127c85f771a78772", size = 1742113, upload-time = "2026-03-31T21:59:17.068Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/66/9d308ed71e3f2491be1acb8769d96c6f0c47d92099f3bc9119cada27b357/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:fceedde51fbd67ee2bcc8c0b33d0126cc8b51ef3bbde2f86662bd6d5a6f10ec5", size = 1553088, upload-time = "2026-03-31T21:59:19.541Z" },
+    { url = "https://files.pythonhosted.org/packages/da/a6/6cc25ed8dfc6e00c90f5c6d126a98e2cf28957ad06fa1036bd34b6f24a2c/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f92995dfec9420bb69ae629abf422e516923ba79ba4403bc750d94fb4a6c68c1", size = 1757976, upload-time = "2026-03-31T21:59:22.311Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/2b/cce5b0ffe0de99c83e5e36d8f828e4161e415660a9f3e58339d07cce3006/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20ae0ff08b1f2c8788d6fb85afcb798654ae6ba0b747575f8562de738078457b", size = 1712444, upload-time = "2026-03-31T21:59:24.635Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/cf/9e1795b4160c58d29421eafd1a69c6ce351e2f7c8d3c6b7e4ca44aea1a5b/aiohttp-3.13.5-cp314-cp314-win32.whl", hash = "sha256:b20df693de16f42b2472a9c485e1c948ee55524786a0a34345511afdd22246f3", size = 438128, upload-time = "2026-03-31T21:59:27.291Z" },
+    { url = "https://files.pythonhosted.org/packages/22/4d/eaedff67fc805aeba4ba746aec891b4b24cebb1a7d078084b6300f79d063/aiohttp-3.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:f85c6f327bf0b8c29da7d93b1cabb6363fb5e4e160a32fa241ed2dce21b73162", size = 464029, upload-time = "2026-03-31T21:59:29.429Z" },
+    { url = "https://files.pythonhosted.org/packages/79/11/c27d9332ee20d68dd164dc12a6ecdef2e2e35ecc97ed6cf0d2442844624b/aiohttp-3.13.5-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:1efb06900858bb618ff5cee184ae2de5828896c448403d51fb633f09e109be0a", size = 778758, upload-time = "2026-03-31T21:59:31.547Z" },
+    { url = "https://files.pythonhosted.org/packages/04/fb/377aead2e0a3ba5f09b7624f702a964bdf4f08b5b6728a9799830c80041e/aiohttp-3.13.5-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:fee86b7c4bd29bdaf0d53d14739b08a106fdda809ca5fe032a15f52fae5fe254", size = 512883, upload-time = "2026-03-31T21:59:34.098Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/a6/aa109a33671f7a5d3bd78b46da9d852797c5e665bfda7d6b373f56bff2ec/aiohttp-3.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:20058e23909b9e65f9da62b396b77dfa95965cbe840f8def6e572538b1d32e36", size = 516668, upload-time = "2026-03-31T21:59:36.497Z" },
+    { url = "https://files.pythonhosted.org/packages/79/b3/ca078f9f2fa9563c36fb8ef89053ea2bb146d6f792c5104574d49d8acb63/aiohttp-3.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cf20a8d6868cb15a73cab329ffc07291ba8c22b1b88176026106ae39aa6df0f", size = 1883461, upload-time = "2026-03-31T21:59:38.723Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/e3/a7ad633ca1ca497b852233a3cce6906a56c3225fb6d9217b5e5e60b7419d/aiohttp-3.13.5-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:330f5da04c987f1d5bdb8ae189137c77139f36bd1cb23779ca1a354a4b027800", size = 1747661, upload-time = "2026-03-31T21:59:41.187Z" },
+    { url = "https://files.pythonhosted.org/packages/33/b9/cd6fe579bed34a906d3d783fe60f2fa297ef55b27bb4538438ee49d4dc41/aiohttp-3.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6f1cbf0c7926d315c3c26c2da41fd2b5d2fe01ac0e157b78caefc51a782196cf", size = 1863800, upload-time = "2026-03-31T21:59:43.84Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/3f/2c1e2f5144cefa889c8afd5cf431994c32f3b29da9961698ff4e3811b79a/aiohttp-3.13.5-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:53fc049ed6390d05423ba33103ded7281fe897cf97878f369a527070bd95795b", size = 1958382, upload-time = "2026-03-31T21:59:46.187Z" },
+    { url = "https://files.pythonhosted.org/packages/66/1d/f31ec3f1013723b3babe3609e7f119c2c2fb6ef33da90061a705ef3e1bc8/aiohttp-3.13.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:898703aa2667e3c5ca4c54ca36cd73f58b7a38ef87a5606414799ebce4d3fd3a", size = 1803724, upload-time = "2026-03-31T21:59:48.656Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/b4/57712dfc6f1542f067daa81eb61da282fab3e6f1966fca25db06c4fc62d5/aiohttp-3.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0494a01ca9584eea1e5fbd6d748e61ecff218c51b576ee1999c23db7066417d8", size = 1640027, upload-time = "2026-03-31T21:59:51.284Z" },
+    { url = "https://files.pythonhosted.org/packages/25/3c/734c878fb43ec083d8e31bf029daae1beafeae582d1b35da234739e82ee7/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6cf81fe010b8c17b09495cbd15c1d35afbc8fb405c0c9cf4738e5ae3af1d65be", size = 1806644, upload-time = "2026-03-31T21:59:53.753Z" },
+    { url = "https://files.pythonhosted.org/packages/20/a5/f671e5cbec1c21d044ff3078223f949748f3a7f86b14e34a365d74a5d21f/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:c564dd5f09ddc9d8f2c2d0a301cd30a79a2cc1b46dd1a73bef8f0038863d016b", size = 1791630, upload-time = "2026-03-31T21:59:56.239Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/63/fb8d0ad63a0b8a99be97deac8c04dacf0785721c158bdf23d679a87aa99e/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:2994be9f6e51046c4f864598fd9abeb4fba6e88f0b2152422c9666dcd4aea9c6", size = 1809403, upload-time = "2026-03-31T21:59:59.103Z" },
+    { url = "https://files.pythonhosted.org/packages/59/0c/bfed7f30662fcf12206481c2aac57dedee43fe1c49275e85b3a1e1742294/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:157826e2fa245d2ef46c83ea8a5faf77ca19355d278d425c29fda0beb3318037", size = 1634924, upload-time = "2026-03-31T22:00:02.116Z" },
+    { url = "https://files.pythonhosted.org/packages/17/d6/fd518d668a09fd5a3319ae5e984d4d80b9a4b3df4e21c52f02251ef5a32e/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:a8aca50daa9493e9e13c0f566201a9006f080e7c50e5e90d0b06f53146a54500", size = 1836119, upload-time = "2026-03-31T22:00:04.756Z" },
+    { url = "https://files.pythonhosted.org/packages/78/b7/15fb7a9d52e112a25b621c67b69c167805cb1f2ab8f1708a5c490d1b52fe/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3b13560160d07e047a93f23aaa30718606493036253d5430887514715b67c9d9", size = 1772072, upload-time = "2026-03-31T22:00:07.494Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/df/57ba7f0c4a553fc2bd8b6321df236870ec6fd64a2a473a8a13d4f733214e/aiohttp-3.13.5-cp314-cp314t-win32.whl", hash = "sha256:9a0f4474b6ea6818b41f82172d799e4b3d29e22c2c520ce4357856fced9af2f8", size = 471819, upload-time = "2026-03-31T22:00:10.277Z" },
+    { url = "https://files.pythonhosted.org/packages/62/29/2f8418269e46454a26171bfdd6a055d74febf32234e474930f2f60a17145/aiohttp-3.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:18a2f6c1182c51baa1d28d68fea51513cb2a76612f038853c0ad3c145423d3d9", size = 505441, upload-time = "2026-03-31T22:00:12.791Z" },
 ]
 [[package]]
 [[package]]
 name = "anthropic"
+version = "0.96.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
     { name = "sniffio" },
     { name = "typing-extensions" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/b9/7e/672f533dee813028d2c699bfd2a7f52c9118d7353680d9aa44b9e23f717f/anthropic-0.96.0.tar.gz", hash = "sha256:9de947b737f39452f68aa520f1c2239d44119c9b73b0fb6d4e6ca80f00279ee6", size = 658210, upload-time = "2026-04-16T14:28:02.846Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/48/5a/72f33204064b6e87601a71a6baf8d855769f8a0c1eaae8d06a1094872371/anthropic-0.96.0-py3-none-any.whl", hash = "sha256:9a6e335a354602a521cd9e777e92bfd46ba6e115bf9bbfe6135311e8fb2015b2", size = 635930, upload-time = "2026-04-16T14:28:01.436Z" },
 ]
 [package.optional-dependencies]
 [[package]]
 name = "email-validator"
+version = "2.2.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "dnspython" },
     { name = "idna" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/48/ce/13508a1ec3f8bb981ae4ca79ea40384becc868bfae97fd1c942bb3a001b1/email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7", size = 48967, upload-time = "2024-06-20T11:30:30.034Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/d7/ee/bf0adb559ad3c786f12bcbc9296b3f5675f529199bef03e2df281fa1fadb/email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631", size = 33521, upload-time = "2024-06-20T11:30:28.248Z" },
 ]
 [[package]]
 [[package]]
 name = "fast-agent-mcp"
+version = "0.6.17"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "a2a-sdk" },
     { name = "uvloop", marker = "sys_platform != 'win32'" },
     { name = "watchfiles" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/8c/a1/b6b1045345d38b342da3def7723a2dc6a44faff9c01fee6d81afbd272d62/fast_agent_mcp-0.6.17.tar.gz", hash = "sha256:a920113d47ef2ab82be1bd63b77d3bf78f8f862a5a6e91f1fd0aa931850fb25f", size = 2091401, upload-time = "2026-04-16T21:48:43.334Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/b4/ef/47e05d6fa95e04ed8ad60afac3ae29d8205894fb220ffde193bd33578f3a/fast_agent_mcp-0.6.17-py3-none-any.whl", hash = "sha256:a23c5a5ed8924e38809dabd31f994e5cc81b8c084e84632bb1eb246b257c4752", size = 1573794, upload-time = "2026-04-16T21:48:38.999Z" },
 ]
 [[package]]
 name = "fastapi"
+version = "0.136.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-doc" },
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/4e/d9/e66315807e41e69e7f6a1b42a162dada2f249c5f06ad3f1a95f84ab336ef/fastapi-0.136.0.tar.gz", hash = "sha256:cf08e067cc66e106e102d9ba659463abfac245200752f8a5b7b1e813de4ff73e", size = 396607, upload-time = "2026-04-16T11:47:13.623Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/26/a3/0bd5f0cdb0bbc92650e8dc457e9250358411ee5d1b65e42b6632387daf81/fastapi-0.136.0-py3-none-any.whl", hash = "sha256:8793d44ec7378e2be07f8a013cf7f7aa47d6327d0dfe9804862688ec4541a6b4", size = 117556, upload-time = "2026-04-16T11:47:11.922Z" },
 ]
 [[package]]
 name = "fastmcp"
+version = "3.2.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "authlib" },
     { name = "watchfiles" },
     { name = "websockets" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/b9/42/7eed0a38e3b7a386805fecacf8a5a9353a2b3040395ef9e30e585d8549ac/fastmcp-3.2.3.tar.gz", hash = "sha256:4f02ae8b00227285a0cf6544dea1db29b022c8cdd8d3dfdec7118540210ae60a", size = 26328743, upload-time = "2026-04-09T22:05:03.402Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/f5/48/84b6dcba793178a44b9d99b4def6cd62f870dcfc5bb7b9153ac390135812/fastmcp-3.2.3-py3-none-any.whl", hash = "sha256:cc50af6eed1f62ed8b6ebf4987286d8d1d006f08d5bec739d5c7fb76160e0911", size = 707260, upload-time = "2026-04-09T22:05:01.225Z" },
 ]
 [[package]]
 [[package]]
 name = "google-genai"
+version = "1.66.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
     { name = "typing-extensions" },
     { name = "websockets" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/9b/ba/0b343b0770d4710ad2979fd9301d7caa56c940174d5361ed4a7cc4979241/google_genai-1.66.0.tar.gz", hash = "sha256:ffc01647b65046bca6387320057aa51db0ad64bcc72c8e3e914062acfa5f7c49", size = 504386, upload-time = "2026-03-04T22:15:28.156Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/dd/403949d922d4e261b08b64aaa132af4e456c3b15c8e2a2d9e6ef693f66e2/google_genai-1.66.0-py3-none-any.whl", hash = "sha256:7f127a39cf695277104ce4091bb26e417c59bb46e952ff3699c3a982d9c474ee", size = 732174, upload-time = "2026-03-04T22:15:26.63Z" },
 ]
 [[package]]
 [[package]]
 name = "jsonschema"
+version = "4.25.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
     { name = "referencing" },
     { name = "rpds-py" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342, upload-time = "2025-08-18T17:03:50.038Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" },
 ]
 [[package]]
 [[package]]
 name = "pydantic"
+version = "2.13.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-types" },
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/f3/6b/1353beb3d1cd5cf61cdec5b6f87a9872399de3bc5cae0b7ce07ff4de2ab0/pydantic-2.13.1.tar.gz", hash = "sha256:a0f829b279ddd1e39291133fe2539d2aa46cc6b150c1706a270ff0879e3774d2", size = 843746, upload-time = "2026-04-15T14:57:19.398Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/5a/2225f4c176dbfed0d809e848b50ef08f70e61daa667b7fa14b0d311ae44d/pydantic-2.13.1-py3-none-any.whl", hash = "sha256:9557ecc2806faaf6037f85b1fbd963d01e30511c48085f0d573650fdeaad378a", size = 471917, upload-time = "2026-04-15T14:57:17.277Z" },
 ]
 [package.optional-dependencies]
 [[package]]
 name = "pydantic-core"
+version = "2.46.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/a1/93/f97a86a7eb28faa1d038af2fd5d6166418b4433659108a4c311b57128b2d/pydantic_core-2.46.1.tar.gz", hash = "sha256:d408153772d9f298098fb5d620f045bdf0f017af0d5cb6e309ef8c205540caa4", size = 471230, upload-time = "2026-04-15T14:49:34.52Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ff/d2/bda39bad2f426cb5078e6ad28076614d3926704196efe0d7a2a19a99025d/pydantic_core-2.46.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:cdc8a5762a9c4b9d86e204d555444e3227507c92daba06259ee66595834de47a", size = 2119092, upload-time = "2026-04-15T14:49:50.392Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/f3/69631e64d69cb3481494b2bddefe0ddd07771209f74e9106d066f9138c2a/pydantic_core-2.46.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ba381dfe9c85692c566ecb60fa5a77a697a2a8eebe274ec5e4d6ec15fafad799", size = 1951400, upload-time = "2026-04-15T14:51:06.588Z" },
+    { url = "https://files.pythonhosted.org/packages/53/1c/21cb3db6ae997df31be8e91f213081f72ffa641cb45c89b8a1986832b1f9/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1593d8de98207466dc070118322fef68307a0cc6a5625e7b386f6fdae57f9ab6", size = 1976864, upload-time = "2026-04-15T14:50:54.804Z" },
+    { url = "https://files.pythonhosted.org/packages/91/9c/05c819f734318ce5a6ca24da300d93696c105af4adb90494ee571303afd8/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8262c74a1af5b0fdf795f5537f7145785a63f9fbf9e15405f547440c30017ed8", size = 2066669, upload-time = "2026-04-15T14:51:42.346Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/23/fadddf1c7f2f517f58731aea9b35c914e6005250f08dac9b8e53904cdbaa/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b88949a24182e83fbbb3f7ca9b7858d0d37b735700ea91081434b7d37b3b444", size = 2238737, upload-time = "2026-04-15T14:50:45.558Z" },
+    { url = "https://files.pythonhosted.org/packages/23/07/0cd4f95cb0359c8b1ec71e89c3777e7932c8dfeb9cd54740289f310aaead/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8f3708cd55537aeaf3fd0ea55df0d68d0da51dcb07cbc8508745b34acc4c6e0", size = 2316258, upload-time = "2026-04-15T14:51:08.471Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/40/6fc24c3766a19c222a0d60d652b78f0283339d4cd4c173fab06b7ee76571/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f79292435fff1d4f0c18d9cfaf214025cc88e4f5104bfaed53f173621da1c743", size = 2097474, upload-time = "2026-04-15T14:49:56.543Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/af/f39795d1ce549e35d0841382b9c616ae211caffb88863147369a8d74fba9/pydantic_core-2.46.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:a2e607aeb59cf4575bb364470288db3b9a1f0e7415d053a322e3e154c1a0802e", size = 2168383, upload-time = "2026-04-15T14:51:29.269Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/32/0d563f74582795779df6cc270c3fc220f49f4daf7860d74a5a6cda8491ff/pydantic_core-2.46.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ec5ca190b75878a9f6ae1fc8f5eb678497934475aef3d93204c9fa01e97370b6", size = 2186182, upload-time = "2026-04-15T14:50:19.097Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/07/1c10d5ce312fc4cf86d1e50bdcdbb8ef248409597b099cab1b4bb3a093f7/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:1f80535259dcdd517d7b8ca588d5ca24b4f337228e583bebedf7a3adcdf5f721", size = 2187859, upload-time = "2026-04-15T14:49:22.974Z" },
+    { url = "https://files.pythonhosted.org/packages/92/01/e1f62d4cb39f0913dbf5c95b9b119ef30ddba9493dff8c2b012f0cdd67dc/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:24820b3c82c43df61eca30147e42853e6c127d8b868afdc0c162df829e011eb4", size = 2338372, upload-time = "2026-04-15T14:49:53.316Z" },
+    { url = "https://files.pythonhosted.org/packages/44/ed/218dfeea6127fb1781a6ceca241ec6edf00e8a8933ff331af2215975a534/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:f12794b1dd8ac9fb66619e0b3a0427189f5d5638e55a3de1385121a9b7bf9b39", size = 2384039, upload-time = "2026-04-15T14:53:04.929Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/1e/011e763cd059238249fbd5780e0f8d0b04b47f86c8925e22784f3e5fc977/pydantic_core-2.46.1-cp313-cp313-win32.whl", hash = "sha256:9bc09aed935cdf50f09e908923f9efbcca54e9244bd14a5a0e2a6c8d2c21b4e9", size = 1977943, upload-time = "2026-04-15T14:52:17.969Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/06/b559a490d3ed106e9b1777b8d5c8112dd8d31716243cd662616f66c1f8ea/pydantic_core-2.46.1-cp313-cp313-win_amd64.whl", hash = "sha256:fac2d6c8615b8b42bee14677861ba09d56ee076ba4a65cfb9c3c3d0cc89042f2", size = 2068729, upload-time = "2026-04-15T14:53:07.288Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/52/32a198946e2e19508532aa9da02a61419eb15bd2d96bab57f810f2713e31/pydantic_core-2.46.1-cp313-cp313-win_arm64.whl", hash = "sha256:f978329f12ace9f3cb814a5e44d98bbeced2e36f633132bafa06d2d71332e33e", size = 2029550, upload-time = "2026-04-15T14:52:22.707Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/2b/6793fe89ab66cb2d3d6e5768044eab80bba1d0fae8fd904d0a1574712e17/pydantic_core-2.46.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:9917cb61effac7ec0f448ef491ec7584526d2193be84ff981e85cbf18b68c42a", size = 2118110, upload-time = "2026-04-15T14:50:52.947Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/87/e9a905ddfcc2fd7bd862b340c02be6ab1f827922822d425513635d0ac774/pydantic_core-2.46.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0e749679ca9f8a9d0bff95fb7f6b57bb53f2207fa42ffcc1ec86de7e0029ab89", size = 1948645, upload-time = "2026-04-15T14:51:55.577Z" },
+    { url = "https://files.pythonhosted.org/packages/15/23/26e67f86ed62ac9d6f7f3091ee5220bf14b5ac36fb811851d601365ef896/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2ecacee70941e233a2dad23f7796a06f86cc10cc2fbd1c97c7dd5b5a79ffa4f", size = 1977576, upload-time = "2026-04-15T14:49:37.58Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/78/813c13c0de323d4de54ee2e6fdd69a0271c09ac8dd65a8a000931aa487a5/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:647d0a2475b8ed471962eed92fa69145b864942f9c6daa10f95ac70676637ae7", size = 2060358, upload-time = "2026-04-15T14:51:40.087Z" },
+    { url = "https://files.pythonhosted.org/packages/09/5e/4caf2a15149271fbd2b4d968899a450853c800b85152abcf54b11531417f/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac9cde61965b0697fce6e6cc372df9e1ad93734828aac36e9c1c42a22ad02897", size = 2235980, upload-time = "2026-04-15T14:50:34.535Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/c1/a2cdabb5da6f5cb63a3558bcafffc20f790fa14ccffbefbfb1370fadc93f/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0a2eb0864085f8b641fb3f54a2fb35c58aff24b175b80bc8a945050fcde03204", size = 2316800, upload-time = "2026-04-15T14:52:46.999Z" },
+    { url = "https://files.pythonhosted.org/packages/76/fd/19d711e4e9331f9d77f222bffc202bf30ea0d74f6419046376bb82f244c8/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b83ce9fede4bc4fb649281d9857f06d30198b8f70168f18b987518d713111572", size = 2101762, upload-time = "2026-04-15T14:49:24.278Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/64/ce95625448e1a4e219390a2923fd594f3fa368599c6b42ac71a5df7238c9/pydantic_core-2.46.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:cb33192753c60f269d2f4a1db8253c95b0df6e04f2989631a8cc1b0f4f6e2e92", size = 2167737, upload-time = "2026-04-15T14:50:41.637Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/31/413572d03ca3e73b408f00f54418b91a8be6401451bc791eaeff210328e5/pydantic_core-2.46.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:96611d51f953f87e1ae97637c01ee596a08b7f494ea00a5afb67ea6547b9f53b", size = 2185658, upload-time = "2026-04-15T14:51:46.799Z" },
+    { url = "https://files.pythonhosted.org/packages/36/09/e4f581353bdf3f0c7de8a8b27afd14fc761da29d78146376315a6fedc487/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:9b176fa55f9107db5e6c86099aa5bfd934f1d3ba6a8b43f714ddeebaed3f42b7", size = 2184154, upload-time = "2026-04-15T14:52:49.629Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/a4/d0d52849933f5a4bf1ad9d8da612792f96469b37e286a269e3ee9c60bbb1/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:79a59f63a4ce4f3330e27e6f3ce281dd1099453b637350e97d7cf24c207cd120", size = 2332379, upload-time = "2026-04-15T14:49:55.009Z" },
+    { url = "https://files.pythonhosted.org/packages/30/93/25bfb08fdbef419f73290e573899ce938a327628c34e8f3a4bafeea30126/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:f200fce071808a385a314b7343f5e3688d7c45746be3d64dc71ee2d3e2a13268", size = 2377964, upload-time = "2026-04-15T14:51:59.649Z" },
+    { url = "https://files.pythonhosted.org/packages/15/36/b777766ff83fef1cf97473d64764cd44f38e0d8c269ed06faace9ae17666/pydantic_core-2.46.1-cp314-cp314-win32.whl", hash = "sha256:3a07eccc0559fb9acc26d55b16bf8ebecd7f237c74a9e2c5741367db4e6d8aff", size = 1976450, upload-time = "2026-04-15T14:51:57.665Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/4b/4cd19d2437acfc18ca166db5a2067040334991eb862c4ecf2db098c91fbf/pydantic_core-2.46.1-cp314-cp314-win_amd64.whl", hash = "sha256:1706d270309ac7d071ffe393988c471363705feb3d009186e55d17786ada9622", size = 2067750, upload-time = "2026-04-15T14:49:38.941Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/a0/490751c0ef8f5b27aae81731859aed1508e72c1a9b5774c6034269db773b/pydantic_core-2.46.1-cp314-cp314-win_arm64.whl", hash = "sha256:22d4e7457ade8af06528012f382bc994a97cc2ce6e119305a70b3deff1e409d6", size = 2021109, upload-time = "2026-04-15T14:50:27.728Z" },
+    { url = "https://files.pythonhosted.org/packages/36/3a/2a018968245fffd25d5f1972714121ad309ff2de19d80019ad93494844f9/pydantic_core-2.46.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:607ff9db0b7e2012e7eef78465e69f9a0d7d1c3e7c6a84cf0c4011db0fcc3feb", size = 2111548, upload-time = "2026-04-15T14:52:08.273Z" },
+    { url = "https://files.pythonhosted.org/packages/77/5b/4103b6192213217e874e764e5467d2ff10d8873c1147d01fa432ac281880/pydantic_core-2.46.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8cda3eacaea13bd02a1bea7e457cc9fc30b91c5a91245cef9b215140f80dd78c", size = 1926745, upload-time = "2026-04-15T14:50:03.045Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/70/602a667cf4be4bec6c3334512b12ae4ea79ce9bfe41dc51be1fd34434453/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9493279cdc7997fe19e5ed9b41f30cbc3806bd4722adb402fedb6f6d41bd72a", size = 1965922, upload-time = "2026-04-15T14:51:12.555Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/24/06a89ce5323e755b7d2812189f9706b87aaebe49b34d247b380502f7992c/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3644e5e10059999202355b6c6616e624909e23773717d8f76deb8a6e2a72328c", size = 2043221, upload-time = "2026-04-15T14:51:18.995Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/6e/b1d9ad907d9d76964903903349fd2e33c87db4b993cc44713edcad0fc488/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ad6c9de57683e26c92730991960c0c3571b8053263b042de2d3e105930b2767", size = 2243655, upload-time = "2026-04-15T14:50:10.718Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/73/787abfaad51174641abb04c8aa125322279b40ad7ce23c495f5a69f76554/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:557ebaa27c7617e7088002318c679a8ce685fa048523417cd1ca52b7f516d955", size = 2295976, upload-time = "2026-04-15T14:53:09.694Z" },
+    { url = "https://files.pythonhosted.org/packages/56/0b/b7c5a631b6d5153d4a1ea4923b139aea256dc3bd99c8e6c7b312c7733146/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cd37e39b22b796ba0298fe81e9421dd7b65f97acfbb0fb19b33ffdda7b9a7b4", size = 2103439, upload-time = "2026-04-15T14:50:08.32Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/3f/952ee470df69e5674cdec1cbde22331adf643b5cc2ff79f4292d80146ee4/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:6689443b59714992e67d62505cdd2f952d6cf1c14cc9fd9aeec6719befc6f23b", size = 2132871, upload-time = "2026-04-15T14:50:24.445Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/8b/1dea3b1e683c60c77a60f710215f90f486755962aa8939dbcb7c0f975ac3/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f32c41ca1e3456b5dd691827b7c1433c12d5f0058cc186afbb3615bc07d97b8", size = 2168658, upload-time = "2026-04-15T14:52:24.897Z" },
+    { url = "https://files.pythonhosted.org/packages/67/97/32ae283810910d274d5ba9f48f856f5f2f612410b78b249f302d297816f5/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:88cd1355578852db83954dc36e4f58f299646916da976147c20cf6892ba5dc43", size = 2171184, upload-time = "2026-04-15T14:52:34.854Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/57/c9a855527fe56c2072070640221f53095b0b19eaf651f3c77643c9cabbe3/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:a170fefdb068279a473cc9d34848b85e61d68bfcc2668415b172c5dfc6f213bf", size = 2316573, upload-time = "2026-04-15T14:52:12.871Z" },
+    { url = "https://files.pythonhosted.org/packages/37/b3/14c39ffc7399819c5448007c7bcb4e6da5669850cfb7dcbb727594290b48/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:556a63ff1006934dba4eed7ea31b58274c227e29298ec398e4275eda4b905e95", size = 2378340, upload-time = "2026-04-15T14:51:02.619Z" },
+    { url = "https://files.pythonhosted.org/packages/01/55/a37461fbb29c053ea4e62cfc5c2d56425cb5efbef8316e63f6d84ae45718/pydantic_core-2.46.1-cp314-cp314t-win32.whl", hash = "sha256:3b146d8336a995f7d7da6d36e4a779b7e7dff2719ac00a1eb8bd3ded00bec87b", size = 1960843, upload-time = "2026-04-15T14:52:06.103Z" },
+    { url = "https://files.pythonhosted.org/packages/22/d7/97e1221197d17a27f768363f87ec061519eeeed15bbd315d2e9d1429ff03/pydantic_core-2.46.1-cp314-cp314t-win_amd64.whl", hash = "sha256:f1bc856c958e6fe9ec071e210afe6feb695f2e2e81fd8d2b102f558d364c4c17", size = 2048696, upload-time = "2026-04-15T14:52:52.154Z" },
+    { url = "https://files.pythonhosted.org/packages/19/d5/4eac95255c7d35094b46a32ec1e4d80eac94729c694726ee1d69948bd5f0/pydantic_core-2.46.1-cp314-cp314t-win_arm64.whl", hash = "sha256:21a5bfd8a1aa4de60494cdf66b0c912b1495f26a8899896040021fbd6038d989", size = 2022343, upload-time = "2026-04-15T14:49:49.036Z" },
 ]
 [[package]]
 name = "pydantic-settings"
+version = "2.13.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pydantic" },
     { name = "python-dotenv" },
     { name = "typing-inspection" },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/96/a1/ae859ffac5a3338a66b74c5e29e244fd3a3cc483c89feaf9f56c39898d75/pydantic_settings-2.13.0.tar.gz", hash = "sha256:95d875514610e8595672800a5c40b073e99e4aae467fa7c8f9c263061ea2e1fe", size = 222450, upload-time = "2026-02-15T12:11:23.476Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/b0/1a/dd1b9d7e627486cf8e7523d09b70010e05a4bc41414f4ae6ce184cf0afb6/pydantic_settings-2.13.0-py3-none-any.whl", hash = "sha256:d67b576fff39cd086b595441bf9c75d4193ca9c0ed643b90360694d0f1240246", size = 58429, upload-time = "2026-02-15T12:11:22.133Z" },
 ]
 [[package]]
 [[package]]
 name = "pyperclip"
+version = "1.9.0"
 source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/30/23/2f0a3efc4d6a32f3b63cdff36cd398d9701d26cda58e3ab97ac79fb5e60d/pyperclip-1.9.0.tar.gz", hash = "sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310", size = 20961, upload-time = "2024-06-18T20:38:48.401Z" }
 [[package]]
 name = "pytest"
 [[package]]
 name = "slop-farmer"
+version = "0.1.1"
 source = { editable = "." }
 dependencies = [
     { name = "duckdb" },
 [package.metadata]
 requires-dist = [
     { name = "duckdb", specifier = ">=1.2.2" },
+    { name = "fast-agent-mcp", specifier = ">=0.6.17" },
     { name = "fast-agent-mcp", marker = "python_full_version >= '3.13.5' and extra == 'llm'", specifier = ">=0.6.16" },
     { name = "fastapi", specifier = ">=0.115.0" },
     { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.28.0" },