evalstate HF Staff commited on
Commit
d09c394
·
verified ·
1 Parent(s): 41a8c52

Deploy OpenClaw PR API

Browse files
pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
  name = "slop-farmer"
7
- version = "0.1.0"
8
  description = "GitHub-to-Hub data pipeline for transformers issue and PR triage research."
9
  readme = "README.md"
10
  requires-python = ">=3.13.5"
@@ -60,13 +60,6 @@ select = [
60
  ]
61
  ignore = ["E501"]
62
 
63
- [tool.slop-farmer.analyze]
64
- output-dir = "eval_data"
65
- hf-repo-id = "evalstate/transformers-pr"
66
- ranking-backend = "hybrid"
67
- model = "gpt-5.4-mini"
68
- max-clusters = 10
69
-
70
  [tool.slop-farmer.dashboard-data]
71
  output-dir = "web/public/data"
72
  window-days = 14
 
4
 
5
  [project]
6
  name = "slop-farmer"
7
+ version = "0.1.1"
8
  description = "GitHub-to-Hub data pipeline for transformers issue and PR triage research."
9
  readme = "README.md"
10
  requires-python = ">=3.13.5"
 
60
  ]
61
  ignore = ["E501"]
62
 
 
 
 
 
 
 
 
63
  [tool.slop-farmer.dashboard-data]
64
  output-dir = "web/public/data"
65
  window-days = 14
src/slop_farmer/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
  __all__ = ["__version__"]
2
 
3
- __version__ = "0.1.0"
 
1
  __all__ = ["__version__"]
2
 
3
+ __version__ = "0.1.1"
src/slop_farmer/app/cli.py CHANGED
@@ -13,6 +13,8 @@ from slop_farmer.config import (
13
  AnalysisOptions,
14
  CheckpointImportOptions,
15
  DashboardDataOptions,
 
 
16
  DeployDashboardOptions,
17
  FullPipelineOptions,
18
  MarkdownReportOptions,
@@ -41,6 +43,7 @@ def build_parser(*, config_path: Path | None = None) -> argparse.ArgumentParser:
41
  subparsers = parser.add_subparsers(dest="command", required=True)
42
 
43
  _add_scrape_parser(subparsers, defaults["scrape"])
 
44
  _add_analyze_parser(subparsers, defaults["analyze"])
45
  _add_pr_scope_parser(subparsers, defaults["pr-scope"])
46
  _add_checkpoint_import_parser(subparsers, defaults["import-hf-checkpoint"])
@@ -52,6 +55,7 @@ def build_parser(*, config_path: Path | None = None) -> argparse.ArgumentParser:
52
  _add_dashboard_data_parser(subparsers, defaults["dashboard-data"])
53
  _add_publish_snapshot_parser(subparsers, defaults["publish-snapshot"])
54
  _add_deploy_dashboard_parser(subparsers, defaults["deploy-dashboard"])
 
55
  _add_full_pipeline_parser(subparsers, defaults["full-pipeline"])
56
  return parser
57
 
@@ -59,6 +63,7 @@ def build_parser(*, config_path: Path | None = None) -> argparse.ArgumentParser:
59
  def _load_parser_defaults(config_path: Path | None) -> dict[str, dict[str, Any]]:
60
  commands = (
61
  "scrape",
 
62
  "analyze",
63
  "import-hf-checkpoint",
64
  "pr-scope",
@@ -68,6 +73,7 @@ def _load_parser_defaults(config_path: Path | None) -> dict[str, dict[str, Any]]
68
  "dashboard-data",
69
  "publish-snapshot",
70
  "deploy-dashboard",
 
71
  "full-pipeline",
72
  )
73
  return {command: command_defaults(command, config_path=config_path) for command in commands}
@@ -184,6 +190,80 @@ def _add_scrape_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
184
  )
185
 
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  def _add_analyze_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
188
  analyze = subparsers.add_parser(
189
  "analyze", help="Analyze a local snapshot and write a shortlist JSON report."
@@ -637,6 +717,61 @@ def _add_pr_search_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
637
  status.add_argument("--repo", help="Optional repo override.")
638
  status.add_argument("--json", action="store_true", help="Emit JSON.")
639
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640
 
641
  def _add_new_contributor_report_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
642
  new_contributor = subparsers.add_parser(
@@ -659,6 +794,24 @@ def _add_new_contributor_report_parser(subparsers: Any, defaults: dict[str, Any]
659
  new_contributor.add_argument(
660
  "--json-output", type=Path, help="Optional JSON output path. Defaults next to the snapshot."
661
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
662
  new_contributor.add_argument(
663
  "--window-days",
664
  type=int,
@@ -702,6 +855,24 @@ def _add_dashboard_data_parser(subparsers: Any, defaults: dict[str, Any]) -> Non
702
  type=Path,
703
  help="Optional PR scope cluster JSON. Defaults to pr-scope-clusters.json in the snapshot.",
704
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705
  dashboard.add_argument(
706
  "--window-days",
707
  type=int,
@@ -761,6 +932,24 @@ def _add_deploy_dashboard_parser(subparsers: Any, defaults: dict[str, Any]) -> N
761
  deploy_dashboard.add_argument(
762
  "--contributors-input", type=Path, help="Optional contributor report JSON override."
763
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
764
  deploy_dashboard.add_argument(
765
  "--refresh-contributors",
766
  action="store_true",
@@ -817,6 +1006,31 @@ def _add_deploy_dashboard_parser(subparsers: Any, defaults: dict[str, Any]) -> N
817
  )
818
 
819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820
  def _add_full_pipeline_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
821
  full_pipeline = subparsers.add_parser(
822
  "full-pipeline",
@@ -933,6 +1147,33 @@ def _run_scrape(args: argparse.Namespace, config_path: Path | None) -> None:
933
  print(run_pipeline(options))
934
 
935
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
936
  def _run_analyze(args: argparse.Namespace, config_path: Path | None) -> None:
937
  from slop_farmer.reports.analysis import run_analysis
938
 
@@ -1041,12 +1282,18 @@ def _run_pr_search(args: argparse.Namespace, config_path: Path | None) -> None:
1041
  explain_pr_search_pair,
1042
  format_pr_search_candidate_clusters,
1043
  format_pr_search_cluster,
 
 
1044
  format_pr_search_pair,
1045
  format_pr_search_probe,
 
1046
  format_pr_search_similar,
1047
  format_pr_search_status,
1048
  get_pr_search_candidate_clusters,
1049
  get_pr_search_cluster,
 
 
 
1050
  get_pr_search_similar,
1051
  get_pr_search_status,
1052
  probe_pr_search_github,
@@ -1140,6 +1387,36 @@ def _run_pr_search(args: argparse.Namespace, config_path: Path | None) -> None:
1140
  print(json.dumps(result, indent=2) if args.json else format_pr_search_status(result))
1141
  return
1142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1143
  raise ValueError(f"Unsupported pr-search command: {args.pr_search_command}")
1144
 
1145
 
@@ -1181,6 +1458,7 @@ def _run_new_contributor_report(args: argparse.Namespace, config_path: Path | No
1181
  del config_path
1182
  from slop_farmer.reports.new_contributor_report import run_new_contributor_report
1183
 
 
1184
  print(
1185
  run_new_contributor_report(
1186
  NewContributorReportOptions(
@@ -1188,6 +1466,9 @@ def _run_new_contributor_report(args: argparse.Namespace, config_path: Path | No
1188
  output_dir=args.output_dir,
1189
  output=args.output,
1190
  json_output=args.json_output,
 
 
 
1191
  window_days=args.window_days,
1192
  max_authors=args.max_authors,
1193
  )
@@ -1199,6 +1480,7 @@ def _run_dashboard_data(args: argparse.Namespace, config_path: Path | None) -> N
1199
  from slop_farmer.reports.dashboard import run_dashboard_data
1200
 
1201
  dashboard_defaults = command_defaults("dashboard-data", config_path=config_path)
 
1202
  print(
1203
  run_dashboard_data(
1204
  DashboardDataOptions(
@@ -1207,6 +1489,9 @@ def _run_dashboard_data(args: argparse.Namespace, config_path: Path | None) -> N
1207
  analysis_input=args.analysis_input,
1208
  contributors_input=args.contributors_input,
1209
  pr_scope_input=args.pr_scope_input,
 
 
 
1210
  window_days=args.window_days,
1211
  snapshot_root=(
1212
  Path(dashboard_defaults["snapshot-root"])
@@ -1222,6 +1507,7 @@ def _run_deploy_dashboard(args: argparse.Namespace, config_path: Path | None) ->
1222
  del config_path
1223
  from slop_farmer.app.deploy import run_deploy_dashboard
1224
 
 
1225
  run_deploy_dashboard(
1226
  DeployDashboardOptions(
1227
  pipeline_data_dir=args.pipeline_data_dir,
@@ -1229,6 +1515,9 @@ def _run_deploy_dashboard(args: argparse.Namespace, config_path: Path | None) ->
1229
  snapshot_dir=args.snapshot_dir,
1230
  analysis_input=args.analysis_input,
1231
  contributors_input=args.contributors_input,
 
 
 
1232
  refresh_contributors=args.refresh_contributors,
1233
  dashboard_window_days=args.dashboard_window_days,
1234
  contributor_window_days=args.contributor_window_days,
@@ -1247,6 +1536,22 @@ def _run_deploy_dashboard(args: argparse.Namespace, config_path: Path | None) ->
1247
  )
1248
 
1249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1250
  def _run_publish_snapshot(args: argparse.Namespace, config_path: Path | None) -> None:
1251
  del config_path
1252
  from slop_farmer.app.publish import run_publish_snapshot
@@ -1296,6 +1601,7 @@ def main() -> None:
1296
 
1297
  handlers: dict[str, CommandHandler] = {
1298
  "scrape": _run_scrape,
 
1299
  "analyze": _run_analyze,
1300
  "markdown-report": _run_markdown_report,
1301
  "duplicate-prs": _run_duplicate_prs,
@@ -1306,6 +1612,7 @@ def main() -> None:
1306
  "new-contributor-report": _run_new_contributor_report,
1307
  "dashboard-data": _run_dashboard_data,
1308
  "deploy-dashboard": _run_deploy_dashboard,
 
1309
  "publish-snapshot": _run_publish_snapshot,
1310
  "full-pipeline": _run_full_pipeline,
1311
  }
 
13
  AnalysisOptions,
14
  CheckpointImportOptions,
15
  DashboardDataOptions,
16
+ DatasetRefreshOptions,
17
+ DatasetStatusOptions,
18
  DeployDashboardOptions,
19
  FullPipelineOptions,
20
  MarkdownReportOptions,
 
43
  subparsers = parser.add_subparsers(dest="command", required=True)
44
 
45
  _add_scrape_parser(subparsers, defaults["scrape"])
46
+ _add_refresh_dataset_parser(subparsers, defaults["refresh-dataset"])
47
  _add_analyze_parser(subparsers, defaults["analyze"])
48
  _add_pr_scope_parser(subparsers, defaults["pr-scope"])
49
  _add_checkpoint_import_parser(subparsers, defaults["import-hf-checkpoint"])
 
55
  _add_dashboard_data_parser(subparsers, defaults["dashboard-data"])
56
  _add_publish_snapshot_parser(subparsers, defaults["publish-snapshot"])
57
  _add_deploy_dashboard_parser(subparsers, defaults["deploy-dashboard"])
58
+ _add_dataset_status_parser(subparsers, defaults["dataset-status"])
59
  _add_full_pipeline_parser(subparsers, defaults["full-pipeline"])
60
  return parser
61
 
 
63
  def _load_parser_defaults(config_path: Path | None) -> dict[str, dict[str, Any]]:
64
  commands = (
65
  "scrape",
66
+ "refresh-dataset",
67
  "analyze",
68
  "import-hf-checkpoint",
69
  "pr-scope",
 
73
  "dashboard-data",
74
  "publish-snapshot",
75
  "deploy-dashboard",
76
+ "dataset-status",
77
  "full-pipeline",
78
  )
79
  return {command: command_defaults(command, config_path=config_path) for command in commands}
 
190
  )
191
 
192
 
193
+ def _add_refresh_dataset_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
194
+ refresh = subparsers.add_parser(
195
+ "refresh-dataset",
196
+ help="Refresh the canonical Hugging Face dataset repo from remote watermark state.",
197
+ )
198
+ refresh.add_argument(
199
+ "--repo",
200
+ default=defaults.get("repo", "huggingface/transformers"),
201
+ help="GitHub repository in owner/name form.",
202
+ )
203
+ refresh.add_argument(
204
+ "--hf-repo-id",
205
+ default=defaults.get("hf-repo-id"),
206
+ required=defaults.get("hf-repo-id") is None,
207
+ help="Canonical Hugging Face dataset repo id to refresh.",
208
+ )
209
+ refresh.add_argument("--max-issues", type=int, default=defaults.get("max-issues"))
210
+ refresh.add_argument("--max-prs", type=int, default=defaults.get("max-prs"))
211
+ refresh.add_argument(
212
+ "--max-issue-comments", type=int, default=defaults.get("max-issue-comments")
213
+ )
214
+ refresh.add_argument(
215
+ "--max-reviews-per-pr", type=int, default=defaults.get("max-reviews-per-pr")
216
+ )
217
+ refresh.add_argument(
218
+ "--max-review-comments-per-pr",
219
+ type=int,
220
+ default=defaults.get("max-review-comments-per-pr"),
221
+ )
222
+ refresh.add_argument(
223
+ "--fetch-timeline",
224
+ action="store_true",
225
+ default=bool(defaults.get("fetch-timeline", False)),
226
+ )
227
+ refresh.add_argument(
228
+ "--new-contributor-report",
229
+ dest="new_contributor_report",
230
+ action="store_true",
231
+ default=bool(defaults.get("new-contributor-report", True)),
232
+ )
233
+ refresh.add_argument(
234
+ "--no-new-contributor-report",
235
+ dest="new_contributor_report",
236
+ action="store_false",
237
+ )
238
+ refresh.add_argument(
239
+ "--new-contributor-window-days",
240
+ type=int,
241
+ default=int(defaults.get("new-contributor-window-days", 42)),
242
+ )
243
+ refresh.add_argument(
244
+ "--new-contributor-max-authors",
245
+ type=int,
246
+ default=int(defaults.get("new-contributor-max-authors", 25)),
247
+ )
248
+ refresh.add_argument("--http-timeout", type=int, default=300)
249
+ refresh.add_argument("--http-max-retries", type=int, default=8)
250
+ refresh.add_argument("--checkpoint-every-comments", type=int, default=1000)
251
+ refresh.add_argument("--checkpoint-every-prs", type=int, default=25)
252
+ refresh.add_argument(
253
+ "--private-hf-repo",
254
+ dest="private_hf_repo",
255
+ action="store_true",
256
+ default=bool(defaults.get("private-hf-repo", False)),
257
+ help="Create the target dataset repo as private if needed.",
258
+ )
259
+ refresh.add_argument(
260
+ "--private",
261
+ dest="private_hf_repo",
262
+ action="store_true",
263
+ help=argparse.SUPPRESS,
264
+ )
265
+
266
+
267
  def _add_analyze_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
268
  analyze = subparsers.add_parser(
269
  "analyze", help="Analyze a local snapshot and write a shortlist JSON report."
 
717
  status.add_argument("--repo", help="Optional repo override.")
718
  status.add_argument("--json", action="store_true", help="Emit JSON.")
719
 
720
+ contributor = pr_search_subparsers.add_parser(
721
+ "contributor", help="Show indexed contributor summary for one author login."
722
+ )
723
+ contributor.add_argument("login", help="GitHub author login to query.")
724
+ contributor.add_argument(
725
+ "--db",
726
+ type=Path,
727
+ default=Path(defaults["db"]) if defaults.get("db") else None,
728
+ help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
729
+ )
730
+ contributor.add_argument(
731
+ "--output-dir",
732
+ type=Path,
733
+ default=Path(defaults.get("output-dir", "data")),
734
+ )
735
+ contributor.add_argument("--repo", help="Optional repo override.")
736
+ contributor.add_argument("--json", action="store_true", help="Emit JSON.")
737
+
738
+ contributor_prs = pr_search_subparsers.add_parser(
739
+ "contributor-prs", help="List indexed PRs for one contributor login."
740
+ )
741
+ contributor_prs.add_argument("login", help="GitHub author login to query.")
742
+ contributor_prs.add_argument(
743
+ "--db",
744
+ type=Path,
745
+ default=Path(defaults["db"]) if defaults.get("db") else None,
746
+ help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
747
+ )
748
+ contributor_prs.add_argument(
749
+ "--output-dir",
750
+ type=Path,
751
+ default=Path(defaults.get("output-dir", "data")),
752
+ )
753
+ contributor_prs.add_argument("--repo", help="Optional repo override.")
754
+ contributor_prs.add_argument("--limit", type=int, default=20, help="Maximum rows to show.")
755
+ contributor_prs.add_argument("--json", action="store_true", help="Emit JSON.")
756
+
757
+ pr_contributor = pr_search_subparsers.add_parser(
758
+ "pr-contributor", help="Show contributor summary for the author of one indexed PR."
759
+ )
760
+ pr_contributor.add_argument("pr_number", type=int, help="Pull request number to query.")
761
+ pr_contributor.add_argument(
762
+ "--db",
763
+ type=Path,
764
+ default=Path(defaults["db"]) if defaults.get("db") else None,
765
+ help="DuckDB file path. Defaults to <output-dir>/state/pr-search.duckdb.",
766
+ )
767
+ pr_contributor.add_argument(
768
+ "--output-dir",
769
+ type=Path,
770
+ default=Path(defaults.get("output-dir", "data")),
771
+ )
772
+ pr_contributor.add_argument("--repo", help="Optional repo override.")
773
+ pr_contributor.add_argument("--json", action="store_true", help="Emit JSON.")
774
+
775
 
776
  def _add_new_contributor_report_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
777
  new_contributor = subparsers.add_parser(
 
794
  new_contributor.add_argument(
795
  "--json-output", type=Path, help="Optional JSON output path. Defaults next to the snapshot."
796
  )
797
+ new_contributor.add_argument(
798
+ "--hf-repo-id",
799
+ default=defaults.get("hf-repo-id"),
800
+ help="Analyze a Hugging Face dataset repo by materializing its parquet export locally.",
801
+ )
802
+ new_contributor.add_argument(
803
+ "--hf-revision",
804
+ default=defaults.get("hf-revision"),
805
+ help="Optional Hub revision for metadata and README download.",
806
+ )
807
+ new_contributor.add_argument(
808
+ "--hf-materialize-dir",
809
+ type=Path,
810
+ default=Path(defaults["hf-materialize-dir"])
811
+ if defaults.get("hf-materialize-dir")
812
+ else None,
813
+ help="Optional local directory used when materializing an HF dataset snapshot.",
814
+ )
815
  new_contributor.add_argument(
816
  "--window-days",
817
  type=int,
 
855
  type=Path,
856
  help="Optional PR scope cluster JSON. Defaults to pr-scope-clusters.json in the snapshot.",
857
  )
858
+ dashboard.add_argument(
859
+ "--hf-repo-id",
860
+ default=defaults.get("hf-repo-id"),
861
+ help="Analyze a Hugging Face dataset repo by materializing its parquet export locally.",
862
+ )
863
+ dashboard.add_argument(
864
+ "--hf-revision",
865
+ default=defaults.get("hf-revision"),
866
+ help="Optional Hub revision for metadata and README download.",
867
+ )
868
+ dashboard.add_argument(
869
+ "--hf-materialize-dir",
870
+ type=Path,
871
+ default=Path(defaults["hf-materialize-dir"])
872
+ if defaults.get("hf-materialize-dir")
873
+ else None,
874
+ help="Optional local directory used when materializing an HF dataset snapshot.",
875
+ )
876
  dashboard.add_argument(
877
  "--window-days",
878
  type=int,
 
932
  deploy_dashboard.add_argument(
933
  "--contributors-input", type=Path, help="Optional contributor report JSON override."
934
  )
935
+ deploy_dashboard.add_argument(
936
+ "--hf-repo-id",
937
+ default=defaults.get("hf-repo-id"),
938
+ help="Materialize a Hugging Face dataset repo instead of using the latest local snapshot.",
939
+ )
940
+ deploy_dashboard.add_argument(
941
+ "--hf-revision",
942
+ default=defaults.get("hf-revision"),
943
+ help="Optional Hub revision for metadata and README download.",
944
+ )
945
+ deploy_dashboard.add_argument(
946
+ "--hf-materialize-dir",
947
+ type=Path,
948
+ default=Path(defaults["hf-materialize-dir"])
949
+ if defaults.get("hf-materialize-dir")
950
+ else None,
951
+ help="Optional local directory used when materializing an HF dataset snapshot.",
952
+ )
953
  deploy_dashboard.add_argument(
954
  "--refresh-contributors",
955
  action="store_true",
 
1006
  )
1007
 
1008
 
1009
+ def _add_dataset_status_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
1010
+ dataset_status = subparsers.add_parser(
1011
+ "dataset-status",
1012
+ help="Inspect canonical dataset freshness and the local latest pointer.",
1013
+ )
1014
+ dataset_status.add_argument("--repo", default=defaults.get("repo"))
1015
+ dataset_status.add_argument(
1016
+ "--output-dir",
1017
+ type=Path,
1018
+ default=Path(defaults.get("output-dir", "data")),
1019
+ help="Local workspace root containing snapshots/latest.json.",
1020
+ )
1021
+ dataset_status.add_argument(
1022
+ "--hf-repo-id",
1023
+ default=defaults.get("hf-repo-id"),
1024
+ help="Canonical Hugging Face dataset repo id to inspect.",
1025
+ )
1026
+ dataset_status.add_argument(
1027
+ "--hf-revision",
1028
+ default=defaults.get("hf-revision"),
1029
+ help="Optional Hub revision for metadata and README download.",
1030
+ )
1031
+ dataset_status.add_argument("--json", action="store_true", help="Emit machine-readable JSON.")
1032
+
1033
+
1034
  def _add_full_pipeline_parser(subparsers: Any, defaults: dict[str, Any]) -> None:
1035
  full_pipeline = subparsers.add_parser(
1036
  "full-pipeline",
 
1147
  print(run_pipeline(options))
1148
 
1149
 
1150
+ def _run_refresh_dataset(args: argparse.Namespace, config_path: Path | None) -> None:
1151
+ del config_path
1152
+ from slop_farmer.app.dataset_refresh import run_dataset_refresh
1153
+
1154
+ result = run_dataset_refresh(
1155
+ DatasetRefreshOptions(
1156
+ repo=RepoRef.parse(args.repo),
1157
+ hf_repo_id=args.hf_repo_id,
1158
+ private_hf_repo=args.private_hf_repo,
1159
+ max_issues=args.max_issues,
1160
+ max_prs=args.max_prs,
1161
+ max_issue_comments=args.max_issue_comments,
1162
+ max_reviews_per_pr=args.max_reviews_per_pr,
1163
+ max_review_comments_per_pr=args.max_review_comments_per_pr,
1164
+ fetch_timeline=args.fetch_timeline,
1165
+ new_contributor_report=args.new_contributor_report,
1166
+ new_contributor_window_days=args.new_contributor_window_days,
1167
+ new_contributor_max_authors=args.new_contributor_max_authors,
1168
+ http_timeout=args.http_timeout,
1169
+ http_max_retries=args.http_max_retries,
1170
+ checkpoint_every_comments=args.checkpoint_every_comments,
1171
+ checkpoint_every_prs=args.checkpoint_every_prs,
1172
+ )
1173
+ )
1174
+ print(json.dumps(result, indent=2))
1175
+
1176
+
1177
  def _run_analyze(args: argparse.Namespace, config_path: Path | None) -> None:
1178
  from slop_farmer.reports.analysis import run_analysis
1179
 
 
1282
  explain_pr_search_pair,
1283
  format_pr_search_candidate_clusters,
1284
  format_pr_search_cluster,
1285
+ format_pr_search_contributor,
1286
+ format_pr_search_contributor_pulls,
1287
  format_pr_search_pair,
1288
  format_pr_search_probe,
1289
+ format_pr_search_pull_contributor,
1290
  format_pr_search_similar,
1291
  format_pr_search_status,
1292
  get_pr_search_candidate_clusters,
1293
  get_pr_search_cluster,
1294
+ get_pr_search_contributor,
1295
+ get_pr_search_contributor_pulls,
1296
+ get_pr_search_pull_contributor,
1297
  get_pr_search_similar,
1298
  get_pr_search_status,
1299
  probe_pr_search_github,
 
1387
  print(json.dumps(result, indent=2) if args.json else format_pr_search_status(result))
1388
  return
1389
 
1390
+ if args.pr_search_command == "contributor":
1391
+ result = get_pr_search_contributor(db_path, author_login=args.login, repo=args.repo)
1392
+ print(json.dumps(result, indent=2) if args.json else format_pr_search_contributor(result))
1393
+ return
1394
+
1395
+ if args.pr_search_command == "contributor-prs":
1396
+ result = get_pr_search_contributor_pulls(
1397
+ db_path,
1398
+ author_login=args.login,
1399
+ repo=args.repo,
1400
+ limit=args.limit,
1401
+ )
1402
+ print(
1403
+ json.dumps(result, indent=2)
1404
+ if args.json
1405
+ else format_pr_search_contributor_pulls(result)
1406
+ )
1407
+ return
1408
+
1409
+ if args.pr_search_command == "pr-contributor":
1410
+ result = get_pr_search_pull_contributor(
1411
+ db_path,
1412
+ pr_number=args.pr_number,
1413
+ repo=args.repo,
1414
+ )
1415
+ print(
1416
+ json.dumps(result, indent=2) if args.json else format_pr_search_pull_contributor(result)
1417
+ )
1418
+ return
1419
+
1420
  raise ValueError(f"Unsupported pr-search command: {args.pr_search_command}")
1421
 
1422
 
 
1458
  del config_path
1459
  from slop_farmer.reports.new_contributor_report import run_new_contributor_report
1460
 
1461
+ hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args)
1462
  print(
1463
  run_new_contributor_report(
1464
  NewContributorReportOptions(
 
1466
  output_dir=args.output_dir,
1467
  output=args.output,
1468
  json_output=args.json_output,
1469
+ hf_repo_id=hf_repo_id,
1470
+ hf_revision=hf_revision,
1471
+ hf_materialize_dir=hf_materialize_dir,
1472
  window_days=args.window_days,
1473
  max_authors=args.max_authors,
1474
  )
 
1480
  from slop_farmer.reports.dashboard import run_dashboard_data
1481
 
1482
  dashboard_defaults = command_defaults("dashboard-data", config_path=config_path)
1483
+ hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args)
1484
  print(
1485
  run_dashboard_data(
1486
  DashboardDataOptions(
 
1489
  analysis_input=args.analysis_input,
1490
  contributors_input=args.contributors_input,
1491
  pr_scope_input=args.pr_scope_input,
1492
+ hf_repo_id=hf_repo_id,
1493
+ hf_revision=hf_revision,
1494
+ hf_materialize_dir=hf_materialize_dir,
1495
  window_days=args.window_days,
1496
  snapshot_root=(
1497
  Path(dashboard_defaults["snapshot-root"])
 
1507
  del config_path
1508
  from slop_farmer.app.deploy import run_deploy_dashboard
1509
 
1510
+ hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args)
1511
  run_deploy_dashboard(
1512
  DeployDashboardOptions(
1513
  pipeline_data_dir=args.pipeline_data_dir,
 
1515
  snapshot_dir=args.snapshot_dir,
1516
  analysis_input=args.analysis_input,
1517
  contributors_input=args.contributors_input,
1518
+ hf_repo_id=hf_repo_id,
1519
+ hf_revision=hf_revision,
1520
+ hf_materialize_dir=hf_materialize_dir,
1521
  refresh_contributors=args.refresh_contributors,
1522
  dashboard_window_days=args.dashboard_window_days,
1523
  contributor_window_days=args.contributor_window_days,
 
1536
  )
1537
 
1538
 
1539
+ def _run_dataset_status(args: argparse.Namespace, config_path: Path | None) -> None:
1540
+ del config_path
1541
+ from slop_farmer.app.dataset_status import format_dataset_status, get_dataset_status
1542
+
1543
+ result = get_dataset_status(
1544
+ DatasetStatusOptions(
1545
+ repo=args.repo,
1546
+ output_dir=args.output_dir,
1547
+ hf_repo_id=args.hf_repo_id,
1548
+ hf_revision=args.hf_revision,
1549
+ json_output=args.json,
1550
+ )
1551
+ )
1552
+ print(json.dumps(result, indent=2) if args.json else format_dataset_status(result))
1553
+
1554
+
1555
  def _run_publish_snapshot(args: argparse.Namespace, config_path: Path | None) -> None:
1556
  del config_path
1557
  from slop_farmer.app.publish import run_publish_snapshot
 
1601
 
1602
  handlers: dict[str, CommandHandler] = {
1603
  "scrape": _run_scrape,
1604
+ "refresh-dataset": _run_refresh_dataset,
1605
  "analyze": _run_analyze,
1606
  "markdown-report": _run_markdown_report,
1607
  "duplicate-prs": _run_duplicate_prs,
 
1612
  "new-contributor-report": _run_new_contributor_report,
1613
  "dashboard-data": _run_dashboard_data,
1614
  "deploy-dashboard": _run_deploy_dashboard,
1615
+ "dataset-status": _run_dataset_status,
1616
  "publish-snapshot": _run_publish_snapshot,
1617
  "full-pipeline": _run_full_pipeline,
1618
  }
src/slop_farmer/app/dataset_refresh.py ADDED
@@ -0,0 +1,1021 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import os
6
+ import shutil
7
+ import tempfile
8
+ import time
9
+ from collections import defaultdict
10
+ from datetime import UTC, datetime
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from huggingface_hub import HfApi
15
+
16
+ from slop_farmer.app_config import command_defaults, extract_cli_config_path
17
+ from slop_farmer.config import (
18
+ DatasetRefreshOptions,
19
+ NewContributorReportOptions,
20
+ RepoRef,
21
+ resolve_github_token,
22
+ )
23
+ from slop_farmer.data.dataset_card import build_hf_dataset_card
24
+ from slop_farmer.data.github_api import GitHubClient
25
+ from slop_farmer.data.hf_dataset_repo import (
26
+ list_remote_paths,
27
+ load_remote_file,
28
+ load_remote_json_file,
29
+ stable_snapshot_candidates,
30
+ )
31
+ from slop_farmer.data.links import build_pr_duplicate_candidate_rows, build_text_link_rows
32
+ from slop_farmer.data.normalize import (
33
+ issue_url_to_number,
34
+ normalize_comment,
35
+ normalize_issue,
36
+ normalize_pr_diff,
37
+ normalize_pr_file,
38
+ normalize_pull_request,
39
+ normalize_review,
40
+ normalize_review_comment,
41
+ normalize_timeline_event,
42
+ )
43
+ from slop_farmer.data.parquet_io import (
44
+ SCHEMAS,
45
+ read_parquet_rows,
46
+ write_json,
47
+ write_parquet,
48
+ write_text,
49
+ )
50
+ from slop_farmer.reports.new_contributor_report import run_new_contributor_report
51
+
52
+ PRIMARY_KEYS: dict[str, tuple[str, ...]] = {
53
+ "issues": ("github_id",),
54
+ "pull_requests": ("github_id",),
55
+ "comments": ("github_id",),
56
+ "reviews": ("github_id",),
57
+ "review_comments": ("github_id",),
58
+ "pr_files": ("repo", "pull_request_number", "filename"),
59
+ "pr_diffs": ("repo", "pull_request_number"),
60
+ "links": (
61
+ "repo",
62
+ "source_type",
63
+ "source_number",
64
+ "source_github_id",
65
+ "target_owner",
66
+ "target_repo",
67
+ "target_number",
68
+ "link_type",
69
+ "link_origin",
70
+ ),
71
+ "events": (
72
+ "repo",
73
+ "parent_kind",
74
+ "parent_number",
75
+ "event",
76
+ "created_at",
77
+ "actor_login",
78
+ "source_issue_number",
79
+ "source_issue_url",
80
+ "commit_id",
81
+ "label_name",
82
+ ),
83
+ }
84
+ CHECKPOINT_PREFIXES = ("_checkpoints", "checkpoints")
85
+
86
+
87
+ def log(message: str) -> None:
88
+ stamp = datetime.now(tz=UTC).strftime("%H:%M:%SZ")
89
+ print(f"[{stamp}] {message}", flush=True)
90
+
91
+
92
+ def iso_now() -> str:
93
+ return datetime.now(tz=UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
94
+
95
+
96
+ def snapshot_id() -> str:
97
+ return datetime.now(tz=UTC).strftime("%Y%m%dT%H%M%SZ")
98
+
99
+
100
+ def row_key(row: dict[str, Any], fields: tuple[str, ...]) -> str:
101
+ return json.dumps([row.get(field) for field in fields], default=str)
102
+
103
+
104
+ def merge_rows(
105
+ table_name: str,
106
+ previous_rows: list[dict[str, Any]],
107
+ delta_rows: list[dict[str, Any]],
108
+ ) -> list[dict[str, Any]]:
109
+ if table_name == "pr_files":
110
+ refreshed_prs = {
111
+ (row.get("repo"), row.get("pull_request_number"))
112
+ for row in delta_rows
113
+ if row.get("pull_request_number") is not None
114
+ }
115
+ previous_rows = [
116
+ row
117
+ for row in previous_rows
118
+ if (row.get("repo"), row.get("pull_request_number")) not in refreshed_prs
119
+ ]
120
+ merged: dict[str, dict[str, Any]] = {}
121
+ for row in previous_rows:
122
+ merged[row_key(row, PRIMARY_KEYS[table_name])] = row
123
+ for row in delta_rows:
124
+ merged[row_key(row, PRIMARY_KEYS[table_name])] = row
125
+ return list(merged.values())
126
+
127
+
128
+ def checkpoint_dirs(remote_paths: set[str]) -> list[tuple[str, str]]:
129
+ by_snapshot_id: dict[str, str] = {}
130
+ for path in remote_paths:
131
+ parts = path.split("/")
132
+ if len(parts) < 3 or parts[0] not in CHECKPOINT_PREFIXES:
133
+ continue
134
+ snapshot_key = parts[1]
135
+ prefix = parts[0]
136
+ current = by_snapshot_id.get(snapshot_key)
137
+ if current is None or current.startswith("checkpoints/"):
138
+ by_snapshot_id[snapshot_key] = f"{prefix}/{snapshot_key}"
139
+ return [(sid, by_snapshot_id[sid]) for sid in sorted(by_snapshot_id)]
140
+
141
+
142
+ def copy_remote_file_from_candidates(
143
+ api: HfApi,
144
+ repo_id: str,
145
+ local_dir: Path,
146
+ destination: Path,
147
+ candidate_paths: list[str],
148
+ ) -> bool:
149
+ for candidate in candidate_paths:
150
+ downloaded = load_remote_file(api, repo_id, candidate, local_dir)
151
+ if downloaded is None:
152
+ continue
153
+ destination.parent.mkdir(parents=True, exist_ok=True)
154
+ shutil.copy2(downloaded, destination)
155
+ return True
156
+ return False
157
+
158
+
159
+ def materialize_previous_snapshot_dir(
160
+ *,
161
+ api: Any,
162
+ repo_id: str,
163
+ previous_root: Path,
164
+ stable_snapshot_id: str | None,
165
+ latest_pointer: dict[str, Any] | None,
166
+ previous_tables: dict[str, list[dict[str, Any]]],
167
+ ) -> Path | None:
168
+ if not stable_snapshot_id:
169
+ return None
170
+ snapshot_dir = (previous_root / "materialized-snapshots" / stable_snapshot_id).resolve()
171
+ snapshot_dir.mkdir(parents=True, exist_ok=True)
172
+ for table_name, rows in previous_tables.items():
173
+ write_parquet(rows, snapshot_dir / f"{table_name}.parquet", table_name)
174
+ for artifact_name in (
175
+ "manifest.json",
176
+ "new_contributors.parquet",
177
+ "new-contributors-report.json",
178
+ "new-contributors-report.md",
179
+ ):
180
+ copy_remote_file_from_candidates(
181
+ api,
182
+ repo_id,
183
+ previous_root,
184
+ snapshot_dir / artifact_name,
185
+ stable_snapshot_candidates(latest_pointer, artifact_name),
186
+ )
187
+ return snapshot_dir
188
+
189
+
190
+ def load_remote_table_from_candidates(
191
+ api: HfApi,
192
+ repo_id: str,
193
+ table_name: str,
194
+ local_dir: Path,
195
+ candidate_paths: list[str],
196
+ ) -> list[dict[str, Any]]:
197
+ for candidate in candidate_paths:
198
+ downloaded = load_remote_file(api, repo_id, candidate, local_dir)
199
+ if downloaded is not None:
200
+ return read_parquet_rows(downloaded)
201
+ return []
202
+
203
+
204
+ def viewer_comment_rows(
205
+ comments: list[dict[str, Any]],
206
+ pull_requests: list[dict[str, Any]],
207
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
208
+ pr_numbers = {int(row["number"]) for row in pull_requests if row.get("number") is not None}
209
+ issue_comments: list[dict[str, Any]] = []
210
+ pr_comments: list[dict[str, Any]] = []
211
+ for row in comments:
212
+ parent_number = row.get("parent_number")
213
+ parent_kind = row.get("parent_kind")
214
+ if parent_kind == "pull_request" or parent_number in pr_numbers:
215
+ pr_comments.append(row)
216
+ else:
217
+ issue_comments.append(row)
218
+ return issue_comments, pr_comments
219
+
220
+
221
+ def upload_delta_checkpoint(
222
+ *,
223
+ api: HfApi,
224
+ repo_id: str,
225
+ work_dir: Path,
226
+ repo_slug: str,
227
+ sid: str,
228
+ stage: str,
229
+ delta_tables: dict[str, list[dict[str, Any]]],
230
+ progress: dict[str, Any],
231
+ ) -> None:
232
+ checkpoint_root = work_dir / "checkpoint_upload"
233
+ if checkpoint_root.exists():
234
+ shutil.rmtree(checkpoint_root)
235
+ checkpoint_root.mkdir(parents=True, exist_ok=True)
236
+
237
+ for table_name, rows in delta_tables.items():
238
+ write_parquet(rows, checkpoint_root / f"{table_name}.parquet", table_name)
239
+ write_json(
240
+ {"repo": repo_slug, "snapshot_id": sid, **progress}, checkpoint_root / "progress.json"
241
+ )
242
+ write_json(
243
+ {"repo": repo_slug, "snapshot_id": sid, **progress},
244
+ checkpoint_root / "state" / "in_progress.json",
245
+ )
246
+ api.upload_folder(
247
+ folder_path=str(checkpoint_root),
248
+ path_in_repo=f"_checkpoints/{sid}",
249
+ repo_id=repo_id,
250
+ repo_type="dataset",
251
+ commit_message=f"Checkpoint {sid} ({stage})",
252
+ )
253
+
254
+
255
+ def remaining_limit(limit: int | None, used: int) -> int | None:
256
+ if limit is None:
257
+ return None
258
+ return max(limit - used, 0)
259
+
260
+
261
+ def _build_argument_parser(*, config_path: Path | None = None) -> argparse.ArgumentParser:
262
+ defaults = command_defaults("refresh-dataset", config_path=config_path)
263
+ parser = argparse.ArgumentParser()
264
+ parser.add_argument("--config", type=Path, help="Optional repo config file.")
265
+ parser.add_argument("--repo", default=defaults.get("repo", "huggingface/transformers"))
266
+ parser.add_argument("--hf-repo-id", default=defaults.get("hf-repo-id"))
267
+ parser.add_argument("--max-issues", type=int, default=defaults.get("max-issues"))
268
+ parser.add_argument("--max-prs", type=int, default=defaults.get("max-prs"))
269
+ parser.add_argument(
270
+ "--max-issue-comments",
271
+ type=int,
272
+ default=defaults.get("max-issue-comments"),
273
+ )
274
+ parser.add_argument(
275
+ "--max-reviews-per-pr",
276
+ type=int,
277
+ default=defaults.get("max-reviews-per-pr"),
278
+ )
279
+ parser.add_argument(
280
+ "--max-review-comments-per-pr",
281
+ type=int,
282
+ default=defaults.get("max-review-comments-per-pr"),
283
+ )
284
+ parser.add_argument(
285
+ "--fetch-timeline",
286
+ action="store_true",
287
+ default=bool(defaults.get("fetch-timeline", False)),
288
+ )
289
+ parser.add_argument(
290
+ "--new-contributor-report",
291
+ dest="new_contributor_report",
292
+ action="store_true",
293
+ default=bool(defaults.get("new-contributor-report", True)),
294
+ )
295
+ parser.add_argument(
296
+ "--no-new-contributor-report",
297
+ dest="new_contributor_report",
298
+ action="store_false",
299
+ )
300
+ parser.add_argument(
301
+ "--new-contributor-window-days",
302
+ type=int,
303
+ default=int(defaults.get("new-contributor-window-days", 42)),
304
+ )
305
+ parser.add_argument(
306
+ "--new-contributor-max-authors",
307
+ type=int,
308
+ default=int(defaults.get("new-contributor-max-authors", 25)),
309
+ )
310
+ parser.add_argument("--http-timeout", type=int, default=300)
311
+ parser.add_argument("--http-max-retries", type=int, default=8)
312
+ parser.add_argument("--checkpoint-every-comments", type=int, default=1000)
313
+ parser.add_argument("--checkpoint-every-prs", type=int, default=25)
314
+ parser.add_argument(
315
+ "--private-hf-repo",
316
+ dest="private_hf_repo",
317
+ action="store_true",
318
+ default=bool(defaults.get("private-hf-repo", False)),
319
+ )
320
+ parser.add_argument("--private", dest="private_hf_repo", action="store_true")
321
+ return parser
322
+
323
+
324
+ def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
325
+ config_path = extract_cli_config_path(argv)
326
+ parser = _build_argument_parser(config_path=config_path)
327
+ args = parser.parse_args(argv)
328
+ if not args.hf_repo_id:
329
+ parser.error("--hf-repo-id is required (or set dataset_id in --config)")
330
+ return args
331
+
332
+
333
+ def run_dataset_refresh(options: DatasetRefreshOptions) -> dict[str, Any]:
334
+ hf_token = os.getenv("HF_TOKEN")
335
+ github_token = resolve_github_token()
336
+ if not github_token:
337
+ raise RuntimeError("GITHUB_TOKEN must be set or resolvable via gh auth/.env")
338
+
339
+ repo_slug = options.repo.slug
340
+ owner, repo_name = options.repo.owner, options.repo.name
341
+ sid = snapshot_id()
342
+ crawl_started_at = iso_now()
343
+ extracted_at = iso_now()
344
+
345
+ api = HfApi(token=hf_token)
346
+ api.create_repo(
347
+ repo_id=options.hf_repo_id,
348
+ repo_type="dataset",
349
+ private=options.private_hf_repo,
350
+ exist_ok=True,
351
+ )
352
+
353
+ with tempfile.TemporaryDirectory(prefix="slop-farmer-job-") as tmp:
354
+ root = Path(tmp)
355
+ previous_root = root / "previous"
356
+ output_root = root / "output"
357
+ previous_root.mkdir(parents=True, exist_ok=True)
358
+ output_root.mkdir(parents=True, exist_ok=True)
359
+
360
+ remote_paths = list_remote_paths(api, options.hf_repo_id)
361
+ previous_watermark = load_remote_json_file(
362
+ api, options.hf_repo_id, "state/watermark.json", previous_root
363
+ )
364
+ remote_manifest = load_remote_json_file(
365
+ api, options.hf_repo_id, "manifest.json", previous_root
366
+ )
367
+ latest_pointer = (
368
+ load_remote_json_file(api, options.hf_repo_id, "snapshots/latest.json", previous_root)
369
+ if "snapshots/latest.json" in remote_paths
370
+ else None
371
+ )
372
+ stable_snapshot_id = None
373
+ if previous_watermark:
374
+ stable_snapshot_id = previous_watermark.get("last_successful_snapshot_id")
375
+ elif latest_pointer:
376
+ stable_snapshot_id = latest_pointer.get("latest_snapshot_id")
377
+ elif remote_manifest:
378
+ stable_snapshot_id = remote_manifest.get("snapshot_id")
379
+
380
+ log(f"Starting dataset refresh for {repo_slug}")
381
+ log(f"Target dataset repo: {options.hf_repo_id}")
382
+ previous_tables = {
383
+ table_name: [] for table_name in SCHEMAS if table_name != "new_contributors"
384
+ }
385
+ for table_name in previous_tables:
386
+ previous_tables[table_name] = load_remote_table_from_candidates(
387
+ api,
388
+ options.hf_repo_id,
389
+ table_name,
390
+ previous_root,
391
+ stable_snapshot_candidates(latest_pointer, f"{table_name}.parquet"),
392
+ )
393
+
394
+ checkpoint_progress: dict[str, Any] | None = None
395
+ best_comment_checkpoint_progress: dict[str, Any] | None = None
396
+ for checkpoint_sid, checkpoint_dir in checkpoint_dirs(remote_paths):
397
+ if stable_snapshot_id is not None and checkpoint_sid <= str(stable_snapshot_id):
398
+ continue
399
+ progress_payload = load_remote_json_file(
400
+ api, options.hf_repo_id, f"{checkpoint_dir}/progress.json", previous_root
401
+ ) or load_remote_json_file(
402
+ api,
403
+ options.hf_repo_id,
404
+ f"{checkpoint_dir}/state/in_progress.json",
405
+ previous_root,
406
+ )
407
+ if progress_payload is not None:
408
+ checkpoint_progress = progress_payload
409
+ if (
410
+ progress_payload.get("effective_since") is None
411
+ and (progress_payload.get("counts") or {}).get("comments", 0) > 0
412
+ and (
413
+ best_comment_checkpoint_progress is None
414
+ or (progress_payload.get("counts") or {}).get("comments", 0)
415
+ > (best_comment_checkpoint_progress.get("counts") or {}).get("comments", 0)
416
+ )
417
+ ):
418
+ best_comment_checkpoint_progress = progress_payload
419
+ for table_name in previous_tables:
420
+ checkpoint_rows = load_remote_table_from_candidates(
421
+ api,
422
+ options.hf_repo_id,
423
+ table_name,
424
+ previous_root,
425
+ [f"{checkpoint_dir}/{table_name}.parquet"],
426
+ )
427
+ if checkpoint_rows:
428
+ previous_tables[table_name] = merge_rows(
429
+ table_name,
430
+ previous_tables[table_name],
431
+ checkpoint_rows,
432
+ )
433
+
434
+ effective_since = None
435
+ if checkpoint_progress and checkpoint_progress.get("effective_since") is not None:
436
+ effective_since = checkpoint_progress.get("effective_since")
437
+ log(f"Resuming from incomplete checkpoint window starting at {effective_since}")
438
+ elif previous_watermark and previous_watermark.get("next_since") is not None:
439
+ effective_since = previous_watermark.get("next_since")
440
+ log(f"Resuming from remote watermark {effective_since}")
441
+ elif (
442
+ remote_manifest
443
+ and isinstance(remote_manifest.get("watermark"), dict)
444
+ and remote_manifest["watermark"].get("next_since") is not None
445
+ ):
446
+ effective_since = remote_manifest["watermark"].get("next_since")
447
+ log(f"Bootstrapping remote watermark from root manifest {effective_since}")
448
+ else:
449
+ log("No successful watermark found; running full snapshot")
450
+
451
+ client = GitHubClient(
452
+ token=github_token,
453
+ timeout=options.http_timeout,
454
+ max_retries=options.http_max_retries,
455
+ log=log,
456
+ )
457
+ previous_snapshot_dir = materialize_previous_snapshot_dir(
458
+ api=api,
459
+ repo_id=options.hf_repo_id,
460
+ previous_root=previous_root,
461
+ stable_snapshot_id=str(stable_snapshot_id) if stable_snapshot_id is not None else None,
462
+ latest_pointer=latest_pointer,
463
+ previous_tables=previous_tables,
464
+ )
465
+
466
+ rate_limit = client.get_json("/rate_limit")
467
+ core = (rate_limit.get("resources") or {}).get("core") or {}
468
+ limit = core.get("limit")
469
+ remaining = core.get("remaining")
470
+ reset_at = core.get("reset")
471
+ log(f"GitHub core rate limit: limit={limit} remaining={remaining} reset={reset_at}")
472
+ if limit is not None and int(limit) <= 60:
473
+ raise RuntimeError("GITHUB_TOKEN appears to be missing, invalid, or not being applied")
474
+ if remaining == 0 and reset_at:
475
+ sleep_for = max(int(reset_at) - int(time.time()), 1)
476
+ log(f"GitHub token exhausted before bootstrap; sleeping {sleep_for}s until reset")
477
+ time.sleep(sleep_for)
478
+
479
+ log("Fetching changed issue and pull request stubs from GitHub")
480
+ issue_stubs = list(
481
+ client.iter_repo_issues(owner, repo_name, effective_since, options.max_issues)
482
+ )
483
+ issues = [item for item in issue_stubs if "pull_request" not in item]
484
+ pr_stubs = [item for item in issue_stubs if "pull_request" in item]
485
+ if options.max_prs is not None:
486
+ pr_stubs = pr_stubs[: options.max_prs]
487
+ log(f"Fetched {len(issue_stubs)} changed stubs")
488
+
489
+ issue_number_to_kind = {
490
+ item["number"]: ("pull_request" if "pull_request" in item else "issue")
491
+ for item in issue_stubs
492
+ }
493
+ issue_rows = [normalize_issue(repo_slug, item, sid, extracted_at) for item in issues]
494
+
495
+ comment_rows: list[dict[str, Any]] = []
496
+ next_comment_checkpoint = options.checkpoint_every_comments
497
+ reuse_checkpoint_comments = (
498
+ stable_snapshot_id is None
499
+ and effective_since is None
500
+ and best_comment_checkpoint_progress is not None
501
+ and bool(previous_tables["comments"])
502
+ )
503
+ if reuse_checkpoint_comments:
504
+ log(
505
+ f"Reusing {len(previous_tables['comments'])} checkpoint comments from prior partial runs"
506
+ )
507
+ else:
508
+ for index, item in enumerate(issue_stubs, start=1):
509
+ if not item.get("comments"):
510
+ continue
511
+ remaining_comments = remaining_limit(options.max_issue_comments, len(comment_rows))
512
+ if remaining_comments == 0:
513
+ break
514
+ if index == 1 or index % 25 == 0:
515
+ log(f"Collecting discussion comments; {len(comment_rows)} collected so far")
516
+ for comment in client.iter_issue_comments_for_number(
517
+ owner,
518
+ repo_name,
519
+ int(item["number"]),
520
+ effective_since,
521
+ remaining_comments,
522
+ ):
523
+ parent_number = issue_url_to_number(comment.get("issue_url"))
524
+ parent_kind = issue_number_to_kind.get(parent_number, "issue_or_pr")
525
+ comment_rows.append(
526
+ normalize_comment(
527
+ repo_slug,
528
+ comment,
529
+ parent_kind,
530
+ parent_number,
531
+ sid,
532
+ extracted_at,
533
+ )
534
+ )
535
+ remaining_comments = remaining_limit(
536
+ options.max_issue_comments,
537
+ len(comment_rows),
538
+ )
539
+ if (
540
+ options.checkpoint_every_comments
541
+ and len(comment_rows) >= next_comment_checkpoint
542
+ ):
543
+ log(f"Pushing comment checkpoint to Hub at {len(comment_rows)} comments")
544
+ upload_delta_checkpoint(
545
+ api=api,
546
+ repo_id=options.hf_repo_id,
547
+ work_dir=root,
548
+ repo_slug=repo_slug,
549
+ sid=sid,
550
+ stage="comments",
551
+ delta_tables={
552
+ "issues": issue_rows,
553
+ "pull_requests": [],
554
+ "comments": comment_rows,
555
+ "reviews": [],
556
+ "review_comments": [],
557
+ "pr_files": [],
558
+ "pr_diffs": [],
559
+ "links": [],
560
+ "events": [],
561
+ },
562
+ progress={
563
+ "stage": "comments",
564
+ "effective_since": effective_since,
565
+ "counts": {
566
+ "issues": len(issue_rows),
567
+ "comments": len(comment_rows),
568
+ "pull_requests": 0,
569
+ "reviews": 0,
570
+ "review_comments": 0,
571
+ "pr_files": 0,
572
+ "pr_diffs": 0,
573
+ "links": 0,
574
+ "events": 0,
575
+ },
576
+ },
577
+ )
578
+ next_comment_checkpoint += options.checkpoint_every_comments
579
+ if remaining_comments == 0:
580
+ break
581
+
582
+ pr_rows: list[dict[str, Any]] = []
583
+ review_rows: list[dict[str, Any]] = []
584
+ review_comment_rows: list[dict[str, Any]] = []
585
+ pr_file_rows: list[dict[str, Any]] = []
586
+ pr_diff_rows: list[dict[str, Any]] = []
587
+ event_rows: list[dict[str, Any]] = []
588
+ next_pr_checkpoint = options.checkpoint_every_prs
589
+
590
+ previous_pr_rows_by_number = {
591
+ int(row["number"]): row
592
+ for row in previous_tables["pull_requests"]
593
+ if row.get("number") is not None
594
+ }
595
+ previous_review_rows_by_number: defaultdict[int, list[dict[str, Any]]] = defaultdict(list)
596
+ for row in previous_tables["reviews"]:
597
+ if row.get("pull_request_number") is not None:
598
+ previous_review_rows_by_number[int(row["pull_request_number"])].append(row)
599
+ previous_review_comment_rows_by_number: defaultdict[int, list[dict[str, Any]]] = (
600
+ defaultdict(list)
601
+ )
602
+ for row in previous_tables["review_comments"]:
603
+ if row.get("pull_request_number") is not None:
604
+ previous_review_comment_rows_by_number[int(row["pull_request_number"])].append(row)
605
+ previous_pr_file_rows_by_number: defaultdict[int, list[dict[str, Any]]] = defaultdict(list)
606
+ for row in previous_tables["pr_files"]:
607
+ if row.get("pull_request_number") is not None:
608
+ previous_pr_file_rows_by_number[int(row["pull_request_number"])].append(row)
609
+ previous_pr_diff_rows_by_number = {
610
+ int(row["pull_request_number"]): row
611
+ for row in previous_tables["pr_diffs"]
612
+ if row.get("pull_request_number") is not None
613
+ }
614
+ previous_pr_event_rows_by_number: defaultdict[int, list[dict[str, Any]]] = defaultdict(list)
615
+ for row in previous_tables["events"]:
616
+ if row.get("parent_kind") == "pull_request" and row.get("parent_number") is not None:
617
+ previous_pr_event_rows_by_number[int(row["parent_number"])].append(row)
618
+
619
+ hydration_pr_stubs: list[dict[str, Any]] = []
620
+ for pr_stub in pr_stubs:
621
+ number = int(pr_stub["number"])
622
+ previous_pr_row = previous_pr_rows_by_number.get(number)
623
+ if previous_pr_row and previous_pr_row.get("updated_at") == pr_stub.get("updated_at"):
624
+ pr_rows.append(previous_pr_row)
625
+ review_rows.extend(previous_review_rows_by_number[number])
626
+ review_comment_rows.extend(previous_review_comment_rows_by_number[number])
627
+ pr_file_rows.extend(previous_pr_file_rows_by_number[number])
628
+ if number in previous_pr_diff_rows_by_number:
629
+ pr_diff_rows.append(previous_pr_diff_rows_by_number[number])
630
+ event_rows.extend(previous_pr_event_rows_by_number[number])
631
+ continue
632
+ hydration_pr_stubs.append(pr_stub)
633
+
634
+ reused_pr_count = len(pr_rows)
635
+ if reused_pr_count:
636
+ log(f"Reusing hydrated data for {reused_pr_count} pull requests from prior checkpoints")
637
+ if options.checkpoint_every_prs:
638
+ while reused_pr_count >= next_pr_checkpoint:
639
+ next_pr_checkpoint += options.checkpoint_every_prs
640
+
641
+ total_prs = len(pr_stubs)
642
+ remaining_prs = len(hydration_pr_stubs)
643
+ for index, pr_stub in enumerate(hydration_pr_stubs, start=1):
644
+ number = int(pr_stub["number"])
645
+ hydrated_count = reused_pr_count + index
646
+ if index == 1 or hydrated_count % 10 == 0 or index == remaining_prs:
647
+ log(f"Hydrating pull requests: {hydrated_count}/{total_prs}")
648
+ detail = client.get_pull_request(owner, repo_name, number)
649
+ pr_rows.append(normalize_pull_request(repo_slug, pr_stub, detail, sid, extracted_at))
650
+ for review in client.iter_pull_reviews(
651
+ owner, repo_name, number, options.max_reviews_per_pr
652
+ ):
653
+ review_rows.append(normalize_review(repo_slug, number, review, sid, extracted_at))
654
+ for comment in client.iter_pull_review_comments(
655
+ owner,
656
+ repo_name,
657
+ number,
658
+ options.max_review_comments_per_pr,
659
+ ):
660
+ review_comment_rows.append(
661
+ normalize_review_comment(repo_slug, number, comment, sid, extracted_at)
662
+ )
663
+ for pr_file in client.iter_pull_files(owner, repo_name, number):
664
+ pr_file_rows.append(
665
+ normalize_pr_file(repo_slug, number, pr_file, sid, extracted_at)
666
+ )
667
+ pr_diff_rows.append(
668
+ normalize_pr_diff(
669
+ repo_slug,
670
+ number,
671
+ pr_stub.get("html_url"),
672
+ pr_stub.get("url"),
673
+ client.get_pull_request_diff(owner, repo_name, number),
674
+ sid,
675
+ extracted_at,
676
+ )
677
+ )
678
+ if options.fetch_timeline:
679
+ for event in client.iter_issue_timeline(owner, repo_name, number):
680
+ event_rows.append(
681
+ normalize_timeline_event(
682
+ repo_slug,
683
+ number,
684
+ "pull_request",
685
+ event,
686
+ sid,
687
+ extracted_at,
688
+ )
689
+ )
690
+ if options.checkpoint_every_prs and len(pr_rows) >= next_pr_checkpoint:
691
+ log(f"Pushing PR checkpoint to Hub at {len(pr_rows)} hydrated PRs")
692
+ upload_delta_checkpoint(
693
+ api=api,
694
+ repo_id=options.hf_repo_id,
695
+ work_dir=root,
696
+ repo_slug=repo_slug,
697
+ sid=sid,
698
+ stage="pull_requests",
699
+ delta_tables={
700
+ "issues": issue_rows,
701
+ "pull_requests": pr_rows,
702
+ "comments": comment_rows,
703
+ "reviews": review_rows,
704
+ "review_comments": review_comment_rows,
705
+ "pr_files": pr_file_rows,
706
+ "pr_diffs": pr_diff_rows,
707
+ "links": [],
708
+ "events": event_rows,
709
+ },
710
+ progress={
711
+ "stage": "pull_requests",
712
+ "effective_since": effective_since,
713
+ "counts": {
714
+ "issues": len(issue_rows),
715
+ "comments": len(comment_rows),
716
+ "pull_requests": len(pr_rows),
717
+ "reviews": len(review_rows),
718
+ "review_comments": len(review_comment_rows),
719
+ "pr_files": len(pr_file_rows),
720
+ "pr_diffs": len(pr_diff_rows),
721
+ "links": 0,
722
+ "events": len(event_rows),
723
+ },
724
+ },
725
+ )
726
+ next_pr_checkpoint += options.checkpoint_every_prs
727
+
728
+ if options.fetch_timeline:
729
+ log(f"Fetching issue timelines for {len(issues)} changed issues")
730
+ for issue in issues:
731
+ for event in client.iter_issue_timeline(owner, repo_name, int(issue["number"])):
732
+ event_rows.append(
733
+ normalize_timeline_event(
734
+ repo_slug,
735
+ int(issue["number"]),
736
+ "issue",
737
+ event,
738
+ sid,
739
+ extracted_at,
740
+ )
741
+ )
742
+
743
+ link_rows: list[dict[str, Any]] = []
744
+ for row in issue_rows:
745
+ link_rows.extend(
746
+ build_text_link_rows(
747
+ repo=repo_slug,
748
+ owner=owner,
749
+ repo_name=repo_name,
750
+ source_type="issue",
751
+ source_number=row["number"],
752
+ source_id=row["github_id"],
753
+ body=row["body"],
754
+ snapshot_id=sid,
755
+ extracted_at=extracted_at,
756
+ )
757
+ )
758
+ for row in pr_rows:
759
+ link_rows.extend(
760
+ build_text_link_rows(
761
+ repo=repo_slug,
762
+ owner=owner,
763
+ repo_name=repo_name,
764
+ source_type="pull_request",
765
+ source_number=row["number"],
766
+ source_id=row["github_id"],
767
+ body=row["body"],
768
+ snapshot_id=sid,
769
+ extracted_at=extracted_at,
770
+ )
771
+ )
772
+ for row in comment_rows or previous_tables["comments"]:
773
+ if row["parent_number"] is None:
774
+ continue
775
+ link_rows.extend(
776
+ build_text_link_rows(
777
+ repo=repo_slug,
778
+ owner=owner,
779
+ repo_name=repo_name,
780
+ source_type="comment",
781
+ source_number=row["parent_number"],
782
+ source_id=row["github_id"],
783
+ body=row["body"],
784
+ snapshot_id=sid,
785
+ extracted_at=extracted_at,
786
+ )
787
+ )
788
+ for row in review_rows:
789
+ link_rows.extend(
790
+ build_text_link_rows(
791
+ repo=repo_slug,
792
+ owner=owner,
793
+ repo_name=repo_name,
794
+ source_type="review",
795
+ source_number=row["pull_request_number"],
796
+ source_id=row["github_id"],
797
+ body=row["body"],
798
+ snapshot_id=sid,
799
+ extracted_at=extracted_at,
800
+ )
801
+ )
802
+ for row in review_comment_rows:
803
+ link_rows.extend(
804
+ build_text_link_rows(
805
+ repo=repo_slug,
806
+ owner=owner,
807
+ repo_name=repo_name,
808
+ source_type="review_comment",
809
+ source_number=row["pull_request_number"],
810
+ source_id=row["github_id"],
811
+ body=row["body"],
812
+ snapshot_id=sid,
813
+ extracted_at=extracted_at,
814
+ )
815
+ )
816
+ link_rows.extend(
817
+ build_pr_duplicate_candidate_rows(
818
+ repo=repo_slug,
819
+ pull_requests=pr_rows,
820
+ link_rows=link_rows,
821
+ snapshot_id=sid,
822
+ extracted_at=extracted_at,
823
+ )
824
+ )
825
+ for event in event_rows:
826
+ if event.get("source_issue_number"):
827
+ link_rows.append(
828
+ {
829
+ "repo": repo_slug,
830
+ "source_type": event["parent_kind"],
831
+ "source_number": event["parent_number"],
832
+ "source_github_id": None,
833
+ "target_owner": owner,
834
+ "target_repo": repo_name,
835
+ "target_number": event["source_issue_number"],
836
+ "link_type": f"timeline:{event['event']}",
837
+ "link_origin": "timeline",
838
+ "snapshot_id": sid,
839
+ "extracted_at": extracted_at,
840
+ }
841
+ )
842
+
843
+ delta_tables = {
844
+ "issues": issue_rows,
845
+ "pull_requests": pr_rows,
846
+ "comments": comment_rows,
847
+ "reviews": review_rows,
848
+ "review_comments": review_comment_rows,
849
+ "pr_files": pr_file_rows,
850
+ "pr_diffs": pr_diff_rows,
851
+ "links": link_rows,
852
+ "events": event_rows,
853
+ }
854
+ if any(delta_tables.values()):
855
+ log("Pushing final delta checkpoint to Hub before merge upload")
856
+ upload_delta_checkpoint(
857
+ api=api,
858
+ repo_id=options.hf_repo_id,
859
+ work_dir=root,
860
+ repo_slug=repo_slug,
861
+ sid=sid,
862
+ stage="final-delta",
863
+ delta_tables=delta_tables,
864
+ progress={
865
+ "stage": "final-delta",
866
+ "effective_since": effective_since,
867
+ "counts": {name: len(rows) for name, rows in delta_tables.items()},
868
+ },
869
+ )
870
+
871
+ final_tables = {
872
+ table_name: merge_rows(table_name, previous_tables[table_name], delta_rows)
873
+ for table_name, delta_rows in delta_tables.items()
874
+ }
875
+ manifest = {
876
+ "repo": repo_slug,
877
+ "snapshot_id": sid,
878
+ "crawl_started_at": crawl_started_at,
879
+ "extracted_at": extracted_at,
880
+ "watermark": {
881
+ "effective_since": effective_since,
882
+ "next_since": crawl_started_at,
883
+ "previous_snapshot_dir": (
884
+ str(previous_snapshot_dir) if previous_snapshot_dir is not None else None
885
+ ),
886
+ },
887
+ "delta_counts": {
888
+ "issue_stubs": len(issue_stubs),
889
+ "issues": len(issue_rows),
890
+ "pull_requests": len(pr_rows),
891
+ "comments": len(comment_rows),
892
+ "reviews": len(review_rows),
893
+ "review_comments": len(review_comment_rows),
894
+ "pr_files": len(pr_file_rows),
895
+ "pr_diffs": len(pr_diff_rows),
896
+ "timeline_events": len(event_rows),
897
+ "links": len(link_rows),
898
+ },
899
+ "counts": {
900
+ "issues": len(final_tables["issues"]),
901
+ "pull_requests": len(final_tables["pull_requests"]),
902
+ "comments": len(final_tables["comments"]),
903
+ "reviews": len(final_tables["reviews"]),
904
+ "review_comments": len(final_tables["review_comments"]),
905
+ "pr_files": len(final_tables["pr_files"]),
906
+ "pr_diffs": len(final_tables["pr_diffs"]),
907
+ "timeline_events": len(final_tables["events"]),
908
+ "links": len(final_tables["links"]),
909
+ },
910
+ }
911
+
912
+ log("Writing updated dataset files")
913
+ for table_name, rows in final_tables.items():
914
+ write_parquet(rows, output_root / f"{table_name}.parquet", table_name)
915
+ issue_comment_rows, pr_comment_rows = viewer_comment_rows(
916
+ final_tables["comments"],
917
+ final_tables["pull_requests"],
918
+ )
919
+ write_parquet(issue_comment_rows, output_root / "issue_comments.parquet", "comments")
920
+ write_parquet(pr_comment_rows, output_root / "pr_comments.parquet", "comments")
921
+ if options.new_contributor_report:
922
+ write_json(manifest, output_root / "manifest.json")
923
+ log("Generating new contributor dataset/report artifacts")
924
+ run_new_contributor_report(
925
+ NewContributorReportOptions(
926
+ snapshot_dir=output_root,
927
+ output_dir=output_root,
928
+ output=None,
929
+ json_output=None,
930
+ hf_repo_id=None,
931
+ hf_revision=None,
932
+ hf_materialize_dir=None,
933
+ window_days=options.new_contributor_window_days,
934
+ max_authors=options.new_contributor_max_authors,
935
+ )
936
+ )
937
+ manifest["counts"]["new_contributors"] = len(
938
+ read_parquet_rows(output_root / "new_contributors.parquet")
939
+ )
940
+ manifest["artifacts"] = {
941
+ "new_contributors_parquet": "new_contributors.parquet",
942
+ "new_contributors_json": "new-contributors-report.json",
943
+ "new_contributors_markdown": "new-contributors-report.md",
944
+ }
945
+ manifest["watermark"].pop("previous_snapshot_dir", None)
946
+ write_json(manifest, output_root / "manifest.json")
947
+ write_text(
948
+ build_hf_dataset_card(
949
+ repo_slug,
950
+ sid,
951
+ include_new_contributors=options.new_contributor_report,
952
+ ),
953
+ output_root / "README.md",
954
+ )
955
+ write_json(
956
+ {
957
+ "repo": repo_slug,
958
+ "last_successful_snapshot_id": sid,
959
+ "effective_since": effective_since,
960
+ "next_since": crawl_started_at,
961
+ "updated_at": extracted_at,
962
+ },
963
+ output_root / "state" / "watermark.json",
964
+ )
965
+ write_json(manifest, output_root / "snapshots" / sid / "manifest.json")
966
+ write_json(
967
+ {
968
+ "repo": repo_slug,
969
+ "latest_snapshot_id": sid,
970
+ "snapshot_dir": f"snapshots/{sid}",
971
+ "manifest_path": "manifest.json",
972
+ "archived_manifest_path": f"snapshots/{sid}/manifest.json",
973
+ "next_since": crawl_started_at,
974
+ },
975
+ output_root / "snapshots" / "latest.json",
976
+ )
977
+
978
+ log("Uploading updated dataset to the Hub")
979
+ api.upload_folder(
980
+ folder_path=str(output_root),
981
+ repo_id=options.hf_repo_id,
982
+ repo_type="dataset",
983
+ commit_message=f"Refresh {repo_name} dataset snapshot {sid}",
984
+ )
985
+ log(f"Dataset refresh complete for {options.hf_repo_id}")
986
+ return {
987
+ "repo": repo_slug,
988
+ "dataset_id": options.hf_repo_id,
989
+ "snapshot_id": sid,
990
+ "effective_since": effective_since,
991
+ "counts": manifest["counts"],
992
+ }
993
+
994
+
995
+ def main(argv: list[str] | None = None) -> None:
996
+ args = parse_args(argv)
997
+ result = run_dataset_refresh(
998
+ DatasetRefreshOptions(
999
+ repo=RepoRef.parse(args.repo),
1000
+ hf_repo_id=args.hf_repo_id,
1001
+ private_hf_repo=args.private_hf_repo,
1002
+ max_issues=args.max_issues,
1003
+ max_prs=args.max_prs,
1004
+ max_issue_comments=args.max_issue_comments,
1005
+ max_reviews_per_pr=args.max_reviews_per_pr,
1006
+ max_review_comments_per_pr=args.max_review_comments_per_pr,
1007
+ fetch_timeline=args.fetch_timeline,
1008
+ new_contributor_report=args.new_contributor_report,
1009
+ new_contributor_window_days=args.new_contributor_window_days,
1010
+ new_contributor_max_authors=args.new_contributor_max_authors,
1011
+ http_timeout=args.http_timeout,
1012
+ http_max_retries=args.http_max_retries,
1013
+ checkpoint_every_comments=args.checkpoint_every_comments,
1014
+ checkpoint_every_prs=args.checkpoint_every_prs,
1015
+ )
1016
+ )
1017
+ print(json.dumps(result, indent=2))
1018
+
1019
+
1020
+ if __name__ == "__main__":
1021
+ main()
src/slop_farmer/app/dataset_status.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import tempfile
4
+ from datetime import UTC, datetime
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from huggingface_hub import HfApi
9
+
10
+ from slop_farmer.config import DatasetStatusOptions
11
+ from slop_farmer.data.hf_dataset_repo import (
12
+ list_remote_paths,
13
+ load_remote_file,
14
+ load_remote_json_file,
15
+ stable_snapshot_candidates,
16
+ )
17
+ from slop_farmer.data.parquet_io import read_json
18
+
19
+
20
+ def _coerce_datetime(value: Any) -> datetime | None:
21
+ if not isinstance(value, str) or not value:
22
+ return None
23
+ try:
24
+ return datetime.fromisoformat(value.replace("Z", "+00:00"))
25
+ except ValueError:
26
+ return None
27
+
28
+
29
+ def _age_summary(value: str | None) -> dict[str, Any]:
30
+ timestamp = _coerce_datetime(value)
31
+ if timestamp is None:
32
+ return {"seconds": None, "summary": "unknown", "staleness": "unknown"}
33
+ age_seconds = max(int((datetime.now(tz=UTC) - timestamp).total_seconds()), 0)
34
+ if age_seconds <= 6 * 3600:
35
+ staleness = "fresh"
36
+ elif age_seconds <= 24 * 3600:
37
+ staleness = "aging"
38
+ else:
39
+ staleness = "stale"
40
+ if age_seconds < 3600:
41
+ summary = f"{age_seconds // 60}m"
42
+ elif age_seconds < 24 * 3600:
43
+ summary = f"{age_seconds // 3600}h"
44
+ else:
45
+ summary = f"{age_seconds // 86400}d"
46
+ return {"seconds": age_seconds, "summary": summary, "staleness": staleness}
47
+
48
+
49
+ def _local_status(output_dir: Path) -> dict[str, Any] | None:
50
+ latest_path = output_dir.resolve() / "snapshots" / "latest.json"
51
+ if not latest_path.exists():
52
+ return None
53
+ payload = read_json(latest_path)
54
+ snapshot_dir = payload.get("snapshot_dir")
55
+ manifest = {}
56
+ if isinstance(snapshot_dir, str) and snapshot_dir:
57
+ manifest_path = Path(snapshot_dir).resolve() / "manifest.json"
58
+ if manifest_path.exists():
59
+ manifest = read_json(manifest_path)
60
+ return {
61
+ "latest_path": str(latest_path),
62
+ "latest_pointer": payload,
63
+ "snapshot_dir": snapshot_dir,
64
+ "snapshot_id": manifest.get("snapshot_id") or payload.get("latest_snapshot_id"),
65
+ }
66
+
67
+
68
+ def _remote_status(repo_id: str, revision: str | None) -> dict[str, Any]:
69
+ api = HfApi()
70
+ with tempfile.TemporaryDirectory(prefix="slop-farmer-dataset-status-") as tmp:
71
+ root = Path(tmp)
72
+ remote_paths = list_remote_paths(api, repo_id, revision=revision)
73
+ latest_pointer = load_remote_json_file(
74
+ api,
75
+ repo_id,
76
+ "snapshots/latest.json",
77
+ root,
78
+ revision=revision,
79
+ )
80
+ watermark = load_remote_json_file(
81
+ api,
82
+ repo_id,
83
+ "state/watermark.json",
84
+ root,
85
+ revision=revision,
86
+ )
87
+ manifest = None
88
+ if latest_pointer is not None:
89
+ for candidate in stable_snapshot_candidates(latest_pointer, "manifest.json"):
90
+ downloaded = load_remote_file(
91
+ api,
92
+ repo_id,
93
+ candidate,
94
+ root,
95
+ revision=revision,
96
+ )
97
+ if downloaded is None:
98
+ continue
99
+ manifest = read_json(downloaded)
100
+ break
101
+ snapshot_prefix = (
102
+ str(latest_pointer.get("snapshot_dir") or "").strip("/")
103
+ if isinstance(latest_pointer, dict)
104
+ else ""
105
+ )
106
+ contributors_present = any(
107
+ path in remote_paths
108
+ for path in (
109
+ "new_contributors.parquet",
110
+ "new-contributors-report.json",
111
+ "new-contributors-report.md",
112
+ )
113
+ )
114
+ if snapshot_prefix:
115
+ contributors_present = contributors_present or any(
116
+ path in remote_paths
117
+ for path in (
118
+ f"{snapshot_prefix}/new_contributors.parquet",
119
+ f"{snapshot_prefix}/new-contributors-report.json",
120
+ f"{snapshot_prefix}/new-contributors-report.md",
121
+ )
122
+ )
123
+ extracted_at = manifest.get("extracted_at") if manifest else None
124
+ return {
125
+ "dataset_id": repo_id,
126
+ "revision": revision,
127
+ "latest_pointer": latest_pointer,
128
+ "watermark": watermark,
129
+ "manifest": manifest,
130
+ "contributors_present": contributors_present,
131
+ "remote_path_count": len(remote_paths),
132
+ "age": _age_summary(extracted_at),
133
+ }
134
+
135
+
136
+ def get_dataset_status(options: DatasetStatusOptions) -> dict[str, Any]:
137
+ remote = _remote_status(options.hf_repo_id, options.hf_revision) if options.hf_repo_id else None
138
+ local = _local_status(options.output_dir)
139
+ repo = options.repo
140
+ if repo is None and remote and remote.get("manifest"):
141
+ repo = remote["manifest"].get("repo")
142
+ if repo is None and local and isinstance(local.get("latest_pointer"), dict):
143
+ repo = local["latest_pointer"].get("repo")
144
+ return {
145
+ "repo": repo,
146
+ "dataset_id": options.hf_repo_id,
147
+ "remote": remote,
148
+ "local": local,
149
+ }
150
+
151
+
152
+ def format_dataset_status(status: dict[str, Any]) -> str:
153
+ remote = status.get("remote") or {}
154
+ local = status.get("local") or {}
155
+ manifest = remote.get("manifest") or {}
156
+ watermark = remote.get("watermark") or {}
157
+ latest_pointer = remote.get("latest_pointer") or {}
158
+ age = remote.get("age") or {}
159
+ lines = [
160
+ f"Repo: {status.get('repo') or '?'}",
161
+ f"Dataset: {status.get('dataset_id') or 'not configured'}",
162
+ ]
163
+ if remote:
164
+ lines.extend(
165
+ [
166
+ f"Remote latest snapshot: {manifest.get('snapshot_id') or latest_pointer.get('latest_snapshot_id') or '?'}",
167
+ f"Remote extracted at: {manifest.get('extracted_at') or '?'}",
168
+ f"Remote next_since: {watermark.get('next_since') or latest_pointer.get('next_since') or '?'}",
169
+ f"Contributor artifacts: {'yes' if remote.get('contributors_present') else 'no'}",
170
+ f"Freshness: {age.get('summary') or 'unknown'} ({age.get('staleness') or 'unknown'})",
171
+ ]
172
+ )
173
+ if local:
174
+ lines.extend(
175
+ [
176
+ f"Local latest pointer: {local.get('latest_path')}",
177
+ f"Local snapshot id: {local.get('snapshot_id') or '?'}",
178
+ ]
179
+ )
180
+ else:
181
+ lines.append("Local latest pointer: none")
182
+ return "\n".join(lines)
src/slop_farmer/app/deploy.py CHANGED
@@ -5,6 +5,7 @@ import subprocess
5
  from pathlib import Path
6
 
7
  from slop_farmer.config import DeployDashboardOptions
 
8
 
9
 
10
  def run_deploy_dashboard(options: DeployDashboardOptions) -> None:
@@ -17,6 +18,16 @@ def run_deploy_dashboard(options: DeployDashboardOptions) -> None:
17
  {
18
  "PIPELINE_DATA_DIR": str(options.pipeline_data_dir),
19
  "WEB_DIR": str(options.web_dir),
 
 
 
 
 
 
 
 
 
 
20
  "DASHBOARD_WINDOW_DAYS": str(options.dashboard_window_days),
21
  "CONTRIBUTOR_WINDOW_DAYS": str(options.contributor_window_days),
22
  "CONTRIBUTOR_MAX_AUTHORS": str(options.contributor_max_authors),
@@ -28,8 +39,6 @@ def run_deploy_dashboard(options: DeployDashboardOptions) -> None:
28
  "SPACE_SHORT_DESCRIPTION": options.space_short_description,
29
  }
30
  )
31
- if options.snapshot_dir is not None:
32
- env["SNAPSHOT_DIR"] = str(options.snapshot_dir)
33
  if options.analysis_input is not None:
34
  env["ANALYSIS_INPUT"] = str(options.analysis_input)
35
  if options.contributors_input is not None:
 
5
  from pathlib import Path
6
 
7
  from slop_farmer.config import DeployDashboardOptions
8
+ from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir
9
 
10
 
11
  def run_deploy_dashboard(options: DeployDashboardOptions) -> None:
 
18
  {
19
  "PIPELINE_DATA_DIR": str(options.pipeline_data_dir),
20
  "WEB_DIR": str(options.web_dir),
21
+ "SNAPSHOT_DIR": str(
22
+ resolve_snapshot_source_dir(
23
+ snapshot_dir=options.snapshot_dir,
24
+ local_snapshots_root=options.pipeline_data_dir.resolve() / "snapshots",
25
+ hf_repo_id=options.hf_repo_id,
26
+ hf_revision=options.hf_revision,
27
+ hf_materialize_dir=options.hf_materialize_dir,
28
+ hf_output_dir=options.pipeline_data_dir,
29
+ )
30
+ ),
31
  "DASHBOARD_WINDOW_DAYS": str(options.dashboard_window_days),
32
  "CONTRIBUTOR_WINDOW_DAYS": str(options.contributor_window_days),
33
  "CONTRIBUTOR_MAX_AUTHORS": str(options.contributor_max_authors),
 
39
  "SPACE_SHORT_DESCRIPTION": options.space_short_description,
40
  }
41
  )
 
 
42
  if options.analysis_input is not None:
43
  env["ANALYSIS_INPUT"] = str(options.analysis_input)
44
  if options.contributors_input is not None:
src/slop_farmer/app/hf_checkpoint_import.py CHANGED
@@ -28,6 +28,7 @@ from huggingface_hub import HfApi, hf_hub_download
28
 
29
  from slop_farmer.app.publish import publish_snapshot
30
  from slop_farmer.config import CheckpointImportOptions
 
31
  from slop_farmer.data.links import build_pr_duplicate_candidate_rows, build_text_link_rows
32
  from slop_farmer.data.parquet_io import (
33
  SCHEMAS,
@@ -455,76 +456,15 @@ def _viewer_comment_rows(
455
  def _dataset_card(
456
  repo_slug: str, snapshot_id: str, source_repo_id: str, checkpoint_root: str
457
  ) -> str:
458
- return f"""---
459
- pretty_name: Transformers PR Slop Dataset
460
- configs:
461
- - config_name: issues
462
- data_files:
463
- - split: train
464
- path: issues.parquet
465
- default: true
466
- - config_name: prs
467
- data_files:
468
- - split: train
469
- path: pull_requests.parquet
470
- - config_name: issue_comments
471
- data_files:
472
- - split: train
473
- path: issue_comments.parquet
474
- - config_name: pr_comments
475
- data_files:
476
- - split: train
477
- path: pr_comments.parquet
478
- - config_name: pr_reviews
479
- data_files:
480
- - split: train
481
- path: reviews.parquet
482
- - config_name: pr_files
483
- data_files:
484
- - split: train
485
- path: pr_files.parquet
486
- - config_name: pr_diffs
487
- data_files:
488
- - split: train
489
- path: pr_diffs.parquet
490
- - config_name: review_comments
491
- data_files:
492
- - split: train
493
- path: review_comments.parquet
494
- - config_name: links
495
- data_files:
496
- - split: train
497
- path: links.parquet
498
- - config_name: events
499
- data_files:
500
- - split: train
501
- path: events.parquet
502
- ---
503
- ---
504
-
505
- # Transformers PR Slop Dataset
506
-
507
- Imported checkpoint snapshot for `{repo_slug}`.
508
-
509
- Files:
510
- - `issues.parquet`
511
- - `pull_requests.parquet`
512
- - `comments.parquet`
513
- - `issue_comments.parquet`
514
- - `pr_comments.parquet`
515
- - `reviews.parquet`
516
- - `pr_files.parquet`
517
- - `pr_diffs.parquet`
518
- - `review_comments.parquet`
519
- - `links.parquet`
520
- - `events.parquet`
521
-
522
- Notes:
523
- - source HF dataset: `{source_repo_id}`
524
- - source checkpoint root: `{checkpoint_root}`
525
- - latest imported checkpoint: `{snapshot_id}`
526
- - links were regenerated locally from text references and timeline events
527
- """
528
 
529
 
530
  def _snapshot_dir_name(source_repo_id: str, checkpoint_id: str) -> str:
 
28
 
29
  from slop_farmer.app.publish import publish_snapshot
30
  from slop_farmer.config import CheckpointImportOptions
31
+ from slop_farmer.data.dataset_card import build_hf_dataset_card
32
  from slop_farmer.data.links import build_pr_duplicate_candidate_rows, build_text_link_rows
33
  from slop_farmer.data.parquet_io import (
34
  SCHEMAS,
 
456
  def _dataset_card(
457
  repo_slug: str, snapshot_id: str, source_repo_id: str, checkpoint_root: str
458
  ) -> str:
459
+ return build_hf_dataset_card(
460
+ repo_slug,
461
+ snapshot_id,
462
+ notes=[
463
+ f"source HF dataset: `{source_repo_id}`",
464
+ f"source checkpoint root: `{checkpoint_root}`",
465
+ "links were regenerated locally from text references and timeline events",
466
+ ],
467
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
 
469
 
470
  def _snapshot_dir_name(source_repo_id: str, checkpoint_id: str) -> str:
src/slop_farmer/app/pipeline.py CHANGED
@@ -9,6 +9,7 @@ from typing import Any, Protocol
9
 
10
  from slop_farmer.app.publish import publish_snapshot
11
  from slop_farmer.config import NewContributorReportOptions, PipelineOptions, resolve_github_token
 
12
  from slop_farmer.data.github_api import GitHubClient
13
  from slop_farmer.data.links import build_pr_duplicate_candidate_rows, build_text_link_rows
14
  from slop_farmer.data.normalize import (
@@ -112,96 +113,14 @@ def _reference_time_for_age_caps(crawl_started_at: str) -> datetime:
112
  def _dataset_card(
113
  repo: str, snapshot_id: str, manifest: dict[str, Any], *, include_new_contributors: bool = False
114
  ) -> str:
115
- new_contributor_config = ""
116
- new_contributor_file = ""
117
- if include_new_contributors:
118
- new_contributor_config = """- config_name: new_contributors
119
- data_files:
120
- - split: train
121
- path: new_contributors.parquet
122
- """
123
- new_contributor_file = """- `new_contributors.parquet`
124
- - `new-contributors-report.json`
125
- - `new-contributors-report.md`
126
- """
127
- return f"""---
128
- pretty_name: Transformers PR Slop Dataset
129
- configs:
130
- - config_name: issues
131
- data_files:
132
- - split: train
133
- path: issues.parquet
134
- default: true
135
- - config_name: prs
136
- data_files:
137
- - split: train
138
- path: pull_requests.parquet
139
- - config_name: issue_comments
140
- data_files:
141
- - split: train
142
- path: issue_comments.parquet
143
- - config_name: pr_comments
144
- data_files:
145
- - split: train
146
- path: pr_comments.parquet
147
- - config_name: pr_reviews
148
- data_files:
149
- - split: train
150
- path: reviews.parquet
151
- - config_name: pr_files
152
- data_files:
153
- - split: train
154
- path: pr_files.parquet
155
- - config_name: pr_diffs
156
- data_files:
157
- - split: train
158
- path: pr_diffs.parquet
159
- - config_name: review_comments
160
- data_files:
161
- - split: train
162
- path: review_comments.parquet
163
- - config_name: links
164
- data_files:
165
- - split: train
166
- path: links.parquet
167
- - config_name: events
168
- data_files:
169
- - split: train
170
- path: events.parquet
171
- {new_contributor_config}---
172
- ---
173
-
174
- # Transformers PR Slop Dataset
175
-
176
- Normalized snapshots of issues, pull requests, comments, reviews, and linkage data from `{repo}`.
177
-
178
- Files:
179
- - `issues.parquet`
180
- - `pull_requests.parquet`
181
- - `comments.parquet`
182
- - `issue_comments.parquet` (derived view of issue discussion comments)
183
- - `pr_comments.parquet` (derived view of pull request discussion comments)
184
- - `reviews.parquet`
185
- - `pr_files.parquet`
186
- - `pr_diffs.parquet`
187
- - `review_comments.parquet`
188
- - `links.parquet`
189
- - `events.parquet`
190
- {new_contributor_file}
191
-
192
- Use:
193
- - duplicate PR and issue analysis
194
- - triage and ranking experiments
195
- - eval set creation
196
-
197
- Notes:
198
- - updated daily
199
- - latest snapshot: `{snapshot_id}`
200
- - raw data only; no labels or moderation decisions
201
- - PR metadata, file-level patch hunks, and full unified diffs are included
202
- - new contributor reviewer artifacts are included when generated for the snapshot
203
- - full file contents for changed files are not included
204
- """
205
 
206
 
207
  def _viewer_comment_rows(
@@ -1045,6 +964,9 @@ def run_pipeline(options: PipelineOptions, client: GitHubClientLike | None = Non
1045
  output_dir=options.output_dir,
1046
  output=None,
1047
  json_output=None,
 
 
 
1048
  window_days=options.new_contributor_window_days,
1049
  max_authors=options.new_contributor_max_authors,
1050
  )
 
9
 
10
  from slop_farmer.app.publish import publish_snapshot
11
  from slop_farmer.config import NewContributorReportOptions, PipelineOptions, resolve_github_token
12
+ from slop_farmer.data.dataset_card import build_hf_dataset_card
13
  from slop_farmer.data.github_api import GitHubClient
14
  from slop_farmer.data.links import build_pr_duplicate_candidate_rows, build_text_link_rows
15
  from slop_farmer.data.normalize import (
 
113
  def _dataset_card(
114
  repo: str, snapshot_id: str, manifest: dict[str, Any], *, include_new_contributors: bool = False
115
  ) -> str:
116
+ notes = ["new contributor reviewer artifacts are included"] if include_new_contributors else []
117
+ del manifest
118
+ return build_hf_dataset_card(
119
+ repo,
120
+ snapshot_id,
121
+ include_new_contributors=include_new_contributors,
122
+ notes=notes,
123
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
 
126
  def _viewer_comment_rows(
 
964
  output_dir=options.output_dir,
965
  output=None,
966
  json_output=None,
967
+ hf_repo_id=None,
968
+ hf_revision=None,
969
+ hf_materialize_dir=None,
970
  window_days=options.new_contributor_window_days,
971
  max_authors=options.new_contributor_max_authors,
972
  )
src/slop_farmer/app/pr_search.py CHANGED
@@ -10,9 +10,12 @@ get_pr_search_status = pr_search_service.get_pr_search_status
10
  get_pr_search_similar = pr_search_service.get_pr_search_similar
11
  get_pr_search_similar_lookup = pr_search_service.get_pr_search_similar_lookup
12
  get_pr_search_candidate_clusters = pr_search_service.get_pr_search_candidate_clusters
 
 
13
  get_pr_search_clusters = pr_search_service.get_pr_search_clusters
14
  list_pr_search_clusters = pr_search_service.list_pr_search_clusters
15
  get_pr_search_cluster = pr_search_service.get_pr_search_cluster
 
16
  explain_pr_search_pair = pr_search_service.explain_pr_search_pair
17
  probe_pr_search_live = pr_search_service.probe_pr_search_live
18
  probe_pr_search_github = pr_search_service.probe_pr_search_github
@@ -31,6 +34,7 @@ def format_pr_search_status(result: Mapping[str, Any]) -> str:
31
  (
32
  "Rows: "
33
  f"documents={counts['documents']} "
 
34
  f"features={counts['features']} "
35
  f"neighbors={counts['neighbors']} "
36
  f"clusters={counts['clusters']} "
@@ -245,3 +249,73 @@ def format_pr_search_probe(result: Mapping[str, Any]) -> str:
245
  if row.get("reason"):
246
  lines.append(f" reason: {row['reason']}")
247
  return "\n".join(lines)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  get_pr_search_similar = pr_search_service.get_pr_search_similar
11
  get_pr_search_similar_lookup = pr_search_service.get_pr_search_similar_lookup
12
  get_pr_search_candidate_clusters = pr_search_service.get_pr_search_candidate_clusters
13
+ get_pr_search_contributor = pr_search_service.get_pr_search_contributor
14
+ get_pr_search_contributor_pulls = pr_search_service.get_pr_search_contributor_pulls
15
  get_pr_search_clusters = pr_search_service.get_pr_search_clusters
16
  list_pr_search_clusters = pr_search_service.list_pr_search_clusters
17
  get_pr_search_cluster = pr_search_service.get_pr_search_cluster
18
+ get_pr_search_pull_contributor = pr_search_service.get_pr_search_pull_contributor
19
  explain_pr_search_pair = pr_search_service.explain_pr_search_pair
20
  probe_pr_search_live = pr_search_service.probe_pr_search_live
21
  probe_pr_search_github = pr_search_service.probe_pr_search_github
 
34
  (
35
  "Rows: "
36
  f"documents={counts['documents']} "
37
+ f"contributors={counts.get('contributors', 0)} "
38
  f"features={counts['features']} "
39
  f"neighbors={counts['neighbors']} "
40
  f"clusters={counts['clusters']} "
 
249
  if row.get("reason"):
250
  lines.append(f" reason: {row['reason']}")
251
  return "\n".join(lines)
252
+
253
+
254
+ def format_pr_search_contributor(result: Mapping[str, Any]) -> str:
255
+ contributor = result["contributor"]
256
+ lines = [
257
+ f"Contributor {contributor['author_login']}",
258
+ f"Repo: {result['repo']}",
259
+ f"Snapshot: {result['snapshot_id']}",
260
+ f"Name: {contributor.get('name') or '-'}",
261
+ f"Profile: {contributor.get('profile_url') or '-'}",
262
+ f"Association: {contributor.get('repo_association') or '-'}",
263
+ f"First seen in snapshot: {'yes' if contributor.get('first_seen_in_snapshot') else 'no'}",
264
+ (
265
+ "Scores: "
266
+ f"follow-through={contributor.get('follow_through_score') or '-'} "
267
+ f"breadth={contributor.get('breadth_score') or '-'} "
268
+ f"risk={contributor.get('automation_risk_signal') or '-'}"
269
+ ),
270
+ f"Heuristic: {contributor.get('heuristic_note') or '-'}",
271
+ f"Public orgs: {', '.join(contributor.get('public_orgs') or []) or '-'}",
272
+ "",
273
+ "Recent indexed PRs:",
274
+ ]
275
+ pulls = result.get("pulls") or []
276
+ if not pulls:
277
+ lines.append("- none")
278
+ return "\n".join(lines)
279
+ for row in pulls:
280
+ lines.append(
281
+ f"- PR #{row['pr_number']}: {row.get('title') or ''} "
282
+ f"[state={row.get('state') or '-'} merged={'yes' if row.get('merged') else 'no'}]"
283
+ )
284
+ return "\n".join(lines)
285
+
286
+
287
+ def format_pr_search_contributor_pulls(result: Mapping[str, Any]) -> str:
288
+ contributor = result["contributor"]
289
+ lines = [
290
+ f"Contributor PRs: {contributor['author_login']}",
291
+ f"Repo: {result['repo']}",
292
+ f"Snapshot: {result['snapshot_id']}",
293
+ f"Pull requests: {result.get('pull_count', len(result.get('pulls') or []))}",
294
+ "",
295
+ ]
296
+ pulls = result.get("pulls") or []
297
+ if not pulls:
298
+ lines.append("No indexed PRs found for that contributor.")
299
+ return "\n".join(lines)
300
+ for row in pulls:
301
+ lines.append(
302
+ f"- PR #{row['pr_number']}: {row.get('title') or ''} "
303
+ f"(updated={row.get('updated_at') or '-'}, state={row.get('state') or '-'})"
304
+ )
305
+ return "\n".join(lines)
306
+
307
+
308
+ def format_pr_search_pull_contributor(result: Mapping[str, Any]) -> str:
309
+ pr = result["pr"]
310
+ contributor = result["contributor"]
311
+ return "\n".join(
312
+ [
313
+ f"PR #{pr['pr_number']}: {pr.get('title') or ''}",
314
+ f"Author: {contributor['author_login']}",
315
+ f"Risk: {contributor.get('automation_risk_signal') or '-'}",
316
+ f"Follow-through: {contributor.get('follow_through_score') or '-'}",
317
+ f"Breadth: {contributor.get('breadth_score') or '-'}",
318
+ f"Heuristic: {contributor.get('heuristic_note') or '-'}",
319
+ f"Profile: {contributor.get('profile_url') or '-'}",
320
+ ]
321
+ )
src/slop_farmer/app/pr_search_api.py CHANGED
@@ -22,6 +22,9 @@ from slop_farmer.reports.analysis_service import (
22
  from slop_farmer.reports.pr_search_service import (
23
  get_pr_search_cluster,
24
  get_pr_search_clusters,
 
 
 
25
  get_pr_search_similar_lookup,
26
  get_pr_search_status,
27
  list_pr_search_clusters,
@@ -34,6 +37,7 @@ class PrSearchApiSettings:
34
  default_repo: str | None
35
  index_path: Path
36
  output_dir: Path
 
37
  snapshot_dir: Path | None = None
38
  hf_repo_id: str | None = None
39
  hf_revision: str | None = None
@@ -66,6 +70,7 @@ class PrSearchApiSettings:
66
  default_repo=os.environ.get("DEFAULT_REPO"),
67
  index_path=index_path,
68
  output_dir=output_dir,
 
69
  snapshot_dir=snapshot_dir,
70
  hf_repo_id=os.environ.get("HF_REPO_ID"),
71
  hf_revision=os.environ.get("HF_REVISION"),
@@ -103,7 +108,7 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
103
  app.state.startup_error = str(exc)
104
  yield
105
 
106
- app = FastAPI(title="slop PR search API", version="0.1.0", lifespan=lifespan)
107
 
108
  @app.exception_handler(ValueError)
109
  async def handle_value_error(_request: Request, exc: ValueError) -> JSONResponse:
@@ -212,6 +217,44 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
212
  ),
213
  )
214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  @app.get("/v1/repos/{owner}/{repo}/analysis/status")
216
  async def analysis_status(
217
  owner: str,
@@ -221,7 +264,12 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
221
  ) -> dict[str, Any]:
222
  settings = request.app.state.settings
223
  repo_slug = _repo_slug(settings, owner, repo)
224
- return get_analysis_status(settings.index_path, repo=repo_slug, variant=variant)
 
 
 
 
 
225
 
226
  @app.get("/v1/repos/{owner}/{repo}/pulls/{number}/analysis")
227
  async def pr_analysis(
@@ -238,6 +286,7 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
238
  repo=repo_slug,
239
  pr_number=number,
240
  variant=variant,
 
241
  )
242
 
243
  @app.get("/v1/repos/{owner}/{repo}/analysis/meta-bugs")
@@ -254,6 +303,7 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
254
  settings.index_path,
255
  repo=repo_slug,
256
  variant=variant,
 
257
  limit=_limit(
258
  limit,
259
  default=settings.cluster_list_limit_default,
@@ -276,6 +326,7 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
276
  repo=repo_slug,
277
  cluster_id=cluster_id,
278
  variant=variant,
 
279
  )
280
 
281
  @app.get("/v1/repos/{owner}/{repo}/analysis/duplicate-prs")
@@ -292,6 +343,7 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
292
  settings.index_path,
293
  repo=repo_slug,
294
  variant=variant,
 
295
  limit=_limit(
296
  limit,
297
  default=settings.cluster_list_limit_default,
@@ -308,7 +360,12 @@ def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI:
308
  ) -> dict[str, Any]:
309
  settings = request.app.state.settings
310
  repo_slug = _repo_slug(settings, owner, repo)
311
- return get_analysis_best(settings.index_path, repo=repo_slug, variant=variant)
 
 
 
 
 
312
 
313
  return app
314
 
@@ -395,6 +452,7 @@ def _looks_not_found(exc: ValueError) -> bool:
395
  message = str(exc).lower()
396
  return (
397
  "not found" in message
 
398
  or "no analysis report was found" in message
399
  or "no active pr search run" in message
400
  or "was not found in the active indexed universe" in message
 
22
  from slop_farmer.reports.pr_search_service import (
23
  get_pr_search_cluster,
24
  get_pr_search_clusters,
25
+ get_pr_search_contributor,
26
+ get_pr_search_contributor_pulls,
27
+ get_pr_search_pull_contributor,
28
  get_pr_search_similar_lookup,
29
  get_pr_search_status,
30
  list_pr_search_clusters,
 
37
  default_repo: str | None
38
  index_path: Path
39
  output_dir: Path
40
+ analysis_dir: Path | None = None
41
  snapshot_dir: Path | None = None
42
  hf_repo_id: str | None = None
43
  hf_revision: str | None = None
 
70
  default_repo=os.environ.get("DEFAULT_REPO"),
71
  index_path=index_path,
72
  output_dir=output_dir,
73
+ analysis_dir=_env_path("ANALYSIS_DIR") or (output_dir / "analysis"),
74
  snapshot_dir=snapshot_dir,
75
  hf_repo_id=os.environ.get("HF_REPO_ID"),
76
  hf_revision=os.environ.get("HF_REVISION"),
 
108
  app.state.startup_error = str(exc)
109
  yield
110
 
111
+ app = FastAPI(title="slop PR search API", version="0.1.1", lifespan=lifespan)
112
 
113
  @app.exception_handler(ValueError)
114
  async def handle_value_error(_request: Request, exc: ValueError) -> JSONResponse:
 
217
  ),
218
  )
219
 
220
+ @app.get("/v1/repos/{owner}/{repo}/contributors/{login}")
221
+ async def contributor_view(
222
+ owner: str, repo: str, login: str, request: Request
223
+ ) -> dict[str, Any]:
224
+ settings = request.app.state.settings
225
+ repo_slug = _repo_slug(settings, owner, repo)
226
+ return get_pr_search_contributor(settings.index_path, repo=repo_slug, author_login=login)
227
+
228
+ @app.get("/v1/repos/{owner}/{repo}/contributors/{login}/pulls")
229
+ async def contributor_pulls(
230
+ owner: str,
231
+ repo: str,
232
+ login: str,
233
+ request: Request,
234
+ limit: int | None = None,
235
+ ) -> dict[str, Any]:
236
+ settings = request.app.state.settings
237
+ repo_slug = _repo_slug(settings, owner, repo)
238
+ return get_pr_search_contributor_pulls(
239
+ settings.index_path,
240
+ repo=repo_slug,
241
+ author_login=login,
242
+ limit=_limit(
243
+ limit, default=settings.similar_limit_default, maximum=settings.similar_limit_max
244
+ ),
245
+ )
246
+
247
+ @app.get("/v1/repos/{owner}/{repo}/pulls/{number}/contributor")
248
+ async def pull_contributor(
249
+ owner: str,
250
+ repo: str,
251
+ number: int,
252
+ request: Request,
253
+ ) -> dict[str, Any]:
254
+ settings = request.app.state.settings
255
+ repo_slug = _repo_slug(settings, owner, repo)
256
+ return get_pr_search_pull_contributor(settings.index_path, repo=repo_slug, pr_number=number)
257
+
258
  @app.get("/v1/repos/{owner}/{repo}/analysis/status")
259
  async def analysis_status(
260
  owner: str,
 
264
  ) -> dict[str, Any]:
265
  settings = request.app.state.settings
266
  repo_slug = _repo_slug(settings, owner, repo)
267
+ return get_analysis_status(
268
+ settings.index_path,
269
+ repo=repo_slug,
270
+ variant=variant,
271
+ analysis_root=settings.analysis_dir,
272
+ )
273
 
274
  @app.get("/v1/repos/{owner}/{repo}/pulls/{number}/analysis")
275
  async def pr_analysis(
 
286
  repo=repo_slug,
287
  pr_number=number,
288
  variant=variant,
289
+ analysis_root=settings.analysis_dir,
290
  )
291
 
292
  @app.get("/v1/repos/{owner}/{repo}/analysis/meta-bugs")
 
303
  settings.index_path,
304
  repo=repo_slug,
305
  variant=variant,
306
+ analysis_root=settings.analysis_dir,
307
  limit=_limit(
308
  limit,
309
  default=settings.cluster_list_limit_default,
 
326
  repo=repo_slug,
327
  cluster_id=cluster_id,
328
  variant=variant,
329
+ analysis_root=settings.analysis_dir,
330
  )
331
 
332
  @app.get("/v1/repos/{owner}/{repo}/analysis/duplicate-prs")
 
343
  settings.index_path,
344
  repo=repo_slug,
345
  variant=variant,
346
+ analysis_root=settings.analysis_dir,
347
  limit=_limit(
348
  limit,
349
  default=settings.cluster_list_limit_default,
 
360
  ) -> dict[str, Any]:
361
  settings = request.app.state.settings
362
  repo_slug = _repo_slug(settings, owner, repo)
363
+ return get_analysis_best(
364
+ settings.index_path,
365
+ repo=repo_slug,
366
+ variant=variant,
367
+ analysis_root=settings.analysis_dir,
368
+ )
369
 
370
  return app
371
 
 
452
  message = str(exc).lower()
453
  return (
454
  "not found" in message
455
+ or "analysis report was not found" in message
456
  or "no analysis report was found" in message
457
  or "no active pr search run" in message
458
  or "was not found in the active indexed universe" in message
src/slop_farmer/app/workflow.py CHANGED
@@ -74,6 +74,9 @@ def run_full_pipeline(options: FullPipelineOptions) -> str:
74
  analysis_input=analysis_path,
75
  contributors_input=snapshot_dir / "new-contributors-report.json",
76
  pr_scope_input=snapshot_dir / "pr-scope-clusters.json",
 
 
 
77
  window_days=options.dashboard_window_days,
78
  )
79
  )
 
74
  analysis_input=analysis_path,
75
  contributors_input=snapshot_dir / "new-contributors-report.json",
76
  pr_scope_input=snapshot_dir / "pr-scope-clusters.json",
77
+ hf_repo_id=None,
78
+ hf_revision=None,
79
+ hf_materialize_dir=None,
80
  window_days=options.dashboard_window_days,
81
  )
82
  )
src/slop_farmer/app_config.py CHANGED
@@ -184,6 +184,18 @@ def _dashboard_config_defaults(config_path: Path) -> dict[str, dict[str, Any]]:
184
  "new-contributor-window-days": contributor_window_days,
185
  "new-contributor-max-authors": contributor_max_authors,
186
  },
 
 
 
 
 
 
 
 
 
 
 
 
187
  "analyze": {
188
  "output-dir": str(data_dir) if data_dir else None,
189
  "hf-repo-id": analysis.get("hf-repo-id", dataset_id),
@@ -201,6 +213,7 @@ def _dashboard_config_defaults(config_path: Path) -> dict[str, dict[str, Any]]:
201
  },
202
  "pr-scope": {
203
  "output-dir": str(data_dir) if data_dir else None,
 
204
  "cluster-suppression-rules": cluster_suppression_rules,
205
  },
206
  "pr-search": {
@@ -210,12 +223,14 @@ def _dashboard_config_defaults(config_path: Path) -> dict[str, dict[str, Any]]:
210
  },
211
  "new-contributor-report": {
212
  "output-dir": str(data_dir) if data_dir else None,
 
213
  "window-days": contributor_window_days,
214
  "max-authors": contributor_max_authors,
215
  },
216
  "dashboard-data": {
217
  "output-dir": str(dashboard_dir) if dashboard_dir else None,
218
  "snapshot-root": str(data_dir / "snapshots") if data_dir else None,
 
219
  "window-days": dashboard_window_days,
220
  },
221
  "publish-snapshot": {
@@ -236,6 +251,7 @@ def _dashboard_config_defaults(config_path: Path) -> dict[str, dict[str, Any]]:
236
  "deploy-dashboard": {
237
  "pipeline-data-dir": str(data_dir) if data_dir else None,
238
  "web-dir": str(web_dir) if web_dir else None,
 
239
  "dashboard-window-days": dashboard_window_days,
240
  "contributor-window-days": contributor_window_days,
241
  "contributor-max-authors": contributor_max_authors,
@@ -248,6 +264,11 @@ def _dashboard_config_defaults(config_path: Path) -> dict[str, dict[str, Any]]:
248
  "dataset-id": dataset_id,
249
  "space-tags": tags_value,
250
  },
 
 
 
 
 
251
  }
252
  for command, values in defaults.items():
253
  defaults[command] = {key: value for key, value in values.items() if value is not None}
@@ -259,6 +280,7 @@ def _dashboard_config_defaults(config_path: Path) -> dict[str, dict[str, Any]]:
259
  defaults[command].update(_resolve_command_paths(config_path, values))
260
 
261
  defaults["scrape"].update(_resolve_command_paths(config_path, scrape))
 
262
  defaults["analyze"].update(_resolve_command_paths(config_path, analysis))
263
  defaults["full-pipeline"].update(_resolve_command_paths(config_path, full_pipeline))
264
  return defaults
 
184
  "new-contributor-window-days": contributor_window_days,
185
  "new-contributor-max-authors": contributor_max_authors,
186
  },
187
+ "refresh-dataset": {
188
+ "repo": repo,
189
+ "hf-repo-id": dataset_id,
190
+ "fetch-timeline": scrape.get("fetch-timeline"),
191
+ "max-issues": scrape.get("max-issues"),
192
+ "max-prs": scrape.get("max-prs"),
193
+ "max-issue-comments": scrape.get("max-issue-comments"),
194
+ "max-reviews-per-pr": scrape.get("max-reviews-per-pr"),
195
+ "max-review-comments-per-pr": scrape.get("max-review-comments-per-pr"),
196
+ "new-contributor-window-days": contributor_window_days,
197
+ "new-contributor-max-authors": contributor_max_authors,
198
+ },
199
  "analyze": {
200
  "output-dir": str(data_dir) if data_dir else None,
201
  "hf-repo-id": analysis.get("hf-repo-id", dataset_id),
 
213
  },
214
  "pr-scope": {
215
  "output-dir": str(data_dir) if data_dir else None,
216
+ "hf-repo-id": dataset_id,
217
  "cluster-suppression-rules": cluster_suppression_rules,
218
  },
219
  "pr-search": {
 
223
  },
224
  "new-contributor-report": {
225
  "output-dir": str(data_dir) if data_dir else None,
226
+ "hf-repo-id": dataset_id,
227
  "window-days": contributor_window_days,
228
  "max-authors": contributor_max_authors,
229
  },
230
  "dashboard-data": {
231
  "output-dir": str(dashboard_dir) if dashboard_dir else None,
232
  "snapshot-root": str(data_dir / "snapshots") if data_dir else None,
233
+ "hf-repo-id": dataset_id,
234
  "window-days": dashboard_window_days,
235
  },
236
  "publish-snapshot": {
 
251
  "deploy-dashboard": {
252
  "pipeline-data-dir": str(data_dir) if data_dir else None,
253
  "web-dir": str(web_dir) if web_dir else None,
254
+ "hf-repo-id": dataset_id,
255
  "dashboard-window-days": dashboard_window_days,
256
  "contributor-window-days": contributor_window_days,
257
  "contributor-max-authors": contributor_max_authors,
 
264
  "dataset-id": dataset_id,
265
  "space-tags": tags_value,
266
  },
267
+ "dataset-status": {
268
+ "repo": repo,
269
+ "output-dir": str(data_dir) if data_dir else None,
270
+ "hf-repo-id": dataset_id,
271
+ },
272
  }
273
  for command, values in defaults.items():
274
  defaults[command] = {key: value for key, value in values.items() if value is not None}
 
280
  defaults[command].update(_resolve_command_paths(config_path, values))
281
 
282
  defaults["scrape"].update(_resolve_command_paths(config_path, scrape))
283
+ defaults["refresh-dataset"].update(_resolve_command_paths(config_path, scrape))
284
  defaults["analyze"].update(_resolve_command_paths(config_path, analysis))
285
  defaults["full-pipeline"].update(_resolve_command_paths(config_path, full_pipeline))
286
  return defaults
src/slop_farmer/config.py CHANGED
@@ -127,6 +127,9 @@ class NewContributorReportOptions:
127
  json_output: Path | None
128
  window_days: int
129
  max_authors: int
 
 
 
130
 
131
 
132
  @dataclass(slots=True)
@@ -137,6 +140,9 @@ class DashboardDataOptions:
137
  contributors_input: Path | None
138
  pr_scope_input: Path | None
139
  window_days: int
 
 
 
140
  snapshot_root: Path | None = None
141
 
142
 
@@ -155,6 +161,9 @@ class DeployDashboardOptions:
155
  snapshot_dir: Path | None
156
  analysis_input: Path | None
157
  contributors_input: Path | None
 
 
 
158
  refresh_contributors: bool
159
  dashboard_window_days: int
160
  contributor_window_days: int
@@ -233,3 +242,32 @@ class FullPipelineOptions:
233
  max_issues: int | None
234
  max_prs: int | None
235
  open_prs_only: bool = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  json_output: Path | None
128
  window_days: int
129
  max_authors: int
130
+ hf_repo_id: str | None = None
131
+ hf_revision: str | None = None
132
+ hf_materialize_dir: Path | None = None
133
 
134
 
135
  @dataclass(slots=True)
 
140
  contributors_input: Path | None
141
  pr_scope_input: Path | None
142
  window_days: int
143
+ hf_repo_id: str | None = None
144
+ hf_revision: str | None = None
145
+ hf_materialize_dir: Path | None = None
146
  snapshot_root: Path | None = None
147
 
148
 
 
161
  snapshot_dir: Path | None
162
  analysis_input: Path | None
163
  contributors_input: Path | None
164
+ hf_repo_id: str | None
165
+ hf_revision: str | None
166
+ hf_materialize_dir: Path | None
167
  refresh_contributors: bool
168
  dashboard_window_days: int
169
  contributor_window_days: int
 
242
  max_issues: int | None
243
  max_prs: int | None
244
  open_prs_only: bool = False
245
+
246
+
247
+ @dataclass(slots=True)
248
+ class DatasetRefreshOptions:
249
+ repo: RepoRef
250
+ hf_repo_id: str
251
+ private_hf_repo: bool
252
+ max_issues: int | None
253
+ max_prs: int | None
254
+ max_issue_comments: int | None
255
+ max_reviews_per_pr: int | None
256
+ max_review_comments_per_pr: int | None
257
+ fetch_timeline: bool
258
+ new_contributor_report: bool
259
+ new_contributor_window_days: int
260
+ new_contributor_max_authors: int
261
+ http_timeout: int
262
+ http_max_retries: int
263
+ checkpoint_every_comments: int
264
+ checkpoint_every_prs: int
265
+
266
+
267
+ @dataclass(slots=True)
268
+ class DatasetStatusOptions:
269
+ output_dir: Path
270
+ hf_repo_id: str | None
271
+ hf_revision: str | None
272
+ repo: str | None = None
273
+ json_output: bool = False
src/slop_farmer/data/dataset_card.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+
4
+ def _repo_title(repo_slug: str) -> str:
5
+ name = repo_slug.split("/", 1)[-1]
6
+ return name.replace("-", " ").replace("_", " ").title()
7
+
8
+
9
+ def build_hf_dataset_card(
10
+ repo_slug: str,
11
+ snapshot_id: str,
12
+ *,
13
+ include_new_contributors: bool = False,
14
+ notes: list[str] | None = None,
15
+ ) -> str:
16
+ repo_title = _repo_title(repo_slug)
17
+ dataset_title = f"{repo_title} PR Dataset"
18
+ new_contributor_config = ""
19
+ new_contributor_files = ""
20
+ if include_new_contributors:
21
+ new_contributor_config = """- config_name: new_contributors
22
+ data_files:
23
+ - split: train
24
+ path: new_contributors.parquet
25
+ """
26
+ new_contributor_files = """- `new_contributors.parquet`
27
+ - `new-contributors-report.json`
28
+ - `new-contributors-report.md`
29
+ """
30
+ note_lines = "\n".join(f"- {note}" for note in (notes or []))
31
+ if note_lines:
32
+ note_lines = f"{note_lines}\n"
33
+ return f"""---
34
+ pretty_name: {dataset_title}
35
+ configs:
36
+ - config_name: issues
37
+ data_files:
38
+ - split: train
39
+ path: issues.parquet
40
+ default: true
41
+ - config_name: prs
42
+ data_files:
43
+ - split: train
44
+ path: pull_requests.parquet
45
+ - config_name: issue_comments
46
+ data_files:
47
+ - split: train
48
+ path: issue_comments.parquet
49
+ - config_name: pr_comments
50
+ data_files:
51
+ - split: train
52
+ path: pr_comments.parquet
53
+ - config_name: pr_reviews
54
+ data_files:
55
+ - split: train
56
+ path: reviews.parquet
57
+ - config_name: pr_files
58
+ data_files:
59
+ - split: train
60
+ path: pr_files.parquet
61
+ - config_name: pr_diffs
62
+ data_files:
63
+ - split: train
64
+ path: pr_diffs.parquet
65
+ - config_name: review_comments
66
+ data_files:
67
+ - split: train
68
+ path: review_comments.parquet
69
+ - config_name: links
70
+ data_files:
71
+ - split: train
72
+ path: links.parquet
73
+ - config_name: events
74
+ data_files:
75
+ - split: train
76
+ path: events.parquet
77
+ {new_contributor_config}---
78
+ ---
79
+
80
+ # {dataset_title}
81
+
82
+ Normalized snapshots of issues, pull requests, comments, reviews, and linkage data from `{repo_slug}`.
83
+
84
+ Files:
85
+ - `issues.parquet`
86
+ - `pull_requests.parquet`
87
+ - `comments.parquet`
88
+ - `issue_comments.parquet` (derived view of issue discussion comments)
89
+ - `pr_comments.parquet` (derived view of pull request discussion comments)
90
+ - `reviews.parquet`
91
+ - `pr_files.parquet`
92
+ - `pr_diffs.parquet`
93
+ - `review_comments.parquet`
94
+ - `links.parquet`
95
+ - `events.parquet`
96
+ {new_contributor_files}
97
+ Use:
98
+ - duplicate PR and issue analysis
99
+ - triage and ranking experiments
100
+ - eval set creation
101
+
102
+ Notes:
103
+ - latest snapshot: `{snapshot_id}`
104
+ - raw data only; no labels or moderation decisions
105
+ - PR metadata, file-level patch hunks, and full unified diffs are included
106
+ - full file contents for changed files are not included
107
+ {note_lines}"""
src/slop_farmer/data/hf_dataset_repo.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from huggingface_hub import HfApi, hf_hub_download
9
+
10
+
11
+ def load_remote_file(
12
+ api: HfApi,
13
+ repo_id: str,
14
+ path_in_repo: str,
15
+ local_dir: Path,
16
+ *,
17
+ revision: str | None = None,
18
+ ) -> Path | None:
19
+ del api
20
+ try:
21
+ downloaded = hf_hub_download(
22
+ repo_id=repo_id,
23
+ filename=path_in_repo,
24
+ repo_type="dataset",
25
+ revision=revision,
26
+ local_dir=str(local_dir),
27
+ token=os.getenv("HF_TOKEN"),
28
+ )
29
+ except Exception:
30
+ return None
31
+ return Path(downloaded)
32
+
33
+
34
+ def load_remote_json_file(
35
+ api: HfApi,
36
+ repo_id: str,
37
+ path_in_repo: str,
38
+ local_dir: Path,
39
+ *,
40
+ revision: str | None = None,
41
+ ) -> dict[str, Any] | None:
42
+ downloaded = load_remote_file(
43
+ api,
44
+ repo_id,
45
+ path_in_repo,
46
+ local_dir,
47
+ revision=revision,
48
+ )
49
+ if downloaded is None:
50
+ return None
51
+ return json.loads(downloaded.read_text(encoding="utf-8"))
52
+
53
+
54
+ def list_remote_paths(api: HfApi, repo_id: str, *, revision: str | None = None) -> set[str]:
55
+ try:
56
+ info = api.dataset_info(repo_id=repo_id, revision=revision, files_metadata=True)
57
+ except TypeError:
58
+ info = api.dataset_info(repo_id=repo_id, revision=revision)
59
+ except Exception:
60
+ return set()
61
+ return {sibling.rfilename for sibling in getattr(info, "siblings", [])}
62
+
63
+
64
+ def stable_snapshot_candidates(latest_payload: dict[str, Any] | None, filename: str) -> list[str]:
65
+ if latest_payload is None:
66
+ return [filename]
67
+ candidates: list[str] = []
68
+ manifest_path = str(latest_payload.get("manifest_path") or "").strip("/")
69
+ snapshot_dir = str(latest_payload.get("snapshot_dir") or "").strip("/")
70
+ latest_snapshot_id = str(latest_payload.get("latest_snapshot_id") or "").strip()
71
+
72
+ if filename == "manifest.json" and manifest_path:
73
+ candidates.append(manifest_path)
74
+ if snapshot_dir and snapshot_dir not in {".", "/"}:
75
+ candidates.append(f"{snapshot_dir}/{filename}")
76
+ archived_manifest_path = str(latest_payload.get("archived_manifest_path") or "").strip("/")
77
+ if filename == "manifest.json" and archived_manifest_path:
78
+ candidates.append(archived_manifest_path)
79
+ if manifest_path and "/" in manifest_path:
80
+ manifest_dir = manifest_path.rsplit("/", 1)[0]
81
+ candidates.append(f"{manifest_dir}/{filename}")
82
+ if latest_snapshot_id:
83
+ candidates.append(f"snapshots/{latest_snapshot_id}/{filename}")
84
+ candidates.append(filename)
85
+
86
+ deduped: list[str] = []
87
+ seen: set[str] = set()
88
+ for candidate in candidates:
89
+ normalized = candidate.lstrip("./")
90
+ if not normalized or normalized in seen:
91
+ continue
92
+ seen.add(normalized)
93
+ deduped.append(normalized)
94
+ return deduped
src/slop_farmer/data/search_duckdb.py CHANGED
@@ -31,6 +31,7 @@ TABLE_COLUMNS: dict[str, tuple[str, ...]] = {
31
  "repo",
32
  "pr_number",
33
  "github_id",
 
34
  "state",
35
  "draft",
36
  "merged",
@@ -46,6 +47,48 @@ TABLE_COLUMNS: dict[str, tuple[str, ...]] = {
46
  "review_comments_count",
47
  "html_url",
48
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  "pr_scope_features": (
50
  "run_id",
51
  "repo",
@@ -144,6 +187,7 @@ CREATE TABLE IF NOT EXISTS pr_search_documents (
144
  repo VARCHAR,
145
  pr_number BIGINT,
146
  github_id BIGINT,
 
147
  state VARCHAR,
148
  draft BOOLEAN,
149
  merged BOOLEAN,
@@ -159,6 +203,48 @@ CREATE TABLE IF NOT EXISTS pr_search_documents (
159
  review_comments_count BIGINT,
160
  html_url VARCHAR
161
  );
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  CREATE TABLE IF NOT EXISTS pr_scope_features (
163
  run_id VARCHAR,
164
  repo VARCHAR,
@@ -232,6 +318,8 @@ CREATE TABLE IF NOT EXISTS pr_scope_cluster_candidates (
232
  CREATE INDEX IF NOT EXISTS idx_pr_search_active_run_repo ON pr_search_active_run (repo);
233
  CREATE INDEX IF NOT EXISTS idx_pr_search_runs_repo_status ON pr_search_runs (repo, status);
234
  CREATE INDEX IF NOT EXISTS idx_pr_search_documents_run_pr ON pr_search_documents (run_id, pr_number);
 
 
235
  CREATE INDEX IF NOT EXISTS idx_pr_scope_features_run_pr ON pr_scope_features (run_id, pr_number);
236
  CREATE INDEX IF NOT EXISTS idx_pr_scope_run_artifacts_run ON pr_scope_run_artifacts (run_id);
237
  CREATE INDEX IF NOT EXISTS idx_pr_scope_neighbors_run_left ON pr_scope_neighbors (run_id, left_pr_number);
@@ -256,6 +344,9 @@ def connect_pr_search_db(path: Path, *, read_only: bool = False) -> duckdb.DuckD
256
 
257
  def ensure_pr_search_schema(connection: duckdb.DuckDBPyConnection) -> None:
258
  connection.execute(SCHEMA_SQL)
 
 
 
259
 
260
 
261
  def insert_rows(
@@ -353,6 +444,7 @@ def resolve_active_run(
353
  def get_run_counts(connection: duckdb.DuckDBPyConnection, *, run_id: str) -> dict[str, int]:
354
  return {
355
  "documents": _count(connection, "pr_search_documents", run_id),
 
356
  "features": _count(connection, "pr_scope_features", run_id),
357
  "run_artifacts": _count(connection, "pr_scope_run_artifacts", run_id),
358
  "neighbors": _count(connection, "pr_scope_neighbors", run_id),
@@ -375,6 +467,60 @@ def get_document(
375
  )
376
 
377
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  def get_feature(
379
  connection: duckdb.DuckDBPyConnection,
380
  *,
 
31
  "repo",
32
  "pr_number",
33
  "github_id",
34
+ "author_login",
35
  "state",
36
  "draft",
37
  "merged",
 
47
  "review_comments_count",
48
  "html_url",
49
  ),
50
+ "pr_search_contributors": (
51
+ "run_id",
52
+ "repo",
53
+ "snapshot_id",
54
+ "report_generated_at",
55
+ "window_days",
56
+ "author_login",
57
+ "name",
58
+ "profile_url",
59
+ "repo_pull_requests_url",
60
+ "repo_issues_url",
61
+ "repo_first_seen_at",
62
+ "repo_last_seen_at",
63
+ "repo_primary_artifact_count",
64
+ "repo_artifact_count",
65
+ "snapshot_issue_count",
66
+ "snapshot_pr_count",
67
+ "snapshot_comment_count",
68
+ "snapshot_review_count",
69
+ "snapshot_review_comment_count",
70
+ "repo_association",
71
+ "new_to_repo",
72
+ "first_seen_in_snapshot",
73
+ "report_reason",
74
+ "account_age_days",
75
+ "young_account",
76
+ "follow_through_score",
77
+ "breadth_score",
78
+ "automation_risk_signal",
79
+ "heuristic_note",
80
+ "public_orgs_json",
81
+ "visible_authored_pr_count",
82
+ "merged_pr_count",
83
+ "closed_unmerged_pr_count",
84
+ "open_pr_count",
85
+ "merged_pr_rate",
86
+ "closed_unmerged_pr_rate",
87
+ "still_open_pr_rate",
88
+ "distinct_repos_with_authored_prs",
89
+ "distinct_repos_with_open_prs",
90
+ "fetch_error",
91
+ ),
92
  "pr_scope_features": (
93
  "run_id",
94
  "repo",
 
187
  repo VARCHAR,
188
  pr_number BIGINT,
189
  github_id BIGINT,
190
+ author_login VARCHAR,
191
  state VARCHAR,
192
  draft BOOLEAN,
193
  merged BOOLEAN,
 
203
  review_comments_count BIGINT,
204
  html_url VARCHAR
205
  );
206
+ CREATE TABLE IF NOT EXISTS pr_search_contributors (
207
+ run_id VARCHAR,
208
+ repo VARCHAR,
209
+ snapshot_id VARCHAR,
210
+ report_generated_at VARCHAR,
211
+ window_days BIGINT,
212
+ author_login VARCHAR,
213
+ name VARCHAR,
214
+ profile_url VARCHAR,
215
+ repo_pull_requests_url VARCHAR,
216
+ repo_issues_url VARCHAR,
217
+ repo_first_seen_at VARCHAR,
218
+ repo_last_seen_at VARCHAR,
219
+ repo_primary_artifact_count BIGINT,
220
+ repo_artifact_count BIGINT,
221
+ snapshot_issue_count BIGINT,
222
+ snapshot_pr_count BIGINT,
223
+ snapshot_comment_count BIGINT,
224
+ snapshot_review_count BIGINT,
225
+ snapshot_review_comment_count BIGINT,
226
+ repo_association VARCHAR,
227
+ new_to_repo BOOLEAN,
228
+ first_seen_in_snapshot BOOLEAN,
229
+ report_reason VARCHAR,
230
+ account_age_days BIGINT,
231
+ young_account BOOLEAN,
232
+ follow_through_score VARCHAR,
233
+ breadth_score VARCHAR,
234
+ automation_risk_signal VARCHAR,
235
+ heuristic_note VARCHAR,
236
+ public_orgs_json VARCHAR,
237
+ visible_authored_pr_count BIGINT,
238
+ merged_pr_count BIGINT,
239
+ closed_unmerged_pr_count BIGINT,
240
+ open_pr_count BIGINT,
241
+ merged_pr_rate DOUBLE,
242
+ closed_unmerged_pr_rate DOUBLE,
243
+ still_open_pr_rate DOUBLE,
244
+ distinct_repos_with_authored_prs BIGINT,
245
+ distinct_repos_with_open_prs BIGINT,
246
+ fetch_error VARCHAR
247
+ );
248
  CREATE TABLE IF NOT EXISTS pr_scope_features (
249
  run_id VARCHAR,
250
  repo VARCHAR,
 
318
  CREATE INDEX IF NOT EXISTS idx_pr_search_active_run_repo ON pr_search_active_run (repo);
319
  CREATE INDEX IF NOT EXISTS idx_pr_search_runs_repo_status ON pr_search_runs (repo, status);
320
  CREATE INDEX IF NOT EXISTS idx_pr_search_documents_run_pr ON pr_search_documents (run_id, pr_number);
321
+ CREATE INDEX IF NOT EXISTS idx_pr_search_documents_run_author ON pr_search_documents (run_id, author_login);
322
+ CREATE INDEX IF NOT EXISTS idx_pr_search_contributors_run_author ON pr_search_contributors (run_id, author_login);
323
  CREATE INDEX IF NOT EXISTS idx_pr_scope_features_run_pr ON pr_scope_features (run_id, pr_number);
324
  CREATE INDEX IF NOT EXISTS idx_pr_scope_run_artifacts_run ON pr_scope_run_artifacts (run_id);
325
  CREATE INDEX IF NOT EXISTS idx_pr_scope_neighbors_run_left ON pr_scope_neighbors (run_id, left_pr_number);
 
344
 
345
  def ensure_pr_search_schema(connection: duckdb.DuckDBPyConnection) -> None:
346
  connection.execute(SCHEMA_SQL)
347
+ connection.execute(
348
+ "ALTER TABLE pr_search_documents ADD COLUMN IF NOT EXISTS author_login VARCHAR"
349
+ )
350
 
351
 
352
  def insert_rows(
 
444
  def get_run_counts(connection: duckdb.DuckDBPyConnection, *, run_id: str) -> dict[str, int]:
445
  return {
446
  "documents": _count(connection, "pr_search_documents", run_id),
447
+ "contributors": _count(connection, "pr_search_contributors", run_id),
448
  "features": _count(connection, "pr_scope_features", run_id),
449
  "run_artifacts": _count(connection, "pr_scope_run_artifacts", run_id),
450
  "neighbors": _count(connection, "pr_scope_neighbors", run_id),
 
467
  )
468
 
469
 
470
+ def get_contributor(
471
+ connection: duckdb.DuckDBPyConnection,
472
+ *,
473
+ run_id: str,
474
+ author_login: str,
475
+ ) -> dict[str, Any] | None:
476
+ return fetch_one(
477
+ connection,
478
+ """
479
+ SELECT *
480
+ FROM pr_search_contributors
481
+ WHERE run_id = ? AND lower(author_login) = lower(?)
482
+ """,
483
+ [run_id, author_login],
484
+ )
485
+
486
+
487
+ def get_contributor_pulls(
488
+ connection: duckdb.DuckDBPyConnection,
489
+ *,
490
+ run_id: str,
491
+ author_login: str,
492
+ limit: int,
493
+ ) -> list[dict[str, Any]]:
494
+ return fetch_rows(
495
+ connection,
496
+ """
497
+ SELECT
498
+ pr_number,
499
+ github_id,
500
+ author_login,
501
+ state,
502
+ draft,
503
+ merged,
504
+ title,
505
+ base_ref,
506
+ created_at,
507
+ updated_at,
508
+ merged_at,
509
+ additions,
510
+ deletions,
511
+ changed_files,
512
+ comments_count,
513
+ review_comments_count,
514
+ html_url
515
+ FROM pr_search_documents
516
+ WHERE run_id = ? AND lower(author_login) = lower(?)
517
+ ORDER BY updated_at DESC NULLS LAST, pr_number DESC
518
+ LIMIT ?
519
+ """,
520
+ [run_id, author_login, limit],
521
+ )
522
+
523
+
524
  def get_feature(
525
  connection: duckdb.DuckDBPyConnection,
526
  *,
src/slop_farmer/data/snapshot_source.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from slop_farmer.data.snapshot_materialize import materialize_hf_dataset_snapshot
6
+ from slop_farmer.data.snapshot_paths import (
7
+ default_hf_materialize_dir,
8
+ resolve_snapshot_dir_from_snapshots_root,
9
+ )
10
+
11
+
12
+ def resolve_snapshot_source_dir(
13
+ *,
14
+ snapshot_dir: Path | None,
15
+ local_snapshots_root: Path,
16
+ hf_repo_id: str | None,
17
+ hf_revision: str | None,
18
+ hf_materialize_dir: Path | None,
19
+ hf_output_dir: Path | None = None,
20
+ ) -> Path:
21
+ if snapshot_dir is not None:
22
+ return snapshot_dir.resolve()
23
+ if hf_repo_id:
24
+ output_dir = (hf_output_dir or local_snapshots_root.parent).resolve()
25
+ return materialize_hf_dataset_snapshot(
26
+ repo_id=hf_repo_id,
27
+ local_dir=hf_materialize_dir
28
+ or default_hf_materialize_dir(output_dir, hf_repo_id, hf_revision),
29
+ revision=hf_revision,
30
+ ).resolve()
31
+ return resolve_snapshot_dir_from_snapshots_root(local_snapshots_root.resolve(), None)
src/slop_farmer/reports/analysis.py CHANGED
@@ -19,11 +19,7 @@ from rank_bm25 import BM25Okapi
19
  from slop_farmer.config import AnalysisOptions, MarkdownReportOptions
20
  from slop_farmer.data.links import build_text_link_rows
21
  from slop_farmer.data.parquet_io import read_json, read_parquet_rows, write_text
22
- from slop_farmer.data.snapshot_materialize import materialize_hf_dataset_snapshot
23
- from slop_farmer.data.snapshot_paths import (
24
- default_hf_materialize_dir,
25
- resolve_snapshot_dir_from_output,
26
- )
27
  from slop_farmer.reports.analysis_cache import (
28
  HYBRID_REVIEW_CACHE_SCHEMA_VERSION,
29
  PREPARED_REVIEW_UNIT_SCHEMA_VERSION,
@@ -766,18 +762,14 @@ def _artifact_suffix(row: dict[str, Any] | None, kind: str) -> str:
766
 
767
 
768
  def _resolve_snapshot_dir(options: AnalysisOptions) -> Path:
769
- if options.hf_repo_id:
770
- materialize_dir = options.hf_materialize_dir or default_hf_materialize_dir(
771
- options.output_dir,
772
- options.hf_repo_id,
773
- options.hf_revision,
774
- )
775
- return materialize_hf_dataset_snapshot(
776
- repo_id=options.hf_repo_id,
777
- local_dir=materialize_dir,
778
- revision=options.hf_revision,
779
- ).resolve()
780
- return resolve_snapshot_dir_from_output(options.output_dir, options.snapshot_dir)
781
 
782
 
783
  def _load_snapshot(snapshot_dir: Path) -> SnapshotData:
 
19
  from slop_farmer.config import AnalysisOptions, MarkdownReportOptions
20
  from slop_farmer.data.links import build_text_link_rows
21
  from slop_farmer.data.parquet_io import read_json, read_parquet_rows, write_text
22
+ from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir
 
 
 
 
23
  from slop_farmer.reports.analysis_cache import (
24
  HYBRID_REVIEW_CACHE_SCHEMA_VERSION,
25
  PREPARED_REVIEW_UNIT_SCHEMA_VERSION,
 
762
 
763
 
764
  def _resolve_snapshot_dir(options: AnalysisOptions) -> Path:
765
+ return resolve_snapshot_source_dir(
766
+ snapshot_dir=options.snapshot_dir,
767
+ local_snapshots_root=options.output_dir.resolve() / "snapshots",
768
+ hf_repo_id=options.hf_repo_id,
769
+ hf_revision=options.hf_revision,
770
+ hf_materialize_dir=options.hf_materialize_dir,
771
+ hf_output_dir=options.output_dir,
772
+ )
 
 
 
 
773
 
774
 
775
  def _load_snapshot(snapshot_dir: Path) -> SnapshotData:
src/slop_farmer/reports/analysis_service.py CHANGED
@@ -24,6 +24,8 @@ class ActiveSnapshotContext:
24
  class AnalysisContext:
25
  active_run: dict[str, Any]
26
  report: dict[str, Any]
 
 
27
  variant_requested: str
28
  variant_used: str
29
 
@@ -33,26 +35,31 @@ def get_analysis_status(
33
  *,
34
  repo: str | None = None,
35
  variant: str = "auto",
 
36
  ) -> dict[str, Any]:
37
  active = _resolve_active_snapshot_context(db_path, repo=repo)
38
- report_path, variant_used = _resolve_analysis_report_path(
39
  active.snapshot_dir,
 
40
  variant,
 
41
  required=False,
42
  )
43
  payload = {
44
  "repo": str(active.active_run["repo"]),
45
- "snapshot_id": str(active.active_run["snapshot_id"]),
46
  "run_id": str(active.active_run["id"]),
47
  "variant_requested": _normalize_analysis_variant(variant),
48
  "available": report_path is not None,
49
  }
50
- if report_path is None or variant_used is None:
51
  return payload
52
  report = _load_report(report_path)
53
  return {
54
  **payload,
 
55
  "variant_used": variant_used,
 
56
  "llm_enrichment": bool(report.get("llm_enrichment")),
57
  "generated_at": report.get("generated_at"),
58
  "counts": _analysis_counts(report),
@@ -65,8 +72,14 @@ def get_pr_analysis(
65
  pr_number: int,
66
  repo: str | None = None,
67
  variant: str = "auto",
 
68
  ) -> dict[str, Any]:
69
- context = _load_analysis_context(db_path, repo=repo, variant=variant)
 
 
 
 
 
70
  meta_bug, rank = _find_meta_bug_for_pr(context.report, pr_number)
71
  duplicate_pr = _find_duplicate_pr_for_pr(context.report, pr_number)
72
  return {
@@ -84,8 +97,14 @@ def list_analysis_meta_bugs(
84
  repo: str | None = None,
85
  variant: str = "auto",
86
  limit: int = 50,
 
87
  ) -> dict[str, Any]:
88
- context = _load_analysis_context(db_path, repo=repo, variant=variant)
 
 
 
 
 
89
  meta_bugs = [
90
  _meta_bug_payload(cluster, rank=index)
91
  for index, cluster in enumerate(context.report.get("meta_bugs", [])[:limit], start=1)
@@ -103,8 +122,14 @@ def get_analysis_meta_bug(
103
  cluster_id: str,
104
  repo: str | None = None,
105
  variant: str = "auto",
 
106
  ) -> dict[str, Any]:
107
- context = _load_analysis_context(db_path, repo=repo, variant=variant)
 
 
 
 
 
108
  for index, cluster in enumerate(context.report.get("meta_bugs", []), start=1):
109
  if str(cluster.get("cluster_id")) != cluster_id:
110
  continue
@@ -113,7 +138,7 @@ def get_analysis_meta_bug(
113
  "meta_bug": _meta_bug_payload(cluster, rank=index),
114
  "duplicate_pr": _find_duplicate_pr_by_cluster_id(context.report, cluster_id),
115
  }
116
- raise ValueError(f"Analysis cluster {cluster_id!r} was not found in the active snapshot.")
117
 
118
 
119
  def list_analysis_duplicate_prs(
@@ -122,8 +147,14 @@ def list_analysis_duplicate_prs(
122
  repo: str | None = None,
123
  variant: str = "auto",
124
  limit: int = 50,
 
125
  ) -> dict[str, Any]:
126
- context = _load_analysis_context(db_path, repo=repo, variant=variant)
 
 
 
 
 
127
  duplicate_prs = [
128
  {"rank": index, **dict(entry)}
129
  for index, entry in enumerate(context.report.get("duplicate_prs", [])[:limit], start=1)
@@ -140,8 +171,14 @@ def get_analysis_best(
140
  *,
141
  repo: str | None = None,
142
  variant: str = "auto",
 
143
  ) -> dict[str, Any]:
144
- context = _load_analysis_context(db_path, repo=repo, variant=variant)
 
 
 
 
 
145
  return {
146
  **_analysis_base_payload(context),
147
  "best_issue": _best_entry_with_cluster_id(
@@ -180,18 +217,24 @@ def _load_analysis_context(
180
  *,
181
  repo: str | None,
182
  variant: str,
 
183
  ) -> AnalysisContext:
184
  active = _resolve_active_snapshot_context(db_path, repo=repo)
185
- report_path, variant_used = _resolve_analysis_report_path(
186
  active.snapshot_dir,
 
187
  variant,
 
188
  required=True,
189
  )
190
  assert report_path is not None
191
  assert variant_used is not None
 
192
  return AnalysisContext(
193
  active_run=active.active_run,
194
  report=_load_report(report_path),
 
 
195
  variant_requested=_normalize_analysis_variant(variant),
196
  variant_used=variant_used,
197
  )
@@ -199,31 +242,56 @@ def _load_analysis_context(
199
 
200
  def _resolve_analysis_report_path(
201
  snapshot_dir: Path,
 
202
  variant: str,
203
  *,
 
204
  required: bool,
205
- ) -> tuple[Path | None, str | None]:
206
  normalized = _normalize_analysis_variant(variant)
 
 
 
 
 
207
  if normalized == "auto":
208
- hybrid_path = snapshot_dir / ANALYSIS_REPORT_FILENAMES["hybrid"]
209
- if hybrid_path.exists():
210
- return hybrid_path, "hybrid"
211
- deterministic_path = snapshot_dir / ANALYSIS_REPORT_FILENAMES["deterministic"]
212
- if deterministic_path.exists():
213
- return deterministic_path, "deterministic"
 
214
  if not required:
215
- return None, None
216
- raise ValueError("No analysis report was found for the active snapshot.")
217
- report_path = snapshot_dir / ANALYSIS_REPORT_FILENAMES[normalized]
218
- if report_path.exists():
219
- return report_path, normalized
 
 
 
220
  if not required:
221
- return None, None
222
  raise ValueError(
223
- f"{normalized.capitalize()} analysis report was not found for the active snapshot."
224
  )
225
 
226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  def _normalize_analysis_variant(variant: str) -> str:
228
  normalized = variant.strip().lower()
229
  if normalized not in ANALYSIS_VARIANTS:
@@ -234,12 +302,16 @@ def _normalize_analysis_variant(variant: str) -> str:
234
 
235
 
236
  def _analysis_base_payload(context: AnalysisContext) -> dict[str, Any]:
 
 
237
  return {
238
  "repo": str(context.active_run["repo"]),
239
- "snapshot_id": str(context.active_run["snapshot_id"]),
 
240
  "run_id": str(context.active_run["id"]),
241
  "variant_requested": context.variant_requested,
242
  "variant_used": context.variant_used,
 
243
  "llm_enrichment": bool(context.report.get("llm_enrichment")),
244
  "generated_at": context.report.get("generated_at"),
245
  }
 
24
  class AnalysisContext:
25
  active_run: dict[str, Any]
26
  report: dict[str, Any]
27
+ report_path: Path
28
+ report_source: str
29
  variant_requested: str
30
  variant_used: str
31
 
 
35
  *,
36
  repo: str | None = None,
37
  variant: str = "auto",
38
+ analysis_root: Path | None = None,
39
  ) -> dict[str, Any]:
40
  active = _resolve_active_snapshot_context(db_path, repo=repo)
41
+ report_path, variant_used, report_source = _resolve_analysis_report_path(
42
  active.snapshot_dir,
43
+ str(active.active_run["repo"]),
44
  variant,
45
+ analysis_root=analysis_root,
46
  required=False,
47
  )
48
  payload = {
49
  "repo": str(active.active_run["repo"]),
50
+ "active_snapshot_id": str(active.active_run["snapshot_id"]),
51
  "run_id": str(active.active_run["id"]),
52
  "variant_requested": _normalize_analysis_variant(variant),
53
  "available": report_path is not None,
54
  }
55
+ if report_path is None or variant_used is None or report_source is None:
56
  return payload
57
  report = _load_report(report_path)
58
  return {
59
  **payload,
60
+ "snapshot_id": str(report.get("snapshot_id") or active.active_run["snapshot_id"]),
61
  "variant_used": variant_used,
62
+ "analysis_source": report_source,
63
  "llm_enrichment": bool(report.get("llm_enrichment")),
64
  "generated_at": report.get("generated_at"),
65
  "counts": _analysis_counts(report),
 
72
  pr_number: int,
73
  repo: str | None = None,
74
  variant: str = "auto",
75
+ analysis_root: Path | None = None,
76
  ) -> dict[str, Any]:
77
+ context = _load_analysis_context(
78
+ db_path,
79
+ repo=repo,
80
+ variant=variant,
81
+ analysis_root=analysis_root,
82
+ )
83
  meta_bug, rank = _find_meta_bug_for_pr(context.report, pr_number)
84
  duplicate_pr = _find_duplicate_pr_for_pr(context.report, pr_number)
85
  return {
 
97
  repo: str | None = None,
98
  variant: str = "auto",
99
  limit: int = 50,
100
+ analysis_root: Path | None = None,
101
  ) -> dict[str, Any]:
102
+ context = _load_analysis_context(
103
+ db_path,
104
+ repo=repo,
105
+ variant=variant,
106
+ analysis_root=analysis_root,
107
+ )
108
  meta_bugs = [
109
  _meta_bug_payload(cluster, rank=index)
110
  for index, cluster in enumerate(context.report.get("meta_bugs", [])[:limit], start=1)
 
122
  cluster_id: str,
123
  repo: str | None = None,
124
  variant: str = "auto",
125
+ analysis_root: Path | None = None,
126
  ) -> dict[str, Any]:
127
+ context = _load_analysis_context(
128
+ db_path,
129
+ repo=repo,
130
+ variant=variant,
131
+ analysis_root=analysis_root,
132
+ )
133
  for index, cluster in enumerate(context.report.get("meta_bugs", []), start=1):
134
  if str(cluster.get("cluster_id")) != cluster_id:
135
  continue
 
138
  "meta_bug": _meta_bug_payload(cluster, rank=index),
139
  "duplicate_pr": _find_duplicate_pr_by_cluster_id(context.report, cluster_id),
140
  }
141
+ raise ValueError(f"Analysis cluster {cluster_id!r} was not found in the active analysis view.")
142
 
143
 
144
  def list_analysis_duplicate_prs(
 
147
  repo: str | None = None,
148
  variant: str = "auto",
149
  limit: int = 50,
150
+ analysis_root: Path | None = None,
151
  ) -> dict[str, Any]:
152
+ context = _load_analysis_context(
153
+ db_path,
154
+ repo=repo,
155
+ variant=variant,
156
+ analysis_root=analysis_root,
157
+ )
158
  duplicate_prs = [
159
  {"rank": index, **dict(entry)}
160
  for index, entry in enumerate(context.report.get("duplicate_prs", [])[:limit], start=1)
 
171
  *,
172
  repo: str | None = None,
173
  variant: str = "auto",
174
+ analysis_root: Path | None = None,
175
  ) -> dict[str, Any]:
176
+ context = _load_analysis_context(
177
+ db_path,
178
+ repo=repo,
179
+ variant=variant,
180
+ analysis_root=analysis_root,
181
+ )
182
  return {
183
  **_analysis_base_payload(context),
184
  "best_issue": _best_entry_with_cluster_id(
 
217
  *,
218
  repo: str | None,
219
  variant: str,
220
+ analysis_root: Path | None,
221
  ) -> AnalysisContext:
222
  active = _resolve_active_snapshot_context(db_path, repo=repo)
223
+ report_path, variant_used, report_source = _resolve_analysis_report_path(
224
  active.snapshot_dir,
225
+ str(active.active_run["repo"]),
226
  variant,
227
+ analysis_root=analysis_root,
228
  required=True,
229
  )
230
  assert report_path is not None
231
  assert variant_used is not None
232
+ assert report_source is not None
233
  return AnalysisContext(
234
  active_run=active.active_run,
235
  report=_load_report(report_path),
236
+ report_path=report_path,
237
+ report_source=report_source,
238
  variant_requested=_normalize_analysis_variant(variant),
239
  variant_used=variant_used,
240
  )
 
242
 
243
  def _resolve_analysis_report_path(
244
  snapshot_dir: Path,
245
+ repo: str,
246
  variant: str,
247
  *,
248
+ analysis_root: Path | None,
249
  required: bool,
250
+ ) -> tuple[Path | None, str | None, str | None]:
251
  normalized = _normalize_analysis_variant(variant)
252
+ candidate_dirs = _candidate_analysis_dirs(
253
+ snapshot_dir=snapshot_dir,
254
+ repo=repo,
255
+ analysis_root=analysis_root,
256
+ )
257
  if normalized == "auto":
258
+ for source, directory in candidate_dirs:
259
+ hybrid_path = directory / ANALYSIS_REPORT_FILENAMES["hybrid"]
260
+ if hybrid_path.exists():
261
+ return hybrid_path, "hybrid", source
262
+ deterministic_path = directory / ANALYSIS_REPORT_FILENAMES["deterministic"]
263
+ if deterministic_path.exists():
264
+ return deterministic_path, "deterministic", source
265
  if not required:
266
+ return None, None, None
267
+ raise ValueError(
268
+ "No analysis report was found for the current analysis path or active snapshot."
269
+ )
270
+ for source, directory in candidate_dirs:
271
+ report_path = directory / ANALYSIS_REPORT_FILENAMES[normalized]
272
+ if report_path.exists():
273
+ return report_path, normalized, source
274
  if not required:
275
+ return None, None, None
276
  raise ValueError(
277
+ f"{normalized.capitalize()} analysis report was not found for the current analysis path or active snapshot."
278
  )
279
 
280
 
281
+ def _candidate_analysis_dirs(
282
+ *,
283
+ snapshot_dir: Path,
284
+ repo: str,
285
+ analysis_root: Path | None,
286
+ ) -> list[tuple[str, Path]]:
287
+ owner, name = repo.split("/", 1)
288
+ candidates: list[tuple[str, Path]] = []
289
+ if analysis_root is not None:
290
+ candidates.append(("current", analysis_root / owner / name / "current"))
291
+ candidates.append(("snapshot", snapshot_dir))
292
+ return candidates
293
+
294
+
295
  def _normalize_analysis_variant(variant: str) -> str:
296
  normalized = variant.strip().lower()
297
  if normalized not in ANALYSIS_VARIANTS:
 
302
 
303
 
304
  def _analysis_base_payload(context: AnalysisContext) -> dict[str, Any]:
305
+ active_snapshot_id = str(context.active_run["snapshot_id"])
306
+ snapshot_id = str(context.report.get("snapshot_id") or active_snapshot_id)
307
  return {
308
  "repo": str(context.active_run["repo"]),
309
+ "snapshot_id": snapshot_id,
310
+ "active_snapshot_id": active_snapshot_id,
311
  "run_id": str(context.active_run["id"]),
312
  "variant_requested": context.variant_requested,
313
  "variant_used": context.variant_used,
314
+ "analysis_source": context.report_source,
315
  "llm_enrichment": bool(context.report.get("llm_enrichment")),
316
  "generated_at": context.report.get("generated_at"),
317
  }
src/slop_farmer/reports/dashboard.py CHANGED
@@ -8,7 +8,7 @@ from typing import Any
8
 
9
  from slop_farmer.config import DashboardDataOptions
10
  from slop_farmer.data.parquet_io import read_json, read_parquet_rows
11
- from slop_farmer.data.snapshot_paths import resolve_snapshot_dir_from_snapshots_root
12
 
13
 
14
  def run_dashboard_data(options: DashboardDataOptions) -> Path:
@@ -88,7 +88,14 @@ def _resolve_snapshot_dir(options: DashboardDataOptions) -> Path:
88
  if options.snapshot_root is not None
89
  else (Path("data") / "snapshots").resolve()
90
  )
91
- return resolve_snapshot_dir_from_snapshots_root(snapshots_root, options.snapshot_dir)
 
 
 
 
 
 
 
92
 
93
 
94
  def _read_optional_json(path: Path) -> dict[str, Any]:
 
8
 
9
  from slop_farmer.config import DashboardDataOptions
10
  from slop_farmer.data.parquet_io import read_json, read_parquet_rows
11
+ from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir
12
 
13
 
14
  def run_dashboard_data(options: DashboardDataOptions) -> Path:
 
88
  if options.snapshot_root is not None
89
  else (Path("data") / "snapshots").resolve()
90
  )
91
+ return resolve_snapshot_source_dir(
92
+ snapshot_dir=options.snapshot_dir,
93
+ local_snapshots_root=snapshots_root,
94
+ hf_repo_id=options.hf_repo_id,
95
+ hf_revision=options.hf_revision,
96
+ hf_materialize_dir=options.hf_materialize_dir,
97
+ hf_output_dir=snapshots_root.parent,
98
+ )
99
 
100
 
101
  def _read_optional_json(path: Path) -> dict[str, Any]:
src/slop_farmer/reports/new_contributor_report.py CHANGED
@@ -12,7 +12,7 @@ from typing import Any
12
  from slop_farmer.config import NewContributorReportOptions, resolve_github_token
13
  from slop_farmer.data.http import urlopen_with_retry
14
  from slop_farmer.data.parquet_io import read_json, read_parquet_rows, write_parquet, write_text
15
- from slop_farmer.data.snapshot_paths import resolve_snapshot_dir_from_output
16
  from slop_farmer.reports.user_activity import summarize_user
17
 
18
  GRAPHQL_URL = "https://api.github.com/graphql"
@@ -131,7 +131,14 @@ def run_new_contributor_report(options: NewContributorReportOptions) -> Path:
131
 
132
 
133
  def _resolve_snapshot_dir(options: NewContributorReportOptions) -> Path:
134
- return resolve_snapshot_dir_from_output(options.output_dir, options.snapshot_dir)
 
 
 
 
 
 
 
135
 
136
 
137
  def _load_snapshot(snapshot_dir: Path) -> dict[str, Any]:
@@ -244,7 +251,6 @@ def _report_contributors(
244
  previous_report_reusable
245
  and previous_entry is not None
246
  and not previous_entry.get("fetch_error")
247
- and not known_via_prior_merged_pr
248
  ):
249
  contributors.append(
250
  _reused_previous_report_entry(
@@ -256,6 +262,8 @@ def _report_contributors(
256
  )
257
  )
258
  reused_previous_report += 1
 
 
259
  continue
260
  try:
261
  summary = summarize_user(row["author_login"], options.window_days, None)
 
12
  from slop_farmer.config import NewContributorReportOptions, resolve_github_token
13
  from slop_farmer.data.http import urlopen_with_retry
14
  from slop_farmer.data.parquet_io import read_json, read_parquet_rows, write_parquet, write_text
15
+ from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir
16
  from slop_farmer.reports.user_activity import summarize_user
17
 
18
  GRAPHQL_URL = "https://api.github.com/graphql"
 
131
 
132
 
133
  def _resolve_snapshot_dir(options: NewContributorReportOptions) -> Path:
134
+ return resolve_snapshot_source_dir(
135
+ snapshot_dir=options.snapshot_dir,
136
+ local_snapshots_root=options.output_dir.resolve() / "snapshots",
137
+ hf_repo_id=options.hf_repo_id,
138
+ hf_revision=options.hf_revision,
139
+ hf_materialize_dir=options.hf_materialize_dir,
140
+ hf_output_dir=options.output_dir,
141
+ )
142
 
143
 
144
  def _load_snapshot(snapshot_dir: Path) -> dict[str, Any]:
 
251
  previous_report_reusable
252
  and previous_entry is not None
253
  and not previous_entry.get("fetch_error")
 
254
  ):
255
  contributors.append(
256
  _reused_previous_report_entry(
 
262
  )
263
  )
264
  reused_previous_report += 1
265
+ if known_via_prior_merged_pr:
266
+ reused_known_merged += 1
267
  continue
268
  try:
269
  summary = summarize_user(row["author_login"], options.window_days, None)
src/slop_farmer/reports/pr_scope.py CHANGED
@@ -42,11 +42,7 @@ from typing import Any
42
  from pydantic import BaseModel, Field
43
 
44
  from slop_farmer.data.parquet_io import read_json, read_parquet_rows
45
- from slop_farmer.data.snapshot_materialize import materialize_hf_dataset_snapshot
46
- from slop_farmer.data.snapshot_paths import (
47
- default_hf_materialize_dir,
48
- resolve_snapshot_dir_from_output,
49
- )
50
  from slop_farmer.reports.pr_heuristics import (
51
  compile_cluster_suppression_rules,
52
  suppressed_pull_request_reasons,
@@ -260,17 +256,14 @@ def run_pr_scope_report(options: Any) -> Path:
260
 
261
 
262
  def _resolve_snapshot_dir(options: Any) -> Path:
263
- if options.hf_repo_id:
264
- snapshot_dir = materialize_hf_dataset_snapshot(
265
- repo_id=options.hf_repo_id,
266
- local_dir=options.hf_materialize_dir
267
- or default_hf_materialize_dir(
268
- options.output_dir, options.hf_repo_id, options.hf_revision
269
- ),
270
- revision=options.hf_revision,
271
- )
272
- return snapshot_dir.resolve()
273
- return resolve_snapshot_dir_from_output(options.output_dir, options.snapshot_dir)
274
 
275
 
276
  def _load_snapshot_context(snapshot_dir: Path) -> dict[str, Any]:
 
42
  from pydantic import BaseModel, Field
43
 
44
  from slop_farmer.data.parquet_io import read_json, read_parquet_rows
45
+ from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir
 
 
 
 
46
  from slop_farmer.reports.pr_heuristics import (
47
  compile_cluster_suppression_rules,
48
  suppressed_pull_request_reasons,
 
256
 
257
 
258
  def _resolve_snapshot_dir(options: Any) -> Path:
259
+ return resolve_snapshot_source_dir(
260
+ snapshot_dir=options.snapshot_dir,
261
+ local_snapshots_root=options.output_dir.resolve() / "snapshots",
262
+ hf_repo_id=options.hf_repo_id,
263
+ hf_revision=options.hf_revision,
264
+ hf_materialize_dir=options.hf_materialize_dir,
265
+ hf_output_dir=options.output_dir,
266
+ )
 
 
 
267
 
268
 
269
  def _load_snapshot_context(snapshot_dir: Path) -> dict[str, Any]:
src/slop_farmer/reports/pr_search_scope.py CHANGED
@@ -10,11 +10,7 @@ from typing import Any
10
 
11
  from slop_farmer.config import PrSearchRefreshOptions
12
  from slop_farmer.data.parquet_io import read_json, read_parquet_rows
13
- from slop_farmer.data.snapshot_materialize import materialize_hf_dataset_snapshot
14
- from slop_farmer.data.snapshot_paths import (
15
- default_hf_materialize_dir,
16
- resolve_snapshot_dir_from_output,
17
- )
18
  from slop_farmer.reports.pr_heuristics import (
19
  compile_cluster_suppression_rules,
20
  suppressed_pull_request_reasons,
@@ -36,17 +32,14 @@ DEFAULT_CANDIDATE_LIMIT = 5
36
 
37
 
38
  def resolve_pr_search_snapshot_dir(options: PrSearchRefreshOptions) -> Path:
39
- if options.hf_repo_id:
40
- snapshot_dir = materialize_hf_dataset_snapshot(
41
- repo_id=options.hf_repo_id,
42
- local_dir=options.hf_materialize_dir
43
- or default_hf_materialize_dir(
44
- options.output_dir, options.hf_repo_id, options.hf_revision
45
- ),
46
- revision=options.hf_revision,
47
- )
48
- return snapshot_dir.resolve()
49
- return resolve_snapshot_dir_from_output(options.output_dir, options.snapshot_dir)
50
 
51
 
52
  def load_pr_search_snapshot(snapshot_dir: Path) -> dict[str, Any]:
@@ -54,6 +47,7 @@ def load_pr_search_snapshot(snapshot_dir: Path) -> dict[str, Any]:
54
  manifest = read_json(manifest_path) if manifest_path.exists() else {}
55
  pull_requests = read_parquet_rows(snapshot_dir / "pull_requests.parquet")
56
  pr_files = read_parquet_rows(snapshot_dir / "pr_files.parquet")
 
57
  repo = manifest.get("repo") or (pull_requests[0].get("repo") if pull_requests else None) or ""
58
  snapshot_id = manifest.get("snapshot_id") or snapshot_dir.name
59
  return {
@@ -62,6 +56,7 @@ def load_pr_search_snapshot(snapshot_dir: Path) -> dict[str, Any]:
62
  "manifest": manifest,
63
  "pull_requests": pull_requests,
64
  "pr_files": pr_files,
 
65
  }
66
 
67
 
@@ -412,6 +407,7 @@ def _document_row(row: Mapping[str, Any]) -> dict[str, Any]:
412
  return {
413
  "pr_number": int(row["number"]),
414
  "github_id": row.get("github_id"),
 
415
  "state": row.get("state"),
416
  "draft": bool(row.get("draft")),
417
  "merged": bool(row.get("merged")),
 
10
 
11
  from slop_farmer.config import PrSearchRefreshOptions
12
  from slop_farmer.data.parquet_io import read_json, read_parquet_rows
13
+ from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir
 
 
 
 
14
  from slop_farmer.reports.pr_heuristics import (
15
  compile_cluster_suppression_rules,
16
  suppressed_pull_request_reasons,
 
32
 
33
 
34
  def resolve_pr_search_snapshot_dir(options: PrSearchRefreshOptions) -> Path:
35
+ return resolve_snapshot_source_dir(
36
+ snapshot_dir=options.snapshot_dir,
37
+ local_snapshots_root=options.output_dir.resolve() / "snapshots",
38
+ hf_repo_id=options.hf_repo_id,
39
+ hf_revision=options.hf_revision,
40
+ hf_materialize_dir=options.hf_materialize_dir,
41
+ hf_output_dir=options.output_dir,
42
+ )
 
 
 
43
 
44
 
45
  def load_pr_search_snapshot(snapshot_dir: Path) -> dict[str, Any]:
 
47
  manifest = read_json(manifest_path) if manifest_path.exists() else {}
48
  pull_requests = read_parquet_rows(snapshot_dir / "pull_requests.parquet")
49
  pr_files = read_parquet_rows(snapshot_dir / "pr_files.parquet")
50
+ contributors = read_parquet_rows(snapshot_dir / "new_contributors.parquet")
51
  repo = manifest.get("repo") or (pull_requests[0].get("repo") if pull_requests else None) or ""
52
  snapshot_id = manifest.get("snapshot_id") or snapshot_dir.name
53
  return {
 
56
  "manifest": manifest,
57
  "pull_requests": pull_requests,
58
  "pr_files": pr_files,
59
+ "contributors": contributors,
60
  }
61
 
62
 
 
407
  return {
408
  "pr_number": int(row["number"]),
409
  "github_id": row.get("github_id"),
410
+ "author_login": row.get("author_login"),
411
  "state": row.get("state"),
412
  "draft": bool(row.get("draft")),
413
  "merged": bool(row.get("merged")),
src/slop_farmer/reports/pr_search_service.py CHANGED
@@ -1,7 +1,7 @@
1
  from __future__ import annotations
2
 
3
  import json
4
- from collections.abc import Iterable, Mapping
5
  from contextlib import suppress
6
  from pathlib import Path
7
  from typing import Any, Protocol
@@ -17,6 +17,8 @@ from slop_farmer.data.search_duckdb import (
17
  get_cluster,
18
  get_cluster_ids_for_prs,
19
  get_cluster_members,
 
 
20
  get_document,
21
  get_feature,
22
  get_pair_neighbor_row,
@@ -99,6 +101,16 @@ def run_pr_search_refresh(options: PrSearchRefreshOptions) -> dict[str, Any]:
99
  "pr_search_documents",
100
  _scoped_rows(artifacts["documents"], run_id=run_id, repo=repo),
101
  )
 
 
 
 
 
 
 
 
 
 
102
  insert_rows(
103
  connection,
104
  "pr_scope_features",
@@ -290,6 +302,85 @@ def get_pr_search_candidate_clusters(
290
  connection.close()
291
 
292
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  def get_pr_search_similar_lookup(
294
  db_path: Path,
295
  *,
@@ -801,6 +892,15 @@ def _require_feature(connection: Any, *, run_id: str, pr_number: int) -> dict[st
801
  return feature
802
 
803
 
 
 
 
 
 
 
 
 
 
804
  def _json_list(raw: Any) -> list[str]:
805
  if isinstance(raw, list):
806
  return [str(item) for item in raw]
@@ -838,6 +938,71 @@ def _without_json_fields(row: Mapping[str, Any]) -> dict[str, Any]:
838
  return {str(key): value for key, value in row.items() if not str(key).endswith("_json")}
839
 
840
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
841
  def _normalize_lookup_mode(mode: str) -> str:
842
  normalized = mode.strip().lower()
843
  if normalized not in {"auto", "indexed", "live"}:
 
1
  from __future__ import annotations
2
 
3
  import json
4
+ from collections.abc import Iterable, Mapping, Sequence
5
  from contextlib import suppress
6
  from pathlib import Path
7
  from typing import Any, Protocol
 
17
  get_cluster,
18
  get_cluster_ids_for_prs,
19
  get_cluster_members,
20
+ get_contributor,
21
+ get_contributor_pulls,
22
  get_document,
23
  get_feature,
24
  get_pair_neighbor_row,
 
101
  "pr_search_documents",
102
  _scoped_rows(artifacts["documents"], run_id=run_id, repo=repo),
103
  )
104
+ insert_rows(
105
+ connection,
106
+ "pr_search_contributors",
107
+ _contributor_rows(
108
+ snapshot["contributors"],
109
+ run_id=run_id,
110
+ repo=repo,
111
+ snapshot_id=str(snapshot["snapshot_id"]),
112
+ ),
113
+ )
114
  insert_rows(
115
  connection,
116
  "pr_scope_features",
 
302
  connection.close()
303
 
304
 
305
+ def get_pr_search_contributor(
306
+ db_path: Path,
307
+ *,
308
+ author_login: str,
309
+ repo: str | None = None,
310
+ ) -> dict[str, Any]:
311
+ connection = connect_pr_search_db(db_path, read_only=True)
312
+ try:
313
+ active_run = resolve_active_run(connection, repo=repo)
314
+ run_id = str(active_run["id"])
315
+ contributor = _require_contributor(connection, run_id=run_id, author_login=author_login)
316
+ pulls = _document_rows(
317
+ get_contributor_pulls(connection, run_id=run_id, author_login=author_login, limit=20)
318
+ )
319
+ return {
320
+ "repo": active_run["repo"],
321
+ "snapshot_id": active_run["snapshot_id"],
322
+ "run_id": run_id,
323
+ "contributor": contributor,
324
+ "pulls": pulls,
325
+ "pull_count": len(pulls),
326
+ }
327
+ finally:
328
+ connection.close()
329
+
330
+
331
+ def get_pr_search_contributor_pulls(
332
+ db_path: Path,
333
+ *,
334
+ author_login: str,
335
+ repo: str | None = None,
336
+ limit: int = 20,
337
+ ) -> dict[str, Any]:
338
+ connection = connect_pr_search_db(db_path, read_only=True)
339
+ try:
340
+ active_run = resolve_active_run(connection, repo=repo)
341
+ run_id = str(active_run["id"])
342
+ contributor = _require_contributor(connection, run_id=run_id, author_login=author_login)
343
+ pulls = _document_rows(
344
+ get_contributor_pulls(connection, run_id=run_id, author_login=author_login, limit=limit)
345
+ )
346
+ return {
347
+ "repo": active_run["repo"],
348
+ "snapshot_id": active_run["snapshot_id"],
349
+ "run_id": run_id,
350
+ "contributor": contributor,
351
+ "pulls": pulls,
352
+ "pull_count": len(pulls),
353
+ }
354
+ finally:
355
+ connection.close()
356
+
357
+
358
+ def get_pr_search_pull_contributor(
359
+ db_path: Path,
360
+ *,
361
+ pr_number: int,
362
+ repo: str | None = None,
363
+ ) -> dict[str, Any]:
364
+ connection = connect_pr_search_db(db_path, read_only=True)
365
+ try:
366
+ active_run = resolve_active_run(connection, repo=repo)
367
+ run_id = str(active_run["id"])
368
+ document = _require_document(connection, run_id=run_id, pr_number=pr_number)
369
+ author_login = str(document.get("author_login") or "").strip()
370
+ if not author_login:
371
+ raise ValueError(f"PR #{pr_number} does not have an indexed author_login.")
372
+ contributor = _require_contributor(connection, run_id=run_id, author_login=author_login)
373
+ return {
374
+ "repo": active_run["repo"],
375
+ "snapshot_id": active_run["snapshot_id"],
376
+ "run_id": run_id,
377
+ "pr": _without_json_fields(document),
378
+ "contributor": contributor,
379
+ }
380
+ finally:
381
+ connection.close()
382
+
383
+
384
  def get_pr_search_similar_lookup(
385
  db_path: Path,
386
  *,
 
892
  return feature
893
 
894
 
895
+ def _require_contributor(connection: Any, *, run_id: str, author_login: str) -> dict[str, Any]:
896
+ contributor = get_contributor(connection, run_id=run_id, author_login=author_login)
897
+ if contributor is None:
898
+ raise ValueError(
899
+ f"Contributor {author_login!r} was not found in the active indexed universe."
900
+ )
901
+ return _contributor_row(contributor)
902
+
903
+
904
  def _json_list(raw: Any) -> list[str]:
905
  if isinstance(raw, list):
906
  return [str(item) for item in raw]
 
938
  return {str(key): value for key, value in row.items() if not str(key).endswith("_json")}
939
 
940
 
941
+ def _document_rows(rows: Sequence[Mapping[str, Any]]) -> list[dict[str, Any]]:
942
+ return [_without_json_fields(row) for row in rows]
943
+
944
+
945
+ def _contributor_rows(
946
+ rows: list[Mapping[str, Any]],
947
+ *,
948
+ run_id: str,
949
+ repo: str,
950
+ snapshot_id: str,
951
+ ) -> list[dict[str, Any]]:
952
+ return [
953
+ {
954
+ "run_id": run_id,
955
+ "repo": repo,
956
+ "snapshot_id": snapshot_id,
957
+ "report_generated_at": row.get("report_generated_at"),
958
+ "window_days": row.get("window_days"),
959
+ "author_login": row.get("author_login"),
960
+ "name": row.get("name"),
961
+ "profile_url": row.get("profile_url"),
962
+ "repo_pull_requests_url": row.get("repo_pull_requests_url"),
963
+ "repo_issues_url": row.get("repo_issues_url"),
964
+ "repo_first_seen_at": row.get("repo_first_seen_at"),
965
+ "repo_last_seen_at": row.get("repo_last_seen_at"),
966
+ "repo_primary_artifact_count": row.get("repo_primary_artifact_count"),
967
+ "repo_artifact_count": row.get("repo_artifact_count"),
968
+ "snapshot_issue_count": row.get("snapshot_issue_count"),
969
+ "snapshot_pr_count": row.get("snapshot_pr_count"),
970
+ "snapshot_comment_count": row.get("snapshot_comment_count"),
971
+ "snapshot_review_count": row.get("snapshot_review_count"),
972
+ "snapshot_review_comment_count": row.get("snapshot_review_comment_count"),
973
+ "repo_association": row.get("repo_association"),
974
+ "new_to_repo": row.get("new_to_repo"),
975
+ "first_seen_in_snapshot": row.get("first_seen_in_snapshot"),
976
+ "report_reason": row.get("report_reason"),
977
+ "account_age_days": row.get("account_age_days"),
978
+ "young_account": row.get("young_account"),
979
+ "follow_through_score": row.get("follow_through_score"),
980
+ "breadth_score": row.get("breadth_score"),
981
+ "automation_risk_signal": row.get("automation_risk_signal"),
982
+ "heuristic_note": row.get("heuristic_note"),
983
+ "public_orgs_json": row.get("public_orgs"),
984
+ "visible_authored_pr_count": row.get("visible_authored_pr_count"),
985
+ "merged_pr_count": row.get("merged_pr_count"),
986
+ "closed_unmerged_pr_count": row.get("closed_unmerged_pr_count"),
987
+ "open_pr_count": row.get("open_pr_count"),
988
+ "merged_pr_rate": row.get("merged_pr_rate"),
989
+ "closed_unmerged_pr_rate": row.get("closed_unmerged_pr_rate"),
990
+ "still_open_pr_rate": row.get("still_open_pr_rate"),
991
+ "distinct_repos_with_authored_prs": row.get("distinct_repos_with_authored_prs"),
992
+ "distinct_repos_with_open_prs": row.get("distinct_repos_with_open_prs"),
993
+ "fetch_error": row.get("fetch_error"),
994
+ }
995
+ for row in rows
996
+ ]
997
+
998
+
999
+ def _contributor_row(row: Mapping[str, Any]) -> dict[str, Any]:
1000
+ return {
1001
+ **_without_json_fields(row),
1002
+ "public_orgs": _json_list(row.get("public_orgs_json")),
1003
+ }
1004
+
1005
+
1006
  def _normalize_lookup_mode(mode: str) -> str:
1007
  normalized = mode.strip().lower()
1008
  if normalized not in {"auto", "indexed", "live"}:
uv.lock CHANGED
@@ -4,7 +4,7 @@ requires-python = ">=3.13.5"
4
 
5
  [[package]]
6
  name = "a2a-sdk"
7
- version = "0.3.25"
8
  source = { registry = "https://pypi.org/simple" }
9
  dependencies = [
10
  { name = "google-api-core" },
@@ -13,9 +13,9 @@ dependencies = [
13
  { name = "protobuf" },
14
  { name = "pydantic" },
15
  ]
16
- sdist = { url = "https://files.pythonhosted.org/packages/55/83/3c99b276d09656cce039464509f05bf385e5600d6dc046a131bbcf686930/a2a_sdk-0.3.25.tar.gz", hash = "sha256:afda85bab8d6af0c5d15e82f326c94190f6be8a901ce562d045a338b7127242f", size = 270638, upload-time = "2026-03-10T13:08:46.417Z" }
17
  wheels = [
18
- { url = "https://files.pythonhosted.org/packages/bd/f9/6a62520b7ecb945188a6e1192275f4732ff9341cd4629bc975a6c146aeab/a2a_sdk-0.3.25-py3-none-any.whl", hash = "sha256:2fce38faea82eb0b6f9f9c2bcf761b0d78612c80ef0e599b50d566db1b2654b5", size = 149609, upload-time = "2026-03-10T13:08:44.7Z" },
19
  ]
20
 
21
  [[package]]
@@ -53,7 +53,7 @@ wheels = [
53
 
54
  [[package]]
55
  name = "aiohttp"
56
- version = "3.13.3"
57
  source = { registry = "https://pypi.org/simple" }
58
  dependencies = [
59
  { name = "aiohappyeyeballs" },
@@ -64,59 +64,59 @@ dependencies = [
64
  { name = "propcache" },
65
  { name = "yarl" },
66
  ]
67
- sdist = { url = "https://files.pythonhosted.org/packages/50/42/32cf8e7704ceb4481406eb87161349abb46a57fee3f008ba9cb610968646/aiohttp-3.13.3.tar.gz", hash = "sha256:a949eee43d3782f2daae4f4a2819b2cb9b0c5d3b7f7a927067cc84dafdbb9f88", size = 7844556, upload-time = "2026-01-03T17:33:05.204Z" }
68
- wheels = [
69
- { url = "https://files.pythonhosted.org/packages/97/8a/12ca489246ca1faaf5432844adbfce7ff2cc4997733e0af120869345643a/aiohttp-3.13.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5dff64413671b0d3e7d5918ea490bdccb97a4ad29b3f311ed423200b2203e01c", size = 734190, upload-time = "2026-01-03T17:30:45.832Z" },
70
- { url = "https://files.pythonhosted.org/packages/32/08/de43984c74ed1fca5c014808963cc83cb00d7bb06af228f132d33862ca76/aiohttp-3.13.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:87b9aab6d6ed88235aa2970294f496ff1a1f9adcd724d800e9b952395a80ffd9", size = 491783, upload-time = "2026-01-03T17:30:47.466Z" },
71
- { url = "https://files.pythonhosted.org/packages/17/f8/8dd2cf6112a5a76f81f81a5130c57ca829d101ad583ce57f889179accdda/aiohttp-3.13.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:425c126c0dc43861e22cb1c14ba4c8e45d09516d0a3ae0a3f7494b79f5f233a3", size = 490704, upload-time = "2026-01-03T17:30:49.373Z" },
72
- { url = "https://files.pythonhosted.org/packages/6d/40/a46b03ca03936f832bc7eaa47cfbb1ad012ba1be4790122ee4f4f8cba074/aiohttp-3.13.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f9120f7093c2a32d9647abcaf21e6ad275b4fbec5b55969f978b1a97c7c86bf", size = 1720652, upload-time = "2026-01-03T17:30:50.974Z" },
73
- { url = "https://files.pythonhosted.org/packages/f7/7e/917fe18e3607af92657e4285498f500dca797ff8c918bd7d90b05abf6c2a/aiohttp-3.13.3-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:697753042d57f4bf7122cab985bf15d0cef23c770864580f5af4f52023a56bd6", size = 1692014, upload-time = "2026-01-03T17:30:52.729Z" },
74
- { url = "https://files.pythonhosted.org/packages/71/b6/cefa4cbc00d315d68973b671cf105b21a609c12b82d52e5d0c9ae61d2a09/aiohttp-3.13.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6de499a1a44e7de70735d0b39f67c8f25eb3d91eb3103be99ca0fa882cdd987d", size = 1759777, upload-time = "2026-01-03T17:30:54.537Z" },
75
- { url = "https://files.pythonhosted.org/packages/fb/e3/e06ee07b45e59e6d81498b591fc589629be1553abb2a82ce33efe2a7b068/aiohttp-3.13.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:37239e9f9a7ea9ac5bf6b92b0260b01f8a22281996da609206a84df860bc1261", size = 1861276, upload-time = "2026-01-03T17:30:56.512Z" },
76
- { url = "https://files.pythonhosted.org/packages/7c/24/75d274228acf35ceeb2850b8ce04de9dd7355ff7a0b49d607ee60c29c518/aiohttp-3.13.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f76c1e3fe7d7c8afad7ed193f89a292e1999608170dcc9751a7462a87dfd5bc0", size = 1743131, upload-time = "2026-01-03T17:30:58.256Z" },
77
- { url = "https://files.pythonhosted.org/packages/04/98/3d21dde21889b17ca2eea54fdcff21b27b93f45b7bb94ca029c31ab59dc3/aiohttp-3.13.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fc290605db2a917f6e81b0e1e0796469871f5af381ce15c604a3c5c7e51cb730", size = 1556863, upload-time = "2026-01-03T17:31:00.445Z" },
78
- { url = "https://files.pythonhosted.org/packages/9e/84/da0c3ab1192eaf64782b03971ab4055b475d0db07b17eff925e8c93b3aa5/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4021b51936308aeea0367b8f006dc999ca02bc118a0cc78c303f50a2ff6afb91", size = 1682793, upload-time = "2026-01-03T17:31:03.024Z" },
79
- { url = "https://files.pythonhosted.org/packages/ff/0f/5802ada182f575afa02cbd0ec5180d7e13a402afb7c2c03a9aa5e5d49060/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:49a03727c1bba9a97d3e93c9f93ca03a57300f484b6e935463099841261195d3", size = 1716676, upload-time = "2026-01-03T17:31:04.842Z" },
80
- { url = "https://files.pythonhosted.org/packages/3f/8c/714d53bd8b5a4560667f7bbbb06b20c2382f9c7847d198370ec6526af39c/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3d9908a48eb7416dc1f4524e69f1d32e5d90e3981e4e37eb0aa1cd18f9cfa2a4", size = 1733217, upload-time = "2026-01-03T17:31:06.868Z" },
81
- { url = "https://files.pythonhosted.org/packages/7d/79/e2176f46d2e963facea939f5be2d26368ce543622be6f00a12844d3c991f/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2712039939ec963c237286113c68dbad80a82a4281543f3abf766d9d73228998", size = 1552303, upload-time = "2026-01-03T17:31:08.958Z" },
82
- { url = "https://files.pythonhosted.org/packages/ab/6a/28ed4dea1759916090587d1fe57087b03e6c784a642b85ef48217b0277ae/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:7bfdc049127717581866fa4708791220970ce291c23e28ccf3922c700740fdc0", size = 1763673, upload-time = "2026-01-03T17:31:10.676Z" },
83
- { url = "https://files.pythonhosted.org/packages/e8/35/4a3daeb8b9fab49240d21c04d50732313295e4bd813a465d840236dd0ce1/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8057c98e0c8472d8846b9c79f56766bcc57e3e8ac7bfd510482332366c56c591", size = 1721120, upload-time = "2026-01-03T17:31:12.575Z" },
84
- { url = "https://files.pythonhosted.org/packages/bc/9f/d643bb3c5fb99547323e635e251c609fbbc660d983144cfebec529e09264/aiohttp-3.13.3-cp313-cp313-win32.whl", hash = "sha256:1449ceddcdbcf2e0446957863af03ebaaa03f94c090f945411b61269e2cb5daf", size = 427383, upload-time = "2026-01-03T17:31:14.382Z" },
85
- { url = "https://files.pythonhosted.org/packages/4e/f1/ab0395f8a79933577cdd996dd2f9aa6014af9535f65dddcf88204682fe62/aiohttp-3.13.3-cp313-cp313-win_amd64.whl", hash = "sha256:693781c45a4033d31d4187d2436f5ac701e7bbfe5df40d917736108c1cc7436e", size = 453899, upload-time = "2026-01-03T17:31:15.958Z" },
86
- { url = "https://files.pythonhosted.org/packages/99/36/5b6514a9f5d66f4e2597e40dea2e3db271e023eb7a5d22defe96ba560996/aiohttp-3.13.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:ea37047c6b367fd4bd632bff8077449b8fa034b69e812a18e0132a00fae6e808", size = 737238, upload-time = "2026-01-03T17:31:17.909Z" },
87
- { url = "https://files.pythonhosted.org/packages/f7/49/459327f0d5bcd8c6c9ca69e60fdeebc3622861e696490d8674a6d0cb90a6/aiohttp-3.13.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:6fc0e2337d1a4c3e6acafda6a78a39d4c14caea625124817420abceed36e2415", size = 492292, upload-time = "2026-01-03T17:31:19.919Z" },
88
- { url = "https://files.pythonhosted.org/packages/e8/0b/b97660c5fd05d3495b4eb27f2d0ef18dc1dc4eff7511a9bf371397ff0264/aiohttp-3.13.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c685f2d80bb67ca8c3837823ad76196b3694b0159d232206d1e461d3d434666f", size = 493021, upload-time = "2026-01-03T17:31:21.636Z" },
89
- { url = "https://files.pythonhosted.org/packages/54/d4/438efabdf74e30aeceb890c3290bbaa449780583b1270b00661126b8aae4/aiohttp-3.13.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48e377758516d262bde50c2584fc6c578af272559c409eecbdd2bae1601184d6", size = 1717263, upload-time = "2026-01-03T17:31:23.296Z" },
90
- { url = "https://files.pythonhosted.org/packages/71/f2/7bddc7fd612367d1459c5bcf598a9e8f7092d6580d98de0e057eb42697ad/aiohttp-3.13.3-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:34749271508078b261c4abb1767d42b8d0c0cc9449c73a4df494777dc55f0687", size = 1669107, upload-time = "2026-01-03T17:31:25.334Z" },
91
- { url = "https://files.pythonhosted.org/packages/00/5a/1aeaecca40e22560f97610a329e0e5efef5e0b5afdf9f857f0d93839ab2e/aiohttp-3.13.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:82611aeec80eb144416956ec85b6ca45a64d76429c1ed46ae1b5f86c6e0c9a26", size = 1760196, upload-time = "2026-01-03T17:31:27.394Z" },
92
- { url = "https://files.pythonhosted.org/packages/f8/f8/0ff6992bea7bd560fc510ea1c815f87eedd745fe035589c71ce05612a19a/aiohttp-3.13.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2fff83cfc93f18f215896e3a190e8e5cb413ce01553901aca925176e7568963a", size = 1843591, upload-time = "2026-01-03T17:31:29.238Z" },
93
- { url = "https://files.pythonhosted.org/packages/e3/d1/e30e537a15f53485b61f5be525f2157da719819e8377298502aebac45536/aiohttp-3.13.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bbe7d4cecacb439e2e2a8a1a7b935c25b812af7a5fd26503a66dadf428e79ec1", size = 1720277, upload-time = "2026-01-03T17:31:31.053Z" },
94
- { url = "https://files.pythonhosted.org/packages/84/45/23f4c451d8192f553d38d838831ebbc156907ea6e05557f39563101b7717/aiohttp-3.13.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b928f30fe49574253644b1ca44b1b8adbd903aa0da4b9054a6c20fc7f4092a25", size = 1548575, upload-time = "2026-01-03T17:31:32.87Z" },
95
- { url = "https://files.pythonhosted.org/packages/6a/ed/0a42b127a43712eda7807e7892c083eadfaf8429ca8fb619662a530a3aab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7b5e8fe4de30df199155baaf64f2fcd604f4c678ed20910db8e2c66dc4b11603", size = 1679455, upload-time = "2026-01-03T17:31:34.76Z" },
96
- { url = "https://files.pythonhosted.org/packages/2e/b5/c05f0c2b4b4fe2c9d55e73b6d3ed4fd6c9dc2684b1d81cbdf77e7fad9adb/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:8542f41a62bcc58fc7f11cf7c90e0ec324ce44950003feb70640fc2a9092c32a", size = 1687417, upload-time = "2026-01-03T17:31:36.699Z" },
97
- { url = "https://files.pythonhosted.org/packages/c9/6b/915bc5dad66aef602b9e459b5a973529304d4e89ca86999d9d75d80cbd0b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:5e1d8c8b8f1d91cd08d8f4a3c2b067bfca6ec043d3ff36de0f3a715feeedf926", size = 1729968, upload-time = "2026-01-03T17:31:38.622Z" },
98
- { url = "https://files.pythonhosted.org/packages/11/3b/e84581290a9520024a08640b63d07673057aec5ca548177a82026187ba73/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:90455115e5da1c3c51ab619ac57f877da8fd6d73c05aacd125c5ae9819582aba", size = 1545690, upload-time = "2026-01-03T17:31:40.57Z" },
99
- { url = "https://files.pythonhosted.org/packages/f5/04/0c3655a566c43fd647c81b895dfe361b9f9ad6d58c19309d45cff52d6c3b/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:042e9e0bcb5fba81886c8b4fbb9a09d6b8a00245fd8d88e4d989c1f96c74164c", size = 1746390, upload-time = "2026-01-03T17:31:42.857Z" },
100
- { url = "https://files.pythonhosted.org/packages/1f/53/71165b26978f719c3419381514c9690bd5980e764a09440a10bb816ea4ab/aiohttp-3.13.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2eb752b102b12a76ca02dff751a801f028b4ffbbc478840b473597fc91a9ed43", size = 1702188, upload-time = "2026-01-03T17:31:44.984Z" },
101
- { url = "https://files.pythonhosted.org/packages/29/a7/cbe6c9e8e136314fa1980da388a59d2f35f35395948a08b6747baebb6aa6/aiohttp-3.13.3-cp314-cp314-win32.whl", hash = "sha256:b556c85915d8efaed322bf1bdae9486aa0f3f764195a0fb6ee962e5c71ef5ce1", size = 433126, upload-time = "2026-01-03T17:31:47.463Z" },
102
- { url = "https://files.pythonhosted.org/packages/de/56/982704adea7d3b16614fc5936014e9af85c0e34b58f9046655817f04306e/aiohttp-3.13.3-cp314-cp314-win_amd64.whl", hash = "sha256:9bf9f7a65e7aa20dd764151fb3d616c81088f91f8df39c3893a536e279b4b984", size = 459128, upload-time = "2026-01-03T17:31:49.2Z" },
103
- { url = "https://files.pythonhosted.org/packages/6c/2a/3c79b638a9c3d4658d345339d22070241ea341ed4e07b5ac60fb0f418003/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:05861afbbec40650d8a07ea324367cb93e9e8cc7762e04dd4405df99fa65159c", size = 769512, upload-time = "2026-01-03T17:31:51.134Z" },
104
- { url = "https://files.pythonhosted.org/packages/29/b9/3e5014d46c0ab0db8707e0ac2711ed28c4da0218c358a4e7c17bae0d8722/aiohttp-3.13.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2fc82186fadc4a8316768d61f3722c230e2c1dcab4200d52d2ebdf2482e47592", size = 506444, upload-time = "2026-01-03T17:31:52.85Z" },
105
- { url = "https://files.pythonhosted.org/packages/90/03/c1d4ef9a054e151cd7839cdc497f2638f00b93cbe8043983986630d7a80c/aiohttp-3.13.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0add0900ff220d1d5c5ebbf99ed88b0c1bbf87aa7e4262300ed1376a6b13414f", size = 510798, upload-time = "2026-01-03T17:31:54.91Z" },
106
- { url = "https://files.pythonhosted.org/packages/ea/76/8c1e5abbfe8e127c893fe7ead569148a4d5a799f7cf958d8c09f3eedf097/aiohttp-3.13.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:568f416a4072fbfae453dcf9a99194bbb8bdeab718e08ee13dfa2ba0e4bebf29", size = 1868835, upload-time = "2026-01-03T17:31:56.733Z" },
107
- { url = "https://files.pythonhosted.org/packages/8e/ac/984c5a6f74c363b01ff97adc96a3976d9c98940b8969a1881575b279ac5d/aiohttp-3.13.3-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:add1da70de90a2569c5e15249ff76a631ccacfe198375eead4aadf3b8dc849dc", size = 1720486, upload-time = "2026-01-03T17:31:58.65Z" },
108
- { url = "https://files.pythonhosted.org/packages/b2/9a/b7039c5f099c4eb632138728828b33428585031a1e658d693d41d07d89d1/aiohttp-3.13.3-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:10b47b7ba335d2e9b1239fa571131a87e2d8ec96b333e68b2a305e7a98b0bae2", size = 1847951, upload-time = "2026-01-03T17:32:00.989Z" },
109
- { url = "https://files.pythonhosted.org/packages/3c/02/3bec2b9a1ba3c19ff89a43a19324202b8eb187ca1e928d8bdac9bbdddebd/aiohttp-3.13.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3dd4dce1c718e38081c8f35f323209d4c1df7d4db4bab1b5c88a6b4d12b74587", size = 1941001, upload-time = "2026-01-03T17:32:03.122Z" },
110
- { url = "https://files.pythonhosted.org/packages/37/df/d879401cedeef27ac4717f6426c8c36c3091c6e9f08a9178cc87549c537f/aiohttp-3.13.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34bac00a67a812570d4a460447e1e9e06fae622946955f939051e7cc895cfab8", size = 1797246, upload-time = "2026-01-03T17:32:05.255Z" },
111
- { url = "https://files.pythonhosted.org/packages/8d/15/be122de1f67e6953add23335c8ece6d314ab67c8bebb3f181063010795a7/aiohttp-3.13.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a19884d2ee70b06d9204b2727a7b9f983d0c684c650254679e716b0b77920632", size = 1627131, upload-time = "2026-01-03T17:32:07.607Z" },
112
- { url = "https://files.pythonhosted.org/packages/12/12/70eedcac9134cfa3219ab7af31ea56bc877395b1ac30d65b1bc4b27d0438/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5f8ca7f2bb6ba8348a3614c7918cc4bb73268c5ac2a207576b7afea19d3d9f64", size = 1795196, upload-time = "2026-01-03T17:32:09.59Z" },
113
- { url = "https://files.pythonhosted.org/packages/32/11/b30e1b1cd1f3054af86ebe60df96989c6a414dd87e27ad16950eee420bea/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:b0d95340658b9d2f11d9697f59b3814a9d3bb4b7a7c20b131df4bcef464037c0", size = 1782841, upload-time = "2026-01-03T17:32:11.445Z" },
114
- { url = "https://files.pythonhosted.org/packages/88/0d/d98a9367b38912384a17e287850f5695c528cff0f14f791ce8ee2e4f7796/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:a1e53262fd202e4b40b70c3aff944a8155059beedc8a89bba9dc1f9ef06a1b56", size = 1795193, upload-time = "2026-01-03T17:32:13.705Z" },
115
- { url = "https://files.pythonhosted.org/packages/43/a5/a2dfd1f5ff5581632c7f6a30e1744deda03808974f94f6534241ef60c751/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:d60ac9663f44168038586cab2157e122e46bdef09e9368b37f2d82d354c23f72", size = 1621979, upload-time = "2026-01-03T17:32:15.965Z" },
116
- { url = "https://files.pythonhosted.org/packages/fa/f0/12973c382ae7c1cccbc4417e129c5bf54c374dfb85af70893646e1f0e749/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:90751b8eed69435bac9ff4e3d2f6b3af1f57e37ecb0fbeee59c0174c9e2d41df", size = 1822193, upload-time = "2026-01-03T17:32:18.219Z" },
117
- { url = "https://files.pythonhosted.org/packages/3c/5f/24155e30ba7f8c96918af1350eb0663e2430aad9e001c0489d89cd708ab1/aiohttp-3.13.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fc353029f176fd2b3ec6cfc71be166aba1936fe5d73dd1992ce289ca6647a9aa", size = 1769801, upload-time = "2026-01-03T17:32:20.25Z" },
118
- { url = "https://files.pythonhosted.org/packages/eb/f8/7314031ff5c10e6ece114da79b338ec17eeff3a079e53151f7e9f43c4723/aiohttp-3.13.3-cp314-cp314t-win32.whl", hash = "sha256:2e41b18a58da1e474a057b3d35248d8320029f61d70a37629535b16a0c8f3767", size = 466523, upload-time = "2026-01-03T17:32:22.215Z" },
119
- { url = "https://files.pythonhosted.org/packages/b4/63/278a98c715ae467624eafe375542d8ba9b4383a016df8fdefe0ae28382a7/aiohttp-3.13.3-cp314-cp314t-win_amd64.whl", hash = "sha256:44531a36aa2264a1860089ffd4dce7baf875ee5a6079d5fb42e261c704ef7344", size = 499694, upload-time = "2026-01-03T17:32:24.546Z" },
120
  ]
121
 
122
  [[package]]
@@ -151,7 +151,7 @@ wheels = [
151
 
152
  [[package]]
153
  name = "anthropic"
154
- version = "0.86.0"
155
  source = { registry = "https://pypi.org/simple" }
156
  dependencies = [
157
  { name = "anyio" },
@@ -163,9 +163,9 @@ dependencies = [
163
  { name = "sniffio" },
164
  { name = "typing-extensions" },
165
  ]
166
- sdist = { url = "https://files.pythonhosted.org/packages/37/7a/8b390dc47945d3169875d342847431e5f7d5fa716b2e37494d57cfc1db10/anthropic-0.86.0.tar.gz", hash = "sha256:60023a7e879aa4fbb1fed99d487fe407b2ebf6569603e5047cfe304cebdaa0e5", size = 583820, upload-time = "2026-03-18T18:43:08.017Z" }
167
  wheels = [
168
- { url = "https://files.pythonhosted.org/packages/63/5f/67db29c6e5d16c8c9c4652d3efb934d89cb750cad201539141781d8eae14/anthropic-0.86.0-py3-none-any.whl", hash = "sha256:9d2bbd339446acce98858c5627d33056efe01f70435b22b63546fe7edae0cd57", size = 469400, upload-time = "2026-03-18T18:43:06.526Z" },
169
  ]
170
 
171
  [package.optional-dependencies]
@@ -539,15 +539,15 @@ wheels = [
539
 
540
  [[package]]
541
  name = "email-validator"
542
- version = "2.3.0"
543
  source = { registry = "https://pypi.org/simple" }
544
  dependencies = [
545
  { name = "dnspython" },
546
  { name = "idna" },
547
  ]
548
- sdist = { url = "https://files.pythonhosted.org/packages/f5/22/900cb125c76b7aaa450ce02fd727f452243f2e91a61af068b40adba60ea9/email_validator-2.3.0.tar.gz", hash = "sha256:9fc05c37f2f6cf439ff414f8fc46d917929974a82244c20eb10231ba60c54426", size = 51238, upload-time = "2025-08-26T13:09:06.831Z" }
549
  wheels = [
550
- { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" },
551
  ]
552
 
553
  [[package]]
@@ -561,7 +561,7 @@ wheels = [
561
 
562
  [[package]]
563
  name = "fast-agent-mcp"
564
- version = "0.6.16"
565
  source = { registry = "https://pypi.org/simple" }
566
  dependencies = [
567
  { name = "a2a-sdk" },
@@ -598,14 +598,14 @@ dependencies = [
598
  { name = "uvloop", marker = "sys_platform != 'win32'" },
599
  { name = "watchfiles" },
600
  ]
601
- sdist = { url = "https://files.pythonhosted.org/packages/77/5d/9b09206f6eda31ab116f9c6f3936e17ddd2465879ce07d894ae01123945b/fast_agent_mcp-0.6.16.tar.gz", hash = "sha256:7d9f9cb1da85331fe68ff3b40dd0afea0fc4a784a400e4ba8efcd64f9203e19d", size = 2072918, upload-time = "2026-04-13T22:18:24.138Z" }
602
  wheels = [
603
- { url = "https://files.pythonhosted.org/packages/7d/e2/ce7ca28d3577968e9ba18635f76c9692e48dc2c5d78a599824b55761aed8/fast_agent_mcp-0.6.16-py3-none-any.whl", hash = "sha256:aec994c14bea9866ed3e806bc04d2f1fdf31ca415c5bbd771e3c3ded817a4742", size = 1552277, upload-time = "2026-04-13T22:18:25.741Z" },
604
  ]
605
 
606
  [[package]]
607
  name = "fastapi"
608
- version = "0.135.2"
609
  source = { registry = "https://pypi.org/simple" }
610
  dependencies = [
611
  { name = "annotated-doc" },
@@ -614,14 +614,14 @@ dependencies = [
614
  { name = "typing-extensions" },
615
  { name = "typing-inspection" },
616
  ]
617
- sdist = { url = "https://files.pythonhosted.org/packages/c4/73/5903c4b13beae98618d64eb9870c3fac4f605523dd0312ca5c80dadbd5b9/fastapi-0.135.2.tar.gz", hash = "sha256:88a832095359755527b7f63bb4c6bc9edb8329a026189eed83d6c1afcf419d56", size = 395833, upload-time = "2026-03-23T14:12:41.697Z" }
618
  wheels = [
619
- { url = "https://files.pythonhosted.org/packages/8f/ea/18f6d0457f9efb2fc6fa594857f92810cadb03024975726db6546b3d6fcf/fastapi-0.135.2-py3-none-any.whl", hash = "sha256:0af0447d541867e8db2a6a25c23a8c4bd80e2394ac5529bd87501bbb9e240ca5", size = 117407, upload-time = "2026-03-23T14:12:43.284Z" },
620
  ]
621
 
622
  [[package]]
623
  name = "fastmcp"
624
- version = "3.2.0"
625
  source = { registry = "https://pypi.org/simple" }
626
  dependencies = [
627
  { name = "authlib" },
@@ -646,9 +646,9 @@ dependencies = [
646
  { name = "watchfiles" },
647
  { name = "websockets" },
648
  ]
649
- sdist = { url = "https://files.pythonhosted.org/packages/d0/32/4f1b2cfd7b50db89114949f90158b1dcc2c92a1917b9f57c0ff24e47a2f4/fastmcp-3.2.0.tar.gz", hash = "sha256:d4830b8ffc3592d3d9c76dc0f398904cf41f04910e41a0de38cc1004e0903bef", size = 26318581, upload-time = "2026-03-30T20:25:37.692Z" }
650
  wheels = [
651
- { url = "https://files.pythonhosted.org/packages/4f/67/684fa2d2de1e7504549d4ca457b4f854ccec3cd3be03bd86b33b599fbf58/fastmcp-3.2.0-py3-none-any.whl", hash = "sha256:e71aba3df16f86f546a4a9e513261d3233bcc92bef0dfa647bac3fa33623f681", size = 705550, upload-time = "2026-03-30T20:25:35.499Z" },
652
  ]
653
 
654
  [[package]]
@@ -778,7 +778,7 @@ requests = [
778
 
779
  [[package]]
780
  name = "google-genai"
781
- version = "1.68.0"
782
  source = { registry = "https://pypi.org/simple" }
783
  dependencies = [
784
  { name = "anyio" },
@@ -792,9 +792,9 @@ dependencies = [
792
  { name = "typing-extensions" },
793
  { name = "websockets" },
794
  ]
795
- sdist = { url = "https://files.pythonhosted.org/packages/9c/2c/f059982dbcb658cc535c81bbcbe7e2c040d675f4b563b03cdb01018a4bc3/google_genai-1.68.0.tar.gz", hash = "sha256:ac30c0b8bc630f9372993a97e4a11dae0e36f2e10d7c55eacdca95a9fa14ca96", size = 511285, upload-time = "2026-03-18T01:03:18.243Z" }
796
  wheels = [
797
- { url = "https://files.pythonhosted.org/packages/84/de/7d3ee9c94b74c3578ea4f88d45e8de9405902f857932334d81e89bce3dfa/google_genai-1.68.0-py3-none-any.whl", hash = "sha256:a1bc9919c0e2ea2907d1e319b65471d3d6d58c54822039a249fe1323e4178d15", size = 750912, upload-time = "2026-03-18T01:03:15.983Z" },
798
  ]
799
 
800
  [[package]]
@@ -1082,7 +1082,7 @@ wheels = [
1082
 
1083
  [[package]]
1084
  name = "jsonschema"
1085
- version = "4.26.0"
1086
  source = { registry = "https://pypi.org/simple" }
1087
  dependencies = [
1088
  { name = "attrs" },
@@ -1090,9 +1090,9 @@ dependencies = [
1090
  { name = "referencing" },
1091
  { name = "rpds-py" },
1092
  ]
1093
- sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" }
1094
  wheels = [
1095
- { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" },
1096
  ]
1097
 
1098
  [[package]]
@@ -1870,7 +1870,7 @@ wheels = [
1870
 
1871
  [[package]]
1872
  name = "pydantic"
1873
- version = "2.12.5"
1874
  source = { registry = "https://pypi.org/simple" }
1875
  dependencies = [
1876
  { name = "annotated-types" },
@@ -1878,9 +1878,9 @@ dependencies = [
1878
  { name = "typing-extensions" },
1879
  { name = "typing-inspection" },
1880
  ]
1881
- sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" }
1882
  wheels = [
1883
- { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" },
1884
  ]
1885
 
1886
  [package.optional-dependencies]
@@ -1890,69 +1890,72 @@ email = [
1890
 
1891
  [[package]]
1892
  name = "pydantic-core"
1893
- version = "2.41.5"
1894
  source = { registry = "https://pypi.org/simple" }
1895
  dependencies = [
1896
  { name = "typing-extensions" },
1897
  ]
1898
- sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
1899
- wheels = [
1900
- { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" },
1901
- { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" },
1902
- { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" },
1903
- { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" },
1904
- { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" },
1905
- { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" },
1906
- { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" },
1907
- { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" },
1908
- { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" },
1909
- { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" },
1910
- { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" },
1911
- { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" },
1912
- { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" },
1913
- { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" },
1914
- { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" },
1915
- { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" },
1916
- { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" },
1917
- { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" },
1918
- { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" },
1919
- { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" },
1920
- { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" },
1921
- { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" },
1922
- { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" },
1923
- { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" },
1924
- { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" },
1925
- { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" },
1926
- { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" },
1927
- { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" },
1928
- { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" },
1929
- { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" },
1930
- { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" },
1931
- { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" },
1932
- { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" },
1933
- { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" },
1934
- { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" },
1935
- { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" },
1936
- { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" },
1937
- { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" },
1938
- { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" },
1939
- { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" },
1940
- { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" },
1941
- { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" },
 
 
 
1942
  ]
1943
 
1944
  [[package]]
1945
  name = "pydantic-settings"
1946
- version = "2.13.1"
1947
  source = { registry = "https://pypi.org/simple" }
1948
  dependencies = [
1949
  { name = "pydantic" },
1950
  { name = "python-dotenv" },
1951
  { name = "typing-inspection" },
1952
  ]
1953
- sdist = { url = "https://files.pythonhosted.org/packages/52/6d/fffca34caecc4a3f97bda81b2098da5e8ab7efc9a66e819074a11955d87e/pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025", size = 223826, upload-time = "2026-02-19T13:45:08.055Z" }
1954
  wheels = [
1955
- { url = "https://files.pythonhosted.org/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" },
1956
  ]
1957
 
1958
  [[package]]
@@ -1993,12 +1996,9 @@ crypto = [
1993
 
1994
  [[package]]
1995
  name = "pyperclip"
1996
- version = "1.11.0"
1997
  source = { registry = "https://pypi.org/simple" }
1998
- sdist = { url = "https://files.pythonhosted.org/packages/e8/52/d87eba7cb129b81563019d1679026e7a112ef76855d6159d24754dbd2a51/pyperclip-1.11.0.tar.gz", hash = "sha256:244035963e4428530d9e3a6101a1ef97209c6825edab1567beac148ccc1db1b6", size = 12185, upload-time = "2025-09-26T14:40:37.245Z" }
1999
- wheels = [
2000
- { url = "https://files.pythonhosted.org/packages/df/80/fc9d01d5ed37ba4c42ca2b55b4339ae6e200b456be3a1aaddf4a9fa99b8c/pyperclip-1.11.0-py3-none-any.whl", hash = "sha256:299403e9ff44581cb9ba2ffeed69c7aa96a008622ad0c46cb575ca75b5b84273", size = 11063, upload-time = "2025-09-26T14:40:36.069Z" },
2001
- ]
2002
 
2003
  [[package]]
2004
  name = "pytest"
@@ -2366,7 +2366,7 @@ wheels = [
2366
 
2367
  [[package]]
2368
  name = "slop-farmer"
2369
- version = "0.1.0"
2370
  source = { editable = "." }
2371
  dependencies = [
2372
  { name = "duckdb" },
@@ -2394,7 +2394,7 @@ llm = [
2394
  [package.metadata]
2395
  requires-dist = [
2396
  { name = "duckdb", specifier = ">=1.2.2" },
2397
- { name = "fast-agent-mcp", specifier = ">=0.6.16" },
2398
  { name = "fast-agent-mcp", marker = "python_full_version >= '3.13.5' and extra == 'llm'", specifier = ">=0.6.16" },
2399
  { name = "fastapi", specifier = ">=0.115.0" },
2400
  { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.28.0" },
 
4
 
5
  [[package]]
6
  name = "a2a-sdk"
7
+ version = "0.3.26"
8
  source = { registry = "https://pypi.org/simple" }
9
  dependencies = [
10
  { name = "google-api-core" },
 
13
  { name = "protobuf" },
14
  { name = "pydantic" },
15
  ]
16
+ sdist = { url = "https://files.pythonhosted.org/packages/be/97/a6840e01795b182ce751ca165430d46459927cde9bfab838087cbb24aef7/a2a_sdk-0.3.26.tar.gz", hash = "sha256:44068e2d037afbb07ab899267439e9bc7eaa7ac2af94f1e8b239933c993ad52d", size = 274598, upload-time = "2026-04-09T15:21:13.902Z" }
17
  wheels = [
18
+ { url = "https://files.pythonhosted.org/packages/dd/d5/51f4ee1bf3b736add42a542d3c8a3fd3fa85f3d36c17972127defc46c26f/a2a_sdk-0.3.26-py3-none-any.whl", hash = "sha256:754e0573f6d33b225c1d8d51f640efa69cbbed7bdfb06ce9c3540ea9f58d4a91", size = 151016, upload-time = "2026-04-09T15:21:12.35Z" },
19
  ]
20
 
21
  [[package]]
 
53
 
54
  [[package]]
55
  name = "aiohttp"
56
+ version = "3.13.5"
57
  source = { registry = "https://pypi.org/simple" }
58
  dependencies = [
59
  { name = "aiohappyeyeballs" },
 
64
  { name = "propcache" },
65
  { name = "yarl" },
66
  ]
67
+ sdist = { url = "https://files.pythonhosted.org/packages/77/9a/152096d4808df8e4268befa55fba462f440f14beab85e8ad9bf990516918/aiohttp-3.13.5.tar.gz", hash = "sha256:9d98cc980ecc96be6eb4c1994ce35d28d8b1f5e5208a23b421187d1209dbb7d1", size = 7858271, upload-time = "2026-03-31T22:01:03.343Z" }
68
+ wheels = [
69
+ { url = "https://files.pythonhosted.org/packages/78/e9/d76bf503005709e390122d34e15256b88f7008e246c4bdbe915cd4f1adce/aiohttp-3.13.5-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5029cc80718bbd545123cd8fe5d15025eccaaaace5d0eeec6bd556ad6163d61", size = 742930, upload-time = "2026-03-31T21:58:13.155Z" },
70
+ { url = "https://files.pythonhosted.org/packages/57/00/4b7b70223deaebd9bb85984d01a764b0d7bd6526fcdc73cca83bcbe7243e/aiohttp-3.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4bb6bf5811620003614076bdc807ef3b5e38244f9d25ca5fe888eaccea2a9832", size = 496927, upload-time = "2026-03-31T21:58:15.073Z" },
71
+ { url = "https://files.pythonhosted.org/packages/9c/f5/0fb20fb49f8efdcdce6cd8127604ad2c503e754a8f139f5e02b01626523f/aiohttp-3.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a84792f8631bf5a94e52d9cc881c0b824ab42717165a5579c760b830d9392ac9", size = 497141, upload-time = "2026-03-31T21:58:17.009Z" },
72
+ { url = "https://files.pythonhosted.org/packages/3b/86/b7c870053e36a94e8951b803cb5b909bfbc9b90ca941527f5fcafbf6b0fa/aiohttp-3.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:57653eac22c6a4c13eb22ecf4d673d64a12f266e72785ab1c8b8e5940d0e8090", size = 1732476, upload-time = "2026-03-31T21:58:18.925Z" },
73
+ { url = "https://files.pythonhosted.org/packages/b5/e5/4e161f84f98d80c03a238671b4136e6530453d65262867d989bbe78244d0/aiohttp-3.13.5-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5e5f7debc7a57af53fdf5c5009f9391d9f4c12867049d509bf7bb164a6e295b", size = 1706507, upload-time = "2026-03-31T21:58:21.094Z" },
74
+ { url = "https://files.pythonhosted.org/packages/d4/56/ea11a9f01518bd5a2a2fcee869d248c4b8a0cfa0bb13401574fa31adf4d4/aiohttp-3.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c719f65bebcdf6716f10e9eff80d27567f7892d8988c06de12bbbd39307c6e3a", size = 1773465, upload-time = "2026-03-31T21:58:23.159Z" },
75
+ { url = "https://files.pythonhosted.org/packages/eb/40/333ca27fb74b0383f17c90570c748f7582501507307350a79d9f9f3c6eb1/aiohttp-3.13.5-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d97f93fdae594d886c5a866636397e2bcab146fd7a132fd6bb9ce182224452f8", size = 1873523, upload-time = "2026-03-31T21:58:25.59Z" },
76
+ { url = "https://files.pythonhosted.org/packages/f0/d2/e2f77eef1acb7111405433c707dc735e63f67a56e176e72e9e7a2cd3f493/aiohttp-3.13.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3df334e39d4c2f899a914f1dba283c1aadc311790733f705182998c6f7cae665", size = 1754113, upload-time = "2026-03-31T21:58:27.624Z" },
77
+ { url = "https://files.pythonhosted.org/packages/fb/56/3f653d7f53c89669301ec9e42c95233e2a0c0a6dd051269e6e678db4fdb0/aiohttp-3.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fe6970addfea9e5e081401bcbadf865d2b6da045472f58af08427e108d618540", size = 1562351, upload-time = "2026-03-31T21:58:29.918Z" },
78
+ { url = "https://files.pythonhosted.org/packages/ec/a6/9b3e91eb8ae791cce4ee736da02211c85c6f835f1bdfac0594a8a3b7018c/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7becdf835feff2f4f335d7477f121af787e3504b48b449ff737afb35869ba7bb", size = 1693205, upload-time = "2026-03-31T21:58:32.214Z" },
79
+ { url = "https://files.pythonhosted.org/packages/98/fc/bfb437a99a2fcebd6b6eaec609571954de2ed424f01c352f4b5504371dd3/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:676e5651705ad5d8a70aeb8eb6936c436d8ebbd56e63436cb7dd9bb36d2a9a46", size = 1730618, upload-time = "2026-03-31T21:58:34.728Z" },
80
+ { url = "https://files.pythonhosted.org/packages/e4/b6/c8534862126191a034f68153194c389addc285a0f1347d85096d349bbc15/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:9b16c653d38eb1a611cc898c41e76859ca27f119d25b53c12875fd0474ae31a8", size = 1745185, upload-time = "2026-03-31T21:58:36.909Z" },
81
+ { url = "https://files.pythonhosted.org/packages/0b/93/4ca8ee2ef5236e2707e0fd5fecb10ce214aee1ff4ab307af9c558bda3b37/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:999802d5fa0389f58decd24b537c54aa63c01c3219ce17d1214cbda3c2b22d2d", size = 1557311, upload-time = "2026-03-31T21:58:39.38Z" },
82
+ { url = "https://files.pythonhosted.org/packages/57/ae/76177b15f18c5f5d094f19901d284025db28eccc5ae374d1d254181d33f4/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ec707059ee75732b1ba130ed5f9580fe10ff75180c812bc267ded039db5128c6", size = 1773147, upload-time = "2026-03-31T21:58:41.476Z" },
83
+ { url = "https://files.pythonhosted.org/packages/01/a4/62f05a0a98d88af59d93b7fcac564e5f18f513cb7471696ac286db970d6a/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2d6d44a5b48132053c2f6cd5c8cb14bc67e99a63594e336b0f2af81e94d5530c", size = 1730356, upload-time = "2026-03-31T21:58:44.049Z" },
84
+ { url = "https://files.pythonhosted.org/packages/e4/85/fc8601f59dfa8c9523808281f2da571f8b4699685f9809a228adcc90838d/aiohttp-3.13.5-cp313-cp313-win32.whl", hash = "sha256:329f292ed14d38a6c4c435e465f48bebb47479fd676a0411936cc371643225cc", size = 432637, upload-time = "2026-03-31T21:58:46.167Z" },
85
+ { url = "https://files.pythonhosted.org/packages/c0/1b/ac685a8882896acf0f6b31d689e3792199cfe7aba37969fa91da63a7fa27/aiohttp-3.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:69f571de7500e0557801c0b51f4780482c0ec5fe2ac851af5a92cfce1af1cb83", size = 458896, upload-time = "2026-03-31T21:58:48.119Z" },
86
+ { url = "https://files.pythonhosted.org/packages/5d/ce/46572759afc859e867a5bc8ec3487315869013f59281ce61764f76d879de/aiohttp-3.13.5-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:eb4639f32fd4a9904ab8fb45bf3383ba71137f3d9d4ba25b3b3f3109977c5b8c", size = 745721, upload-time = "2026-03-31T21:58:50.229Z" },
87
+ { url = "https://files.pythonhosted.org/packages/13/fe/8a2efd7626dbe6049b2ef8ace18ffda8a4dfcbe1bcff3ac30c0c7575c20b/aiohttp-3.13.5-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:7e5dc4311bd5ac493886c63cbf76ab579dbe4641268e7c74e48e774c74b6f2be", size = 497663, upload-time = "2026-03-31T21:58:52.232Z" },
88
+ { url = "https://files.pythonhosted.org/packages/9b/91/cc8cc78a111826c54743d88651e1687008133c37e5ee615fee9b57990fac/aiohttp-3.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:756c3c304d394977519824449600adaf2be0ccee76d206ee339c5e76b70ded25", size = 499094, upload-time = "2026-03-31T21:58:54.566Z" },
89
+ { url = "https://files.pythonhosted.org/packages/0a/33/a8362cb15cf16a3af7e86ed11962d5cd7d59b449202dc576cdc731310bde/aiohttp-3.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecc26751323224cf8186efcf7fbcbc30f4e1d8c7970659daf25ad995e4032a56", size = 1726701, upload-time = "2026-03-31T21:58:56.864Z" },
90
+ { url = "https://files.pythonhosted.org/packages/45/0c/c091ac5c3a17114bd76cbf85d674650969ddf93387876cf67f754204bd77/aiohttp-3.13.5-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10a75acfcf794edf9d8db50e5a7ec5fc818b2a8d3f591ce93bc7b1210df016d2", size = 1683360, upload-time = "2026-03-31T21:58:59.072Z" },
91
+ { url = "https://files.pythonhosted.org/packages/23/73/bcee1c2b79bc275e964d1446c55c54441a461938e70267c86afaae6fba27/aiohttp-3.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0f7a18f258d124cd678c5fe072fe4432a4d5232b0657fca7c1847f599233c83a", size = 1773023, upload-time = "2026-03-31T21:59:01.776Z" },
92
+ { url = "https://files.pythonhosted.org/packages/c7/ef/720e639df03004fee2d869f771799d8c23046dec47d5b81e396c7cda583a/aiohttp-3.13.5-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:df6104c009713d3a89621096f3e3e88cc323fd269dbd7c20afe18535094320be", size = 1853795, upload-time = "2026-03-31T21:59:04.568Z" },
93
+ { url = "https://files.pythonhosted.org/packages/bd/c9/989f4034fb46841208de7aeeac2c6d8300745ab4f28c42f629ba77c2d916/aiohttp-3.13.5-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:241a94f7de7c0c3b616627aaad530fe2cb620084a8b144d3be7b6ecfe95bae3b", size = 1730405, upload-time = "2026-03-31T21:59:07.221Z" },
94
+ { url = "https://files.pythonhosted.org/packages/ce/75/ee1fd286ca7dc599d824b5651dad7b3be7ff8d9a7e7b3fe9820d9180f7db/aiohttp-3.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c974fb66180e58709b6fc402846f13791240d180b74de81d23913abe48e96d94", size = 1558082, upload-time = "2026-03-31T21:59:09.484Z" },
95
+ { url = "https://files.pythonhosted.org/packages/c3/20/1e9e6650dfc436340116b7aa89ff8cb2bbdf0abc11dfaceaad8f74273a10/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6e27ea05d184afac78aabbac667450c75e54e35f62238d44463131bd3f96753d", size = 1692346, upload-time = "2026-03-31T21:59:12.068Z" },
96
+ { url = "https://files.pythonhosted.org/packages/d8/40/8ebc6658d48ea630ac7903912fe0dd4e262f0e16825aa4c833c56c9f1f56/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a79a6d399cef33a11b6f004c67bb07741d91f2be01b8d712d52c75711b1e07c7", size = 1698891, upload-time = "2026-03-31T21:59:14.552Z" },
97
+ { url = "https://files.pythonhosted.org/packages/d8/78/ea0ae5ec8ba7a5c10bdd6e318f1ba5e76fcde17db8275188772afc7917a4/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c632ce9c0b534fbe25b52c974515ed674937c5b99f549a92127c85f771a78772", size = 1742113, upload-time = "2026-03-31T21:59:17.068Z" },
98
+ { url = "https://files.pythonhosted.org/packages/8a/66/9d308ed71e3f2491be1acb8769d96c6f0c47d92099f3bc9119cada27b357/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:fceedde51fbd67ee2bcc8c0b33d0126cc8b51ef3bbde2f86662bd6d5a6f10ec5", size = 1553088, upload-time = "2026-03-31T21:59:19.541Z" },
99
+ { url = "https://files.pythonhosted.org/packages/da/a6/6cc25ed8dfc6e00c90f5c6d126a98e2cf28957ad06fa1036bd34b6f24a2c/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f92995dfec9420bb69ae629abf422e516923ba79ba4403bc750d94fb4a6c68c1", size = 1757976, upload-time = "2026-03-31T21:59:22.311Z" },
100
+ { url = "https://files.pythonhosted.org/packages/c1/2b/cce5b0ffe0de99c83e5e36d8f828e4161e415660a9f3e58339d07cce3006/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20ae0ff08b1f2c8788d6fb85afcb798654ae6ba0b747575f8562de738078457b", size = 1712444, upload-time = "2026-03-31T21:59:24.635Z" },
101
+ { url = "https://files.pythonhosted.org/packages/6c/cf/9e1795b4160c58d29421eafd1a69c6ce351e2f7c8d3c6b7e4ca44aea1a5b/aiohttp-3.13.5-cp314-cp314-win32.whl", hash = "sha256:b20df693de16f42b2472a9c485e1c948ee55524786a0a34345511afdd22246f3", size = 438128, upload-time = "2026-03-31T21:59:27.291Z" },
102
+ { url = "https://files.pythonhosted.org/packages/22/4d/eaedff67fc805aeba4ba746aec891b4b24cebb1a7d078084b6300f79d063/aiohttp-3.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:f85c6f327bf0b8c29da7d93b1cabb6363fb5e4e160a32fa241ed2dce21b73162", size = 464029, upload-time = "2026-03-31T21:59:29.429Z" },
103
+ { url = "https://files.pythonhosted.org/packages/79/11/c27d9332ee20d68dd164dc12a6ecdef2e2e35ecc97ed6cf0d2442844624b/aiohttp-3.13.5-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:1efb06900858bb618ff5cee184ae2de5828896c448403d51fb633f09e109be0a", size = 778758, upload-time = "2026-03-31T21:59:31.547Z" },
104
+ { url = "https://files.pythonhosted.org/packages/04/fb/377aead2e0a3ba5f09b7624f702a964bdf4f08b5b6728a9799830c80041e/aiohttp-3.13.5-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:fee86b7c4bd29bdaf0d53d14739b08a106fdda809ca5fe032a15f52fae5fe254", size = 512883, upload-time = "2026-03-31T21:59:34.098Z" },
105
+ { url = "https://files.pythonhosted.org/packages/bb/a6/aa109a33671f7a5d3bd78b46da9d852797c5e665bfda7d6b373f56bff2ec/aiohttp-3.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:20058e23909b9e65f9da62b396b77dfa95965cbe840f8def6e572538b1d32e36", size = 516668, upload-time = "2026-03-31T21:59:36.497Z" },
106
+ { url = "https://files.pythonhosted.org/packages/79/b3/ca078f9f2fa9563c36fb8ef89053ea2bb146d6f792c5104574d49d8acb63/aiohttp-3.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cf20a8d6868cb15a73cab329ffc07291ba8c22b1b88176026106ae39aa6df0f", size = 1883461, upload-time = "2026-03-31T21:59:38.723Z" },
107
+ { url = "https://files.pythonhosted.org/packages/b7/e3/a7ad633ca1ca497b852233a3cce6906a56c3225fb6d9217b5e5e60b7419d/aiohttp-3.13.5-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:330f5da04c987f1d5bdb8ae189137c77139f36bd1cb23779ca1a354a4b027800", size = 1747661, upload-time = "2026-03-31T21:59:41.187Z" },
108
+ { url = "https://files.pythonhosted.org/packages/33/b9/cd6fe579bed34a906d3d783fe60f2fa297ef55b27bb4538438ee49d4dc41/aiohttp-3.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6f1cbf0c7926d315c3c26c2da41fd2b5d2fe01ac0e157b78caefc51a782196cf", size = 1863800, upload-time = "2026-03-31T21:59:43.84Z" },
109
+ { url = "https://files.pythonhosted.org/packages/c0/3f/2c1e2f5144cefa889c8afd5cf431994c32f3b29da9961698ff4e3811b79a/aiohttp-3.13.5-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:53fc049ed6390d05423ba33103ded7281fe897cf97878f369a527070bd95795b", size = 1958382, upload-time = "2026-03-31T21:59:46.187Z" },
110
+ { url = "https://files.pythonhosted.org/packages/66/1d/f31ec3f1013723b3babe3609e7f119c2c2fb6ef33da90061a705ef3e1bc8/aiohttp-3.13.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:898703aa2667e3c5ca4c54ca36cd73f58b7a38ef87a5606414799ebce4d3fd3a", size = 1803724, upload-time = "2026-03-31T21:59:48.656Z" },
111
+ { url = "https://files.pythonhosted.org/packages/0e/b4/57712dfc6f1542f067daa81eb61da282fab3e6f1966fca25db06c4fc62d5/aiohttp-3.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0494a01ca9584eea1e5fbd6d748e61ecff218c51b576ee1999c23db7066417d8", size = 1640027, upload-time = "2026-03-31T21:59:51.284Z" },
112
+ { url = "https://files.pythonhosted.org/packages/25/3c/734c878fb43ec083d8e31bf029daae1beafeae582d1b35da234739e82ee7/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6cf81fe010b8c17b09495cbd15c1d35afbc8fb405c0c9cf4738e5ae3af1d65be", size = 1806644, upload-time = "2026-03-31T21:59:53.753Z" },
113
+ { url = "https://files.pythonhosted.org/packages/20/a5/f671e5cbec1c21d044ff3078223f949748f3a7f86b14e34a365d74a5d21f/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:c564dd5f09ddc9d8f2c2d0a301cd30a79a2cc1b46dd1a73bef8f0038863d016b", size = 1791630, upload-time = "2026-03-31T21:59:56.239Z" },
114
+ { url = "https://files.pythonhosted.org/packages/0b/63/fb8d0ad63a0b8a99be97deac8c04dacf0785721c158bdf23d679a87aa99e/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:2994be9f6e51046c4f864598fd9abeb4fba6e88f0b2152422c9666dcd4aea9c6", size = 1809403, upload-time = "2026-03-31T21:59:59.103Z" },
115
+ { url = "https://files.pythonhosted.org/packages/59/0c/bfed7f30662fcf12206481c2aac57dedee43fe1c49275e85b3a1e1742294/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:157826e2fa245d2ef46c83ea8a5faf77ca19355d278d425c29fda0beb3318037", size = 1634924, upload-time = "2026-03-31T22:00:02.116Z" },
116
+ { url = "https://files.pythonhosted.org/packages/17/d6/fd518d668a09fd5a3319ae5e984d4d80b9a4b3df4e21c52f02251ef5a32e/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:a8aca50daa9493e9e13c0f566201a9006f080e7c50e5e90d0b06f53146a54500", size = 1836119, upload-time = "2026-03-31T22:00:04.756Z" },
117
+ { url = "https://files.pythonhosted.org/packages/78/b7/15fb7a9d52e112a25b621c67b69c167805cb1f2ab8f1708a5c490d1b52fe/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3b13560160d07e047a93f23aaa30718606493036253d5430887514715b67c9d9", size = 1772072, upload-time = "2026-03-31T22:00:07.494Z" },
118
+ { url = "https://files.pythonhosted.org/packages/7e/df/57ba7f0c4a553fc2bd8b6321df236870ec6fd64a2a473a8a13d4f733214e/aiohttp-3.13.5-cp314-cp314t-win32.whl", hash = "sha256:9a0f4474b6ea6818b41f82172d799e4b3d29e22c2c520ce4357856fced9af2f8", size = 471819, upload-time = "2026-03-31T22:00:10.277Z" },
119
+ { url = "https://files.pythonhosted.org/packages/62/29/2f8418269e46454a26171bfdd6a055d74febf32234e474930f2f60a17145/aiohttp-3.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:18a2f6c1182c51baa1d28d68fea51513cb2a76612f038853c0ad3c145423d3d9", size = 505441, upload-time = "2026-03-31T22:00:12.791Z" },
120
  ]
121
 
122
  [[package]]
 
151
 
152
  [[package]]
153
  name = "anthropic"
154
+ version = "0.96.0"
155
  source = { registry = "https://pypi.org/simple" }
156
  dependencies = [
157
  { name = "anyio" },
 
163
  { name = "sniffio" },
164
  { name = "typing-extensions" },
165
  ]
166
+ sdist = { url = "https://files.pythonhosted.org/packages/b9/7e/672f533dee813028d2c699bfd2a7f52c9118d7353680d9aa44b9e23f717f/anthropic-0.96.0.tar.gz", hash = "sha256:9de947b737f39452f68aa520f1c2239d44119c9b73b0fb6d4e6ca80f00279ee6", size = 658210, upload-time = "2026-04-16T14:28:02.846Z" }
167
  wheels = [
168
+ { url = "https://files.pythonhosted.org/packages/48/5a/72f33204064b6e87601a71a6baf8d855769f8a0c1eaae8d06a1094872371/anthropic-0.96.0-py3-none-any.whl", hash = "sha256:9a6e335a354602a521cd9e777e92bfd46ba6e115bf9bbfe6135311e8fb2015b2", size = 635930, upload-time = "2026-04-16T14:28:01.436Z" },
169
  ]
170
 
171
  [package.optional-dependencies]
 
539
 
540
  [[package]]
541
  name = "email-validator"
542
+ version = "2.2.0"
543
  source = { registry = "https://pypi.org/simple" }
544
  dependencies = [
545
  { name = "dnspython" },
546
  { name = "idna" },
547
  ]
548
+ sdist = { url = "https://files.pythonhosted.org/packages/48/ce/13508a1ec3f8bb981ae4ca79ea40384becc868bfae97fd1c942bb3a001b1/email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7", size = 48967, upload-time = "2024-06-20T11:30:30.034Z" }
549
  wheels = [
550
+ { url = "https://files.pythonhosted.org/packages/d7/ee/bf0adb559ad3c786f12bcbc9296b3f5675f529199bef03e2df281fa1fadb/email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631", size = 33521, upload-time = "2024-06-20T11:30:28.248Z" },
551
  ]
552
 
553
  [[package]]
 
561
 
562
  [[package]]
563
  name = "fast-agent-mcp"
564
+ version = "0.6.17"
565
  source = { registry = "https://pypi.org/simple" }
566
  dependencies = [
567
  { name = "a2a-sdk" },
 
598
  { name = "uvloop", marker = "sys_platform != 'win32'" },
599
  { name = "watchfiles" },
600
  ]
601
+ sdist = { url = "https://files.pythonhosted.org/packages/8c/a1/b6b1045345d38b342da3def7723a2dc6a44faff9c01fee6d81afbd272d62/fast_agent_mcp-0.6.17.tar.gz", hash = "sha256:a920113d47ef2ab82be1bd63b77d3bf78f8f862a5a6e91f1fd0aa931850fb25f", size = 2091401, upload-time = "2026-04-16T21:48:43.334Z" }
602
  wheels = [
603
+ { url = "https://files.pythonhosted.org/packages/b4/ef/47e05d6fa95e04ed8ad60afac3ae29d8205894fb220ffde193bd33578f3a/fast_agent_mcp-0.6.17-py3-none-any.whl", hash = "sha256:a23c5a5ed8924e38809dabd31f994e5cc81b8c084e84632bb1eb246b257c4752", size = 1573794, upload-time = "2026-04-16T21:48:38.999Z" },
604
  ]
605
 
606
  [[package]]
607
  name = "fastapi"
608
+ version = "0.136.0"
609
  source = { registry = "https://pypi.org/simple" }
610
  dependencies = [
611
  { name = "annotated-doc" },
 
614
  { name = "typing-extensions" },
615
  { name = "typing-inspection" },
616
  ]
617
+ sdist = { url = "https://files.pythonhosted.org/packages/4e/d9/e66315807e41e69e7f6a1b42a162dada2f249c5f06ad3f1a95f84ab336ef/fastapi-0.136.0.tar.gz", hash = "sha256:cf08e067cc66e106e102d9ba659463abfac245200752f8a5b7b1e813de4ff73e", size = 396607, upload-time = "2026-04-16T11:47:13.623Z" }
618
  wheels = [
619
+ { url = "https://files.pythonhosted.org/packages/26/a3/0bd5f0cdb0bbc92650e8dc457e9250358411ee5d1b65e42b6632387daf81/fastapi-0.136.0-py3-none-any.whl", hash = "sha256:8793d44ec7378e2be07f8a013cf7f7aa47d6327d0dfe9804862688ec4541a6b4", size = 117556, upload-time = "2026-04-16T11:47:11.922Z" },
620
  ]
621
 
622
  [[package]]
623
  name = "fastmcp"
624
+ version = "3.2.3"
625
  source = { registry = "https://pypi.org/simple" }
626
  dependencies = [
627
  { name = "authlib" },
 
646
  { name = "watchfiles" },
647
  { name = "websockets" },
648
  ]
649
+ sdist = { url = "https://files.pythonhosted.org/packages/b9/42/7eed0a38e3b7a386805fecacf8a5a9353a2b3040395ef9e30e585d8549ac/fastmcp-3.2.3.tar.gz", hash = "sha256:4f02ae8b00227285a0cf6544dea1db29b022c8cdd8d3dfdec7118540210ae60a", size = 26328743, upload-time = "2026-04-09T22:05:03.402Z" }
650
  wheels = [
651
+ { url = "https://files.pythonhosted.org/packages/f5/48/84b6dcba793178a44b9d99b4def6cd62f870dcfc5bb7b9153ac390135812/fastmcp-3.2.3-py3-none-any.whl", hash = "sha256:cc50af6eed1f62ed8b6ebf4987286d8d1d006f08d5bec739d5c7fb76160e0911", size = 707260, upload-time = "2026-04-09T22:05:01.225Z" },
652
  ]
653
 
654
  [[package]]
 
778
 
779
  [[package]]
780
  name = "google-genai"
781
+ version = "1.66.0"
782
  source = { registry = "https://pypi.org/simple" }
783
  dependencies = [
784
  { name = "anyio" },
 
792
  { name = "typing-extensions" },
793
  { name = "websockets" },
794
  ]
795
+ sdist = { url = "https://files.pythonhosted.org/packages/9b/ba/0b343b0770d4710ad2979fd9301d7caa56c940174d5361ed4a7cc4979241/google_genai-1.66.0.tar.gz", hash = "sha256:ffc01647b65046bca6387320057aa51db0ad64bcc72c8e3e914062acfa5f7c49", size = 504386, upload-time = "2026-03-04T22:15:28.156Z" }
796
  wheels = [
797
+ { url = "https://files.pythonhosted.org/packages/d1/dd/403949d922d4e261b08b64aaa132af4e456c3b15c8e2a2d9e6ef693f66e2/google_genai-1.66.0-py3-none-any.whl", hash = "sha256:7f127a39cf695277104ce4091bb26e417c59bb46e952ff3699c3a982d9c474ee", size = 732174, upload-time = "2026-03-04T22:15:26.63Z" },
798
  ]
799
 
800
  [[package]]
 
1082
 
1083
  [[package]]
1084
  name = "jsonschema"
1085
+ version = "4.25.1"
1086
  source = { registry = "https://pypi.org/simple" }
1087
  dependencies = [
1088
  { name = "attrs" },
 
1090
  { name = "referencing" },
1091
  { name = "rpds-py" },
1092
  ]
1093
+ sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342, upload-time = "2025-08-18T17:03:50.038Z" }
1094
  wheels = [
1095
+ { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" },
1096
  ]
1097
 
1098
  [[package]]
 
1870
 
1871
  [[package]]
1872
  name = "pydantic"
1873
+ version = "2.13.1"
1874
  source = { registry = "https://pypi.org/simple" }
1875
  dependencies = [
1876
  { name = "annotated-types" },
 
1878
  { name = "typing-extensions" },
1879
  { name = "typing-inspection" },
1880
  ]
1881
+ sdist = { url = "https://files.pythonhosted.org/packages/f3/6b/1353beb3d1cd5cf61cdec5b6f87a9872399de3bc5cae0b7ce07ff4de2ab0/pydantic-2.13.1.tar.gz", hash = "sha256:a0f829b279ddd1e39291133fe2539d2aa46cc6b150c1706a270ff0879e3774d2", size = 843746, upload-time = "2026-04-15T14:57:19.398Z" }
1882
  wheels = [
1883
+ { url = "https://files.pythonhosted.org/packages/81/5a/2225f4c176dbfed0d809e848b50ef08f70e61daa667b7fa14b0d311ae44d/pydantic-2.13.1-py3-none-any.whl", hash = "sha256:9557ecc2806faaf6037f85b1fbd963d01e30511c48085f0d573650fdeaad378a", size = 471917, upload-time = "2026-04-15T14:57:17.277Z" },
1884
  ]
1885
 
1886
  [package.optional-dependencies]
 
1890
 
1891
  [[package]]
1892
  name = "pydantic-core"
1893
+ version = "2.46.1"
1894
  source = { registry = "https://pypi.org/simple" }
1895
  dependencies = [
1896
  { name = "typing-extensions" },
1897
  ]
1898
+ sdist = { url = "https://files.pythonhosted.org/packages/a1/93/f97a86a7eb28faa1d038af2fd5d6166418b4433659108a4c311b57128b2d/pydantic_core-2.46.1.tar.gz", hash = "sha256:d408153772d9f298098fb5d620f045bdf0f017af0d5cb6e309ef8c205540caa4", size = 471230, upload-time = "2026-04-15T14:49:34.52Z" }
1899
+ wheels = [
1900
+ { url = "https://files.pythonhosted.org/packages/ff/d2/bda39bad2f426cb5078e6ad28076614d3926704196efe0d7a2a19a99025d/pydantic_core-2.46.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:cdc8a5762a9c4b9d86e204d555444e3227507c92daba06259ee66595834de47a", size = 2119092, upload-time = "2026-04-15T14:49:50.392Z" },
1901
+ { url = "https://files.pythonhosted.org/packages/ee/f3/69631e64d69cb3481494b2bddefe0ddd07771209f74e9106d066f9138c2a/pydantic_core-2.46.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ba381dfe9c85692c566ecb60fa5a77a697a2a8eebe274ec5e4d6ec15fafad799", size = 1951400, upload-time = "2026-04-15T14:51:06.588Z" },
1902
+ { url = "https://files.pythonhosted.org/packages/53/1c/21cb3db6ae997df31be8e91f213081f72ffa641cb45c89b8a1986832b1f9/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1593d8de98207466dc070118322fef68307a0cc6a5625e7b386f6fdae57f9ab6", size = 1976864, upload-time = "2026-04-15T14:50:54.804Z" },
1903
+ { url = "https://files.pythonhosted.org/packages/91/9c/05c819f734318ce5a6ca24da300d93696c105af4adb90494ee571303afd8/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8262c74a1af5b0fdf795f5537f7145785a63f9fbf9e15405f547440c30017ed8", size = 2066669, upload-time = "2026-04-15T14:51:42.346Z" },
1904
+ { url = "https://files.pythonhosted.org/packages/cb/23/fadddf1c7f2f517f58731aea9b35c914e6005250f08dac9b8e53904cdbaa/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b88949a24182e83fbbb3f7ca9b7858d0d37b735700ea91081434b7d37b3b444", size = 2238737, upload-time = "2026-04-15T14:50:45.558Z" },
1905
+ { url = "https://files.pythonhosted.org/packages/23/07/0cd4f95cb0359c8b1ec71e89c3777e7932c8dfeb9cd54740289f310aaead/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8f3708cd55537aeaf3fd0ea55df0d68d0da51dcb07cbc8508745b34acc4c6e0", size = 2316258, upload-time = "2026-04-15T14:51:08.471Z" },
1906
+ { url = "https://files.pythonhosted.org/packages/0c/40/6fc24c3766a19c222a0d60d652b78f0283339d4cd4c173fab06b7ee76571/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f79292435fff1d4f0c18d9cfaf214025cc88e4f5104bfaed53f173621da1c743", size = 2097474, upload-time = "2026-04-15T14:49:56.543Z" },
1907
+ { url = "https://files.pythonhosted.org/packages/4b/af/f39795d1ce549e35d0841382b9c616ae211caffb88863147369a8d74fba9/pydantic_core-2.46.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:a2e607aeb59cf4575bb364470288db3b9a1f0e7415d053a322e3e154c1a0802e", size = 2168383, upload-time = "2026-04-15T14:51:29.269Z" },
1908
+ { url = "https://files.pythonhosted.org/packages/e6/32/0d563f74582795779df6cc270c3fc220f49f4daf7860d74a5a6cda8491ff/pydantic_core-2.46.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ec5ca190b75878a9f6ae1fc8f5eb678497934475aef3d93204c9fa01e97370b6", size = 2186182, upload-time = "2026-04-15T14:50:19.097Z" },
1909
+ { url = "https://files.pythonhosted.org/packages/5c/07/1c10d5ce312fc4cf86d1e50bdcdbb8ef248409597b099cab1b4bb3a093f7/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:1f80535259dcdd517d7b8ca588d5ca24b4f337228e583bebedf7a3adcdf5f721", size = 2187859, upload-time = "2026-04-15T14:49:22.974Z" },
1910
+ { url = "https://files.pythonhosted.org/packages/92/01/e1f62d4cb39f0913dbf5c95b9b119ef30ddba9493dff8c2b012f0cdd67dc/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:24820b3c82c43df61eca30147e42853e6c127d8b868afdc0c162df829e011eb4", size = 2338372, upload-time = "2026-04-15T14:49:53.316Z" },
1911
+ { url = "https://files.pythonhosted.org/packages/44/ed/218dfeea6127fb1781a6ceca241ec6edf00e8a8933ff331af2215975a534/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:f12794b1dd8ac9fb66619e0b3a0427189f5d5638e55a3de1385121a9b7bf9b39", size = 2384039, upload-time = "2026-04-15T14:53:04.929Z" },
1912
+ { url = "https://files.pythonhosted.org/packages/6c/1e/011e763cd059238249fbd5780e0f8d0b04b47f86c8925e22784f3e5fc977/pydantic_core-2.46.1-cp313-cp313-win32.whl", hash = "sha256:9bc09aed935cdf50f09e908923f9efbcca54e9244bd14a5a0e2a6c8d2c21b4e9", size = 1977943, upload-time = "2026-04-15T14:52:17.969Z" },
1913
+ { url = "https://files.pythonhosted.org/packages/8c/06/b559a490d3ed106e9b1777b8d5c8112dd8d31716243cd662616f66c1f8ea/pydantic_core-2.46.1-cp313-cp313-win_amd64.whl", hash = "sha256:fac2d6c8615b8b42bee14677861ba09d56ee076ba4a65cfb9c3c3d0cc89042f2", size = 2068729, upload-time = "2026-04-15T14:53:07.288Z" },
1914
+ { url = "https://files.pythonhosted.org/packages/9f/52/32a198946e2e19508532aa9da02a61419eb15bd2d96bab57f810f2713e31/pydantic_core-2.46.1-cp313-cp313-win_arm64.whl", hash = "sha256:f978329f12ace9f3cb814a5e44d98bbeced2e36f633132bafa06d2d71332e33e", size = 2029550, upload-time = "2026-04-15T14:52:22.707Z" },
1915
+ { url = "https://files.pythonhosted.org/packages/bd/2b/6793fe89ab66cb2d3d6e5768044eab80bba1d0fae8fd904d0a1574712e17/pydantic_core-2.46.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:9917cb61effac7ec0f448ef491ec7584526d2193be84ff981e85cbf18b68c42a", size = 2118110, upload-time = "2026-04-15T14:50:52.947Z" },
1916
+ { url = "https://files.pythonhosted.org/packages/d2/87/e9a905ddfcc2fd7bd862b340c02be6ab1f827922822d425513635d0ac774/pydantic_core-2.46.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0e749679ca9f8a9d0bff95fb7f6b57bb53f2207fa42ffcc1ec86de7e0029ab89", size = 1948645, upload-time = "2026-04-15T14:51:55.577Z" },
1917
+ { url = "https://files.pythonhosted.org/packages/15/23/26e67f86ed62ac9d6f7f3091ee5220bf14b5ac36fb811851d601365ef896/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2ecacee70941e233a2dad23f7796a06f86cc10cc2fbd1c97c7dd5b5a79ffa4f", size = 1977576, upload-time = "2026-04-15T14:49:37.58Z" },
1918
+ { url = "https://files.pythonhosted.org/packages/b8/78/813c13c0de323d4de54ee2e6fdd69a0271c09ac8dd65a8a000931aa487a5/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:647d0a2475b8ed471962eed92fa69145b864942f9c6daa10f95ac70676637ae7", size = 2060358, upload-time = "2026-04-15T14:51:40.087Z" },
1919
+ { url = "https://files.pythonhosted.org/packages/09/5e/4caf2a15149271fbd2b4d968899a450853c800b85152abcf54b11531417f/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac9cde61965b0697fce6e6cc372df9e1ad93734828aac36e9c1c42a22ad02897", size = 2235980, upload-time = "2026-04-15T14:50:34.535Z" },
1920
+ { url = "https://files.pythonhosted.org/packages/c2/c1/a2cdabb5da6f5cb63a3558bcafffc20f790fa14ccffbefbfb1370fadc93f/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0a2eb0864085f8b641fb3f54a2fb35c58aff24b175b80bc8a945050fcde03204", size = 2316800, upload-time = "2026-04-15T14:52:46.999Z" },
1921
+ { url = "https://files.pythonhosted.org/packages/76/fd/19d711e4e9331f9d77f222bffc202bf30ea0d74f6419046376bb82f244c8/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b83ce9fede4bc4fb649281d9857f06d30198b8f70168f18b987518d713111572", size = 2101762, upload-time = "2026-04-15T14:49:24.278Z" },
1922
+ { url = "https://files.pythonhosted.org/packages/dc/64/ce95625448e1a4e219390a2923fd594f3fa368599c6b42ac71a5df7238c9/pydantic_core-2.46.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:cb33192753c60f269d2f4a1db8253c95b0df6e04f2989631a8cc1b0f4f6e2e92", size = 2167737, upload-time = "2026-04-15T14:50:41.637Z" },
1923
+ { url = "https://files.pythonhosted.org/packages/ad/31/413572d03ca3e73b408f00f54418b91a8be6401451bc791eaeff210328e5/pydantic_core-2.46.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:96611d51f953f87e1ae97637c01ee596a08b7f494ea00a5afb67ea6547b9f53b", size = 2185658, upload-time = "2026-04-15T14:51:46.799Z" },
1924
+ { url = "https://files.pythonhosted.org/packages/36/09/e4f581353bdf3f0c7de8a8b27afd14fc761da29d78146376315a6fedc487/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:9b176fa55f9107db5e6c86099aa5bfd934f1d3ba6a8b43f714ddeebaed3f42b7", size = 2184154, upload-time = "2026-04-15T14:52:49.629Z" },
1925
+ { url = "https://files.pythonhosted.org/packages/1a/a4/d0d52849933f5a4bf1ad9d8da612792f96469b37e286a269e3ee9c60bbb1/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:79a59f63a4ce4f3330e27e6f3ce281dd1099453b637350e97d7cf24c207cd120", size = 2332379, upload-time = "2026-04-15T14:49:55.009Z" },
1926
+ { url = "https://files.pythonhosted.org/packages/30/93/25bfb08fdbef419f73290e573899ce938a327628c34e8f3a4bafeea30126/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:f200fce071808a385a314b7343f5e3688d7c45746be3d64dc71ee2d3e2a13268", size = 2377964, upload-time = "2026-04-15T14:51:59.649Z" },
1927
+ { url = "https://files.pythonhosted.org/packages/15/36/b777766ff83fef1cf97473d64764cd44f38e0d8c269ed06faace9ae17666/pydantic_core-2.46.1-cp314-cp314-win32.whl", hash = "sha256:3a07eccc0559fb9acc26d55b16bf8ebecd7f237c74a9e2c5741367db4e6d8aff", size = 1976450, upload-time = "2026-04-15T14:51:57.665Z" },
1928
+ { url = "https://files.pythonhosted.org/packages/7b/4b/4cd19d2437acfc18ca166db5a2067040334991eb862c4ecf2db098c91fbf/pydantic_core-2.46.1-cp314-cp314-win_amd64.whl", hash = "sha256:1706d270309ac7d071ffe393988c471363705feb3d009186e55d17786ada9622", size = 2067750, upload-time = "2026-04-15T14:49:38.941Z" },
1929
+ { url = "https://files.pythonhosted.org/packages/7f/a0/490751c0ef8f5b27aae81731859aed1508e72c1a9b5774c6034269db773b/pydantic_core-2.46.1-cp314-cp314-win_arm64.whl", hash = "sha256:22d4e7457ade8af06528012f382bc994a97cc2ce6e119305a70b3deff1e409d6", size = 2021109, upload-time = "2026-04-15T14:50:27.728Z" },
1930
+ { url = "https://files.pythonhosted.org/packages/36/3a/2a018968245fffd25d5f1972714121ad309ff2de19d80019ad93494844f9/pydantic_core-2.46.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:607ff9db0b7e2012e7eef78465e69f9a0d7d1c3e7c6a84cf0c4011db0fcc3feb", size = 2111548, upload-time = "2026-04-15T14:52:08.273Z" },
1931
+ { url = "https://files.pythonhosted.org/packages/77/5b/4103b6192213217e874e764e5467d2ff10d8873c1147d01fa432ac281880/pydantic_core-2.46.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8cda3eacaea13bd02a1bea7e457cc9fc30b91c5a91245cef9b215140f80dd78c", size = 1926745, upload-time = "2026-04-15T14:50:03.045Z" },
1932
+ { url = "https://files.pythonhosted.org/packages/c3/70/602a667cf4be4bec6c3334512b12ae4ea79ce9bfe41dc51be1fd34434453/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9493279cdc7997fe19e5ed9b41f30cbc3806bd4722adb402fedb6f6d41bd72a", size = 1965922, upload-time = "2026-04-15T14:51:12.555Z" },
1933
+ { url = "https://files.pythonhosted.org/packages/a9/24/06a89ce5323e755b7d2812189f9706b87aaebe49b34d247b380502f7992c/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3644e5e10059999202355b6c6616e624909e23773717d8f76deb8a6e2a72328c", size = 2043221, upload-time = "2026-04-15T14:51:18.995Z" },
1934
+ { url = "https://files.pythonhosted.org/packages/2c/6e/b1d9ad907d9d76964903903349fd2e33c87db4b993cc44713edcad0fc488/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ad6c9de57683e26c92730991960c0c3571b8053263b042de2d3e105930b2767", size = 2243655, upload-time = "2026-04-15T14:50:10.718Z" },
1935
+ { url = "https://files.pythonhosted.org/packages/ef/73/787abfaad51174641abb04c8aa125322279b40ad7ce23c495f5a69f76554/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:557ebaa27c7617e7088002318c679a8ce685fa048523417cd1ca52b7f516d955", size = 2295976, upload-time = "2026-04-15T14:53:09.694Z" },
1936
+ { url = "https://files.pythonhosted.org/packages/56/0b/b7c5a631b6d5153d4a1ea4923b139aea256dc3bd99c8e6c7b312c7733146/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cd37e39b22b796ba0298fe81e9421dd7b65f97acfbb0fb19b33ffdda7b9a7b4", size = 2103439, upload-time = "2026-04-15T14:50:08.32Z" },
1937
+ { url = "https://files.pythonhosted.org/packages/2a/3f/952ee470df69e5674cdec1cbde22331adf643b5cc2ff79f4292d80146ee4/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:6689443b59714992e67d62505cdd2f952d6cf1c14cc9fd9aeec6719befc6f23b", size = 2132871, upload-time = "2026-04-15T14:50:24.445Z" },
1938
+ { url = "https://files.pythonhosted.org/packages/e3/8b/1dea3b1e683c60c77a60f710215f90f486755962aa8939dbcb7c0f975ac3/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f32c41ca1e3456b5dd691827b7c1433c12d5f0058cc186afbb3615bc07d97b8", size = 2168658, upload-time = "2026-04-15T14:52:24.897Z" },
1939
+ { url = "https://files.pythonhosted.org/packages/67/97/32ae283810910d274d5ba9f48f856f5f2f612410b78b249f302d297816f5/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:88cd1355578852db83954dc36e4f58f299646916da976147c20cf6892ba5dc43", size = 2171184, upload-time = "2026-04-15T14:52:34.854Z" },
1940
+ { url = "https://files.pythonhosted.org/packages/a2/57/c9a855527fe56c2072070640221f53095b0b19eaf651f3c77643c9cabbe3/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:a170fefdb068279a473cc9d34848b85e61d68bfcc2668415b172c5dfc6f213bf", size = 2316573, upload-time = "2026-04-15T14:52:12.871Z" },
1941
+ { url = "https://files.pythonhosted.org/packages/37/b3/14c39ffc7399819c5448007c7bcb4e6da5669850cfb7dcbb727594290b48/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:556a63ff1006934dba4eed7ea31b58274c227e29298ec398e4275eda4b905e95", size = 2378340, upload-time = "2026-04-15T14:51:02.619Z" },
1942
+ { url = "https://files.pythonhosted.org/packages/01/55/a37461fbb29c053ea4e62cfc5c2d56425cb5efbef8316e63f6d84ae45718/pydantic_core-2.46.1-cp314-cp314t-win32.whl", hash = "sha256:3b146d8336a995f7d7da6d36e4a779b7e7dff2719ac00a1eb8bd3ded00bec87b", size = 1960843, upload-time = "2026-04-15T14:52:06.103Z" },
1943
+ { url = "https://files.pythonhosted.org/packages/22/d7/97e1221197d17a27f768363f87ec061519eeeed15bbd315d2e9d1429ff03/pydantic_core-2.46.1-cp314-cp314t-win_amd64.whl", hash = "sha256:f1bc856c958e6fe9ec071e210afe6feb695f2e2e81fd8d2b102f558d364c4c17", size = 2048696, upload-time = "2026-04-15T14:52:52.154Z" },
1944
+ { url = "https://files.pythonhosted.org/packages/19/d5/4eac95255c7d35094b46a32ec1e4d80eac94729c694726ee1d69948bd5f0/pydantic_core-2.46.1-cp314-cp314t-win_arm64.whl", hash = "sha256:21a5bfd8a1aa4de60494cdf66b0c912b1495f26a8899896040021fbd6038d989", size = 2022343, upload-time = "2026-04-15T14:49:49.036Z" },
1945
  ]
1946
 
1947
  [[package]]
1948
  name = "pydantic-settings"
1949
+ version = "2.13.0"
1950
  source = { registry = "https://pypi.org/simple" }
1951
  dependencies = [
1952
  { name = "pydantic" },
1953
  { name = "python-dotenv" },
1954
  { name = "typing-inspection" },
1955
  ]
1956
+ sdist = { url = "https://files.pythonhosted.org/packages/96/a1/ae859ffac5a3338a66b74c5e29e244fd3a3cc483c89feaf9f56c39898d75/pydantic_settings-2.13.0.tar.gz", hash = "sha256:95d875514610e8595672800a5c40b073e99e4aae467fa7c8f9c263061ea2e1fe", size = 222450, upload-time = "2026-02-15T12:11:23.476Z" }
1957
  wheels = [
1958
+ { url = "https://files.pythonhosted.org/packages/b0/1a/dd1b9d7e627486cf8e7523d09b70010e05a4bc41414f4ae6ce184cf0afb6/pydantic_settings-2.13.0-py3-none-any.whl", hash = "sha256:d67b576fff39cd086b595441bf9c75d4193ca9c0ed643b90360694d0f1240246", size = 58429, upload-time = "2026-02-15T12:11:22.133Z" },
1959
  ]
1960
 
1961
  [[package]]
 
1996
 
1997
  [[package]]
1998
  name = "pyperclip"
1999
+ version = "1.9.0"
2000
  source = { registry = "https://pypi.org/simple" }
2001
+ sdist = { url = "https://files.pythonhosted.org/packages/30/23/2f0a3efc4d6a32f3b63cdff36cd398d9701d26cda58e3ab97ac79fb5e60d/pyperclip-1.9.0.tar.gz", hash = "sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310", size = 20961, upload-time = "2024-06-18T20:38:48.401Z" }
 
 
 
2002
 
2003
  [[package]]
2004
  name = "pytest"
 
2366
 
2367
  [[package]]
2368
  name = "slop-farmer"
2369
+ version = "0.1.1"
2370
  source = { editable = "." }
2371
  dependencies = [
2372
  { name = "duckdb" },
 
2394
  [package.metadata]
2395
  requires-dist = [
2396
  { name = "duckdb", specifier = ">=1.2.2" },
2397
+ { name = "fast-agent-mcp", specifier = ">=0.6.17" },
2398
  { name = "fast-agent-mcp", marker = "python_full_version >= '3.13.5' and extra == 'llm'", specifier = ">=0.6.16" },
2399
  { name = "fastapi", specifier = ">=0.115.0" },
2400
  { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.28.0" },