diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..ecae771a8c888a832bff47910ba1007783ee52f3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.13-slim + +RUN pip install --no-cache-dir uv && \ + useradd -m -u 1000 user && \ + mkdir -p /data && \ + chmod 777 /data + +USER user + +ENV HOME=/home/user \ + PATH=/home/user/.local/bin:$PATH \ + UV_LINK_MODE=copy + +WORKDIR $HOME/app + +COPY --chown=user pyproject.toml uv.lock README.md ./ +COPY --chown=user src ./src + +RUN uv sync --frozen --no-dev + +EXPOSE 7860 + +CMD ["uv", "run", "--no-dev", "uvicorn", "slop_farmer.app.pr_search_api:app", "--host", "0.0.0.0", "--port", "7860"] diff --git a/README.md b/README.md index 672780e1090f8c90cbf3a389ab515aeb3c6709dd..ee4c01ea3e8f937de6a2c7ea9a6134649d91a10d 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,43 @@ --- -title: Diffusers Pr Api -emoji: 🏆 -colorFrom: red +title: Diffusers PR API +emoji: 🔎 +colorFrom: purple colorTo: indigo sdk: docker -pinned: false +app_port: 7860 +short_description: Live API for Diffusers PR similarity search. +datasets: + - evalstate/diffusers-pr +tags: + - api + - docker + - pull-requests + - similarity + - diffusers --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# Diffusers PR API + +Machine-oriented API for PR similarity search. + +Canonical storage roles: + +- dataset repo: published latest state and canonical current analysis +- mounted bucket: mutable operational cache only +- Space disk: ephemeral runtime storage + +Defaults for this deployment: + +- repo: `huggingface/diffusers` +- live probe source: `https://ghreplica.dutiful.dev` +- dataset: `evalstate/diffusers-pr` + +CLI examples: + +```bash +pr-search status +pr-search code similar 67096 +pr-search code clusters for-pr 67096 +pr-search issues list --limit 5 +pr-search contributors list --limit 10 +``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..492bc52f42b668b01feaa27796d9f49cd8290614 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,65 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "slop-farmer" +version = "0.1.1" +description = "GitHub-to-Hub data pipeline for transformers issue and PR triage research." +readme = "README.md" +requires-python = ">=3.13.5" +dependencies = [ + "duckdb>=1.2.2", + "pyarrow>=18.0.0", + "fastapi>=0.115.0", + "huggingface_hub>=1.11.0", + "pydantic>=2.11", + "PyYAML>=6.0.2", + "rank-bm25>=0.2.2", + "fast-agent-mcp>=0.6.17", + "uvicorn>=0.34.0", +] + +[project.optional-dependencies] +dev = [ + "httpx>=0.28.0", + "pytest>=8.3.0", + "ruff>=0.11", + "ty>=0.0.23", +] +llm = [ + "fast-agent-mcp>=0.6.16; python_full_version >= '3.13.5'", +] + +[project.scripts] +slop-farmer = "slop_farmer.app.cli:main" + +[tool.setuptools] +package-dir = {"" = "src"} + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.pytest.ini_options] +pythonpath = ["src"] +testpaths = ["tests"] + +[tool.ruff] +line-length = 100 +target-version = "py311" + +[tool.ruff.lint] +select = [ + "B", + "E", + "F", + "I", + "RUF", + "SIM", + "UP", +] +ignore = ["E501"] + +[tool.slop-farmer.dashboard-data] +output-dir = "web/public/data" +window-days = 14 diff --git a/src/slop_farmer.egg-info/PKG-INFO b/src/slop_farmer.egg-info/PKG-INFO new file mode 100644 index 0000000000000000000000000000000000000000..b927a314ac30b99748466bc64d7a0c7badf44c75 --- /dev/null +++ b/src/slop_farmer.egg-info/PKG-INFO @@ -0,0 +1,468 @@ +Metadata-Version: 2.4 +Name: slop-farmer +Version: 0.1.1 +Summary: GitHub-to-Hub data pipeline for transformers issue and PR triage research. +Requires-Python: >=3.13.5 +Description-Content-Type: text/markdown +Requires-Dist: duckdb>=1.2.2 +Requires-Dist: pyarrow>=18.0.0 +Requires-Dist: fastapi>=0.115.0 +Requires-Dist: huggingface_hub>=1.11.0 +Requires-Dist: pydantic>=2.11 +Requires-Dist: PyYAML>=6.0.2 +Requires-Dist: rank-bm25>=0.2.2 +Requires-Dist: fast-agent-mcp>=0.6.17 +Requires-Dist: uvicorn>=0.34.0 +Provides-Extra: dev +Requires-Dist: httpx>=0.28.0; extra == "dev" +Requires-Dist: pytest>=8.3.0; extra == "dev" +Requires-Dist: ruff>=0.11; extra == "dev" +Requires-Dist: ty>=0.0.23; extra == "dev" +Provides-Extra: llm +Requires-Dist: fast-agent-mcp>=0.6.16; python_full_version >= "3.13.5" and extra == "llm" + +# slop-farmer + +Pipeline for managing PR's in high volume GitHub repositories. + +Scrapes PR, Issue and Contributor data in to a dataset, performs analysis and publishes a dashboard. + +The pipeline stages are: + 1. Scrape - Collect data from the Github Repository + 1. Contributor Report - Look at contributors recent history. + 1. Analyze - Cluster PRs and Issues on + 1. Scope - Cluster PRs on overlapping repository areas. + 1. Dashboard Export - Export data in JSON format to populate a browsing dashboard + 1. Publish Dashboard - Build a dashboard and deploy it in a Hugging Face Space. + + + +## Scrape + +To run a scrape you need to configure: + +1. The GitHub Repository ID +1. A valid GitHub PAT with API access. + +`uv run slop-farmer scrape --repo huggingface/diffusers --output-dir runs/diffusers/data` + +## Contributor Report + +This scans the dataset for Contributors and provides a short profile of their recent public commit history and merged PR rate. + +## Analyze + +Cluster PRs and Issue Content. Choice of deterministic or LLM supplemented algorithm. + +When `ranking_backend=hybrid`, analysis writes reusable LLM review cache entries under +`/analysis-state/`. If you enable YAML config setting +`analysis.cached_analysis: true`, `analyze` will automatically copy `analysis-state/` +forward from the previous snapshot when the new snapshot does not already have it, then +log a cache-hit summary for the run. This is useful for incremental scrapes where many +review units are unchanged and can safely reuse cached hybrid decisions. + +## Scope + +Cluster PRs by touched repository areas. + +## Dashboard Export / Publish + +Export the report, and publish a dashboard. + + + +## Quickstart + +```bash +uv run slop-farmer scrape \ + --repo huggingface/transformers \ + --output-dir data \ + --max-issues 200 \ + --max-prs 50 +``` + +To publish a snapshot to the Hub: + +```bash +uv run slop-farmer scrape \ + --repo huggingface/transformers \ + --output-dir data \ + --hf-repo-id burtenshaw/transformers-pr-slop-dataset \ + --publish +``` + +When `--publish` is used, `slop-farmer` now also generates and uploads new contributor reviewer artifacts by default: + +- `new_contributors.parquet` +- `new-contributors-report.json` +- `new-contributors-report.md` + +Use `--no-new-contributor-report` to skip them. + +## Nightly incremental runs + +The scraper now stores a local watermark at `data/state/watermark.json` and resumes from it by default when `--since` is not provided. + +```bash +uv run slop-farmer scrape \ + --repo huggingface/transformers \ + --output-dir data \ + --fetch-timeline +``` + +On the first run, this creates a full snapshot. On later runs against the same `--output-dir`, it uses the last successful watermark, fetches only changed records, merges them into the previous snapshot locally, and writes a new full latest snapshot. + +To ignore the watermark and force a fresh full run: + +```bash +uv run slop-farmer scrape \ + --repo huggingface/transformers \ + --output-dir data \ + --no-resume +``` + +Authentication defaults: + +- GitHub: `GITHUB_TOKEN`, then `gh auth token` +- Hugging Face: `HF_TOKEN`, otherwise existing `hf auth` login + +## Canonical dataset upkeep + +`dataset_id` is the canonical latest dataset repo. + +Use the remote-first writer: + +```bash +uv run slop-farmer --config configs/transformers.yaml refresh-dataset +``` + +Or submit the generic HF Job wrapper: + +```bash +scripts/submit_dataset_job.sh +``` + +By default this creates a scheduled HF Job that: + +- reads `CONFIG_PATH` (defaults to `configs/transformers.yaml`) +- refreshes `dataset_id` incrementally against the current Hub dataset state +- regenerates the new contributor report +- uploads the updated snapshot back to the dataset repo + +Useful overrides: + +```bash +# fire once immediately instead of creating a schedule +MODE=run scripts/submit_dataset_job.sh + +# change the cron schedule +SCHEDULE="0 */6 * * *" scripts/submit_dataset_job.sh + +# optionally mount a writable HF bucket for temp files +SCRATCH_BUCKET=evalstate/slop-farmer-scratch \ + scripts/submit_dataset_job.sh +``` + +Buckets are best treated here as optional scratch space via `TMPDIR`, not as the canonical +published dataset. The repo's local analysis and PR-scope tooling already knows how to +materialize versioned Hub **dataset repos**; it does not currently read HF buckets directly. + +Compatibility wrappers remain available: + +- `scripts/submit_transformers_dataset_job.sh` +- `scripts/submit_openclaw_dataset_job.sh` + +For the current storage model and recommended modes, see +[`docs/data-architecture.md`](docs/data-architecture.md). + +## Analyze a Hub dataset + +You can analyze the published Hugging Face dataset directly without scraping GitHub again: + +```bash +uv run slop-farmer analyze \ + --snapshot-dir eval_data/snapshots/gh-live-latest-1000x1000 \ + --ranking-backend hybrid \ + --model "gpt-5-mini?reasoning=low" \ + --output /tmp/gh-live-latest-1000x1000-hybrid.json +``` + +This materializes the dataset-viewer parquet export into a local snapshot cache under `eval_data/snapshots/` and writes `analysis-report.json` next to it. + +Repo-local defaults for `analyze` can be stored in `pyproject.toml` under `[tool.slop-farmer.analyze]`. This repo currently defaults to: + +- `dashboard-data.output-dir = "web/public/data"` + +For repo-specific remote-first analysis, prefer a YAML config with `dataset_id`, e.g.: + +```bash +uv run slop-farmer --config configs/openclaw.yaml analyze +``` + +## Cluster open PRs by code scope + +You can also build holistic PR scope clusters from an existing snapshot: + +```bash +uv run slop-farmer pr-scope \ + --snapshot-dir data/snapshots/20260324T150154Z +``` + +By default this writes `pr-scope-clusters.json` next to the snapshot. + +## Merge duplicate PR clusters + +List only the duplicate PR clusters that pass the mergeability gate: + +```bash +uv run slop-farmer duplicate-prs list \ + --report eval_data/snapshots/gh-live-latest-1000x1000/analysis-report-hybrid.json +``` + +Then synthesize and publish one minimal upstream PR from the top-ranked mergeable cluster: + +```bash +uv run slop-farmer duplicate-prs merge \ + --report eval_data/snapshots/gh-live-latest-1000x1000/analysis-report-hybrid.json \ + --repo-dir /path/to/transformers +``` + +If your local checkout uses a fork as `origin`, point the merge flow at the upstream remote explicitly and relax the file policy when needed: + +```bash +uv run slop-farmer duplicate-prs merge \ + --report eval_data/snapshots/gh-live-latest-1000x1000/analysis-report-hybrid.json \ + --repo-dir /path/to/transformers \ + --upstream-repo huggingface/transformers \ + --upstream-remote upstream \ + --fork-repo YOURNAME/transformers-minimal \ + --fork-remote origin \ + --file-policy allow-docs +``` + +## Import a historical HF checkpoint as a clean local snapshot + +If an older dataset keeps its richest data under `_checkpoints//`, +you can promote one of those checkpoints into a normal local snapshot: + +```bash +uv run slop-farmer import-hf-checkpoint \ + --source-repo-id burtenshaw/transformers-pr-slop-dataset \ + --output-dir eval_data +``` + +By default this selects the latest viable checkpoint, writes a clean snapshot +under `eval_data/snapshots/`, and regenerates `links.parquet`, +`issue_comments.parquet`, and `pr_comments.parquet`. + +## Render markdown from an analysis JSON + +You can turn an existing analysis report into a human-readable markdown file without rerunning clustering: + +```bash +uv run slop-farmer markdown-report \ + --input eval_data/snapshots/hf-latest-100x100/analysis-report-hybrid.json +``` + +By default this writes `analysis-report-hybrid.md` next to the JSON and uses the JSON parent directory as the snapshot source for issue and PR titles, links, and latest-activity ordering. + +## Render a new contributor report + +You can also render a reviewer-facing markdown report for contributors who are still new to the repo snapshot: + +```bash +uv run slop-farmer new-contributor-report \ + --snapshot-dir data/snapshots/20260324T000000Z +``` + +By default this writes: + +- `new_contributors.parquet` +- `new-contributors-report.md` +- `new-contributors-report.json` + +next to the snapshot, including GitHub profile links, repo issue/PR search links, and example authored artifacts. + +## Full end-to-end workflow + +You can run scrape + publish + analyze + markdown + dashboard export in one command: + +```bash +uv run slop-farmer full-pipeline \ + --repo huggingface/transformers \ + --dataset YOURNAME/transformers-pr-slop-dataset \ + --model "gpt-5-mini?reasoning=low" +``` + +This writes outputs under a repo-anchored workspace directory, for example: + +- `runs/transformers/data/` +- `runs/transformers/web/public/data/` + +Optional age caps are based on `created_at`: + +```bash + --issue-max-age-days 30 \ + --pr-max-age-days 14 +``` + +## Validation checks + +Before committing or wiring new package moves into automation, run: + +```bash +uv run python scripts/enforce_packaging.py +uv run --extra dev ruff format --check src tests scripts jobs +uv run --extra dev ruff check src tests scripts jobs +uv run --extra dev ty check src tests scripts jobs +uv run --extra dev pytest -q +``` + +`scripts/enforce_packaging.py` verifies the coarse package boundaries: + +- `data` must not import `app` +- `data` must not import `reports` +- `reports` must not import `app` + +## YAML config-driven runs + +You can keep repo-specific pipeline defaults in a YAML file and apply them to all +commands with `--config`. + +Example: `configs/diffusers.yaml` + +```yaml +repo: huggingface/diffusers +workspace: runs/diffusers +dataset_id: evalstate/diffusers-pr + +pull-requests: + template_cleanup: + mode: merge_defaults + line_patterns: + - '^d(?:o not merge|ontmerge)\.?$' + cluster_suppression_rules: + - id: diffusers_post_release + title_patterns: + - '\bpost[- ]release\b' + +dashboard: + space_id: evalstate/diffusers-dashboard + title: Diffusers Dashboard + window_days: 60 + contributor_window_days: 60 + contributor_max_authors: 0 + +analysis: + model: gpt-5.4-mini + ranking_backend: hybrid + cached_analysis: true + +scrape: + fetch-timeline: true +``` + +Then commands stay aligned without repeating repo/workspace/window settings: + +```bash +uv run slop-farmer --config configs/diffusers.yaml refresh-dataset +uv run slop-farmer --config configs/diffusers.yaml analyze +uv run slop-farmer --config configs/diffusers.yaml pr-scope +uv run slop-farmer --config configs/diffusers.yaml pr-search refresh +uv run slop-farmer --config configs/diffusers.yaml new-contributor-report +uv run slop-farmer --config configs/diffusers.yaml dashboard-data +uv run slop-farmer --config configs/diffusers.yaml deploy-dashboard --refresh-contributors +uv run slop-farmer --config configs/diffusers.yaml dataset-status +``` + +Those reader commands default to `dataset_id` when configured. Pass `--snapshot-dir` to force +an explicit local snapshot instead. + +If you run `analyze` before `publish-snapshot`, the uploaded snapshot will also include +`analysis-state/`, which makes the hybrid cache portable across machines and reusable in +later snapshots when `analysis.cached_analysis: true` is enabled. + +## Export static dashboard data + +You can export a slim JSON bundle for the React dashboard: + +```bash +uv run slop-farmer dashboard-data \ + --snapshot-dir data/snapshots/20260324T150154Z \ + --output-dir web/public/data \ + --window-days 14 +``` + +This writes: + +- `summary.json` +- `clusters.json` +- `prs.json` +- `contributors.json` + +The dashboard is intentionally summary-first and links out to GitHub for deep detail. + +## Deploy a dashboard to a Hugging Face Space + +Use the generic deploy script: + +```bash +SPACE_ID=evalstate/openclaw-pr-report \ +PIPELINE_DATA_DIR=runs/openclaw/data \ +SNAPSHOT_DIR=runs/openclaw/data/snapshots/20260324T233649Z \ +SPACE_TITLE="OpenClaw PR Report" \ +DATASET_ID=evalstate/openclaw-pr \ +scripts/deploy_dashboard_space.sh +``` + +Repo-specific wrappers are also available: + +- `scripts/deploy_transformers_dashboard_space.sh` +- `scripts/deploy_openclaw_dashboard_space.sh` + +Or use the CLI wrapper with a YAML config: + +```bash +uv run slop-farmer --config configs/diffusers.yaml deploy-dashboard --refresh-contributors +``` + +## Deploy the PR similarity API to a Hugging Face Docker Space + +The repo includes the FastAPI service for the read-oriented PR similarity surface. +The standalone `pr-search` client now lives in the downstream `pr-search-cli` +package. + +Deploy the OpenClaw API Space with: + +```bash +scripts/update_openclaw_pr_search_api.sh +``` + +Or use the generic deploy script directly: + +```bash +SPACE_ID=evalstate/openclaw-pr-api \ +SPACE_TITLE="OpenClaw PR API" \ +DEFAULT_REPO=openclaw/openclaw \ +GHR_BASE_URL=https://ghreplica.dutiful.dev \ +HF_REPO_ID=evalstate/openclaw-pr \ +BUCKET_ID=evalstate/openclaw-pr-api-data \ +scripts/deploy_pr_search_space.sh +``` + +This deploy flow: + +- creates or updates a Docker Space +- uploads a minimal app bundle with a generated Space `README.md` +- sets runtime variables for the API +- mounts the configured HF bucket at `/data` + +After the Space is live, you can query it either through the in-repo admin CLI: + +```bash +uv run slop-farmer pr-search status --repo openclaw/openclaw +uv run slop-farmer pr-search similar 67096 --repo openclaw/openclaw +``` + +Or through the downstream `pr-search-cli` package, which owns the standalone +`pr-search` executable. diff --git a/src/slop_farmer.egg-info/SOURCES.txt b/src/slop_farmer.egg-info/SOURCES.txt new file mode 100644 index 0000000000000000000000000000000000000000..cf3dfb95c8870251fcd53cfee22b06d214c47875 --- /dev/null +++ b/src/slop_farmer.egg-info/SOURCES.txt @@ -0,0 +1,74 @@ +README.md +pyproject.toml +src/slop_farmer/__init__.py +src/slop_farmer/app_config.py +src/slop_farmer/config.py +src/slop_farmer.egg-info/PKG-INFO +src/slop_farmer.egg-info/SOURCES.txt +src/slop_farmer.egg-info/dependency_links.txt +src/slop_farmer.egg-info/entry_points.txt +src/slop_farmer.egg-info/requires.txt +src/slop_farmer.egg-info/top_level.txt +src/slop_farmer/app/__init__.py +src/slop_farmer/app/cli.py +src/slop_farmer/app/dataset_refresh.py +src/slop_farmer/app/dataset_status.py +src/slop_farmer/app/deploy.py +src/slop_farmer/app/duplicate_prs.py +src/slop_farmer/app/hf_checkpoint_import.py +src/slop_farmer/app/pipeline.py +src/slop_farmer/app/pr_search.py +src/slop_farmer/app/pr_search_api.py +src/slop_farmer/app/publish.py +src/slop_farmer/app/snapshot_state.py +src/slop_farmer/app/workflow.py +src/slop_farmer/data/__init__.py +src/slop_farmer/data/dataset_card.py +src/slop_farmer/data/ghreplica_api.py +src/slop_farmer/data/github_api.py +src/slop_farmer/data/hf_dataset_repo.py +src/slop_farmer/data/http.py +src/slop_farmer/data/links.py +src/slop_farmer/data/normalize.py +src/slop_farmer/data/parquet_io.py +src/slop_farmer/data/search_duckdb.py +src/slop_farmer/data/snapshot_materialize.py +src/slop_farmer/data/snapshot_paths.py +src/slop_farmer/data/snapshot_source.py +src/slop_farmer/reports/__init__.py +src/slop_farmer/reports/analysis.py +src/slop_farmer/reports/analysis_cache.py +src/slop_farmer/reports/analysis_service.py +src/slop_farmer/reports/canonical_duplicate_pr.py +src/slop_farmer/reports/dashboard.py +src/slop_farmer/reports/duplicate_prs.py +src/slop_farmer/reports/new_contributor_report.py +src/slop_farmer/reports/pr_heuristics.py +src/slop_farmer/reports/pr_scope.py +src/slop_farmer/reports/pr_search_scope.py +src/slop_farmer/reports/pr_search_service.py +src/slop_farmer/reports/read_views.py +src/slop_farmer/reports/user_activity.py +tests/test_analysis.py +tests/test_analysis_cache.py +tests/test_canonical_duplicate_pr.py +tests/test_cli.py +tests/test_config.py +tests/test_dashboard.py +tests/test_dataset_status.py +tests/test_farmer_setup_assets.py +tests/test_ghreplica_api.py +tests/test_github_api.py +tests/test_hf_checkpoint_import.py +tests/test_http.py +tests/test_links.py +tests/test_new_contributor_report.py +tests/test_normalize.py +tests/test_pipeline_checkpoint_resume.py +tests/test_pr_scope.py +tests/test_pr_search.py +tests/test_pr_search_api.py +tests/test_publish.py +tests/test_snapshot_state.py +tests/test_update_transformers_dataset.py +tests/test_viewer_layout.py \ No newline at end of file diff --git a/src/slop_farmer.egg-info/dependency_links.txt b/src/slop_farmer.egg-info/dependency_links.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/src/slop_farmer.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/src/slop_farmer.egg-info/entry_points.txt b/src/slop_farmer.egg-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..6d5d28c94c30bbdc5655823211236cb050af52a0 --- /dev/null +++ b/src/slop_farmer.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +slop-farmer = slop_farmer.app.cli:main diff --git a/src/slop_farmer.egg-info/requires.txt b/src/slop_farmer.egg-info/requires.txt new file mode 100644 index 0000000000000000000000000000000000000000..bd2e78babb7d2815ef1a798508bb3f24b6722920 --- /dev/null +++ b/src/slop_farmer.egg-info/requires.txt @@ -0,0 +1,20 @@ +duckdb>=1.2.2 +pyarrow>=18.0.0 +fastapi>=0.115.0 +huggingface_hub>=1.11.0 +pydantic>=2.11 +PyYAML>=6.0.2 +rank-bm25>=0.2.2 +fast-agent-mcp>=0.6.17 +uvicorn>=0.34.0 + +[dev] +httpx>=0.28.0 +pytest>=8.3.0 +ruff>=0.11 +ty>=0.0.23 + +[llm] + +[llm:python_full_version >= "3.13.5"] +fast-agent-mcp>=0.6.16 diff --git a/src/slop_farmer.egg-info/top_level.txt b/src/slop_farmer.egg-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..711a4776b54ee2e8222b955a1d7c68dd98c6633c --- /dev/null +++ b/src/slop_farmer.egg-info/top_level.txt @@ -0,0 +1 @@ +slop_farmer diff --git a/src/slop_farmer/__init__.py b/src/slop_farmer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fd9a4ec046875dae286a647a080d8538704762e1 --- /dev/null +++ b/src/slop_farmer/__init__.py @@ -0,0 +1,3 @@ +__all__ = ["__version__"] + +__version__ = "0.1.1" diff --git a/src/slop_farmer/app/__init__.py b/src/slop_farmer/app/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/slop_farmer/app/cli.py b/src/slop_farmer/app/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..9fb6976198a08904f970621891027f3e53d11643 --- /dev/null +++ b/src/slop_farmer/app/cli.py @@ -0,0 +1,1620 @@ +from __future__ import annotations + +import argparse +import json +import sys +from collections.abc import Callable +from pathlib import Path +from typing import Any + +from slop_farmer.app.duplicate_prs import DEFAULT_FILE_POLICY, FILE_POLICY_CHOICES +from slop_farmer.app_config import command_defaults, extract_cli_config_path +from slop_farmer.config import ( + AnalysisOptions, + CheckpointImportOptions, + DashboardDataOptions, + DatasetRefreshOptions, + DatasetStatusOptions, + DeployDashboardOptions, + MarkdownReportOptions, + NewContributorReportOptions, + PipelineOptions, + PrScopeOptions, + PrSearchRefreshOptions, + PublishAnalysisArtifactsOptions, + RepoRef, + SaveCacheOptions, + SnapshotAdoptOptions, +) +from slop_farmer.reports.duplicate_prs import DEFAULT_DUPLICATE_PR_MODEL + +CommandHandler = Callable[[argparse.Namespace, Path | None], None] + + +def _int_at_least(minimum: int) -> Callable[[str], int]: + def parse(raw: str) -> int: + value = int(raw) + if value < minimum: + raise argparse.ArgumentTypeError(f"expected integer >= {minimum}") + return value + + return parse + + +def build_parser(*, config_path: Path | None = None) -> argparse.ArgumentParser: + defaults = _load_parser_defaults(config_path) + + parser = argparse.ArgumentParser(prog="slop-farmer") + parser.add_argument( + "--config", + type=Path, + help="YAML config file with shared repo/workspace/dashboard defaults.", + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + _add_scrape_parser(subparsers, defaults["scrape"]) + _add_refresh_dataset_parser(subparsers, defaults["refresh-dataset"]) + _add_analyze_parser(subparsers, defaults["analyze"]) + _add_pr_scope_parser(subparsers, defaults["pr-scope"]) + _add_checkpoint_import_parser(subparsers, defaults["import-hf-checkpoint"]) + _add_adopt_snapshot_parser(subparsers, defaults["adopt-snapshot"]) + _add_markdown_report_parser(subparsers) + _add_duplicate_prs_parser(subparsers) + _add_pr_search_parser(subparsers, defaults["pr-search"]) + _add_new_contributor_report_parser(subparsers, defaults["new-contributor-report"]) + _add_dashboard_data_parser(subparsers, defaults["dashboard-data"]) + _add_publish_analysis_artifacts_parser(subparsers, defaults["publish-analysis-artifacts"]) + _add_save_cache_parser(subparsers, defaults["save-cache"]) + _add_deploy_dashboard_parser(subparsers, defaults["deploy-dashboard"]) + _add_dataset_status_parser(subparsers, defaults["dataset-status"]) + return parser + + +def _load_parser_defaults(config_path: Path | None) -> dict[str, dict[str, Any]]: + commands = ( + "scrape", + "refresh-dataset", + "analyze", + "import-hf-checkpoint", + "pr-scope", + "pr-search", + "adopt-snapshot", + "new-contributor-report", + "dashboard-data", + "publish-analysis-artifacts", + "save-cache", + "deploy-dashboard", + "dataset-status", + ) + return {command: command_defaults(command, config_path=config_path) for command in commands} + + +# Parser builders + + +def _add_scrape_parser(subparsers: Any, defaults: dict[str, Any]) -> None: + scrape = subparsers.add_parser("scrape", help="Scrape GitHub and write a snapshot dataset.") + scrape.add_argument( + "--repo", + default=defaults.get("repo", "huggingface/transformers"), + help="GitHub repository in owner/name form.", + ) + scrape.add_argument("--output-dir", type=Path, default=Path(defaults.get("output-dir", "data"))) + scrape.add_argument("--since", help="Incremental sync lower bound in ISO 8601 format.") + scrape.add_argument( + "--resume", + dest="resume", + action="store_true", + default=True, + help="Resume from the last successful local watermark when --since is not provided.", + ) + scrape.add_argument( + "--no-resume", + dest="resume", + action="store_false", + help="Ignore local watermark state and run from scratch unless --since is set.", + ) + scrape.add_argument( + "--http-timeout", type=int, default=180, help="Per-request timeout in seconds." + ) + scrape.add_argument( + "--http-max-retries", type=int, default=5, help="Retries for transient network failures." + ) + scrape.add_argument( + "--max-issues", type=int, default=None, help="Limit total issue endpoint items read." + ) + scrape.add_argument( + "--max-prs", type=int, default=None, help="Limit pull requests to hydrate in detail." + ) + scrape.add_argument( + "--issue-max-age-days", + type=int, + default=defaults.get("issue-max-age-days"), + help="Optional created_at age cap for issues included in the snapshot.", + ) + scrape.add_argument( + "--pr-max-age-days", + type=int, + default=defaults.get("pr-max-age-days"), + help="Optional created_at age cap for pull requests included in the snapshot.", + ) + scrape.add_argument( + "--max-issue-comments", type=int, default=None, help="Limit issue comment rows." + ) + scrape.add_argument( + "--max-reviews-per-pr", type=int, default=None, help="Limit review rows per PR." + ) + scrape.add_argument( + "--max-review-comments-per-pr", + type=int, + default=None, + help="Limit inline review comment rows per PR.", + ) + scrape.add_argument( + "--fetch-timeline", + action="store_true", + default=bool(defaults.get("fetch-timeline", False)), + help="Fetch issue timeline events for linkage rows.", + ) + scrape.add_argument( + "--new-contributor-report", + dest="new_contributor_report", + action="store_true", + default=defaults.get("new-contributor-report"), + help="Generate new contributor dataset/report artifacts for the local snapshot.", + ) + scrape.add_argument( + "--no-new-contributor-report", + dest="new_contributor_report", + action="store_false", + help="Skip new contributor dataset/report generation.", + ) + scrape.add_argument( + "--new-contributor-window-days", + type=int, + default=int(defaults.get("new-contributor-window-days", 42)), + help="Recent public activity window for contributor enrichment.", + ) + scrape.add_argument( + "--new-contributor-max-authors", + type=int, + default=int(defaults.get("new-contributor-max-authors", 25)), + help="Maximum number of contributors to include in the new contributor report. Use 0 for no cap.", + ) + + +def _add_refresh_dataset_parser(subparsers: Any, defaults: dict[str, Any]) -> None: + refresh = subparsers.add_parser( + "refresh-dataset", + help="Refresh the canonical Hugging Face dataset repo from remote watermark state.", + ) + refresh.add_argument( + "--repo", + default=defaults.get("repo", "huggingface/transformers"), + help="GitHub repository in owner/name form.", + ) + refresh.add_argument( + "--hf-repo-id", + default=defaults.get("hf-repo-id"), + required=defaults.get("hf-repo-id") is None, + help="Canonical Hugging Face dataset repo id to refresh.", + ) + refresh.add_argument("--max-issues", type=int, default=defaults.get("max-issues")) + refresh.add_argument("--max-prs", type=int, default=defaults.get("max-prs")) + refresh.add_argument( + "--max-issue-comments", type=int, default=defaults.get("max-issue-comments") + ) + refresh.add_argument( + "--max-reviews-per-pr", type=int, default=defaults.get("max-reviews-per-pr") + ) + refresh.add_argument( + "--max-review-comments-per-pr", + type=int, + default=defaults.get("max-review-comments-per-pr"), + ) + refresh.add_argument( + "--fetch-timeline", + action="store_true", + default=bool(defaults.get("fetch-timeline", False)), + ) + refresh.add_argument( + "--new-contributor-report", + dest="new_contributor_report", + action="store_true", + default=bool(defaults.get("new-contributor-report", True)), + ) + refresh.add_argument( + "--no-new-contributor-report", + dest="new_contributor_report", + action="store_false", + ) + refresh.add_argument( + "--new-contributor-window-days", + type=int, + default=int(defaults.get("new-contributor-window-days", 42)), + ) + refresh.add_argument( + "--new-contributor-max-authors", + type=int, + default=int(defaults.get("new-contributor-max-authors", 25)), + ) + refresh.add_argument("--http-timeout", type=int, default=300) + refresh.add_argument("--http-max-retries", type=int, default=8) + refresh.add_argument("--checkpoint-every-comments", type=int, default=1000) + refresh.add_argument("--checkpoint-every-prs", type=int, default=25) + refresh.add_argument( + "--private-hf-repo", + dest="private_hf_repo", + action="store_true", + default=bool(defaults.get("private-hf-repo", False)), + help="Create the target dataset repo as private if needed.", + ) + refresh.add_argument( + "--private", + dest="private_hf_repo", + action="store_true", + help=argparse.SUPPRESS, + ) + + +def _add_analyze_parser(subparsers: Any, defaults: dict[str, Any]) -> None: + analyze = subparsers.add_parser( + "analyze", + help="Analyze a snapshot and write a local JSON report. Canonical publication is separate.", + ) + analyze.add_argument( + "--snapshot-dir", + type=Path, + help="Snapshot directory to analyze. Defaults to the latest local snapshot.", + ) + analyze.add_argument( + "--output-dir", type=Path, default=Path(defaults.get("output-dir", "data")) + ) + analyze.add_argument("--output", type=Path, help="Output path for the analysis JSON.") + analyze.add_argument( + "--hf-repo-id", + default=defaults.get("hf-repo-id"), + help="Analyze a canonical Hugging Face dataset repo by materializing a self-consistent published snapshot locally.", + ) + analyze.add_argument( + "--hf-revision", + default=defaults.get("hf-revision"), + help="Optional Hub revision for metadata and README download.", + ) + analyze.add_argument( + "--hf-materialize-dir", + type=Path, + default=Path(defaults["hf-materialize-dir"]) + if defaults.get("hf-materialize-dir") + else None, + help="Optional local directory used when materializing an HF dataset snapshot.", + ) + analyze.add_argument( + "--ranking-backend", + choices=("hybrid", "deterministic"), + default=defaults.get("ranking-backend", "hybrid"), + help="Whether to use deterministic-only ranking or optional fast-agent enrichment.", + ) + analyze.add_argument( + "--model", + default=defaults.get("model", "gpt-5.4-mini?service_tier=flex"), + help="Model string used by fast-agent when enabled.", + ) + analyze.add_argument( + "--max-clusters", + type=int, + default=int(defaults.get("max-clusters", 10)), + help="Maximum number of meta clusters to include in the report.", + ) + analyze.add_argument( + "--hybrid-llm-concurrency", + type=_int_at_least(1), + default=int(defaults.get("hybrid-llm-concurrency", 1)), + help=( + "Maximum number of hybrid LLM review units to run at once. " + "Use 1 to minimize provider pressure." + ), + ) + analyze.add_argument( + "--open-prs-only", + action="store_true", + default=bool(defaults.get("open-prs-only", False)), + help="Restrict PR analysis/clustering to open PRs only. Draft PRs are still included.", + ) + + +def _add_pr_scope_parser(subparsers: Any, defaults: dict[str, Any]) -> None: + pr_scope = subparsers.add_parser( + "pr-scope", help="Cluster open PRs by holistic file/scope overlap." + ) + pr_scope.add_argument( + "--snapshot-dir", + type=Path, + help="Snapshot directory to analyze. Defaults to the latest local snapshot.", + ) + pr_scope.add_argument( + "--output-dir", type=Path, default=Path(defaults.get("output-dir", "data")) + ) + pr_scope.add_argument( + "--output", + type=Path, + help="Output path for the PR scope JSON. Defaults next to the snapshot.", + ) + pr_scope.add_argument( + "--hf-repo-id", + default=defaults.get("hf-repo-id"), + help="Analyze a Hugging Face dataset repo by materializing its parquet export locally.", + ) + pr_scope.add_argument( + "--hf-revision", + default=defaults.get("hf-revision"), + help="Optional Hub revision for metadata and README download.", + ) + pr_scope.add_argument( + "--hf-materialize-dir", + type=Path, + default=Path(defaults["hf-materialize-dir"]) + if defaults.get("hf-materialize-dir") + else None, + help="Optional local directory used when materializing an HF dataset snapshot.", + ) + + +def _add_checkpoint_import_parser(subparsers: Any, defaults: dict[str, Any]) -> None: + checkpoint_import = subparsers.add_parser( + "import-hf-checkpoint", + help="Import a checkpoint snapshot from an HF dataset repo into a clean local snapshot.", + ) + checkpoint_import.add_argument( + "--source-repo-id", + default=defaults.get("source-repo-id", "burtenshaw/transformers-pr-slop-dataset"), + help="Source Hugging Face dataset repo id containing checkpoint folders.", + ) + checkpoint_import.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "eval_data")), + help="Local root directory where the imported snapshot should be written.", + ) + checkpoint_import.add_argument( + "--checkpoint-id", + help="Optional checkpoint snapshot id. Defaults to the latest viable checkpoint.", + ) + checkpoint_import.add_argument( + "--checkpoint-root", + choices=("checkpoints", "_checkpoints"), + help="Optional checkpoint root directory. Defaults to auto-detect.", + ) + checkpoint_import.add_argument( + "--publish-repo-id", + help="Optional HF dataset repo id to publish the imported clean snapshot to.", + ) + checkpoint_import.add_argument( + "--private-hf-repo", + action="store_true", + help="Create the publish target as private when --publish-repo-id is used.", + ) + checkpoint_import.add_argument( + "--force", + action="store_true", + help="Overwrite an existing imported snapshot directory if present.", + ) + + +def _add_adopt_snapshot_parser(subparsers: Any, defaults: dict[str, Any]) -> None: + adopt_snapshot = subparsers.add_parser( + "adopt-snapshot", + help="Mark an existing snapshot as the current pipeline base so the next scrape resumes from it.", + ) + adopt_snapshot.add_argument( + "--snapshot-dir", type=Path, required=True, help="Existing local snapshot directory." + ) + adopt_snapshot.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "data")), + help="Pipeline workspace root where state/ and snapshots/latest.json should be written.", + ) + adopt_snapshot.add_argument( + "--next-since", + help="Optional explicit watermark timestamp. Defaults to snapshot watermark.next_since, crawl_started_at, or extracted_at.", + ) + + +def _add_markdown_report_parser(subparsers: Any) -> None: + markdown = subparsers.add_parser( + "markdown-report", help="Render a markdown report from an analysis JSON file." + ) + markdown.add_argument( + "--input", type=Path, required=True, help="Path to an existing analysis JSON report." + ) + markdown.add_argument( + "--output", + type=Path, + help="Output path for the markdown report. Defaults next to the input JSON.", + ) + markdown.add_argument( + "--snapshot-dir", + type=Path, + help="Optional snapshot directory containing issues.parquet and pull_requests.parquet. Defaults to the input JSON parent directory.", + ) + + +def _add_duplicate_prs_parser(subparsers: Any) -> None: + duplicate_prs = subparsers.add_parser( + "duplicate-prs", + help="List or merge mergeable duplicate PR clusters from hybrid-enriched analysis.", + ) + duplicate_prs_subparsers = duplicate_prs.add_subparsers( + dest="duplicate_prs_command", required=True + ) + + duplicate_list = duplicate_prs_subparsers.add_parser( + "list", + help="List mergeable duplicate PR clusters from a hybrid-enriched analysis report.", + ) + duplicate_list_source = duplicate_list.add_mutually_exclusive_group(required=True) + duplicate_list_source.add_argument( + "--report", type=Path, help="Path to an analysis JSON report." + ) + duplicate_list_source.add_argument( + "--snapshot-dir", type=Path, help="Snapshot directory to analyze." + ) + duplicate_list.add_argument( + "--limit", type=int, default=10, help="Maximum number of mergeable clusters to print." + ) + duplicate_list.add_argument( + "--model", + default=DEFAULT_DUPLICATE_PR_MODEL, + help="Model string used for hybrid analysis and duplicate-PR mergeability gating.", + ) + + duplicate_merge = duplicate_prs_subparsers.add_parser( + "merge", + help="Use Codex to synthesize and publish a minimal upstream PR for a mergeable duplicate cluster.", + ) + duplicate_merge_source = duplicate_merge.add_mutually_exclusive_group(required=True) + duplicate_merge_source.add_argument( + "--report", type=Path, help="Path to an analysis JSON report." + ) + duplicate_merge_source.add_argument( + "--snapshot-dir", type=Path, help="Snapshot directory to analyze." + ) + duplicate_merge.add_argument( + "--repo-dir", + type=Path, + required=True, + help="Local upstream repository checkout used for the synthesis worktree.", + ) + duplicate_merge.add_argument( + "--upstream-repo", + help="Optional owner/name override for the upstream target repository.", + ) + duplicate_merge.add_argument( + "--upstream-remote", + default="origin", + help="Remote in --repo-dir that points at the upstream repository. Defaults to origin.", + ) + duplicate_merge.add_argument( + "--fork-remote", + default="fork", + help="Remote in the synthesis worktree used for pushing the branch. Defaults to fork.", + ) + duplicate_merge.add_argument("--cluster-id", help="Optional cluster override.") + duplicate_merge.add_argument( + "--fork-repo", + help="Optional owner/name override for the fork push target. Overrides --fork-owner when both are set.", + ) + duplicate_merge.add_argument( + "--fork-owner", + help="Optional GitHub fork owner override. Defaults to the authenticated user.", + ) + duplicate_merge.add_argument( + "--file-policy", + choices=FILE_POLICY_CHOICES, + default=DEFAULT_FILE_POLICY, + help="Changed-file policy enforced on the synthesized branch.", + ) + duplicate_merge.add_argument( + "--model", + default=DEFAULT_DUPLICATE_PR_MODEL, + help="Model string used for hybrid analysis, mergeability gating, and Codex synthesis.", + ) + + +def _add_pr_search_parser(subparsers: Any, defaults: dict[str, Any]) -> None: + pr_search = subparsers.add_parser( + "pr-search", + help="Refresh and query the DuckDB-backed PR code-similarity index.", + ) + pr_search_subparsers = pr_search.add_subparsers(dest="pr_search_command", required=True) + + refresh = pr_search_subparsers.add_parser( + "refresh", + help="Refresh the PR code-similarity index from a local snapshot or HF dataset repo.", + ) + refresh_source = refresh.add_mutually_exclusive_group() + refresh_source.add_argument( + "--snapshot-dir", + type=Path, + help="Snapshot directory to index. Defaults to the latest local snapshot.", + ) + refresh_source.add_argument( + "--hf-repo-id", + default=defaults.get("hf-repo-id"), + help="Hugging Face dataset repo id to materialize before indexing.", + ) + refresh.add_argument( + "--hf-revision", + default=defaults.get("hf-revision"), + help="Optional Hub revision for metadata and README download.", + ) + refresh.add_argument( + "--hf-materialize-dir", + type=Path, + default=Path(defaults["hf-materialize-dir"]) + if defaults.get("hf-materialize-dir") + else None, + help="Optional local directory used when materializing an HF dataset snapshot.", + ) + refresh.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "data")), + help="Workspace root used for latest snapshot resolution and default DB placement.", + ) + refresh.add_argument( + "--db", + type=Path, + default=Path(defaults["db"]) if defaults.get("db") else None, + help="DuckDB file path. Defaults to /state/pr-search.duckdb.", + ) + refresh.add_argument("--limit-prs", type=int, help="Optional cap on indexed PRs.") + refresh.add_argument( + "--include-drafts", + action="store_true", + default=bool(defaults.get("include-drafts", False)), + help="Include draft PRs in the indexed universe.", + ) + refresh.add_argument( + "--include-closed", + action="store_true", + default=bool(defaults.get("include-closed", False)), + help="Include closed PRs in the indexed universe.", + ) + refresh.add_argument( + "--replace-active", + dest="replace_active", + action="store_true", + default=True, + help="Activate the new run on success. Enabled by default.", + ) + refresh.add_argument( + "--no-replace-active", + dest="replace_active", + action="store_false", + help="Write the new run without switching the active run pointer.", + ) + + similar = pr_search_subparsers.add_parser( + "similar", help="Show similar PRs for one indexed pull request." + ) + similar.add_argument("pr_number", type=int, help="Pull request number to query.") + similar.add_argument( + "--db", + type=Path, + default=Path(defaults["db"]) if defaults.get("db") else None, + help="DuckDB file path. Defaults to /state/pr-search.duckdb.", + ) + similar.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "data")), + ) + similar.add_argument("--repo", help="Optional repo override when the DB holds multiple repos.") + similar.add_argument("--limit", type=int, default=10, help="Maximum number of rows to show.") + similar.add_argument("--json", action="store_true", help="Emit machine-readable JSON.") + + probe_github = pr_search_subparsers.add_parser( + "probe-github", + help="Fetch one live GitHub PR and compare it against the active indexed scope features.", + ) + probe_github.add_argument("pr_number", type=int, help="Pull request number to probe.") + probe_github.add_argument( + "--repo", + help="GitHub repository in owner/name form. Defaults to the active repo in the DB.", + ) + probe_github.add_argument( + "--db", + type=Path, + default=Path(defaults["db"]) if defaults.get("db") else None, + help="DuckDB file path. Defaults to /state/pr-search.duckdb.", + ) + probe_github.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "data")), + ) + probe_github.add_argument( + "--limit", + type=int, + default=10, + help="Maximum number of similar PR rows to show.", + ) + probe_github.add_argument("--json", action="store_true", help="Emit machine-readable JSON.") + + candidate_clusters = pr_search_subparsers.add_parser( + "candidate-clusters", + help="Show candidate scope clusters for one indexed pull request.", + ) + candidate_clusters.add_argument("pr_number", type=int, help="Pull request number to query.") + candidate_clusters.add_argument( + "--db", + type=Path, + default=Path(defaults["db"]) if defaults.get("db") else None, + help="DuckDB file path. Defaults to /state/pr-search.duckdb.", + ) + candidate_clusters.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "data")), + ) + candidate_clusters.add_argument( + "--repo", help="Optional repo override when the DB holds multiple repos." + ) + candidate_clusters.add_argument( + "--limit", type=int, default=5, help="Maximum number of rows to show." + ) + candidate_clusters.add_argument("--json", action="store_true", help="Emit JSON.") + + cluster = pr_search_subparsers.add_parser("cluster", help="Inspect one scope cluster.") + cluster_subparsers = cluster.add_subparsers(dest="pr_search_cluster_command", required=True) + cluster_show = cluster_subparsers.add_parser("show", help="Show cluster details.") + cluster_show.add_argument("cluster_id", help="Cluster identifier.") + cluster_show.add_argument( + "--db", + type=Path, + default=Path(defaults["db"]) if defaults.get("db") else None, + help="DuckDB file path. Defaults to /state/pr-search.duckdb.", + ) + cluster_show.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "data")), + ) + cluster_show.add_argument("--repo", help="Optional repo override.") + cluster_show.add_argument("--json", action="store_true", help="Emit JSON.") + + explain_pair = pr_search_subparsers.add_parser( + "explain-pair", + help="Explain one PR pair, falling back to on-demand scoring when needed.", + ) + explain_pair.add_argument("left_pr_number", type=int) + explain_pair.add_argument("right_pr_number", type=int) + explain_pair.add_argument( + "--db", + type=Path, + default=Path(defaults["db"]) if defaults.get("db") else None, + help="DuckDB file path. Defaults to /state/pr-search.duckdb.", + ) + explain_pair.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "data")), + ) + explain_pair.add_argument("--repo", help="Optional repo override.") + explain_pair.add_argument("--json", action="store_true", help="Emit JSON.") + + status = pr_search_subparsers.add_parser("status", help="Show the active PR search run.") + status.add_argument( + "--db", + type=Path, + default=Path(defaults["db"]) if defaults.get("db") else None, + help="DuckDB file path. Defaults to /state/pr-search.duckdb.", + ) + status.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "data")), + ) + status.add_argument("--repo", help="Optional repo override.") + status.add_argument("--json", action="store_true", help="Emit JSON.") + + contributor = pr_search_subparsers.add_parser( + "contributor", help="Show indexed contributor summary for one author login." + ) + contributor.add_argument("login", help="GitHub author login to query.") + contributor.add_argument( + "--db", + type=Path, + default=Path(defaults["db"]) if defaults.get("db") else None, + help="DuckDB file path. Defaults to /state/pr-search.duckdb.", + ) + contributor.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "data")), + ) + contributor.add_argument("--repo", help="Optional repo override.") + contributor.add_argument("--json", action="store_true", help="Emit JSON.") + + contributor_prs = pr_search_subparsers.add_parser( + "contributor-prs", help="List indexed PRs for one contributor login." + ) + contributor_prs.add_argument("login", help="GitHub author login to query.") + contributor_prs.add_argument( + "--db", + type=Path, + default=Path(defaults["db"]) if defaults.get("db") else None, + help="DuckDB file path. Defaults to /state/pr-search.duckdb.", + ) + contributor_prs.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "data")), + ) + contributor_prs.add_argument("--repo", help="Optional repo override.") + contributor_prs.add_argument("--limit", type=int, default=20, help="Maximum rows to show.") + contributor_prs.add_argument("--json", action="store_true", help="Emit JSON.") + + pr_contributor = pr_search_subparsers.add_parser( + "pr-contributor", help="Show contributor summary for the author of one indexed PR." + ) + pr_contributor.add_argument("pr_number", type=int, help="Pull request number to query.") + pr_contributor.add_argument( + "--db", + type=Path, + default=Path(defaults["db"]) if defaults.get("db") else None, + help="DuckDB file path. Defaults to /state/pr-search.duckdb.", + ) + pr_contributor.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "data")), + ) + pr_contributor.add_argument("--repo", help="Optional repo override.") + pr_contributor.add_argument("--json", action="store_true", help="Emit JSON.") + + +def _add_new_contributor_report_parser(subparsers: Any, defaults: dict[str, Any]) -> None: + new_contributor = subparsers.add_parser( + "new-contributor-report", + help="Render a markdown report for newly observed contributors in a snapshot.", + ) + new_contributor.add_argument( + "--snapshot-dir", + type=Path, + help="Snapshot directory to inspect. Defaults to the latest local snapshot.", + ) + new_contributor.add_argument( + "--output-dir", type=Path, default=Path(defaults.get("output-dir", "data")) + ) + new_contributor.add_argument( + "--output", + type=Path, + help="Output path for the markdown report. Defaults next to the snapshot.", + ) + new_contributor.add_argument( + "--json-output", type=Path, help="Optional JSON output path. Defaults next to the snapshot." + ) + new_contributor.add_argument( + "--hf-repo-id", + default=defaults.get("hf-repo-id"), + help="Analyze a Hugging Face dataset repo by materializing its parquet export locally.", + ) + new_contributor.add_argument( + "--hf-revision", + default=defaults.get("hf-revision"), + help="Optional Hub revision for metadata and README download.", + ) + new_contributor.add_argument( + "--hf-materialize-dir", + type=Path, + default=Path(defaults["hf-materialize-dir"]) + if defaults.get("hf-materialize-dir") + else None, + help="Optional local directory used when materializing an HF dataset snapshot.", + ) + new_contributor.add_argument( + "--window-days", + type=int, + default=int(defaults.get("window-days", 42)), + help="Recent public activity window for contributor enrichment.", + ) + new_contributor.add_argument( + "--max-authors", + type=int, + default=int(defaults.get("max-authors", 25)), + help="Maximum number of contributors to include. Use 0 for no cap.", + ) + + +def _add_dashboard_data_parser(subparsers: Any, defaults: dict[str, Any]) -> None: + dashboard = subparsers.add_parser( + "dashboard-data", help="Export frontend-ready JSON for the static dashboard." + ) + dashboard.add_argument( + "--snapshot-dir", + type=Path, + help="Snapshot directory to export. Defaults to the latest local snapshot.", + ) + dashboard.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "web/public/data")), + ) + dashboard.add_argument( + "--analysis-input", + type=Path, + help="Optional analysis report JSON override. Defaults to canonical published current analysis when available, otherwise falls back to snapshot-local analysis files.", + ) + dashboard.add_argument( + "--contributors-input", + type=Path, + help="Optional contributor report JSON override. Defaults to the materialized snapshot's new-contributors-report.json.", + ) + dashboard.add_argument( + "--pr-scope-input", + type=Path, + help="Optional PR scope cluster JSON override. Defaults to the materialized snapshot's pr-scope-clusters.json.", + ) + dashboard.add_argument( + "--hf-repo-id", + default=defaults.get("hf-repo-id"), + help="Materialize the canonical Hugging Face dataset repo instead of using the latest local snapshot.", + ) + dashboard.add_argument( + "--hf-revision", + default=defaults.get("hf-revision"), + help="Optional Hub revision for metadata and README download.", + ) + dashboard.add_argument( + "--hf-materialize-dir", + type=Path, + default=Path(defaults["hf-materialize-dir"]) + if defaults.get("hf-materialize-dir") + else None, + help="Optional local directory used when materializing an HF dataset snapshot.", + ) + dashboard.add_argument( + "--window-days", + type=int, + default=int(defaults.get("window-days", 14)), + help="Recent PR window to expose in the dashboard.", + ) + + +def _add_publish_analysis_artifacts_parser(subparsers: Any, defaults: dict[str, Any]) -> None: + publish_analysis = subparsers.add_parser( + "publish-analysis-artifacts", + help="Publish archived and optional canonical hybrid analysis artifacts to a dataset repo.", + ) + publish_analysis.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "data")), + help="Pipeline workspace root containing snapshots/latest.json.", + ) + publish_analysis.add_argument( + "--snapshot-dir", + type=Path, + help="Optional explicit snapshot directory containing analysis-report-hybrid.json.", + ) + publish_analysis.add_argument( + "--analysis-input", + type=Path, + help="Optional explicit hybrid analysis report JSON to publish instead of snapshot-dir discovery.", + ) + publish_analysis.add_argument( + "--hf-repo-id", + default=defaults.get("hf-repo-id"), + required=defaults.get("hf-repo-id") is None, + help="Target Hugging Face dataset repo id.", + ) + publish_analysis.add_argument("--analysis-id", required=True, help="Immutable analysis run id.") + publish_analysis.add_argument( + "--canonical", + action="store_true", + default=bool(defaults.get("canonical", False)), + help="Also update the stable analysis/current canonical alias.", + ) + publish_analysis.add_argument( + "--save-cache", + action="store_true", + default=bool(defaults.get("save-cache", False)), + help="Also upload snapshot-local analysis-state/ as mutable operational cache at repo-root analysis-state/.", + ) + publish_analysis.add_argument( + "--private-hf-repo", + action="store_true", + default=bool(defaults.get("private-hf-repo", False)), + help="Create the target dataset repo as private if needed.", + ) + + +def _add_save_cache_parser(subparsers: Any, defaults: dict[str, Any]) -> None: + save_cache = subparsers.add_parser( + "save-cache", + help="Upload snapshot-local analysis-state/ as mutable operational cache to a dataset repo.", + ) + save_cache.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "data")), + help="Pipeline workspace root containing snapshots/latest.json.", + ) + save_cache.add_argument( + "--snapshot-dir", + type=Path, + help="Optional explicit snapshot directory containing analysis-state/.", + ) + save_cache.add_argument( + "--hf-repo-id", + default=defaults.get("hf-repo-id"), + required=defaults.get("hf-repo-id") is None, + help="Target Hugging Face dataset repo id.", + ) + save_cache.add_argument( + "--private-hf-repo", + action="store_true", + default=bool(defaults.get("private-hf-repo", False)), + help="Create the target dataset repo as private if needed.", + ) + + +def _add_deploy_dashboard_parser(subparsers: Any, defaults: dict[str, Any]) -> None: + deploy_dashboard = subparsers.add_parser( + "deploy-dashboard", + help="Build and publish the static dashboard to a Hugging Face Space from a materialized dataset view.", + ) + deploy_dashboard.add_argument( + "--pipeline-data-dir", + type=Path, + default=Path(defaults.get("pipeline-data-dir", "data")), + ) + deploy_dashboard.add_argument( + "--web-dir", type=Path, default=Path(defaults.get("web-dir", "web")) + ) + deploy_dashboard.add_argument( + "--snapshot-dir", + type=Path, + help="Optional snapshot directory to publish. Defaults to the latest snapshot in --pipeline-data-dir.", + ) + deploy_dashboard.add_argument( + "--analysis-input", + type=Path, + help="Optional analysis report JSON override. Omit to prefer canonical published current analysis when available.", + ) + deploy_dashboard.add_argument( + "--contributors-input", + type=Path, + help="Optional contributor report JSON override.", + ) + deploy_dashboard.add_argument( + "--pr-scope-input", + type=Path, + help="Optional PR scope cluster JSON override.", + ) + deploy_dashboard.add_argument( + "--hf-repo-id", + default=defaults.get("hf-repo-id"), + help="Materialize the canonical Hugging Face dataset repo instead of using the latest local snapshot.", + ) + deploy_dashboard.add_argument( + "--hf-revision", + default=defaults.get("hf-revision"), + help="Optional Hub revision for metadata and README download.", + ) + deploy_dashboard.add_argument( + "--hf-materialize-dir", + type=Path, + default=Path(defaults["hf-materialize-dir"]) + if defaults.get("hf-materialize-dir") + else None, + help="Optional local directory used when materializing an HF dataset snapshot.", + ) + deploy_dashboard.add_argument( + "--refresh-contributors", + action="store_true", + default=bool(defaults.get("refresh-contributors", False)), + ) + deploy_dashboard.add_argument( + "--dashboard-window-days", + type=int, + default=int(defaults.get("dashboard-window-days", 14)), + ) + deploy_dashboard.add_argument( + "--contributor-window-days", + type=int, + default=int( + defaults.get("contributor-window-days", defaults.get("dashboard-window-days", 14)) + ), + ) + deploy_dashboard.add_argument( + "--contributor-max-authors", + type=int, + default=int(defaults.get("contributor-max-authors", 0)), + ) + deploy_dashboard.add_argument( + "--private-space", + action="store_true", + default=bool(defaults.get("private-space", False)), + ) + deploy_dashboard.add_argument( + "--commit-message", + default=defaults.get("commit-message", "Deploy dashboard"), + ) + deploy_dashboard.add_argument( + "--space-id", + default=defaults.get("space-id"), + help="Hugging Face Space repo id.", + ) + deploy_dashboard.add_argument("--space-title", default=defaults.get("space-title")) + deploy_dashboard.add_argument("--space-emoji", default=defaults.get("space-emoji", "📊")) + deploy_dashboard.add_argument( + "--space-color-from", default=defaults.get("space-color-from", "indigo") + ) + deploy_dashboard.add_argument( + "--space-color-to", default=defaults.get("space-color-to", "blue") + ) + deploy_dashboard.add_argument( + "--space-short-description", + default=defaults.get( + "space-short-description", "Static dashboard for the slop-farmer PR analysis pipeline." + ), + ) + deploy_dashboard.add_argument("--dataset-id", default=defaults.get("dataset-id")) + deploy_dashboard.add_argument( + "--space-tags", default=defaults.get("space-tags", "dashboard,static") + ) + + +def _add_dataset_status_parser(subparsers: Any, defaults: dict[str, Any]) -> None: + dataset_status = subparsers.add_parser( + "dataset-status", + help="Inspect canonical dataset freshness and the local latest pointer.", + ) + dataset_status.add_argument("--repo", default=defaults.get("repo")) + dataset_status.add_argument( + "--output-dir", + type=Path, + default=Path(defaults.get("output-dir", "data")), + help="Local workspace root containing snapshots/latest.json.", + ) + dataset_status.add_argument( + "--hf-repo-id", + default=defaults.get("hf-repo-id"), + help="Canonical Hugging Face dataset repo id to inspect.", + ) + dataset_status.add_argument( + "--hf-revision", + default=defaults.get("hf-revision"), + help="Optional Hub revision for metadata and README download.", + ) + dataset_status.add_argument("--json", action="store_true", help="Emit machine-readable JSON.") + + +# Dispatch helpers + + +def _explicit_flag_present(flag: str) -> bool: + return any(arg == flag or arg.startswith(f"{flag}=") for arg in sys.argv[1:]) + + +def _resolve_hf_inputs(args: argparse.Namespace) -> tuple[str | None, str | None, Path | None]: + hf_repo_id = args.hf_repo_id + hf_revision = args.hf_revision + hf_materialize_dir = args.hf_materialize_dir + if args.snapshot_dir is not None and not _explicit_flag_present("--hf-repo-id"): + hf_repo_id = None + hf_revision = None + hf_materialize_dir = None + return hf_repo_id, hf_revision, hf_materialize_dir + + +def _run_scrape(args: argparse.Namespace, config_path: Path | None) -> None: + from slop_farmer.app.pipeline import run_pipeline + + new_contributor_report = bool(args.new_contributor_report) + options = PipelineOptions( + repo=RepoRef.parse(args.repo), + output_dir=args.output_dir, + since=args.since, + resume=args.resume, + http_timeout=args.http_timeout, + http_max_retries=args.http_max_retries, + max_issues=args.max_issues, + max_prs=args.max_prs, + max_issue_comments=args.max_issue_comments, + max_reviews_per_pr=args.max_reviews_per_pr, + max_review_comments_per_pr=args.max_review_comments_per_pr, + fetch_timeline=args.fetch_timeline, + new_contributor_report=new_contributor_report, + new_contributor_window_days=args.new_contributor_window_days, + new_contributor_max_authors=args.new_contributor_max_authors, + issue_max_age_days=args.issue_max_age_days, + pr_max_age_days=args.pr_max_age_days, + ) + print(run_pipeline(options)) + + +def _run_refresh_dataset(args: argparse.Namespace, config_path: Path | None) -> None: + from slop_farmer.app.dataset_refresh import run_dataset_refresh + + refresh_defaults = command_defaults("refresh-dataset", config_path=config_path) + result = run_dataset_refresh( + DatasetRefreshOptions( + repo=RepoRef.parse(args.repo), + hf_repo_id=args.hf_repo_id, + private_hf_repo=args.private_hf_repo, + max_issues=args.max_issues, + max_prs=args.max_prs, + max_issue_comments=args.max_issue_comments, + max_reviews_per_pr=args.max_reviews_per_pr, + max_review_comments_per_pr=args.max_review_comments_per_pr, + fetch_timeline=args.fetch_timeline, + new_contributor_report=args.new_contributor_report, + new_contributor_window_days=args.new_contributor_window_days, + new_contributor_max_authors=args.new_contributor_max_authors, + http_timeout=args.http_timeout, + http_max_retries=args.http_max_retries, + checkpoint_every_comments=args.checkpoint_every_comments, + checkpoint_every_prs=args.checkpoint_every_prs, + cluster_suppression_rules=tuple(refresh_defaults.get("cluster-suppression-rules", ())), + ) + ) + print(json.dumps(result, indent=2)) + + +def _run_analyze(args: argparse.Namespace, config_path: Path | None) -> None: + from slop_farmer.reports.analysis import run_analysis + + analyze_defaults = command_defaults("analyze", config_path=config_path) + hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args) + options = AnalysisOptions( + snapshot_dir=args.snapshot_dir, + output_dir=args.output_dir, + output=args.output, + hf_repo_id=hf_repo_id, + hf_revision=hf_revision, + hf_materialize_dir=hf_materialize_dir, + ranking_backend=args.ranking_backend, + model=args.model, + max_clusters=args.max_clusters, + hybrid_llm_concurrency=args.hybrid_llm_concurrency, + open_prs_only=args.open_prs_only, + cached_analysis=bool(analyze_defaults.get("cached_analysis", False)), + pr_template_cleanup_mode=str( + analyze_defaults.get("pr-template-cleanup-mode", "merge_defaults") + ), + pr_template_strip_html_comments=bool( + analyze_defaults.get("pr-template-strip-html-comments", True) + ), + pr_template_trim_closing_reference_prefix=bool( + analyze_defaults.get("pr-template-trim-closing-reference-prefix", True) + ), + pr_template_section_patterns=tuple( + analyze_defaults.get("pr-template-section-patterns", ()) + ), + pr_template_line_patterns=tuple(analyze_defaults.get("pr-template-line-patterns", ())), + cluster_suppression_rules=tuple(analyze_defaults.get("cluster-suppression-rules", ())), + ) + print(run_analysis(options)) + + +def _run_markdown_report(args: argparse.Namespace, config_path: Path | None) -> None: + del config_path + from slop_farmer.reports.analysis import render_markdown_report + + print( + render_markdown_report( + MarkdownReportOptions( + input=args.input, + output=args.output, + snapshot_dir=args.snapshot_dir, + ) + ) + ) + + +def _run_duplicate_prs(args: argparse.Namespace, config_path: Path | None) -> None: + del config_path + from slop_farmer.app.duplicate_prs import run_duplicate_pr_merge + from slop_farmer.reports.duplicate_prs import list_mergeable_duplicate_pr_clusters + + if args.duplicate_prs_command == "list": + clusters = list_mergeable_duplicate_pr_clusters( + report_path=args.report, + snapshot_dir=args.snapshot_dir, + limit=args.limit, + model=args.model, + ) + print(json.dumps(clusters, indent=2)) + return + + result = run_duplicate_pr_merge( + report_path=args.report, + snapshot_dir=args.snapshot_dir, + repo_dir=args.repo_dir, + upstream_repo=args.upstream_repo, + upstream_remote=args.upstream_remote, + fork_remote=args.fork_remote, + cluster_id=args.cluster_id, + fork_repo=args.fork_repo, + fork_owner=args.fork_owner, + file_policy=args.file_policy, + model=args.model, + ) + print(json.dumps(result, indent=2)) + + +def _run_pr_scope(args: argparse.Namespace, config_path: Path | None) -> None: + from slop_farmer.reports.pr_scope import run_pr_scope_report + + pr_scope_defaults = command_defaults("pr-scope", config_path=config_path) + hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args) + print( + run_pr_scope_report( + PrScopeOptions( + snapshot_dir=args.snapshot_dir, + output_dir=args.output_dir, + output=args.output, + hf_repo_id=hf_repo_id, + hf_revision=hf_revision, + hf_materialize_dir=hf_materialize_dir, + cluster_suppression_rules=tuple( + pr_scope_defaults.get("cluster-suppression-rules", ()) + ), + ) + ) + ) + + +def _run_pr_search(args: argparse.Namespace, config_path: Path | None) -> None: + from slop_farmer.app.pr_search import ( + explain_pr_search_pair, + format_pr_search_candidate_clusters, + format_pr_search_cluster, + format_pr_search_contributor, + format_pr_search_contributor_pulls, + format_pr_search_pair, + format_pr_search_probe, + format_pr_search_pull_contributor, + format_pr_search_similar, + format_pr_search_status, + get_pr_search_candidate_clusters, + get_pr_search_cluster, + get_pr_search_contributor, + get_pr_search_contributor_pulls, + get_pr_search_pull_contributor, + get_pr_search_similar, + get_pr_search_status, + probe_pr_search_github, + resolve_pr_search_db_path, + run_pr_search_refresh, + ) + + pr_search_defaults = command_defaults("pr-search", config_path=config_path) + if args.pr_search_command == "refresh": + hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args) + result = run_pr_search_refresh( + PrSearchRefreshOptions( + snapshot_dir=args.snapshot_dir, + output_dir=args.output_dir, + db=args.db, + hf_repo_id=hf_repo_id, + hf_revision=hf_revision, + hf_materialize_dir=hf_materialize_dir, + include_drafts=args.include_drafts, + include_closed=args.include_closed, + limit_prs=args.limit_prs, + replace_active=args.replace_active, + cluster_suppression_rules=tuple( + pr_search_defaults.get("cluster-suppression-rules", ()) + ), + ) + ) + print(json.dumps(result, indent=2)) + return + + db_path = resolve_pr_search_db_path(args.db, output_dir=args.output_dir) + if args.pr_search_command == "similar": + result = get_pr_search_similar( + db_path, + pr_number=args.pr_number, + repo=args.repo, + limit=args.limit, + ) + print(json.dumps(result, indent=2) if args.json else format_pr_search_similar(result)) + return + + if args.pr_search_command == "probe-github": + result = probe_pr_search_github( + db_path, + pr_number=args.pr_number, + repo=args.repo, + limit=args.limit, + ) + print(json.dumps(result, indent=2) if args.json else format_pr_search_probe(result)) + return + + if args.pr_search_command == "candidate-clusters": + result = get_pr_search_candidate_clusters( + db_path, + pr_number=args.pr_number, + repo=args.repo, + limit=args.limit, + ) + print( + json.dumps(result, indent=2) + if args.json + else format_pr_search_candidate_clusters(result) + ) + return + + if args.pr_search_command == "cluster": + if args.pr_search_cluster_command != "show": + raise ValueError( + f"Unsupported pr-search cluster command: {args.pr_search_cluster_command}" + ) + result = get_pr_search_cluster( + db_path, + cluster_id=args.cluster_id, + repo=args.repo, + ) + print(json.dumps(result, indent=2) if args.json else format_pr_search_cluster(result)) + return + + if args.pr_search_command == "explain-pair": + result = explain_pr_search_pair( + db_path, + left_pr_number=args.left_pr_number, + right_pr_number=args.right_pr_number, + repo=args.repo, + ) + print(json.dumps(result, indent=2) if args.json else format_pr_search_pair(result)) + return + + if args.pr_search_command == "status": + result = get_pr_search_status(db_path, repo=args.repo) + print(json.dumps(result, indent=2) if args.json else format_pr_search_status(result)) + return + + if args.pr_search_command == "contributor": + result = get_pr_search_contributor(db_path, author_login=args.login, repo=args.repo) + print(json.dumps(result, indent=2) if args.json else format_pr_search_contributor(result)) + return + + if args.pr_search_command == "contributor-prs": + result = get_pr_search_contributor_pulls( + db_path, + author_login=args.login, + repo=args.repo, + limit=args.limit, + ) + print( + json.dumps(result, indent=2) + if args.json + else format_pr_search_contributor_pulls(result) + ) + return + + if args.pr_search_command == "pr-contributor": + result = get_pr_search_pull_contributor( + db_path, + pr_number=args.pr_number, + repo=args.repo, + ) + print( + json.dumps(result, indent=2) if args.json else format_pr_search_pull_contributor(result) + ) + return + + raise ValueError(f"Unsupported pr-search command: {args.pr_search_command}") + + +def _run_import_hf_checkpoint(args: argparse.Namespace, config_path: Path | None) -> None: + del config_path + from slop_farmer.app.hf_checkpoint_import import import_hf_checkpoint + + print( + import_hf_checkpoint( + CheckpointImportOptions( + source_repo_id=args.source_repo_id, + output_dir=args.output_dir, + checkpoint_id=args.checkpoint_id, + checkpoint_root=args.checkpoint_root, + publish_repo_id=args.publish_repo_id, + private_hf_repo=args.private_hf_repo, + force=args.force, + ) + ) + ) + + +def _run_adopt_snapshot(args: argparse.Namespace, config_path: Path | None) -> None: + del config_path + from slop_farmer.app.snapshot_state import adopt_snapshot_for_pipeline + + print( + adopt_snapshot_for_pipeline( + SnapshotAdoptOptions( + snapshot_dir=args.snapshot_dir, + output_dir=args.output_dir, + next_since=args.next_since, + ) + ) + ) + + +def _run_new_contributor_report(args: argparse.Namespace, config_path: Path | None) -> None: + del config_path + from slop_farmer.reports.new_contributor_report import run_new_contributor_report + + hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args) + print( + run_new_contributor_report( + NewContributorReportOptions( + snapshot_dir=args.snapshot_dir, + output_dir=args.output_dir, + output=args.output, + json_output=args.json_output, + hf_repo_id=hf_repo_id, + hf_revision=hf_revision, + hf_materialize_dir=hf_materialize_dir, + window_days=args.window_days, + max_authors=args.max_authors, + ) + ) + ) + + +def _run_dashboard_data(args: argparse.Namespace, config_path: Path | None) -> None: + from slop_farmer.reports.dashboard import run_dashboard_data + + dashboard_defaults = command_defaults("dashboard-data", config_path=config_path) + hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args) + print( + run_dashboard_data( + DashboardDataOptions( + snapshot_dir=args.snapshot_dir, + output_dir=args.output_dir, + analysis_input=args.analysis_input, + contributors_input=args.contributors_input, + pr_scope_input=args.pr_scope_input, + hf_repo_id=hf_repo_id, + hf_revision=hf_revision, + hf_materialize_dir=hf_materialize_dir, + window_days=args.window_days, + snapshot_root=( + Path(dashboard_defaults["snapshot-root"]) + if dashboard_defaults.get("snapshot-root") + else None + ), + ) + ) + ) + + +def _run_deploy_dashboard(args: argparse.Namespace, config_path: Path | None) -> None: + del config_path + from slop_farmer.app.deploy import run_deploy_dashboard + + hf_repo_id, hf_revision, hf_materialize_dir = _resolve_hf_inputs(args) + run_deploy_dashboard( + DeployDashboardOptions( + pipeline_data_dir=args.pipeline_data_dir, + web_dir=args.web_dir, + snapshot_dir=args.snapshot_dir, + analysis_input=args.analysis_input, + contributors_input=args.contributors_input, + pr_scope_input=args.pr_scope_input, + hf_repo_id=hf_repo_id, + hf_revision=hf_revision, + hf_materialize_dir=hf_materialize_dir, + refresh_contributors=args.refresh_contributors, + dashboard_window_days=args.dashboard_window_days, + contributor_window_days=args.contributor_window_days, + contributor_max_authors=args.contributor_max_authors, + private_space=args.private_space, + commit_message=args.commit_message, + space_id=args.space_id, + space_title=args.space_title, + space_emoji=args.space_emoji, + space_color_from=args.space_color_from, + space_color_to=args.space_color_to, + space_short_description=args.space_short_description, + dataset_id=args.dataset_id, + space_tags=args.space_tags, + ) + ) + + +def _run_dataset_status(args: argparse.Namespace, config_path: Path | None) -> None: + del config_path + from slop_farmer.app.dataset_status import format_dataset_status, get_dataset_status + + result = get_dataset_status( + DatasetStatusOptions( + repo=args.repo, + output_dir=args.output_dir, + hf_repo_id=args.hf_repo_id, + hf_revision=args.hf_revision, + json_output=args.json, + ) + ) + print(json.dumps(result, indent=2) if args.json else format_dataset_status(result)) + + +def _run_publish_analysis_artifacts(args: argparse.Namespace, config_path: Path | None) -> None: + del config_path + from slop_farmer.app.publish_analysis import run_publish_analysis_artifacts + + print( + json.dumps( + run_publish_analysis_artifacts( + PublishAnalysisArtifactsOptions( + output_dir=args.output_dir, + snapshot_dir=args.snapshot_dir, + analysis_input=args.analysis_input, + hf_repo_id=args.hf_repo_id, + analysis_id=args.analysis_id, + canonical=args.canonical, + save_cache=args.save_cache, + private_hf_repo=args.private_hf_repo, + ) + ), + indent=2, + ) + ) + + +def _run_save_cache(args: argparse.Namespace, config_path: Path | None) -> None: + del config_path + from slop_farmer.app.save_cache import run_save_cache + + print( + json.dumps( + run_save_cache( + SaveCacheOptions( + output_dir=args.output_dir, + snapshot_dir=args.snapshot_dir, + hf_repo_id=args.hf_repo_id, + private_hf_repo=args.private_hf_repo, + ) + ), + indent=2, + ) + ) + + +def main() -> None: + config_path = extract_cli_config_path() + parser = build_parser(config_path=config_path) + args = parser.parse_args() + + handlers: dict[str, CommandHandler] = { + "scrape": _run_scrape, + "refresh-dataset": _run_refresh_dataset, + "analyze": _run_analyze, + "markdown-report": _run_markdown_report, + "duplicate-prs": _run_duplicate_prs, + "pr-scope": _run_pr_scope, + "pr-search": _run_pr_search, + "import-hf-checkpoint": _run_import_hf_checkpoint, + "adopt-snapshot": _run_adopt_snapshot, + "new-contributor-report": _run_new_contributor_report, + "dashboard-data": _run_dashboard_data, + "deploy-dashboard": _run_deploy_dashboard, + "dataset-status": _run_dataset_status, + "publish-analysis-artifacts": _run_publish_analysis_artifacts, + "save-cache": _run_save_cache, + } + handler = handlers.get(args.command) + if handler is None: + parser.error(f"Unknown command: {args.command}") + handler(args, config_path) + + +if __name__ == "__main__": + main() diff --git a/src/slop_farmer/app/dataset_refresh.py b/src/slop_farmer/app/dataset_refresh.py new file mode 100644 index 0000000000000000000000000000000000000000..d0373075e60ef54beb529d601c470ab2f6e6958b --- /dev/null +++ b/src/slop_farmer/app/dataset_refresh.py @@ -0,0 +1,1049 @@ +from __future__ import annotations + +import argparse +import json +import os +import shutil +import tempfile +import time +from collections import defaultdict +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from huggingface_hub import HfApi + +from slop_farmer.app_config import command_defaults, extract_cli_config_path +from slop_farmer.config import ( + DatasetRefreshOptions, + NewContributorReportOptions, + PrScopeOptions, + RepoRef, + resolve_github_token, +) +from slop_farmer.data.dataset_card import build_hf_dataset_card +from slop_farmer.data.github_api import GitHubClient +from slop_farmer.data.hf_dataset_repo import ( + list_remote_paths, + load_remote_file, + load_remote_json_file, + stable_snapshot_candidates, +) +from slop_farmer.data.links import build_pr_duplicate_candidate_rows, build_text_link_rows +from slop_farmer.data.normalize import ( + issue_url_to_number, + normalize_comment, + normalize_issue, + normalize_pr_diff, + normalize_pr_file, + normalize_pull_request, + normalize_review, + normalize_review_comment, + normalize_timeline_event, +) +from slop_farmer.data.parquet_io import ( + SCHEMAS, + read_parquet_rows, + write_json, + write_parquet, + write_text, +) +from slop_farmer.reports.new_contributor_report import run_new_contributor_report +from slop_farmer.reports.pr_scope import run_pr_scope_report + +PRIMARY_KEYS: dict[str, tuple[str, ...]] = { + "issues": ("github_id",), + "pull_requests": ("github_id",), + "comments": ("github_id",), + "reviews": ("github_id",), + "review_comments": ("github_id",), + "pr_files": ("repo", "pull_request_number", "filename"), + "pr_diffs": ("repo", "pull_request_number"), + "links": ( + "repo", + "source_type", + "source_number", + "source_github_id", + "target_owner", + "target_repo", + "target_number", + "link_type", + "link_origin", + ), + "events": ( + "repo", + "parent_kind", + "parent_number", + "event", + "created_at", + "actor_login", + "source_issue_number", + "source_issue_url", + "commit_id", + "label_name", + ), +} +CHECKPOINT_PREFIXES = ("_checkpoints", "checkpoints") + + +def log(message: str) -> None: + stamp = datetime.now(tz=UTC).strftime("%H:%M:%SZ") + print(f"[{stamp}] {message}", flush=True) + + +def iso_now() -> str: + return datetime.now(tz=UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def snapshot_id() -> str: + return datetime.now(tz=UTC).strftime("%Y%m%dT%H%M%SZ") + + +def row_key(row: dict[str, Any], fields: tuple[str, ...]) -> str: + return json.dumps([row.get(field) for field in fields], default=str) + + +def merge_rows( + table_name: str, + previous_rows: list[dict[str, Any]], + delta_rows: list[dict[str, Any]], +) -> list[dict[str, Any]]: + if table_name == "pr_files": + refreshed_prs = { + (row.get("repo"), row.get("pull_request_number")) + for row in delta_rows + if row.get("pull_request_number") is not None + } + previous_rows = [ + row + for row in previous_rows + if (row.get("repo"), row.get("pull_request_number")) not in refreshed_prs + ] + merged: dict[str, dict[str, Any]] = {} + for row in previous_rows: + merged[row_key(row, PRIMARY_KEYS[table_name])] = row + for row in delta_rows: + merged[row_key(row, PRIMARY_KEYS[table_name])] = row + return list(merged.values()) + + +def checkpoint_dirs(remote_paths: set[str]) -> list[tuple[str, str]]: + by_snapshot_id: dict[str, str] = {} + for path in remote_paths: + parts = path.split("/") + if len(parts) < 3 or parts[0] not in CHECKPOINT_PREFIXES: + continue + snapshot_key = parts[1] + prefix = parts[0] + current = by_snapshot_id.get(snapshot_key) + if current is None or current.startswith("checkpoints/"): + by_snapshot_id[snapshot_key] = f"{prefix}/{snapshot_key}" + return [(sid, by_snapshot_id[sid]) for sid in sorted(by_snapshot_id)] + + +def copy_remote_file_from_candidates( + api: HfApi, + repo_id: str, + local_dir: Path, + destination: Path, + candidate_paths: list[str], +) -> bool: + for candidate in candidate_paths: + downloaded = load_remote_file(api, repo_id, candidate, local_dir) + if downloaded is None: + continue + destination.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(downloaded, destination) + return True + return False + + +def materialize_previous_snapshot_dir( + *, + api: Any, + repo_id: str, + previous_root: Path, + stable_snapshot_id: str | None, + latest_pointer: dict[str, Any] | None, + previous_tables: dict[str, list[dict[str, Any]]], +) -> Path | None: + if not stable_snapshot_id: + return None + snapshot_dir = (previous_root / "materialized-snapshots" / stable_snapshot_id).resolve() + snapshot_dir.mkdir(parents=True, exist_ok=True) + for table_name, rows in previous_tables.items(): + write_parquet(rows, snapshot_dir / f"{table_name}.parquet", table_name) + for artifact_name in ( + "manifest.json", + "new_contributors.parquet", + "new-contributors-report.json", + "new-contributors-report.md", + ): + copy_remote_file_from_candidates( + api, + repo_id, + previous_root, + snapshot_dir / artifact_name, + stable_snapshot_candidates(latest_pointer, artifact_name), + ) + return snapshot_dir + + +def load_remote_table_from_candidates( + api: HfApi, + repo_id: str, + table_name: str, + local_dir: Path, + candidate_paths: list[str], +) -> list[dict[str, Any]]: + for candidate in candidate_paths: + downloaded = load_remote_file(api, repo_id, candidate, local_dir) + if downloaded is not None: + return read_parquet_rows(downloaded) + return [] + + +def viewer_comment_rows( + comments: list[dict[str, Any]], + pull_requests: list[dict[str, Any]], +) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + pr_numbers = {int(row["number"]) for row in pull_requests if row.get("number") is not None} + issue_comments: list[dict[str, Any]] = [] + pr_comments: list[dict[str, Any]] = [] + for row in comments: + parent_number = row.get("parent_number") + parent_kind = row.get("parent_kind") + if parent_kind == "pull_request" or parent_number in pr_numbers: + pr_comments.append(row) + else: + issue_comments.append(row) + return issue_comments, pr_comments + + +def upload_delta_checkpoint( + *, + api: HfApi, + repo_id: str, + work_dir: Path, + repo_slug: str, + sid: str, + stage: str, + delta_tables: dict[str, list[dict[str, Any]]], + progress: dict[str, Any], +) -> None: + checkpoint_root = work_dir / "checkpoint_upload" + if checkpoint_root.exists(): + shutil.rmtree(checkpoint_root) + checkpoint_root.mkdir(parents=True, exist_ok=True) + + for table_name, rows in delta_tables.items(): + write_parquet(rows, checkpoint_root / f"{table_name}.parquet", table_name) + write_json( + {"repo": repo_slug, "snapshot_id": sid, **progress}, checkpoint_root / "progress.json" + ) + write_json( + {"repo": repo_slug, "snapshot_id": sid, **progress}, + checkpoint_root / "state" / "in_progress.json", + ) + api.upload_folder( + folder_path=str(checkpoint_root), + path_in_repo=f"_checkpoints/{sid}", + repo_id=repo_id, + repo_type="dataset", + commit_message=f"Checkpoint {sid} ({stage})", + ) + + +def remaining_limit(limit: int | None, used: int) -> int | None: + if limit is None: + return None + return max(limit - used, 0) + + +def _build_argument_parser(*, config_path: Path | None = None) -> argparse.ArgumentParser: + defaults = command_defaults("refresh-dataset", config_path=config_path) + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=Path, help="Optional repo config file.") + parser.add_argument("--repo", default=defaults.get("repo", "huggingface/transformers")) + parser.add_argument("--hf-repo-id", default=defaults.get("hf-repo-id")) + parser.add_argument("--max-issues", type=int, default=defaults.get("max-issues")) + parser.add_argument("--max-prs", type=int, default=defaults.get("max-prs")) + parser.add_argument( + "--max-issue-comments", + type=int, + default=defaults.get("max-issue-comments"), + ) + parser.add_argument( + "--max-reviews-per-pr", + type=int, + default=defaults.get("max-reviews-per-pr"), + ) + parser.add_argument( + "--max-review-comments-per-pr", + type=int, + default=defaults.get("max-review-comments-per-pr"), + ) + parser.add_argument( + "--fetch-timeline", + action="store_true", + default=bool(defaults.get("fetch-timeline", False)), + ) + parser.add_argument( + "--new-contributor-report", + dest="new_contributor_report", + action="store_true", + default=bool(defaults.get("new-contributor-report", True)), + ) + parser.add_argument( + "--no-new-contributor-report", + dest="new_contributor_report", + action="store_false", + ) + parser.add_argument( + "--new-contributor-window-days", + type=int, + default=int(defaults.get("new-contributor-window-days", 42)), + ) + parser.add_argument( + "--new-contributor-max-authors", + type=int, + default=int(defaults.get("new-contributor-max-authors", 25)), + ) + parser.add_argument("--http-timeout", type=int, default=300) + parser.add_argument("--http-max-retries", type=int, default=8) + parser.add_argument("--checkpoint-every-comments", type=int, default=1000) + parser.add_argument("--checkpoint-every-prs", type=int, default=25) + parser.add_argument( + "--private-hf-repo", + dest="private_hf_repo", + action="store_true", + default=bool(defaults.get("private-hf-repo", False)), + ) + parser.add_argument("--private", dest="private_hf_repo", action="store_true") + parser.set_defaults( + cluster_suppression_rules=tuple(defaults.get("cluster-suppression-rules", ())) + ) + return parser + + +def parse_args(argv: list[str] | None = None) -> argparse.Namespace: + config_path = extract_cli_config_path(argv) + parser = _build_argument_parser(config_path=config_path) + args = parser.parse_args(argv) + if not args.hf_repo_id: + parser.error("--hf-repo-id is required (or set dataset_id in --config)") + return args + + +def run_dataset_refresh(options: DatasetRefreshOptions) -> dict[str, Any]: + hf_token = os.getenv("HF_TOKEN") + github_token = resolve_github_token() + if not github_token: + raise RuntimeError("GITHUB_TOKEN must be set or resolvable via gh auth/.env") + + repo_slug = options.repo.slug + owner, repo_name = options.repo.owner, options.repo.name + sid = snapshot_id() + crawl_started_at = iso_now() + extracted_at = iso_now() + + api = HfApi(token=hf_token) + api.create_repo( + repo_id=options.hf_repo_id, + repo_type="dataset", + private=options.private_hf_repo, + exist_ok=True, + ) + + with tempfile.TemporaryDirectory(prefix="slop-farmer-job-") as tmp: + root = Path(tmp) + previous_root = root / "previous" + output_root = root / "output" + previous_root.mkdir(parents=True, exist_ok=True) + output_root.mkdir(parents=True, exist_ok=True) + + remote_paths = list_remote_paths(api, options.hf_repo_id) + previous_watermark = load_remote_json_file( + api, options.hf_repo_id, "state/watermark.json", previous_root + ) + remote_manifest = load_remote_json_file( + api, options.hf_repo_id, "manifest.json", previous_root + ) + latest_pointer = ( + load_remote_json_file(api, options.hf_repo_id, "snapshots/latest.json", previous_root) + if "snapshots/latest.json" in remote_paths + else None + ) + stable_snapshot_id = None + if previous_watermark: + stable_snapshot_id = previous_watermark.get("last_successful_snapshot_id") + elif latest_pointer: + stable_snapshot_id = latest_pointer.get("latest_snapshot_id") + elif remote_manifest: + stable_snapshot_id = remote_manifest.get("snapshot_id") + + log(f"Starting dataset refresh for {repo_slug}") + log(f"Target dataset repo: {options.hf_repo_id}") + previous_tables = { + table_name: [] for table_name in SCHEMAS if table_name != "new_contributors" + } + for table_name in previous_tables: + previous_tables[table_name] = load_remote_table_from_candidates( + api, + options.hf_repo_id, + table_name, + previous_root, + stable_snapshot_candidates(latest_pointer, f"{table_name}.parquet"), + ) + + checkpoint_progress: dict[str, Any] | None = None + best_comment_checkpoint_progress: dict[str, Any] | None = None + for checkpoint_sid, checkpoint_dir in checkpoint_dirs(remote_paths): + if stable_snapshot_id is not None and checkpoint_sid <= str(stable_snapshot_id): + continue + progress_payload = load_remote_json_file( + api, options.hf_repo_id, f"{checkpoint_dir}/progress.json", previous_root + ) or load_remote_json_file( + api, + options.hf_repo_id, + f"{checkpoint_dir}/state/in_progress.json", + previous_root, + ) + if progress_payload is not None: + checkpoint_progress = progress_payload + if ( + progress_payload.get("effective_since") is None + and (progress_payload.get("counts") or {}).get("comments", 0) > 0 + and ( + best_comment_checkpoint_progress is None + or (progress_payload.get("counts") or {}).get("comments", 0) + > (best_comment_checkpoint_progress.get("counts") or {}).get("comments", 0) + ) + ): + best_comment_checkpoint_progress = progress_payload + for table_name in previous_tables: + checkpoint_rows = load_remote_table_from_candidates( + api, + options.hf_repo_id, + table_name, + previous_root, + [f"{checkpoint_dir}/{table_name}.parquet"], + ) + if checkpoint_rows: + previous_tables[table_name] = merge_rows( + table_name, + previous_tables[table_name], + checkpoint_rows, + ) + + effective_since = None + if checkpoint_progress and checkpoint_progress.get("effective_since") is not None: + effective_since = checkpoint_progress.get("effective_since") + log(f"Resuming from incomplete checkpoint window starting at {effective_since}") + elif previous_watermark and previous_watermark.get("next_since") is not None: + effective_since = previous_watermark.get("next_since") + log(f"Resuming from remote watermark {effective_since}") + elif ( + remote_manifest + and isinstance(remote_manifest.get("watermark"), dict) + and remote_manifest["watermark"].get("next_since") is not None + ): + effective_since = remote_manifest["watermark"].get("next_since") + log(f"Bootstrapping remote watermark from root manifest {effective_since}") + else: + log("No successful watermark found; running full snapshot") + + client = GitHubClient( + token=github_token, + timeout=options.http_timeout, + max_retries=options.http_max_retries, + log=log, + ) + previous_snapshot_dir = materialize_previous_snapshot_dir( + api=api, + repo_id=options.hf_repo_id, + previous_root=previous_root, + stable_snapshot_id=str(stable_snapshot_id) if stable_snapshot_id is not None else None, + latest_pointer=latest_pointer, + previous_tables=previous_tables, + ) + + rate_limit = client.get_json("/rate_limit") + core = (rate_limit.get("resources") or {}).get("core") or {} + limit = core.get("limit") + remaining = core.get("remaining") + reset_at = core.get("reset") + log(f"GitHub core rate limit: limit={limit} remaining={remaining} reset={reset_at}") + if limit is not None and int(limit) <= 60: + raise RuntimeError("GITHUB_TOKEN appears to be missing, invalid, or not being applied") + if remaining == 0 and reset_at: + sleep_for = max(int(reset_at) - int(time.time()), 1) + log(f"GitHub token exhausted before bootstrap; sleeping {sleep_for}s until reset") + time.sleep(sleep_for) + + log("Fetching changed issue and pull request stubs from GitHub") + issue_stubs = list( + client.iter_repo_issues(owner, repo_name, effective_since, options.max_issues) + ) + issues = [item for item in issue_stubs if "pull_request" not in item] + pr_stubs = [item for item in issue_stubs if "pull_request" in item] + if options.max_prs is not None: + pr_stubs = pr_stubs[: options.max_prs] + log(f"Fetched {len(issue_stubs)} changed stubs") + + issue_number_to_kind = { + item["number"]: ("pull_request" if "pull_request" in item else "issue") + for item in issue_stubs + } + issue_rows = [normalize_issue(repo_slug, item, sid, extracted_at) for item in issues] + + comment_rows: list[dict[str, Any]] = [] + next_comment_checkpoint = options.checkpoint_every_comments + reuse_checkpoint_comments = ( + stable_snapshot_id is None + and effective_since is None + and best_comment_checkpoint_progress is not None + and bool(previous_tables["comments"]) + ) + if reuse_checkpoint_comments: + log( + f"Reusing {len(previous_tables['comments'])} checkpoint comments from prior partial runs" + ) + else: + for index, item in enumerate(issue_stubs, start=1): + if not item.get("comments"): + continue + remaining_comments = remaining_limit(options.max_issue_comments, len(comment_rows)) + if remaining_comments == 0: + break + if index == 1 or index % 25 == 0: + log(f"Collecting discussion comments; {len(comment_rows)} collected so far") + for comment in client.iter_issue_comments_for_number( + owner, + repo_name, + int(item["number"]), + effective_since, + remaining_comments, + ): + parent_number = issue_url_to_number(comment.get("issue_url")) + parent_kind = issue_number_to_kind.get(parent_number, "issue_or_pr") + comment_rows.append( + normalize_comment( + repo_slug, + comment, + parent_kind, + parent_number, + sid, + extracted_at, + ) + ) + remaining_comments = remaining_limit( + options.max_issue_comments, + len(comment_rows), + ) + if ( + options.checkpoint_every_comments + and len(comment_rows) >= next_comment_checkpoint + ): + log(f"Pushing comment checkpoint to Hub at {len(comment_rows)} comments") + upload_delta_checkpoint( + api=api, + repo_id=options.hf_repo_id, + work_dir=root, + repo_slug=repo_slug, + sid=sid, + stage="comments", + delta_tables={ + "issues": issue_rows, + "pull_requests": [], + "comments": comment_rows, + "reviews": [], + "review_comments": [], + "pr_files": [], + "pr_diffs": [], + "links": [], + "events": [], + }, + progress={ + "stage": "comments", + "effective_since": effective_since, + "counts": { + "issues": len(issue_rows), + "comments": len(comment_rows), + "pull_requests": 0, + "reviews": 0, + "review_comments": 0, + "pr_files": 0, + "pr_diffs": 0, + "links": 0, + "events": 0, + }, + }, + ) + next_comment_checkpoint += options.checkpoint_every_comments + if remaining_comments == 0: + break + + pr_rows: list[dict[str, Any]] = [] + review_rows: list[dict[str, Any]] = [] + review_comment_rows: list[dict[str, Any]] = [] + pr_file_rows: list[dict[str, Any]] = [] + pr_diff_rows: list[dict[str, Any]] = [] + event_rows: list[dict[str, Any]] = [] + next_pr_checkpoint = options.checkpoint_every_prs + + previous_pr_rows_by_number = { + int(row["number"]): row + for row in previous_tables["pull_requests"] + if row.get("number") is not None + } + previous_review_rows_by_number: defaultdict[int, list[dict[str, Any]]] = defaultdict(list) + for row in previous_tables["reviews"]: + if row.get("pull_request_number") is not None: + previous_review_rows_by_number[int(row["pull_request_number"])].append(row) + previous_review_comment_rows_by_number: defaultdict[int, list[dict[str, Any]]] = ( + defaultdict(list) + ) + for row in previous_tables["review_comments"]: + if row.get("pull_request_number") is not None: + previous_review_comment_rows_by_number[int(row["pull_request_number"])].append(row) + previous_pr_file_rows_by_number: defaultdict[int, list[dict[str, Any]]] = defaultdict(list) + for row in previous_tables["pr_files"]: + if row.get("pull_request_number") is not None: + previous_pr_file_rows_by_number[int(row["pull_request_number"])].append(row) + previous_pr_diff_rows_by_number = { + int(row["pull_request_number"]): row + for row in previous_tables["pr_diffs"] + if row.get("pull_request_number") is not None + } + previous_pr_event_rows_by_number: defaultdict[int, list[dict[str, Any]]] = defaultdict(list) + for row in previous_tables["events"]: + if row.get("parent_kind") == "pull_request" and row.get("parent_number") is not None: + previous_pr_event_rows_by_number[int(row["parent_number"])].append(row) + + hydration_pr_stubs: list[dict[str, Any]] = [] + for pr_stub in pr_stubs: + number = int(pr_stub["number"]) + previous_pr_row = previous_pr_rows_by_number.get(number) + if previous_pr_row and previous_pr_row.get("updated_at") == pr_stub.get("updated_at"): + pr_rows.append(previous_pr_row) + review_rows.extend(previous_review_rows_by_number[number]) + review_comment_rows.extend(previous_review_comment_rows_by_number[number]) + pr_file_rows.extend(previous_pr_file_rows_by_number[number]) + if number in previous_pr_diff_rows_by_number: + pr_diff_rows.append(previous_pr_diff_rows_by_number[number]) + event_rows.extend(previous_pr_event_rows_by_number[number]) + continue + hydration_pr_stubs.append(pr_stub) + + reused_pr_count = len(pr_rows) + if reused_pr_count: + log(f"Reusing hydrated data for {reused_pr_count} pull requests from prior checkpoints") + if options.checkpoint_every_prs: + while reused_pr_count >= next_pr_checkpoint: + next_pr_checkpoint += options.checkpoint_every_prs + + total_prs = len(pr_stubs) + remaining_prs = len(hydration_pr_stubs) + for index, pr_stub in enumerate(hydration_pr_stubs, start=1): + number = int(pr_stub["number"]) + hydrated_count = reused_pr_count + index + if index == 1 or hydrated_count % 10 == 0 or index == remaining_prs: + log(f"Hydrating pull requests: {hydrated_count}/{total_prs}") + detail = client.get_pull_request(owner, repo_name, number) + pr_rows.append(normalize_pull_request(repo_slug, pr_stub, detail, sid, extracted_at)) + for review in client.iter_pull_reviews( + owner, repo_name, number, options.max_reviews_per_pr + ): + review_rows.append(normalize_review(repo_slug, number, review, sid, extracted_at)) + for comment in client.iter_pull_review_comments( + owner, + repo_name, + number, + options.max_review_comments_per_pr, + ): + review_comment_rows.append( + normalize_review_comment(repo_slug, number, comment, sid, extracted_at) + ) + for pr_file in client.iter_pull_files(owner, repo_name, number): + pr_file_rows.append( + normalize_pr_file(repo_slug, number, pr_file, sid, extracted_at) + ) + pr_diff_rows.append( + normalize_pr_diff( + repo_slug, + number, + pr_stub.get("html_url"), + pr_stub.get("url"), + client.get_pull_request_diff(owner, repo_name, number), + sid, + extracted_at, + ) + ) + if options.fetch_timeline: + for event in client.iter_issue_timeline(owner, repo_name, number): + event_rows.append( + normalize_timeline_event( + repo_slug, + number, + "pull_request", + event, + sid, + extracted_at, + ) + ) + if options.checkpoint_every_prs and len(pr_rows) >= next_pr_checkpoint: + log(f"Pushing PR checkpoint to Hub at {len(pr_rows)} hydrated PRs") + upload_delta_checkpoint( + api=api, + repo_id=options.hf_repo_id, + work_dir=root, + repo_slug=repo_slug, + sid=sid, + stage="pull_requests", + delta_tables={ + "issues": issue_rows, + "pull_requests": pr_rows, + "comments": comment_rows, + "reviews": review_rows, + "review_comments": review_comment_rows, + "pr_files": pr_file_rows, + "pr_diffs": pr_diff_rows, + "links": [], + "events": event_rows, + }, + progress={ + "stage": "pull_requests", + "effective_since": effective_since, + "counts": { + "issues": len(issue_rows), + "comments": len(comment_rows), + "pull_requests": len(pr_rows), + "reviews": len(review_rows), + "review_comments": len(review_comment_rows), + "pr_files": len(pr_file_rows), + "pr_diffs": len(pr_diff_rows), + "links": 0, + "events": len(event_rows), + }, + }, + ) + next_pr_checkpoint += options.checkpoint_every_prs + + if options.fetch_timeline: + log(f"Fetching issue timelines for {len(issues)} changed issues") + for issue in issues: + for event in client.iter_issue_timeline(owner, repo_name, int(issue["number"])): + event_rows.append( + normalize_timeline_event( + repo_slug, + int(issue["number"]), + "issue", + event, + sid, + extracted_at, + ) + ) + + link_rows: list[dict[str, Any]] = [] + for row in issue_rows: + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=owner, + repo_name=repo_name, + source_type="issue", + source_number=row["number"], + source_id=row["github_id"], + body=row["body"], + snapshot_id=sid, + extracted_at=extracted_at, + ) + ) + for row in pr_rows: + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=owner, + repo_name=repo_name, + source_type="pull_request", + source_number=row["number"], + source_id=row["github_id"], + body=row["body"], + snapshot_id=sid, + extracted_at=extracted_at, + ) + ) + for row in comment_rows or previous_tables["comments"]: + if row["parent_number"] is None: + continue + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=owner, + repo_name=repo_name, + source_type="comment", + source_number=row["parent_number"], + source_id=row["github_id"], + body=row["body"], + snapshot_id=sid, + extracted_at=extracted_at, + ) + ) + for row in review_rows: + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=owner, + repo_name=repo_name, + source_type="review", + source_number=row["pull_request_number"], + source_id=row["github_id"], + body=row["body"], + snapshot_id=sid, + extracted_at=extracted_at, + ) + ) + for row in review_comment_rows: + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=owner, + repo_name=repo_name, + source_type="review_comment", + source_number=row["pull_request_number"], + source_id=row["github_id"], + body=row["body"], + snapshot_id=sid, + extracted_at=extracted_at, + ) + ) + link_rows.extend( + build_pr_duplicate_candidate_rows( + repo=repo_slug, + pull_requests=pr_rows, + link_rows=link_rows, + snapshot_id=sid, + extracted_at=extracted_at, + ) + ) + for event in event_rows: + if event.get("source_issue_number"): + link_rows.append( + { + "repo": repo_slug, + "source_type": event["parent_kind"], + "source_number": event["parent_number"], + "source_github_id": None, + "target_owner": owner, + "target_repo": repo_name, + "target_number": event["source_issue_number"], + "link_type": f"timeline:{event['event']}", + "link_origin": "timeline", + "snapshot_id": sid, + "extracted_at": extracted_at, + } + ) + + delta_tables = { + "issues": issue_rows, + "pull_requests": pr_rows, + "comments": comment_rows, + "reviews": review_rows, + "review_comments": review_comment_rows, + "pr_files": pr_file_rows, + "pr_diffs": pr_diff_rows, + "links": link_rows, + "events": event_rows, + } + if any(delta_tables.values()): + log("Pushing final delta checkpoint to Hub before merge upload") + upload_delta_checkpoint( + api=api, + repo_id=options.hf_repo_id, + work_dir=root, + repo_slug=repo_slug, + sid=sid, + stage="final-delta", + delta_tables=delta_tables, + progress={ + "stage": "final-delta", + "effective_since": effective_since, + "counts": {name: len(rows) for name, rows in delta_tables.items()}, + }, + ) + + final_tables = { + table_name: merge_rows(table_name, previous_tables[table_name], delta_rows) + for table_name, delta_rows in delta_tables.items() + } + manifest: dict[str, Any] = { + "repo": repo_slug, + "snapshot_id": sid, + "crawl_started_at": crawl_started_at, + "extracted_at": extracted_at, + "watermark": { + "effective_since": effective_since, + "next_since": crawl_started_at, + "previous_snapshot_dir": ( + str(previous_snapshot_dir) if previous_snapshot_dir is not None else None + ), + }, + "delta_counts": { + "issue_stubs": len(issue_stubs), + "issues": len(issue_rows), + "pull_requests": len(pr_rows), + "comments": len(comment_rows), + "reviews": len(review_rows), + "review_comments": len(review_comment_rows), + "pr_files": len(pr_file_rows), + "pr_diffs": len(pr_diff_rows), + "timeline_events": len(event_rows), + "links": len(link_rows), + }, + "counts": { + "issues": len(final_tables["issues"]), + "pull_requests": len(final_tables["pull_requests"]), + "comments": len(final_tables["comments"]), + "reviews": len(final_tables["reviews"]), + "review_comments": len(final_tables["review_comments"]), + "pr_files": len(final_tables["pr_files"]), + "pr_diffs": len(final_tables["pr_diffs"]), + "timeline_events": len(final_tables["events"]), + "links": len(final_tables["links"]), + }, + } + + log("Writing updated dataset files") + for table_name, rows in final_tables.items(): + write_parquet(rows, output_root / f"{table_name}.parquet", table_name) + issue_comment_rows, pr_comment_rows = viewer_comment_rows( + final_tables["comments"], + final_tables["pull_requests"], + ) + write_parquet(issue_comment_rows, output_root / "issue_comments.parquet", "comments") + write_parquet(pr_comment_rows, output_root / "pr_comments.parquet", "comments") + archived_snapshot_dir = output_root / "snapshots" / sid + archived_snapshot_dir.mkdir(parents=True, exist_ok=True) + write_json(manifest, output_root / "manifest.json") + log("Generating PR scope clusters") + pr_scope_path = run_pr_scope_report( + PrScopeOptions( + snapshot_dir=output_root, + output_dir=output_root, + output=output_root / "pr-scope-clusters.json", + hf_repo_id=None, + hf_revision=None, + hf_materialize_dir=None, + cluster_suppression_rules=options.cluster_suppression_rules, + ) + ) + shutil.copy2(pr_scope_path, archived_snapshot_dir / pr_scope_path.name) + artifacts: dict[str, str] = { + "pr_scope_clusters_json": pr_scope_path.name, + "archived_pr_scope_clusters_json": f"snapshots/{sid}/{pr_scope_path.name}", + } + if options.new_contributor_report: + log("Generating new contributor dataset/report artifacts") + run_new_contributor_report( + NewContributorReportOptions( + snapshot_dir=output_root, + output_dir=output_root, + output=None, + json_output=None, + hf_repo_id=None, + hf_revision=None, + hf_materialize_dir=None, + window_days=options.new_contributor_window_days, + max_authors=options.new_contributor_max_authors, + ) + ) + manifest["counts"]["new_contributors"] = len( + read_parquet_rows(output_root / "new_contributors.parquet") + ) + artifacts.update( + { + "new_contributors_parquet": "new_contributors.parquet", + "new_contributors_json": "new-contributors-report.json", + "new_contributors_markdown": "new-contributors-report.md", + } + ) + manifest["artifacts"] = artifacts + manifest["watermark"].pop("previous_snapshot_dir", None) + write_json(manifest, output_root / "manifest.json") + write_text( + build_hf_dataset_card( + repo_slug, + sid, + include_new_contributors=options.new_contributor_report, + ), + output_root / "README.md", + ) + write_json( + { + "repo": repo_slug, + "last_successful_snapshot_id": sid, + "effective_since": effective_since, + "next_since": crawl_started_at, + "updated_at": extracted_at, + }, + output_root / "state" / "watermark.json", + ) + write_json(manifest, archived_snapshot_dir / "manifest.json") + write_json( + { + "repo": repo_slug, + "latest_snapshot_id": sid, + "snapshot_dir": f"snapshots/{sid}", + "manifest_path": "manifest.json", + "archived_manifest_path": f"snapshots/{sid}/manifest.json", + "next_since": crawl_started_at, + }, + output_root / "snapshots" / "latest.json", + ) + + log("Uploading updated dataset to the Hub") + api.upload_folder( + folder_path=str(output_root), + repo_id=options.hf_repo_id, + repo_type="dataset", + commit_message=f"Refresh {repo_name} dataset snapshot {sid}", + ) + log(f"Dataset refresh complete for {options.hf_repo_id}") + return { + "repo": repo_slug, + "dataset_id": options.hf_repo_id, + "snapshot_id": sid, + "effective_since": effective_since, + "counts": manifest["counts"], + } + + +def main(argv: list[str] | None = None) -> None: + args = parse_args(argv) + result = run_dataset_refresh( + DatasetRefreshOptions( + repo=RepoRef.parse(args.repo), + hf_repo_id=args.hf_repo_id, + private_hf_repo=args.private_hf_repo, + max_issues=args.max_issues, + max_prs=args.max_prs, + max_issue_comments=args.max_issue_comments, + max_reviews_per_pr=args.max_reviews_per_pr, + max_review_comments_per_pr=args.max_review_comments_per_pr, + fetch_timeline=args.fetch_timeline, + new_contributor_report=args.new_contributor_report, + new_contributor_window_days=args.new_contributor_window_days, + new_contributor_max_authors=args.new_contributor_max_authors, + http_timeout=args.http_timeout, + http_max_retries=args.http_max_retries, + checkpoint_every_comments=args.checkpoint_every_comments, + checkpoint_every_prs=args.checkpoint_every_prs, + cluster_suppression_rules=tuple(args.cluster_suppression_rules), + ) + ) + print(json.dumps(result, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/src/slop_farmer/app/dataset_status.py b/src/slop_farmer/app/dataset_status.py new file mode 100644 index 0000000000000000000000000000000000000000..bfe5dd27381c7adea04fa9677ca716ff28c8fbcc --- /dev/null +++ b/src/slop_farmer/app/dataset_status.py @@ -0,0 +1,316 @@ +from __future__ import annotations + +import tempfile +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from huggingface_hub import HfApi + +from slop_farmer.config import DatasetStatusOptions +from slop_farmer.data.hf_dataset_repo import ( + list_remote_paths, + load_remote_file, + load_remote_json_file, + stable_snapshot_candidates, +) +from slop_farmer.data.parquet_io import read_json +from slop_farmer.data.snapshot_paths import ( + CONTRIBUTOR_ARTIFACT_FILENAMES, + CURRENT_ANALYSIS_MANIFEST_PATH, + PR_SCOPE_CLUSTERS_FILENAME, + SNAPSHOTS_LATEST_PATH, + load_current_analysis_manifest, + repo_relative_path_to_local, +) + + +def _coerce_datetime(value: Any) -> datetime | None: + if not isinstance(value, str) or not value: + return None + try: + return datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None + + +def _age_summary(value: str | None) -> dict[str, Any]: + timestamp = _coerce_datetime(value) + if timestamp is None: + return {"seconds": None, "summary": "unknown", "staleness": "unknown"} + age_seconds = max(int((datetime.now(tz=UTC) - timestamp).total_seconds()), 0) + if age_seconds <= 6 * 3600: + staleness = "fresh" + elif age_seconds <= 24 * 3600: + staleness = "aging" + else: + staleness = "stale" + if age_seconds < 3600: + summary = f"{age_seconds // 60}m" + elif age_seconds < 24 * 3600: + summary = f"{age_seconds // 3600}h" + else: + summary = f"{age_seconds // 86400}d" + return {"seconds": age_seconds, "summary": summary, "staleness": staleness} + + +def _local_status(output_dir: Path) -> dict[str, Any] | None: + latest_path = output_dir.resolve() / "snapshots" / "latest.json" + if not latest_path.exists(): + return None + payload = read_json(latest_path) + snapshot_dir_raw = payload.get("snapshot_dir") + manifest: dict[str, Any] = {} + snapshot_dir: Path | None = None + if isinstance(snapshot_dir_raw, str) and snapshot_dir_raw: + snapshot_dir = Path(snapshot_dir_raw).resolve() + manifest_path = snapshot_dir / "manifest.json" + if manifest_path.exists(): + manifest = read_json(manifest_path) + current_analysis = _local_current_analysis(snapshot_dir) + return { + "latest_path": str(latest_path), + "latest_pointer": payload, + "snapshot_dir": snapshot_dir_raw, + "snapshot_id": manifest.get("snapshot_id") or payload.get("latest_snapshot_id"), + "current_analysis": current_analysis, + } + + +def _local_current_analysis(snapshot_dir: Path | None) -> dict[str, Any]: + if snapshot_dir is None: + return {"present": False} + manifest_path = repo_relative_path_to_local(snapshot_dir, CURRENT_ANALYSIS_MANIFEST_PATH) + if not manifest_path.exists(): + return {"present": False} + try: + manifest = load_current_analysis_manifest(manifest_path) + except ValueError as exc: + return {"present": True, "valid": False, "detail": str(exc)} + return { + "present": True, + "valid": True, + "snapshot_id": manifest["snapshot_id"], + "analysis_id": manifest["analysis_id"], + "variant": manifest["variant"], + "published_at": manifest["published_at"], + } + + +def _remote_status(repo_id: str, revision: str | None) -> dict[str, Any]: + api = HfApi() + with tempfile.TemporaryDirectory(prefix="slop-farmer-dataset-status-") as tmp: + root = Path(tmp) + remote_paths = list_remote_paths(api, repo_id, revision=revision) + latest_pointer = load_remote_json_file( + api, + repo_id, + SNAPSHOTS_LATEST_PATH, + root, + revision=revision, + ) + watermark = load_remote_json_file( + api, + repo_id, + "state/watermark.json", + root, + revision=revision, + ) + manifest = None + if latest_pointer is not None: + for candidate in stable_snapshot_candidates(latest_pointer, "manifest.json"): + downloaded = load_remote_file( + api, + repo_id, + candidate, + root, + revision=revision, + ) + if downloaded is None: + continue + manifest = read_json(downloaded) + break + current_analysis = _remote_current_analysis( + api, + repo_id, + root, + revision=revision, + remote_paths=remote_paths, + latest_pointer=latest_pointer, + ) + latest_snapshot_id = ( + str(latest_pointer.get("latest_snapshot_id")) + if isinstance(latest_pointer, dict) + else None + ) + archived_run_manifests = sorted( + path + for path in remote_paths + if path.startswith("snapshots/") + and "/analysis-runs/" in path + and path.endswith("/manifest.json") + ) + current_snapshot_run_count = 0 + if latest_snapshot_id: + current_snapshot_run_count = sum( + 1 + for path in archived_run_manifests + if path.startswith(f"snapshots/{latest_snapshot_id}/analysis-runs/") + ) + extracted_at = manifest.get("extracted_at") if manifest else None + return { + "dataset_id": repo_id, + "revision": revision, + "latest_pointer": latest_pointer, + "watermark": watermark, + "manifest": manifest, + "cheap_artifacts": { + "pr_scope_clusters": _remote_has_latest_artifact( + remote_paths, + latest_pointer, + PR_SCOPE_CLUSTERS_FILENAME, + ), + "contributors": all( + _remote_has_latest_artifact(remote_paths, latest_pointer, filename) + for filename in CONTRIBUTOR_ARTIFACT_FILENAMES + ), + }, + "current_analysis": current_analysis, + "archived_analysis_runs": { + "count": len(archived_run_manifests), + "current_snapshot_count": current_snapshot_run_count, + }, + "remote_path_count": len(remote_paths), + "age": _age_summary(extracted_at), + } + + +def _remote_current_analysis( + api: HfApi, + repo_id: str, + root: Path, + *, + revision: str | None, + remote_paths: set[str], + latest_pointer: dict[str, Any] | None, +) -> dict[str, Any]: + if CURRENT_ANALYSIS_MANIFEST_PATH not in remote_paths: + return {"present": False} + downloaded = load_remote_file( + api, + repo_id, + CURRENT_ANALYSIS_MANIFEST_PATH, + root, + revision=revision, + ) + if downloaded is None: + return {"present": False} + try: + manifest = load_current_analysis_manifest(downloaded) + except ValueError as exc: + return {"present": True, "valid": False, "detail": str(exc)} + latest_snapshot_id = ( + str(latest_pointer.get("latest_snapshot_id")) if isinstance(latest_pointer, dict) else None + ) + return { + "present": True, + "valid": True, + "snapshot_id": manifest["snapshot_id"], + "analysis_id": manifest["analysis_id"], + "variant": manifest["variant"], + "published_at": manifest["published_at"], + "matches_latest_snapshot": manifest["snapshot_id"] == latest_snapshot_id, + "artifact_count": len(manifest["artifacts"]), + } + + +def _remote_has_latest_artifact( + remote_paths: set[str], + latest_pointer: dict[str, Any] | None, + filename: str, +) -> bool: + candidates = stable_snapshot_candidates(latest_pointer, filename) + return any(candidate in remote_paths for candidate in candidates) + + +def get_dataset_status(options: DatasetStatusOptions) -> dict[str, Any]: + remote = _remote_status(options.hf_repo_id, options.hf_revision) if options.hf_repo_id else None + local = _local_status(options.output_dir) + repo = options.repo + if repo is None and remote and remote.get("manifest"): + repo = remote["manifest"].get("repo") + if repo is None and local and isinstance(local.get("latest_pointer"), dict): + repo = local["latest_pointer"].get("repo") + return { + "repo": repo, + "dataset_id": options.hf_repo_id, + "remote": remote, + "local": local, + } + + +def format_dataset_status(status: dict[str, Any]) -> str: + remote = status.get("remote") or {} + local = status.get("local") or {} + manifest = remote.get("manifest") or {} + watermark = remote.get("watermark") or {} + latest_pointer = remote.get("latest_pointer") or {} + age = remote.get("age") or {} + current_analysis = remote.get("current_analysis") or {} + cheap_artifacts = remote.get("cheap_artifacts") or {} + archived_runs = remote.get("archived_analysis_runs") or {} + lines = [ + f"Repo: {status.get('repo') or '?'}", + f"Dataset: {status.get('dataset_id') or 'not configured'}", + ] + if remote: + lines.extend( + [ + f"Remote latest snapshot: {manifest.get('snapshot_id') or latest_pointer.get('latest_snapshot_id') or '?'}", + f"Remote extracted at: {manifest.get('extracted_at') or '?'}", + f"Remote next_since: {watermark.get('next_since') or latest_pointer.get('next_since') or '?'}", + f"PR scope artifact: {'yes' if cheap_artifacts.get('pr_scope_clusters') else 'no'}", + f"Contributor artifacts: {'yes' if cheap_artifacts.get('contributors') else 'no'}", + ] + ) + if current_analysis.get("present"): + if current_analysis.get("valid") is False: + lines.append(f"Current analysis: invalid ({current_analysis.get('detail')})") + else: + lines.append( + "Current analysis: " + f"snapshot={current_analysis.get('snapshot_id')} " + f"analysis_id={current_analysis.get('analysis_id')}" + ) + lines.append( + "Current analysis matches latest snapshot: " + f"{'yes' if current_analysis.get('matches_latest_snapshot') else 'no'}" + ) + else: + lines.append("Current analysis: none") + lines.append( + "Archived analysis runs: " + f"{archived_runs.get('count', 0)} total, {archived_runs.get('current_snapshot_count', 0)} for latest snapshot" + ) + lines.append( + f"Freshness: {age.get('summary') or 'unknown'} ({age.get('staleness') or 'unknown'})" + ) + if local: + lines.extend( + [ + f"Local latest pointer: {local.get('latest_path')}", + f"Local snapshot id: {local.get('snapshot_id') or '?'}", + ] + ) + local_current_analysis = local.get("current_analysis") or {} + if local_current_analysis.get("present"): + lines.append( + "Local current analysis: " + f"snapshot={local_current_analysis.get('snapshot_id')} " + f"analysis_id={local_current_analysis.get('analysis_id')}" + ) + else: + lines.append("Local current analysis: none") + else: + lines.append("Local latest pointer: none") + return "\n".join(lines) diff --git a/src/slop_farmer/app/deploy.py b/src/slop_farmer/app/deploy.py new file mode 100644 index 0000000000000000000000000000000000000000..fd887470f672c84772824f1a83c1279a7af01fb7 --- /dev/null +++ b/src/slop_farmer/app/deploy.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +import os +import subprocess +from pathlib import Path + +from slop_farmer.config import DeployDashboardOptions +from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir + + +def run_deploy_dashboard(options: DeployDashboardOptions) -> None: + repo_root = Path(__file__).resolve().parents[3] + script_path = repo_root / "scripts" / "deploy_dashboard_space.sh" + if not script_path.exists(): + raise FileNotFoundError(f"Could not find deploy script at {script_path}") + env = os.environ.copy() + env.update( + { + "PIPELINE_DATA_DIR": str(options.pipeline_data_dir), + "WEB_DIR": str(options.web_dir), + "SNAPSHOT_DIR": str( + resolve_snapshot_source_dir( + snapshot_dir=options.snapshot_dir, + local_snapshots_root=options.pipeline_data_dir.resolve() / "snapshots", + hf_repo_id=options.hf_repo_id, + hf_revision=options.hf_revision, + hf_materialize_dir=options.hf_materialize_dir, + hf_output_dir=options.pipeline_data_dir, + ) + ), + "DASHBOARD_WINDOW_DAYS": str(options.dashboard_window_days), + "CONTRIBUTOR_WINDOW_DAYS": str(options.contributor_window_days), + "CONTRIBUTOR_MAX_AUTHORS": str(options.contributor_max_authors), + "COMMIT_MESSAGE": options.commit_message, + "SPACE_ID": options.space_id, + "SPACE_EMOJI": options.space_emoji, + "SPACE_COLOR_FROM": options.space_color_from, + "SPACE_COLOR_TO": options.space_color_to, + "SPACE_SHORT_DESCRIPTION": options.space_short_description, + } + ) + if options.analysis_input is not None: + env["ANALYSIS_INPUT"] = str(options.analysis_input.resolve()) + if options.contributors_input is not None: + env["CONTRIBUTORS_INPUT"] = str(options.contributors_input.resolve()) + if options.pr_scope_input is not None: + env["PR_SCOPE_INPUT"] = str(options.pr_scope_input.resolve()) + if options.refresh_contributors: + env["REFRESH_CONTRIBUTORS"] = "1" + if options.private_space: + env["PRIVATE_FLAG"] = "--private" + if options.space_title: + env["SPACE_TITLE"] = options.space_title + if options.dataset_id: + env["DATASET_ID"] = options.dataset_id + if options.space_tags: + env["SPACE_TAGS"] = options.space_tags + subprocess.run([str(script_path)], cwd=repo_root, env=env, check=True) diff --git a/src/slop_farmer/app/duplicate_prs.py b/src/slop_farmer/app/duplicate_prs.py new file mode 100644 index 0000000000000000000000000000000000000000..4fc4e3a006ef204ac2068e66ad6e93da9022454a --- /dev/null +++ b/src/slop_farmer/app/duplicate_prs.py @@ -0,0 +1,702 @@ +from __future__ import annotations + +import json +import shutil +import subprocess +import tempfile +from datetime import UTC, datetime +from pathlib import Path, PurePosixPath +from typing import Any + +from slop_farmer.config import RepoRef +from slop_farmer.data.parquet_io import read_json, write_json, write_text +from slop_farmer.reports.canonical_duplicate_pr import prepare_publish_artifacts, stage_run_bundle +from slop_farmer.reports.duplicate_prs import ( + DEFAULT_DUPLICATE_PR_MODEL, + load_duplicate_pr_bundle, + select_mergeable_duplicate_pr_cluster, +) + +# Navigation: +# - run_duplicate_pr_merge(): end-to-end orchestration entrypoint +# - validate_codex_result(): structured result checks +# - git/gh/codex helpers +# - file-policy helpers + +DEFAULT_RUNS_DIR = Path("runs/duplicate_prs") +DEFAULT_FILE_POLICY = "pure-loc" +FILE_POLICY_CHOICES = ("pure-loc", "allow-docs", "allow-any") +CODE_FILE_SUFFIXES = { + ".c", + ".cc", + ".cpp", + ".go", + ".h", + ".hpp", + ".java", + ".js", + ".jsx", + ".kt", + ".m", + ".mm", + ".php", + ".py", + ".rb", + ".rs", + ".scala", + ".sh", + ".swift", + ".ts", + ".tsx", +} +DOC_FILE_SUFFIXES = {".md", ".mdx", ".rst", ".txt"} +DOC_DIRECTORY_NAMES = {"doc", "docs"} +DOC_FILE_PREFIXES = ("changelog", "readme", "news") +TEST_DIRECTORY_NAMES = {"test", "tests"} + + +# Merge orchestration + + +def run_duplicate_pr_merge( + *, + report_path: Path | None, + snapshot_dir: Path | None, + repo_dir: Path, + cluster_id: str | None, + fork_owner: str | None, + fork_repo: str | None = None, + upstream_repo: str | None = None, + upstream_remote: str = "origin", + fork_remote: str = "fork", + file_policy: str = DEFAULT_FILE_POLICY, + model: str = DEFAULT_DUPLICATE_PR_MODEL, + runs_dir: Path = DEFAULT_RUNS_DIR, +) -> dict[str, Any]: + if file_policy not in FILE_POLICY_CHOICES: + raise ValueError( + f"Unsupported file policy {file_policy!r}. " + f"Expected one of: {', '.join(FILE_POLICY_CHOICES)}." + ) + bundle = load_duplicate_pr_bundle( + report_path=report_path, + snapshot_dir=snapshot_dir, + model=model, + ) + effective_upstream_repo = _normalize_repo_slug(upstream_repo or bundle.repo) + selected_cluster = select_mergeable_duplicate_pr_cluster( + bundle, + cluster_id=cluster_id, + model=model, + ) + + _require_command("git") + _require_command("gh") + _require_command("codex") + + authenticated_user = _resolve_authenticated_github_user() + fork_target = _resolve_fork_target( + upstream_repo=effective_upstream_repo, + fork_repo=fork_repo, + fork_owner=fork_owner, + authenticated_user=authenticated_user, + ) + effective_fork_owner = fork_target.owner + effective_fork_repo = fork_target.slug + + resolved_repo_dir = repo_dir.resolve() + _validate_repo_checkout( + resolved_repo_dir, + expected_repo=effective_upstream_repo, + remote_name=upstream_remote, + ) + default_branch = _resolve_default_branch(effective_upstream_repo) + + run_dir = _create_run_dir(runs_dir) + manifest = stage_run_bundle( + bundle.report_path, + run_dir, + selected_cluster=selected_cluster, + max_clusters=1, + prompt_repo=effective_upstream_repo, + prompt_default_branch=default_branch, + prompt_file_policy_instruction=_file_policy_instruction(file_policy), + ) + manifest_path = run_dir / "run-manifest.json" + + run_stamp = _utc_stamp() + branch_name = f"codex/{selected_cluster['cluster_id']}-{run_stamp}" + worktree_dir = run_dir / "worktree" + + _create_worktree( + repo_dir=resolved_repo_dir, + worktree_dir=worktree_dir, + branch_name=branch_name, + default_branch=default_branch, + upstream_remote=upstream_remote, + ) + _update_manifest( + manifest_path, + { + "upstream_repo": effective_upstream_repo, + "upstream_remote": upstream_remote, + "default_branch": default_branch, + "branch_name": branch_name, + "worktree_dir": str(worktree_dir.resolve()), + "fork_owner": effective_fork_owner, + "fork_repo": effective_fork_repo, + "fork_remote": fork_remote, + "file_policy": file_policy, + }, + ) + + artifacts = manifest["artifacts"] + result_path = Path(artifacts["result_path"]) + _run_codex_exec( + worktree_dir=worktree_dir, + run_dir=run_dir, + prompt_path=Path(artifacts["prompt_path"]), + schema_path=Path(artifacts["schema_path"]), + result_path=result_path, + ) + + result = validate_codex_result(manifest_path, result_path) + changed_paths = _validate_synthesized_branch( + worktree_dir=worktree_dir, + upstream_remote=upstream_remote, + default_branch=default_branch, + commit_message=result["commit_message"], + file_policy=file_policy, + ) + publish_metadata = prepare_publish_artifacts(manifest_path, result_path) + + fork_repo = _ensure_fork_repo( + upstream_repo=effective_upstream_repo, + fork_repo=effective_fork_repo, + authenticated_user=authenticated_user, + ) + _ensure_fork_remote(worktree_dir=worktree_dir, fork_repo=fork_repo, remote_name=fork_remote) + _push_branch(worktree_dir=worktree_dir, branch_name=branch_name, remote_name=fork_remote) + pr_url = _create_pull_request( + upstream_repo=effective_upstream_repo, + default_branch=default_branch, + fork_owner=effective_fork_owner, + branch_name=branch_name, + title=publish_metadata["pr_title"], + body_path=Path(publish_metadata["pr_body_path"]), + ) + + pr_url_path = Path(artifacts["pr_url_path"]) + write_text(pr_url.rstrip() + "\n", pr_url_path) + _update_manifest( + manifest_path, + { + "changed_paths": changed_paths, + "pr_url": pr_url, + }, + ) + + publish_metadata_path = Path(artifacts["publish_metadata_path"]) + publish_metadata["pr_url"] = pr_url + publish_metadata["changed_paths"] = changed_paths + write_json(publish_metadata, publish_metadata_path) + + return { + "cluster_id": selected_cluster["cluster_id"], + "repo": effective_upstream_repo, + "report_path": str(bundle.report_path), + "run_dir": str(run_dir.resolve()), + "worktree_dir": str(worktree_dir.resolve()), + "branch_name": branch_name, + "fork_repo": fork_repo, + "fork_remote": fork_remote, + "upstream_remote": upstream_remote, + "file_policy": file_policy, + "pr_url": pr_url, + "changed_paths": changed_paths, + } + + +def validate_codex_result(manifest_path: Path, result_path: Path) -> dict[str, Any]: + manifest = read_json(manifest_path.resolve()) + result = json.loads(result_path.resolve().read_text(encoding="utf-8")) + selected_cluster = manifest["selected_cluster"] + + if result.get("status") != "success": + summary = str(result.get("summary") or "").strip() + raise ValueError( + "Codex did not synthesize a valid canonical PR." + (f" {summary}" if summary else "") + ) + if result.get("cluster_id") != selected_cluster["cluster_id"]: + raise ValueError("Codex result cluster_id does not match the selected cluster.") + + expected_source_pr_numbers = _ordered_ints(selected_cluster["source_pr_numbers"]) + actual_source_pr_numbers = _normalize_result_source_pr_numbers( + expected_source_pr_numbers=expected_source_pr_numbers, + raw_source_pr_numbers=result.get("source_pr_numbers"), + ) + + tests_run = [ + str(value).strip() for value in result.get("tests_run") or [] if str(value).strip() + ] + if not tests_run: + raise ValueError("Codex result did not include any executed validation commands.") + + for field in ("commit_message", "pr_title", "summary"): + if not str(result.get(field) or "").strip(): + raise ValueError(f"Codex result did not provide a {field.replace('_', ' ')}.") + + normalized = dict(result) + normalized["source_pr_numbers"] = actual_source_pr_numbers + normalized["tests_run"] = tests_run + return normalized + + +# GitHub / git / Codex helpers + + +def _require_command(command_name: str) -> None: + if shutil.which(command_name): + return + raise RuntimeError(f"Missing required command: {command_name}") + + +def _resolve_authenticated_github_user() -> str: + try: + _run_checked(["gh", "auth", "status"]) + except RuntimeError as exc: + raise RuntimeError( + "GitHub CLI authentication is invalid. Run `gh auth login` and retry." + ) from exc + login = _run_stdout(["gh", "api", "user", "--jq", ".login"]).strip() + if not login: + raise RuntimeError("Could not resolve the authenticated GitHub user from `gh api user`.") + return login + + +def _normalize_repo_slug(raw: str) -> str: + return RepoRef.parse(raw).slug + + +def _resolve_fork_target( + *, + upstream_repo: str, + fork_repo: str | None, + fork_owner: str | None, + authenticated_user: str, +) -> RepoRef: + if fork_repo is not None: + return RepoRef.parse(fork_repo.strip()) + owner = (fork_owner or authenticated_user).strip() + if not owner: + raise RuntimeError("Could not resolve the GitHub fork owner.") + upstream = RepoRef.parse(upstream_repo) + return RepoRef(owner=owner, name=upstream.name) + + +def _validate_repo_checkout(repo_dir: Path, *, expected_repo: str, remote_name: str) -> None: + if not repo_dir.exists(): + raise RuntimeError(f"Missing repo checkout: {repo_dir}") + remote_url = _run_stdout(["git", "-C", str(repo_dir), "remote", "get-url", remote_name]).strip() + actual_repo = _repo_slug_from_remote_url(remote_url) + if actual_repo != expected_repo: + raise RuntimeError( + f"`--repo-dir` remote {remote_name!r} must point at {expected_repo}, " + f"but resolves to {actual_repo or remote_url!r}." + ) + + +def _resolve_default_branch(repo: str) -> str: + default_branch = _run_stdout( + [ + "gh", + "repo", + "view", + repo, + "--json", + "defaultBranchRef", + "--jq", + ".defaultBranchRef.name", + ] + ).strip() + if not default_branch: + raise RuntimeError(f"Could not resolve the default branch for {repo}.") + return default_branch + + +def _create_run_dir(runs_dir: Path) -> Path: + base_dir = runs_dir.resolve() + base_dir.mkdir(parents=True, exist_ok=True) + return Path(tempfile.mkdtemp(prefix=f"{_utc_stamp()}.", dir=base_dir)) + + +def _create_worktree( + *, + repo_dir: Path, + worktree_dir: Path, + branch_name: str, + default_branch: str, + upstream_remote: str, +) -> None: + _run_checked(["git", "-C", str(repo_dir), "fetch", upstream_remote, default_branch]) + _run_checked( + [ + "git", + "-C", + str(repo_dir), + "worktree", + "add", + "-B", + branch_name, + str(worktree_dir), + f"{upstream_remote}/{default_branch}", + ] + ) + + +def _run_codex_exec( + *, + worktree_dir: Path, + run_dir: Path, + prompt_path: Path, + schema_path: Path, + result_path: Path, +) -> None: + prompt_text = prompt_path.read_text(encoding="utf-8") + _run_checked( + [ + "codex", + "exec", + "-C", + str(worktree_dir), + "--add-dir", + str(run_dir), + "--full-auto", + "--output-schema", + str(schema_path), + "-o", + str(result_path), + "-", + ], + input_text=prompt_text, + ) + if not result_path.exists(): + raise RuntimeError("Codex did not write a structured result.") + + +def _validate_synthesized_branch( + *, + worktree_dir: Path, + upstream_remote: str, + default_branch: str, + commit_message: str, + file_policy: str, +) -> list[str]: + ahead_count = int( + _run_stdout( + [ + "git", + "-C", + str(worktree_dir), + "rev-list", + "--count", + f"{upstream_remote}/{default_branch}..HEAD", + ] + ) + ) + if ahead_count != 1: + raise RuntimeError( + f"Synthesized branch must contain exactly one commit on top of " + f"{upstream_remote}/{default_branch}; found {ahead_count}." + ) + + head_subject = _run_stdout(["git", "-C", str(worktree_dir), "log", "-1", "--pretty=%s"]).strip() + if head_subject != commit_message: + raise RuntimeError( + f"Codex commit message {commit_message!r} does not match HEAD subject {head_subject!r}." + ) + + status_output = _run_stdout(["git", "-C", str(worktree_dir), "status", "--porcelain"]) + if status_output.strip(): + raise RuntimeError("Codex left uncommitted changes in the synthesis worktree.") + + changed_paths = [ + line.strip() + for line in _run_stdout( + [ + "git", + "-C", + str(worktree_dir), + "diff", + "--name-only", + f"{upstream_remote}/{default_branch}..HEAD", + ] + ).splitlines() + if line.strip() + ] + if not changed_paths: + raise RuntimeError("The synthesized branch does not modify any files.") + + if file_policy == "pure-loc": + disallowed_paths = [path for path in changed_paths if _is_doc_path(path)] + if disallowed_paths: + raise RuntimeError( + "The synthesized branch touched non-LOC documentation paths: " + + ", ".join(disallowed_paths) + ) + unsupported_paths = [ + path for path in changed_paths if not _is_allowed_path(path, allow_docs=False) + ] + if unsupported_paths: + raise RuntimeError( + "The synthesized branch touched files outside implementation/test code paths: " + + ", ".join(unsupported_paths) + ) + elif file_policy == "allow-docs": + unsupported_paths = [ + path for path in changed_paths if not _is_allowed_path(path, allow_docs=True) + ] + if unsupported_paths: + raise RuntimeError( + "The synthesized branch touched files outside implementation/test/documentation " + "paths: " + ", ".join(unsupported_paths) + ) + elif file_policy != "allow-any": + raise RuntimeError(f"Unsupported file policy: {file_policy}") + return changed_paths + + +def _ensure_fork_repo( + *, + upstream_repo: str, + fork_repo: str, + authenticated_user: str, +) -> str: + fork_target = RepoRef.parse(fork_repo) + try: + _run_checked(["gh", "repo", "view", fork_repo, "--json", "nameWithOwner"]) + except RuntimeError: + fork_command = [ + "gh", + "repo", + "fork", + upstream_repo, + "--clone=false", + "--remote=false", + "--fork-name", + fork_target.name, + ] + if fork_target.owner != authenticated_user: + fork_command.extend(["--org", fork_target.owner]) + _run_checked(fork_command) + return fork_target.slug + + +def _ensure_fork_remote(*, worktree_dir: Path, fork_repo: str, remote_name: str) -> None: + fork_url = f"https://github.com/{fork_repo}.git" + try: + existing_url = _run_stdout( + ["git", "-C", str(worktree_dir), "remote", "get-url", remote_name] + ).strip() + except RuntimeError: + _run_checked( + [ + "git", + "-C", + str(worktree_dir), + "remote", + "add", + remote_name, + fork_url, + ] + ) + return + if existing_url != fork_url: + raise RuntimeError( + f"Existing `{remote_name}` remote points to {existing_url}, expected {fork_url}." + ) + + +def _push_branch(*, worktree_dir: Path, branch_name: str, remote_name: str) -> None: + _run_checked(["git", "-C", str(worktree_dir), "push", "-u", remote_name, branch_name]) + + +def _create_pull_request( + *, + upstream_repo: str, + default_branch: str, + fork_owner: str, + branch_name: str, + title: str, + body_path: Path, +) -> str: + return _run_stdout( + [ + "gh", + "pr", + "create", + "--repo", + upstream_repo, + "--base", + default_branch, + "--head", + f"{fork_owner}:{branch_name}", + "--title", + title, + "--body-file", + str(body_path), + ] + ).strip() + + +def _repo_slug_from_remote_url(url: str) -> str: + normalized = url.strip() + if not normalized: + return "" + for prefix in ( + "https://github.com/", + "http://github.com/", + "ssh://git@github.com/", + "git://github.com/", + ): + if normalized.startswith(prefix): + normalized = normalized[len(prefix) :] + break + if normalized.startswith("git@github.com:"): + normalized = normalized.split(":", 1)[1] + normalized = normalized.rstrip("/") + if normalized.endswith(".git"): + normalized = normalized[:-4] + return normalized + + +# File-policy helpers + + +def _is_doc_path(path: str) -> bool: + pure_path = PurePosixPath(path) + lowered_parts = [part.lower() for part in pure_path.parts] + lowered_name = pure_path.name.lower() + if pure_path.suffix.lower() in DOC_FILE_SUFFIXES: + return True + if any(part in DOC_DIRECTORY_NAMES for part in lowered_parts): + return True + return lowered_name.startswith(DOC_FILE_PREFIXES) + + +def _is_allowed_path(path: str, *, allow_docs: bool) -> bool: + pure_path = PurePosixPath(path) + if _is_test_path(pure_path): + return True + if allow_docs and _is_doc_path(path): + return True + return pure_path.suffix.lower() in CODE_FILE_SUFFIXES + + +def _file_policy_instruction(file_policy: str) -> str: + if file_policy == "pure-loc": + return ( + "Do not touch README files, changelogs, markdown docs, prose-only files, " + "or commentary artifacts. Fail instead of submitting a noisy branch." + ) + if file_policy == "allow-docs": + return ( + "Documentation and markdown changes are allowed only when they are necessary " + "for the same fix. Keep them minimal and subordinate to the code patch." + ) + if file_policy == "allow-any": + return ( + "Non-code file changes are allowed when they are required for the same fix, " + "but keep the patch as small and focused as possible." + ) + raise ValueError(f"Unsupported file policy: {file_policy}") + + +def _is_test_path(path: PurePosixPath) -> bool: + lowered_parts = [part.lower() for part in path.parts] + lowered_name = path.name.lower() + lowered_stem = path.stem.lower() + if any(part in TEST_DIRECTORY_NAMES for part in lowered_parts): + return True + return lowered_name.startswith("test_") or lowered_stem.endswith("_test") + + +def _run_checked( + args: list[str], + *, + input_text: str | None = None, +) -> subprocess.CompletedProcess[str]: + try: + return subprocess.run( + args, + input=input_text, + text=True, + capture_output=True, + check=True, + ) + except subprocess.CalledProcessError as exc: + detail = (exc.stderr or exc.stdout or "").strip() + message = f"Command failed: {' '.join(args)}" + if detail: + message = f"{message}: {detail}" + raise RuntimeError(message) from exc + + +def _run_stdout(args: list[str]) -> str: + return _run_checked(args).stdout + + +def _update_manifest(manifest_path: Path, updates: dict[str, Any]) -> None: + manifest = read_json(manifest_path) + manifest.update(updates) + write_json(manifest, manifest_path) + + +def _utc_stamp() -> str: + return datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ") + + +def _ordered_ints(values: Any) -> list[int]: + ordered: list[int] = [] + seen: set[int] = set() + for value in values or []: + number = _coerce_int(value) + if number is None or number in seen: + continue + ordered.append(number) + seen.add(number) + return ordered + + +def _normalize_result_source_pr_numbers( + *, + expected_source_pr_numbers: list[int], + raw_source_pr_numbers: Any, +) -> list[int]: + actual_source_pr_numbers = _ordered_ints(raw_source_pr_numbers) + if len(actual_source_pr_numbers) < 2: + raise ValueError( + "Codex result must reference at least two open source PRs from the selected cluster." + ) + + expected_source_pr_set = set(expected_source_pr_numbers) + unknown_source_pr_numbers = [ + number for number in actual_source_pr_numbers if number not in expected_source_pr_set + ] + if unknown_source_pr_numbers: + raise ValueError( + "Codex result source_pr_numbers included PRs outside the selected open PR set: " + + ", ".join(str(number) for number in unknown_source_pr_numbers) + ) + + actual_source_pr_set = set(actual_source_pr_numbers) + return [number for number in expected_source_pr_numbers if number in actual_source_pr_set] + + +def _coerce_int(value: Any) -> int | None: + if value is None: + return None + try: + return int(value) + except (TypeError, ValueError): + return None diff --git a/src/slop_farmer/app/hf_checkpoint_import.py b/src/slop_farmer/app/hf_checkpoint_import.py new file mode 100644 index 0000000000000000000000000000000000000000..1eed797a4a0ee5cad8c7b7ae57d134966f684e8a --- /dev/null +++ b/src/slop_farmer/app/hf_checkpoint_import.py @@ -0,0 +1,483 @@ +"""Import a historical Hugging Face dataset checkpoint into a clean snapshot. + +This is mainly for legacy datasets that kept their richest data under +`_checkpoints//...` instead of promoting the latest full snapshot +to the dataset root. + +The importer: + +1. selects a checkpoint from a source HF dataset repo +2. downloads the checkpoint parquet files +3. rewrites small tables to the current local schema when needed +4. regenerates derived artifacts (`links`, `issue_comments`, `pr_comments`) +5. writes a normal local snapshot directory that `analyze` / `pr-scope` can use +6. optionally republishes that clean snapshot +""" + +from __future__ import annotations + +import re +import shutil +from collections import defaultdict +from collections.abc import Mapping +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from huggingface_hub import HfApi, hf_hub_download + +from slop_farmer.app.publish_dataset_snapshot import publish_dataset_snapshot +from slop_farmer.config import CheckpointImportOptions +from slop_farmer.data.dataset_card import build_hf_dataset_card +from slop_farmer.data.links import build_pr_duplicate_candidate_rows, build_text_link_rows +from slop_farmer.data.parquet_io import ( + SCHEMAS, + read_json, + read_parquet_rows, + write_json, + write_parquet, + write_text, +) + +CHECKPOINT_PATH_PATTERN = re.compile( + r"^(?P_?checkpoints)/(?P[^/]+)/(?P[^/]+)$" +) +REQUIRED_CHECKPOINT_FILES = ( + "issues.parquet", + "pull_requests.parquet", + "comments.parquet", + "reviews.parquet", + "review_comments.parquet", + "pr_files.parquet", + "pr_diffs.parquet", + "events.parquet", +) +SMALL_TABLES = ( + "issues", + "pull_requests", + "comments", + "reviews", + "review_comments", + "events", +) +COPIED_TABLES = ( + "pr_files", + "pr_diffs", +) + +__all__ = [ + "import_hf_checkpoint", + "materialize_checkpoint_snapshot", +] + + +def import_hf_checkpoint(options: CheckpointImportOptions) -> Path: + api = HfApi() + siblings = [ + sibling.rfilename + for sibling in api.dataset_info(repo_id=options.source_repo_id).siblings or [] + ] + checkpoint_root, checkpoint_id = _select_checkpoint( + siblings, + checkpoint_id=options.checkpoint_id, + checkpoint_root=options.checkpoint_root, + ) + checkpoint_files = { + filename: Path( + hf_hub_download( + repo_id=options.source_repo_id, + repo_type="dataset", + filename=f"{checkpoint_root}/{checkpoint_id}/{filename}", + ) + ) + for filename in REQUIRED_CHECKPOINT_FILES + } + source_manifest = _maybe_download_json(options.source_repo_id, "manifest.json") + source_progress = _maybe_download_json( + options.source_repo_id, f"{checkpoint_root}/{checkpoint_id}/progress.json" + ) + snapshot_dir = materialize_checkpoint_snapshot( + checkpoint_files, + output_root=options.output_dir, + source_repo_id=options.source_repo_id, + checkpoint_root=checkpoint_root, + checkpoint_id=checkpoint_id, + source_manifest=source_manifest, + source_progress=source_progress, + force=options.force, + ) + if options.publish_repo_id: + publish_dataset_snapshot( + snapshot_dir, options.publish_repo_id, private=options.private_hf_repo + ) + return snapshot_dir + + +def materialize_checkpoint_snapshot( + checkpoint_files: Mapping[str, Path], + *, + output_root: Path, + source_repo_id: str, + checkpoint_root: str, + checkpoint_id: str, + source_manifest: Mapping[str, Any] | None = None, + source_progress: Mapping[str, Any] | None = None, + force: bool = False, +) -> Path: + source_manifest = dict(source_manifest or {}) + source_progress = dict(source_progress or {}) + output_root = output_root.resolve() + snapshot_dir = output_root / "snapshots" / _snapshot_dir_name(source_repo_id, checkpoint_id) + if snapshot_dir.exists(): + if not force: + raise FileExistsError(f"Snapshot already exists: {snapshot_dir}") + shutil.rmtree(snapshot_dir) + snapshot_dir.mkdir(parents=True, exist_ok=True) + + rows_by_table: dict[str, list[dict[str, Any]]] = {} + for table_name in SMALL_TABLES: + rows = read_parquet_rows(checkpoint_files[f"{table_name}.parquet"]) + rows_by_table[table_name] = _project_rows(rows, table_name) + write_parquet(rows_by_table[table_name], snapshot_dir / f"{table_name}.parquet", table_name) + + for table_name in COPIED_TABLES: + shutil.copy2( + checkpoint_files[f"{table_name}.parquet"], snapshot_dir / f"{table_name}.parquet" + ) + + repo_slug = _resolve_repo(rows_by_table, source_manifest, source_progress) + extracted_at = _resolve_extracted_at(rows_by_table, source_manifest) + link_rows = _derived_link_rows( + repo_slug=repo_slug, + snapshot_id=checkpoint_id, + extracted_at=extracted_at, + issues=rows_by_table["issues"], + pull_requests=rows_by_table["pull_requests"], + comments=rows_by_table["comments"], + reviews=rows_by_table["reviews"], + review_comments=rows_by_table["review_comments"], + events=rows_by_table["events"], + ) + write_parquet(link_rows, snapshot_dir / "links.parquet", "links") + + issue_comment_rows, pr_comment_rows = _viewer_comment_rows( + rows_by_table["comments"], rows_by_table["pull_requests"] + ) + write_parquet(issue_comment_rows, snapshot_dir / "issue_comments.parquet", "comments") + write_parquet(pr_comment_rows, snapshot_dir / "pr_comments.parquet", "comments") + + manifest = { + "repo": repo_slug, + "snapshot_id": checkpoint_id, + "extracted_at": extracted_at, + "imported_at": _iso_now(), + "source_type": "hf_checkpoint_import", + "source_hf_repo_id": source_repo_id, + "source_checkpoint_root": checkpoint_root, + "source_checkpoint_id": checkpoint_id, + "source_manifest": source_manifest, + "source_progress": source_progress, + "counts": { + "issues": len(rows_by_table["issues"]), + "pull_requests": len(rows_by_table["pull_requests"]), + "comments": len(rows_by_table["comments"]), + "reviews": len(rows_by_table["reviews"]), + "review_comments": len(rows_by_table["review_comments"]), + "pr_files": _parquet_row_count(snapshot_dir / "pr_files.parquet"), + "pr_diffs": _parquet_row_count(snapshot_dir / "pr_diffs.parquet"), + "timeline_events": len(rows_by_table["events"]), + "links": len(link_rows), + }, + "notes": ( + "Imported from a remote HF dataset checkpoint and regenerated locally derived link/comment views." + ), + } + write_json(manifest, snapshot_dir / "manifest.json") + write_text( + _dataset_card(repo_slug, checkpoint_id, source_repo_id, checkpoint_root), + snapshot_dir / "README.md", + ) + return snapshot_dir + + +def _select_checkpoint( + sibling_paths: list[str], + *, + checkpoint_id: str | None, + checkpoint_root: str | None, +) -> tuple[str, str]: + candidates: defaultdict[tuple[str, str], set[str]] = defaultdict(set) + for path in sibling_paths: + match = CHECKPOINT_PATH_PATTERN.match(path) + if match is None: + continue + root = match.group("root") + snapshot_id = match.group("snapshot_id") + filename = match.group("filename") + candidates[(root, snapshot_id)].add(filename) + + viable = [ + (root, snapshot_id) + for (root, snapshot_id), filenames in candidates.items() + if all(filename in filenames for filename in REQUIRED_CHECKPOINT_FILES) + ] + if checkpoint_id is not None: + requested = [ + candidate + for candidate in viable + if candidate[1] == checkpoint_id + and (checkpoint_root is None or candidate[0] == checkpoint_root) + ] + if not requested: + raise ValueError( + f"Checkpoint {checkpoint_root or '*'}:{checkpoint_id} not found with required files." + ) + return sorted(requested, key=_checkpoint_sort_key)[-1] + + if checkpoint_root is not None: + viable = [candidate for candidate in viable if candidate[0] == checkpoint_root] + if not viable: + raise ValueError("No viable checkpoint directories were found in the source HF dataset.") + return sorted(viable, key=_checkpoint_sort_key)[-1] + + +def _checkpoint_sort_key(candidate: tuple[str, str]) -> tuple[str, int]: + root, snapshot_id = candidate + root_priority = 1 if root == "_checkpoints" else 0 + return snapshot_id, root_priority + + +def _maybe_download_json(repo_id: str, filename: str) -> dict[str, Any]: + try: + path = hf_hub_download(repo_id=repo_id, repo_type="dataset", filename=filename) + except Exception: + return {} + return read_json(Path(path)) + + +def _project_rows(rows: list[dict[str, Any]], table_name: str) -> list[dict[str, Any]]: + field_names = [field.name for field in SCHEMAS[table_name]] + return [{field_name: row.get(field_name) for field_name in field_names} for row in rows] + + +def _resolve_repo( + rows_by_table: Mapping[str, list[dict[str, Any]]], + source_manifest: Mapping[str, Any], + source_progress: Mapping[str, Any], +) -> str: + for table_name in ("issues", "pull_requests", "comments"): + rows = rows_by_table.get(table_name) or [] + if rows and rows[0].get("repo"): + return str(rows[0]["repo"]) + return str(source_manifest.get("repo") or source_progress.get("repo") or "") + + +def _resolve_extracted_at( + rows_by_table: Mapping[str, list[dict[str, Any]]], + source_manifest: Mapping[str, Any], +) -> str: + for table_name in ( + "issues", + "pull_requests", + "comments", + "reviews", + "review_comments", + "events", + ): + rows = rows_by_table.get(table_name) or [] + if rows and rows[0].get("extracted_at"): + return str(rows[0]["extracted_at"]) + return str(source_manifest.get("extracted_at") or _iso_now()) + + +def _derived_link_rows( + *, + repo_slug: str, + snapshot_id: str, + extracted_at: str, + issues: list[dict[str, Any]], + pull_requests: list[dict[str, Any]], + comments: list[dict[str, Any]], + reviews: list[dict[str, Any]], + review_comments: list[dict[str, Any]], + events: list[dict[str, Any]], +) -> list[dict[str, Any]]: + owner, repo_name = repo_slug.split("/", 1) + link_rows: list[dict[str, Any]] = [] + for issue_row in issues: + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=owner, + repo_name=repo_name, + source_type="issue", + source_number=int(issue_row["number"]), + source_id=issue_row.get("github_id"), + body=issue_row.get("body"), + snapshot_id=snapshot_id, + extracted_at=extracted_at, + ) + ) + for pr_row in pull_requests: + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=owner, + repo_name=repo_name, + source_type="pull_request", + source_number=int(pr_row["number"]), + source_id=pr_row.get("github_id"), + body=pr_row.get("body"), + snapshot_id=snapshot_id, + extracted_at=extracted_at, + ) + ) + for comment_row in comments: + parent_number = comment_row.get("parent_number") + if parent_number is None: + continue + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=owner, + repo_name=repo_name, + source_type="comment", + source_number=int(parent_number), + source_id=comment_row.get("github_id"), + body=comment_row.get("body"), + snapshot_id=snapshot_id, + extracted_at=extracted_at, + ) + ) + for review_row in reviews: + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=owner, + repo_name=repo_name, + source_type="review", + source_number=int(review_row["pull_request_number"]), + source_id=review_row.get("github_id"), + body=review_row.get("body"), + snapshot_id=snapshot_id, + extracted_at=extracted_at, + ) + ) + for review_comment_row in review_comments: + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=owner, + repo_name=repo_name, + source_type="review_comment", + source_number=int(review_comment_row["pull_request_number"]), + source_id=review_comment_row.get("github_id"), + body=review_comment_row.get("body"), + snapshot_id=snapshot_id, + extracted_at=extracted_at, + ) + ) + link_rows.extend( + build_pr_duplicate_candidate_rows( + repo=repo_slug, + pull_requests=pull_requests, + link_rows=link_rows, + snapshot_id=snapshot_id, + extracted_at=extracted_at, + ) + ) + for event in events: + source_issue_number = event.get("source_issue_number") + if not source_issue_number: + continue + link_rows.append( + { + "repo": repo_slug, + "source_type": event["parent_kind"], + "source_number": event["parent_number"], + "source_github_id": None, + "target_owner": owner, + "target_repo": repo_name, + "target_number": source_issue_number, + "link_type": f"timeline:{event['event']}", + "link_origin": "timeline", + "snapshot_id": snapshot_id, + "extracted_at": extracted_at, + } + ) + return sorted( + _dedupe_link_rows(link_rows), + key=lambda row: ( + str(row.get("source_type") or ""), + int(row.get("source_number") or 0), + int(row.get("source_github_id") or 0), + str(row.get("target_owner") or ""), + str(row.get("target_repo") or ""), + int(row.get("target_number") or 0), + str(row.get("link_type") or ""), + str(row.get("link_origin") or ""), + ), + ) + + +def _dedupe_link_rows(rows: list[dict[str, Any]]) -> list[dict[str, Any]]: + deduped: dict[tuple[Any, ...], dict[str, Any]] = {} + for row in rows: + key = ( + row.get("repo"), + row.get("source_type"), + row.get("source_number"), + row.get("source_github_id"), + row.get("target_owner"), + row.get("target_repo"), + row.get("target_number"), + row.get("link_type"), + row.get("link_origin"), + ) + deduped[key] = row + return list(deduped.values()) + + +def _viewer_comment_rows( + comments: list[dict[str, Any]], + pull_requests: list[dict[str, Any]], +) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + pr_numbers = {int(row["number"]) for row in pull_requests if row.get("number") is not None} + issue_comments: list[dict[str, Any]] = [] + pr_comments: list[dict[str, Any]] = [] + for row in comments: + parent_number = row.get("parent_number") + parent_kind = row.get("parent_kind") + if parent_kind == "pull_request" or parent_number in pr_numbers: + pr_comments.append(row) + else: + issue_comments.append(row) + return issue_comments, pr_comments + + +def _dataset_card( + repo_slug: str, snapshot_id: str, source_repo_id: str, checkpoint_root: str +) -> str: + return build_hf_dataset_card( + repo_slug, + snapshot_id, + notes=[ + f"source HF dataset: `{source_repo_id}`", + f"source checkpoint root: `{checkpoint_root}`", + "links were regenerated locally from text references and timeline events", + ], + ) + + +def _snapshot_dir_name(source_repo_id: str, checkpoint_id: str) -> str: + return f"hf-{source_repo_id.replace('/', '--')}-{checkpoint_id}" + + +def _iso_now() -> str: + return datetime.now(tz=UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def _parquet_row_count(path: Path) -> int: + import pyarrow.parquet as pq + + return pq.read_metadata(path).num_rows diff --git a/src/slop_farmer/app/pipeline.py b/src/slop_farmer/app/pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..c2bb7268087a731b2f2dab2e3ef2d8512ab38bde --- /dev/null +++ b/src/slop_farmer/app/pipeline.py @@ -0,0 +1,1016 @@ +from __future__ import annotations + +import json +import shutil +from collections.abc import Iterable +from datetime import UTC, datetime, timedelta +from pathlib import Path +from typing import Any, Protocol + +from slop_farmer.config import NewContributorReportOptions, PipelineOptions, resolve_github_token +from slop_farmer.data.dataset_card import build_hf_dataset_card +from slop_farmer.data.github_api import GitHubClient +from slop_farmer.data.links import build_pr_duplicate_candidate_rows, build_text_link_rows +from slop_farmer.data.normalize import ( + issue_url_to_number, + normalize_comment, + normalize_issue, + normalize_pr_diff, + normalize_pr_file, + normalize_pull_request, + normalize_review, + normalize_review_comment, + normalize_timeline_event, +) +from slop_farmer.data.parquet_io import ( + read_json, + read_parquet_rows, + write_json, + write_parquet, + write_text, +) +from slop_farmer.reports.new_contributor_report import run_new_contributor_report + +# Navigation: +# - protocol + small time/log/view helpers +# - checkpoint/state helpers for resumable crawls +# - incremental merge helpers +# - run_pipeline(): fetch -> hydrate -> derive links -> merge/write -> publish + + +class GitHubClientLike(Protocol): + def iter_repo_issues( + self, owner: str, repo: str, since: str | None, limit: int | None + ) -> Iterable[dict[str, Any]]: ... + + def iter_issue_comments_for_number( + self, owner: str, repo: str, number: int, since: str | None, limit: int | None = None + ) -> Iterable[dict[str, Any]]: ... + + def get_pull_request(self, owner: str, repo: str, number: int) -> dict[str, Any]: ... + + def iter_pull_reviews( + self, owner: str, repo: str, number: int, limit: int | None = None + ) -> Iterable[dict[str, Any]]: ... + + def iter_pull_review_comments( + self, owner: str, repo: str, number: int, limit: int | None = None + ) -> Iterable[dict[str, Any]]: ... + + def iter_pull_files( + self, owner: str, repo: str, number: int, limit: int | None = None + ) -> Iterable[dict[str, Any]]: ... + + def get_pull_request_diff(self, owner: str, repo: str, number: int) -> str: ... + + def iter_issue_timeline( + self, owner: str, repo: str, number: int, limit: int | None = None + ) -> Iterable[dict[str, Any]]: ... + + +def _iso_now() -> str: + return datetime.now(tz=UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def _snapshot_id() -> str: + return datetime.now(tz=UTC).strftime("%Y%m%dT%H%M%SZ") + + +def _log(message: str) -> None: + stamp = datetime.now(tz=UTC).strftime("%H:%M:%SZ") + print(f"[{stamp}] {message}", flush=True) + + +def _remaining_limit(limit: int | None, used: int) -> int | None: + if limit is None: + return None + return max(limit - used, 0) + + +def _created_after_cutoff( + item: dict[str, Any], max_age_days: int | None, reference_time: datetime +) -> bool: + if max_age_days is None: + return True + created_at = item.get("created_at") + if not created_at: + return False + try: + created_dt = datetime.fromisoformat(str(created_at).replace("Z", "+00:00")) + except ValueError: + return False + return created_dt >= reference_time - timedelta(days=max_age_days) + + +def _reference_time_for_age_caps(crawl_started_at: str) -> datetime: + try: + return datetime.fromisoformat(crawl_started_at.replace("Z", "+00:00")) + except ValueError: + return datetime.now(tz=UTC) + + +def _dataset_card( + repo: str, snapshot_id: str, manifest: dict[str, Any], *, include_new_contributors: bool = False +) -> str: + notes = ["new contributor reviewer artifacts are included"] if include_new_contributors else [] + del manifest + return build_hf_dataset_card( + repo, + snapshot_id, + include_new_contributors=include_new_contributors, + notes=notes, + ) + + +def _viewer_comment_rows( + comments: list[dict[str, Any]], + pull_requests: list[dict[str, Any]], +) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + pr_numbers = {int(row["number"]) for row in pull_requests if row.get("number") is not None} + issue_comments: list[dict[str, Any]] = [] + pr_comments: list[dict[str, Any]] = [] + for row in comments: + parent_number = row.get("parent_number") + parent_kind = row.get("parent_kind") + if parent_kind == "pull_request" or parent_number in pr_numbers: + pr_comments.append(row) + else: + issue_comments.append(row) + return issue_comments, pr_comments + + +PRIMARY_KEYS: dict[str, tuple[str, ...]] = { + "issues": ("github_id",), + "pull_requests": ("github_id",), + "comments": ("github_id",), + "reviews": ("github_id",), + "review_comments": ("github_id",), + "pr_files": ("repo", "pull_request_number", "filename"), + "pr_diffs": ("repo", "pull_request_number"), + "links": ( + "repo", + "source_type", + "source_number", + "source_github_id", + "target_owner", + "target_repo", + "target_number", + "link_type", + "link_origin", + ), + "events": ( + "repo", + "parent_kind", + "parent_number", + "event", + "created_at", + "actor_login", + "source_issue_number", + "source_issue_url", + "commit_id", + "label_name", + ), +} + +CHECKPOINT_VERSION = 1 +CHECKPOINT_PR_INTERVAL = 5 +CHECKPOINT_ISSUE_TIMELINE_INTERVAL = 25 +CHECKPOINT_TABLE_NAMES = ( + "issues", + "comments", + "pull_requests", + "reviews", + "review_comments", + "pr_files", + "pr_diffs", + "events", +) + + +# Checkpoint/state helpers + + +def _state_dir(output_dir: Path) -> Path: + return output_dir / "state" + + +def _in_progress_path(output_dir: Path) -> Path: + return _state_dir(output_dir) / "in_progress.json" + + +def _watermark_path(output_dir: Path) -> Path: + return _state_dir(output_dir) / "watermark.json" + + +def _latest_snapshot_pointer_path(output_dir: Path) -> Path: + return output_dir / "snapshots" / "latest.json" + + +def _checkpoint_dir(snapshot_dir: Path) -> Path: + return snapshot_dir / "_checkpoint" + + +def _checkpoint_progress_path(snapshot_dir: Path) -> Path: + return _checkpoint_dir(snapshot_dir) / "progress.json" + + +def _checkpoint_table_path(snapshot_dir: Path, table_name: str) -> Path: + return _checkpoint_dir(snapshot_dir) / f"{table_name}.parquet" + + +def _checkpoint_options(options: PipelineOptions) -> dict[str, Any]: + return { + "max_issues": options.max_issues, + "max_prs": options.max_prs, + "max_issue_comments": options.max_issue_comments, + "max_reviews_per_pr": options.max_reviews_per_pr, + "max_review_comments_per_pr": options.max_review_comments_per_pr, + "issue_max_age_days": options.issue_max_age_days, + "pr_max_age_days": options.pr_max_age_days, + "fetch_timeline": options.fetch_timeline, + } + + +def _load_in_progress_checkpoint(options: PipelineOptions, repo_slug: str) -> dict[str, Any] | None: + if not options.resume or options.since: + return None + path = _in_progress_path(options.output_dir) + if not path.exists(): + return None + payload = read_json(path) + if not isinstance(payload, dict): + return None + if payload.get("version") != CHECKPOINT_VERSION or payload.get("repo") != repo_slug: + return None + if payload.get("options") != _checkpoint_options(options): + _log(f"Ignoring incompatible in-progress checkpoint: {path}") + return None + snapshot_dir_raw = payload.get("snapshot_dir") + if not isinstance(snapshot_dir_raw, str) or not snapshot_dir_raw: + return None + payload["snapshot_dir"] = Path(snapshot_dir_raw) + previous_snapshot_dir_raw = payload.get("previous_snapshot_dir") + payload["previous_snapshot_dir"] = ( + Path(previous_snapshot_dir_raw) + if isinstance(previous_snapshot_dir_raw, str) and previous_snapshot_dir_raw + else None + ) + return payload + + +def _load_checkpoint_rows(snapshot_dir: Path, table_name: str) -> list[dict[str, Any]]: + return read_parquet_rows(_checkpoint_table_path(snapshot_dir, table_name)) + + +def _write_checkpoint( + *, + options: PipelineOptions, + repo_slug: str, + snapshot_id: str, + snapshot_dir: Path, + effective_since: str | None, + crawl_started_at: str, + extracted_at: str, + previous_snapshot_dir: Path | None, + merge_with_previous: bool, + phase: str, + comments_done: bool, + issue_rows: list[dict[str, Any]], + comment_rows: list[dict[str, Any]], + pr_rows: list[dict[str, Any]], + review_rows: list[dict[str, Any]], + review_comment_rows: list[dict[str, Any]], + pr_file_rows: list[dict[str, Any]], + pr_diff_rows: list[dict[str, Any]], + timeline_rows: list[dict[str, Any]], + completed_issue_timeline_numbers: set[int], +) -> None: + checkpoint_tables = { + "issues": issue_rows, + "comments": comment_rows, + "pull_requests": pr_rows, + "reviews": review_rows, + "review_comments": review_comment_rows, + "pr_files": pr_file_rows, + "pr_diffs": pr_diff_rows, + "events": timeline_rows, + } + for table_name, rows in checkpoint_tables.items(): + write_parquet(rows, _checkpoint_table_path(snapshot_dir, table_name), table_name) + progress = { + "version": CHECKPOINT_VERSION, + "repo": repo_slug, + "snapshot_id": snapshot_id, + "snapshot_dir": str(snapshot_dir), + "effective_since": effective_since, + "crawl_started_at": crawl_started_at, + "extracted_at": extracted_at, + "phase": phase, + "comments_done": comments_done, + "merge_with_previous": merge_with_previous, + "previous_snapshot_dir": str(previous_snapshot_dir) if previous_snapshot_dir else None, + "completed_pr_numbers": sorted( + int(row["number"]) for row in pr_rows if row.get("number") is not None + ), + "completed_issue_timeline_numbers": sorted(completed_issue_timeline_numbers), + "options": _checkpoint_options(options), + "counts": {table_name: len(rows) for table_name, rows in checkpoint_tables.items()}, + } + write_json(progress, _checkpoint_progress_path(snapshot_dir)) + write_json(progress, _in_progress_path(options.output_dir)) + + +def _clear_checkpoint(output_dir: Path, snapshot_dir: Path) -> None: + checkpoint_state = _in_progress_path(output_dir) + if checkpoint_state.exists(): + checkpoint_state.unlink() + checkpoint_dir = _checkpoint_dir(snapshot_dir) + if checkpoint_dir.exists(): + shutil.rmtree(checkpoint_dir) + + +def _load_watermark(output_dir: Path) -> dict[str, Any] | None: + path = _watermark_path(output_dir) + if not path.exists(): + return None + return read_json(path) + + +def _load_latest_snapshot_pointer(output_dir: Path) -> dict[str, Any] | None: + path = _latest_snapshot_pointer_path(output_dir) + if not path.exists(): + return None + return read_json(path) + + +def _resolve_effective_since( + options: PipelineOptions, repo_slug: str +) -> tuple[str | None, dict[str, Any] | None]: + if options.since: + return options.since, None + if not options.resume: + return None, None + watermark = _load_watermark(options.output_dir) + if not watermark or watermark.get("repo") != repo_slug: + return None, watermark + return watermark.get("next_since"), watermark + + +def _previous_snapshot_dir(output_dir: Path, repo_slug: str) -> Path | None: + latest = _load_latest_snapshot_pointer(output_dir) + if not latest: + return None + latest_repo = latest.get("repo") + if latest_repo and latest_repo != repo_slug: + return None + snapshot_dir = latest.get("snapshot_dir") + if not snapshot_dir: + return None + path = Path(snapshot_dir) + return path if path.exists() else None + + +# Incremental merge helpers + + +def _row_key(row: dict[str, Any], key_fields: tuple[str, ...]) -> str: + return json.dumps([row.get(field) for field in key_fields], sort_keys=False, default=str) + + +def _merge_rows( + table_name: str, previous_rows: list[dict[str, Any]], delta_rows: list[dict[str, Any]] +) -> list[dict[str, Any]]: + key_fields = PRIMARY_KEYS[table_name] + if table_name == "pr_files": + refreshed_prs = { + (row.get("repo"), row.get("pull_request_number")) + for row in delta_rows + if row.get("pull_request_number") is not None + } + previous_rows = [ + row + for row in previous_rows + if (row.get("repo"), row.get("pull_request_number")) not in refreshed_prs + ] + merged: dict[str, dict[str, Any]] = {} + for row in previous_rows: + merged[_row_key(row, key_fields)] = row + for row in delta_rows: + merged[_row_key(row, key_fields)] = row + rows = list(merged.values()) + sort_fields = [ + field + for field in ("number", "pull_request_number", "parent_number", "github_id", "created_at") + if rows and field in rows[0] + ] + if sort_fields: + rows.sort( + key=lambda row: tuple( + "" if row.get(field) is None else str(row.get(field)) for field in sort_fields + ) + ) + return rows + + +def _load_previous_rows(snapshot_dir: Path | None, table_name: str) -> list[dict[str, Any]]: + if snapshot_dir is None: + return [] + return read_parquet_rows(snapshot_dir / f"{table_name}.parquet") + + +# Pipeline orchestration + + +def run_pipeline(options: PipelineOptions, client: GitHubClientLike | None = None) -> Path: + # Resume or initialize one snapshot run. + repo_slug = options.repo.slug + checkpoint = _load_in_progress_checkpoint(options, repo_slug) + watermark: dict[str, Any] | None = None + if checkpoint: + effective_since = checkpoint.get("effective_since") + previous_snapshot_dir = checkpoint.get("previous_snapshot_dir") + merge_with_previous = bool(checkpoint.get("merge_with_previous")) + crawl_started_at = str(checkpoint.get("crawl_started_at") or _iso_now()) + snapshot_id = str(checkpoint.get("snapshot_id") or _snapshot_id()) + extracted_at = str(checkpoint.get("extracted_at") or _iso_now()) + snapshot_dir = checkpoint["snapshot_dir"] + else: + effective_since, watermark = _resolve_effective_since(options, repo_slug) + previous_snapshot_dir = _previous_snapshot_dir(options.output_dir, repo_slug) + merge_with_previous = previous_snapshot_dir is not None and effective_since is not None + crawl_started_at = _iso_now() + snapshot_id = _snapshot_id() + extracted_at = _iso_now() + snapshot_dir = options.output_dir / "snapshots" / snapshot_id + snapshot_dir.mkdir(parents=True, exist_ok=True) + + if client is None: + token = resolve_github_token() + client = GitHubClient( + token=token, + timeout=options.http_timeout, + max_retries=options.http_max_retries, + log=_log, + ) + + if checkpoint: + _log(f"Resuming snapshot {snapshot_id} for {repo_slug}") + _log(f"Recovered in-progress checkpoint: {_in_progress_path(options.output_dir)}") + else: + _log(f"Starting snapshot {snapshot_id} for {repo_slug}") + _log(f"Output directory: {snapshot_dir}") + if options.since: + _log(f"Using explicit since watermark: {effective_since}") + elif checkpoint: + _log(f"Resuming in-progress crawl window from {effective_since}") + elif effective_since: + source_snapshot = watermark.get("last_successful_snapshot_id") if watermark else None + _log(f"Resuming from local watermark {effective_since} from snapshot {source_snapshot}") + else: + _log("No watermark active; running full snapshot") + if merge_with_previous: + _log(f"Merging delta into previous snapshot: {previous_snapshot_dir}") + + # Load any checkpointed tables before resuming remote work. + issue_rows = _load_checkpoint_rows(snapshot_dir, "issues") if checkpoint else [] + comment_rows = _load_checkpoint_rows(snapshot_dir, "comments") if checkpoint else [] + pr_rows = _load_checkpoint_rows(snapshot_dir, "pull_requests") if checkpoint else [] + review_rows = _load_checkpoint_rows(snapshot_dir, "reviews") if checkpoint else [] + review_comment_rows = ( + _load_checkpoint_rows(snapshot_dir, "review_comments") if checkpoint else [] + ) + pr_file_rows = _load_checkpoint_rows(snapshot_dir, "pr_files") if checkpoint else [] + pr_diff_rows = _load_checkpoint_rows(snapshot_dir, "pr_diffs") if checkpoint else [] + timeline_rows = _load_checkpoint_rows(snapshot_dir, "events") if checkpoint else [] + completed_issue_timeline_numbers = { + int(number) + for number in (checkpoint or {}).get("completed_issue_timeline_numbers", []) + if number is not None + } + comments_done = bool((checkpoint or {}).get("comments_done")) + if not checkpoint: + _write_checkpoint( + options=options, + repo_slug=repo_slug, + snapshot_id=snapshot_id, + snapshot_dir=snapshot_dir, + effective_since=effective_since, + crawl_started_at=crawl_started_at, + extracted_at=extracted_at, + previous_snapshot_dir=previous_snapshot_dir, + merge_with_previous=merge_with_previous, + phase="starting", + comments_done=False, + issue_rows=issue_rows, + comment_rows=comment_rows, + pr_rows=pr_rows, + review_rows=review_rows, + review_comment_rows=review_comment_rows, + pr_file_rows=pr_file_rows, + pr_diff_rows=pr_diff_rows, + timeline_rows=timeline_rows, + completed_issue_timeline_numbers=completed_issue_timeline_numbers, + ) + + # Fetch lightweight issue/PR stubs and top-level discussion comments first. + _log("Fetching issue and pull request stubs from GitHub") + issue_stubs = list( + client.iter_repo_issues( + options.repo.owner, + options.repo.name, + since=effective_since, + limit=options.max_issues, + ) + ) + reference_time = _reference_time_for_age_caps(crawl_started_at) + issues = [ + item + for item in issue_stubs + if "pull_request" not in item + and _created_after_cutoff(item, options.issue_max_age_days, reference_time) + ] + pr_stubs = [ + item + for item in issue_stubs + if "pull_request" in item + and _created_after_cutoff(item, options.pr_max_age_days, reference_time) + ] + if options.max_prs is not None: + pr_stubs = pr_stubs[: options.max_prs] + _log( + f"Fetched {len(issue_stubs)} stubs total: {len(issues)} issues and {len(pr_stubs)} pull requests selected" + ) + if options.issue_max_age_days is not None: + _log(f"Issue import age cap: last {options.issue_max_age_days} days by created_at") + if options.pr_max_age_days is not None: + _log(f"PR import age cap: last {options.pr_max_age_days} days by created_at") + + issue_number_to_kind = { + item["number"]: ("pull_request" if "pull_request" in item else "issue") + for item in issue_stubs + } + issue_rows = [normalize_issue(repo_slug, item, snapshot_id, extracted_at) for item in issues] + + if comments_done: + _log(f"Reusing {len(comment_rows)} checkpointed discussion comments") + else: + comment_rows = [] + comment_threads_seen = 0 + for item in issue_stubs: + if not item.get("comments"): + continue + remaining = _remaining_limit(options.max_issue_comments, len(comment_rows)) + if remaining == 0: + break + comment_threads_seen += 1 + if comment_threads_seen == 1 or comment_threads_seen % 25 == 0: + _log( + f"Collecting discussion comments: {len(comment_rows)} gathered so far across {comment_threads_seen} threads" + ) + for comment in client.iter_issue_comments_for_number( + options.repo.owner, + options.repo.name, + int(item["number"]), + since=effective_since, + limit=remaining, + ): + parent_number = issue_url_to_number(comment.get("issue_url")) + kind = issue_number_to_kind.get(parent_number, "issue_or_pr") + comment_rows.append( + normalize_comment( + repo_slug, comment, kind, parent_number, snapshot_id, extracted_at + ) + ) + remaining = _remaining_limit(options.max_issue_comments, len(comment_rows)) + if remaining == 0: + break + comments_done = True + _write_checkpoint( + options=options, + repo_slug=repo_slug, + snapshot_id=snapshot_id, + snapshot_dir=snapshot_dir, + effective_since=effective_since, + crawl_started_at=crawl_started_at, + extracted_at=extracted_at, + previous_snapshot_dir=previous_snapshot_dir, + merge_with_previous=merge_with_previous, + phase="comments_complete", + comments_done=comments_done, + issue_rows=issue_rows, + comment_rows=comment_rows, + pr_rows=pr_rows, + review_rows=review_rows, + review_comment_rows=review_comment_rows, + pr_file_rows=pr_file_rows, + pr_diff_rows=pr_diff_rows, + timeline_rows=timeline_rows, + completed_issue_timeline_numbers=completed_issue_timeline_numbers, + ) + _log(f"Collected {len(comment_rows)} discussion comments") + + # Hydrate PR-owned detail tables: reviews, review comments, files, diffs, timelines. + completed_pr_numbers = {int(row["number"]) for row in pr_rows if row.get("number") is not None} + if completed_pr_numbers: + _log(f"Reusing hydrated data for {len(completed_pr_numbers)} pull requests from checkpoint") + + total_prs = len(pr_stubs) + for pr_stub in pr_stubs: + number = int(pr_stub["number"]) + if number in completed_pr_numbers: + continue + current_pr_index = len(completed_pr_numbers) + 1 + if current_pr_index == 1 or current_pr_index % 10 == 0 or current_pr_index == total_prs: + _log(f"Hydrating pull requests: {current_pr_index}/{total_prs} (current #{number})") + pr_detail = client.get_pull_request(options.repo.owner, options.repo.name, number) + pr_rows.append( + normalize_pull_request(repo_slug, pr_stub, pr_detail, snapshot_id, extracted_at) + ) + for review in client.iter_pull_reviews( + options.repo.owner, + options.repo.name, + number, + limit=options.max_reviews_per_pr, + ): + review_rows.append( + normalize_review(repo_slug, number, review, snapshot_id, extracted_at) + ) + for review_comment in client.iter_pull_review_comments( + options.repo.owner, + options.repo.name, + number, + limit=options.max_review_comments_per_pr, + ): + review_comment_rows.append( + normalize_review_comment( + repo_slug, number, review_comment, snapshot_id, extracted_at + ) + ) + for pr_file in client.iter_pull_files(options.repo.owner, options.repo.name, number): + pr_file_rows.append( + normalize_pr_file(repo_slug, number, pr_file, snapshot_id, extracted_at) + ) + pr_diff_rows.append( + normalize_pr_diff( + repo_slug, + number, + pr_stub.get("html_url"), + pr_stub.get("url"), + client.get_pull_request_diff(options.repo.owner, options.repo.name, number), + snapshot_id, + extracted_at, + ) + ) + if options.fetch_timeline: + for event in client.iter_issue_timeline(options.repo.owner, options.repo.name, number): + timeline_rows.append( + normalize_timeline_event( + repo_slug, number, "pull_request", event, snapshot_id, extracted_at + ) + ) + completed_pr_numbers.add(number) + if ( + len(completed_pr_numbers) % CHECKPOINT_PR_INTERVAL == 0 + or len(completed_pr_numbers) == total_prs + ): + _log(f"Checkpointing pull request hydration at {len(completed_pr_numbers)}/{total_prs}") + _write_checkpoint( + options=options, + repo_slug=repo_slug, + snapshot_id=snapshot_id, + snapshot_dir=snapshot_dir, + effective_since=effective_since, + crawl_started_at=crawl_started_at, + extracted_at=extracted_at, + previous_snapshot_dir=previous_snapshot_dir, + merge_with_previous=merge_with_previous, + phase="hydrating_pull_requests", + comments_done=comments_done, + issue_rows=issue_rows, + comment_rows=comment_rows, + pr_rows=pr_rows, + review_rows=review_rows, + review_comment_rows=review_comment_rows, + pr_file_rows=pr_file_rows, + pr_diff_rows=pr_diff_rows, + timeline_rows=timeline_rows, + completed_issue_timeline_numbers=completed_issue_timeline_numbers, + ) + _log( + f"Hydrated {len(pr_rows)} pull requests, {len(review_rows)} reviews, " + f"{len(review_comment_rows)} review comments, {len(pr_file_rows)} PR files, " + f"and {len(pr_diff_rows)} PR diffs" + ) + + # Fetch issue timelines after PR hydration so checkpoints can resume either phase. + if options.fetch_timeline: + _log(f"Fetching timeline events for {len(issues)} issues") + if completed_issue_timeline_numbers: + _log(f"Reusing timeline checkpoints for {len(completed_issue_timeline_numbers)} issues") + for issue in issues: + number = int(issue["number"]) + if number in completed_issue_timeline_numbers: + continue + for event in client.iter_issue_timeline(options.repo.owner, options.repo.name, number): + timeline_rows.append( + normalize_timeline_event( + repo_slug, number, "issue", event, snapshot_id, extracted_at + ) + ) + completed_issue_timeline_numbers.add(number) + if len( + completed_issue_timeline_numbers + ) % CHECKPOINT_ISSUE_TIMELINE_INTERVAL == 0 or len( + completed_issue_timeline_numbers + ) == len(issues): + _log( + f"Checkpointing issue timelines at {len(completed_issue_timeline_numbers)}/{len(issues)} issues" + ) + _write_checkpoint( + options=options, + repo_slug=repo_slug, + snapshot_id=snapshot_id, + snapshot_dir=snapshot_dir, + effective_since=effective_since, + crawl_started_at=crawl_started_at, + extracted_at=extracted_at, + previous_snapshot_dir=previous_snapshot_dir, + merge_with_previous=merge_with_previous, + phase="fetching_issue_timelines", + comments_done=comments_done, + issue_rows=issue_rows, + comment_rows=comment_rows, + pr_rows=pr_rows, + review_rows=review_rows, + review_comment_rows=review_comment_rows, + pr_file_rows=pr_file_rows, + pr_diff_rows=pr_diff_rows, + timeline_rows=timeline_rows, + completed_issue_timeline_numbers=completed_issue_timeline_numbers, + ) + _log(f"Collected {len(timeline_rows)} timeline events") + + # Derive link rows, then optionally merge this delta into the previous full snapshot. + _log("Building derived link rows") + link_rows: list[dict[str, Any]] = [] + for issue_row in issue_rows: + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=options.repo.owner, + repo_name=options.repo.name, + source_type="issue", + source_number=issue_row["number"], + source_id=issue_row["github_id"], + body=issue_row["body"], + snapshot_id=snapshot_id, + extracted_at=extracted_at, + ) + ) + for pr_row in pr_rows: + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=options.repo.owner, + repo_name=options.repo.name, + source_type="pull_request", + source_number=pr_row["number"], + source_id=pr_row["github_id"], + body=pr_row["body"], + snapshot_id=snapshot_id, + extracted_at=extracted_at, + ) + ) + for comment_row in comment_rows: + parent_number = comment_row.get("parent_number") + if parent_number is None: + continue + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=options.repo.owner, + repo_name=options.repo.name, + source_type="comment", + source_number=parent_number, + source_id=comment_row["github_id"], + body=comment_row["body"], + snapshot_id=snapshot_id, + extracted_at=extracted_at, + ) + ) + for review_row in review_rows: + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=options.repo.owner, + repo_name=options.repo.name, + source_type="review", + source_number=review_row["pull_request_number"], + source_id=review_row["github_id"], + body=review_row["body"], + snapshot_id=snapshot_id, + extracted_at=extracted_at, + ) + ) + for review_comment_row in review_comment_rows: + link_rows.extend( + build_text_link_rows( + repo=repo_slug, + owner=options.repo.owner, + repo_name=options.repo.name, + source_type="review_comment", + source_number=review_comment_row["pull_request_number"], + source_id=review_comment_row["github_id"], + body=review_comment_row["body"], + snapshot_id=snapshot_id, + extracted_at=extracted_at, + ) + ) + link_rows.extend( + build_pr_duplicate_candidate_rows( + repo=repo_slug, + pull_requests=pr_rows, + link_rows=link_rows, + snapshot_id=snapshot_id, + extracted_at=extracted_at, + ) + ) + for event in timeline_rows: + if event.get("source_issue_number"): + link_rows.append( + { + "repo": repo_slug, + "source_type": event["parent_kind"], + "source_number": event["parent_number"], + "source_github_id": None, + "target_owner": options.repo.owner, + "target_repo": options.repo.name, + "target_number": event["source_issue_number"], + "link_type": f"timeline:{event['event']}", + "link_origin": "timeline", + "snapshot_id": snapshot_id, + "extracted_at": extracted_at, + } + ) + _log(f"Built {len(link_rows)} link rows") + + delta_tables = { + "issues": issue_rows, + "pull_requests": pr_rows, + "comments": comment_rows, + "reviews": review_rows, + "review_comments": review_comment_rows, + "pr_files": pr_file_rows, + "pr_diffs": pr_diff_rows, + "links": link_rows, + "events": timeline_rows, + } + final_tables = dict(delta_tables) + + if merge_with_previous: + _log("Loading previous snapshot tables for merge") + for table_name, delta_rows in delta_tables.items(): + previous_rows = _load_previous_rows(previous_snapshot_dir, table_name) + final_tables[table_name] = _merge_rows(table_name, previous_rows, delta_rows) + _log("Merged incremental delta into cumulative snapshot") + + manifest = { + "repo": repo_slug, + "snapshot_id": snapshot_id, + "crawl_started_at": crawl_started_at, + "extracted_at": extracted_at, + "watermark": { + "effective_since": effective_since, + "next_since": crawl_started_at, + "resume_enabled": options.resume, + "resumed_from_checkpoint": bool(checkpoint), + "merge_with_previous": merge_with_previous, + "previous_snapshot_dir": str(previous_snapshot_dir) if previous_snapshot_dir else None, + }, + "options": { + "since": options.since, + "effective_since": effective_since, + "http_timeout": options.http_timeout, + "http_max_retries": options.http_max_retries, + "max_issues": options.max_issues, + "max_prs": options.max_prs, + "max_issue_comments": options.max_issue_comments, + "max_reviews_per_pr": options.max_reviews_per_pr, + "max_review_comments_per_pr": options.max_review_comments_per_pr, + "issue_max_age_days": options.issue_max_age_days, + "pr_max_age_days": options.pr_max_age_days, + "fetch_timeline": options.fetch_timeline, + "new_contributor_report": options.new_contributor_report, + "new_contributor_window_days": options.new_contributor_window_days, + "new_contributor_max_authors": options.new_contributor_max_authors, + }, + "delta_counts": { + "issue_stubs": len(issue_stubs), + "issues": len(issue_rows), + "pull_requests": len(pr_rows), + "comments": len(comment_rows), + "reviews": len(review_rows), + "review_comments": len(review_comment_rows), + "pr_files": len(pr_file_rows), + "pr_diffs": len(pr_diff_rows), + "timeline_events": len(timeline_rows), + "links": len(link_rows), + }, + "counts": { + "issues": len(final_tables["issues"]), + "pull_requests": len(final_tables["pull_requests"]), + "comments": len(final_tables["comments"]), + "reviews": len(final_tables["reviews"]), + "review_comments": len(final_tables["review_comments"]), + "pr_files": len(final_tables["pr_files"]), + "pr_diffs": len(final_tables["pr_diffs"]), + "timeline_events": len(final_tables["events"]), + "links": len(final_tables["links"]), + }, + } + + # Write the final snapshot, derived viewer tables, manifest, and optional publish artifacts. + _log("Writing Parquet snapshot files") + write_parquet(final_tables["issues"], snapshot_dir / "issues.parquet", "issues") + write_parquet( + final_tables["pull_requests"], snapshot_dir / "pull_requests.parquet", "pull_requests" + ) + write_parquet(final_tables["comments"], snapshot_dir / "comments.parquet", "comments") + issue_comment_rows, pr_comment_rows = _viewer_comment_rows( + final_tables["comments"], + final_tables["pull_requests"], + ) + write_parquet(issue_comment_rows, snapshot_dir / "issue_comments.parquet", "comments") + write_parquet(pr_comment_rows, snapshot_dir / "pr_comments.parquet", "comments") + write_parquet(final_tables["reviews"], snapshot_dir / "reviews.parquet", "reviews") + write_parquet(final_tables["pr_files"], snapshot_dir / "pr_files.parquet", "pr_files") + write_parquet(final_tables["pr_diffs"], snapshot_dir / "pr_diffs.parquet", "pr_diffs") + write_parquet( + final_tables["review_comments"], snapshot_dir / "review_comments.parquet", "review_comments" + ) + write_parquet(final_tables["links"], snapshot_dir / "links.parquet", "links") + write_parquet(final_tables["events"], snapshot_dir / "events.parquet", "events") + write_json(manifest, snapshot_dir / "manifest.json") + generated_new_contributor_report = False + if options.new_contributor_report: + _log("Generating new contributor dataset/report artifacts") + run_new_contributor_report( + NewContributorReportOptions( + snapshot_dir=snapshot_dir, + output_dir=options.output_dir, + output=None, + json_output=None, + hf_repo_id=None, + hf_revision=None, + hf_materialize_dir=None, + window_days=options.new_contributor_window_days, + max_authors=options.new_contributor_max_authors, + ) + ) + generated_new_contributor_report = True + new_contributor_rows = read_parquet_rows(snapshot_dir / "new_contributors.parquet") + manifest["counts"]["new_contributors"] = len(new_contributor_rows) + manifest["artifacts"] = { + "new_contributors_parquet": "new_contributors.parquet", + "new_contributors_json": "new-contributors-report.json", + "new_contributors_markdown": "new-contributors-report.md", + } + write_json(manifest, snapshot_dir / "manifest.json") + write_text( + _dataset_card( + repo_slug, + snapshot_id, + manifest, + include_new_contributors=generated_new_contributor_report, + ), + snapshot_dir / "README.md", + ) + _log("Wrote manifest and dataset card") + + latest = _latest_snapshot_pointer_path(options.output_dir) + write_json( + { + "repo": repo_slug, + "latest_snapshot_id": snapshot_id, + "snapshot_dir": str(snapshot_dir), + "manifest_path": str(snapshot_dir / "manifest.json"), + "next_since": crawl_started_at, + }, + latest, + ) + _log(f"Updated latest snapshot pointer: {latest}") + + watermark_payload = { + "repo": repo_slug, + "last_successful_snapshot_id": snapshot_id, + "snapshot_dir": str(snapshot_dir), + "effective_since": effective_since, + "next_since": crawl_started_at, + "updated_at": extracted_at, + } + write_json(watermark_payload, _watermark_path(options.output_dir)) + _log(f"Updated watermark state: {_watermark_path(options.output_dir)}") + + _clear_checkpoint(options.output_dir, snapshot_dir) + _log(f"Snapshot complete: {snapshot_dir}") + return snapshot_dir diff --git a/src/slop_farmer/app/pr_search.py b/src/slop_farmer/app/pr_search.py new file mode 100644 index 0000000000000000000000000000000000000000..d802957f1268b69342ff985e2031f298dbd3620a --- /dev/null +++ b/src/slop_farmer/app/pr_search.py @@ -0,0 +1,321 @@ +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any + +from slop_farmer.reports import pr_search_service + +run_pr_search_refresh = pr_search_service.run_pr_search_refresh +get_pr_search_status = pr_search_service.get_pr_search_status +get_pr_search_similar = pr_search_service.get_pr_search_similar +get_pr_search_similar_lookup = pr_search_service.get_pr_search_similar_lookup +get_pr_search_candidate_clusters = pr_search_service.get_pr_search_candidate_clusters +get_pr_search_contributor = pr_search_service.get_pr_search_contributor +get_pr_search_contributor_pulls = pr_search_service.get_pr_search_contributor_pulls +get_pr_search_clusters = pr_search_service.get_pr_search_clusters +list_pr_search_clusters = pr_search_service.list_pr_search_clusters +get_pr_search_cluster = pr_search_service.get_pr_search_cluster +get_pr_search_pull_contributor = pr_search_service.get_pr_search_pull_contributor +explain_pr_search_pair = pr_search_service.explain_pr_search_pair +probe_pr_search_live = pr_search_service.probe_pr_search_live +probe_pr_search_github = pr_search_service.probe_pr_search_github +resolve_pr_search_db_path = pr_search_service.resolve_pr_search_db_path + + +def format_pr_search_status(result: Mapping[str, Any]) -> str: + counts = result["row_counts"] + return "\n".join( + [ + f"Repo: {result['repo']}", + f"Active run: {result['id']}", + f"Snapshot: {result['snapshot_id']}", + f"Source: {result['source_type']}", + f"Finished: {result.get('finished_at') or 'running'}", + ( + "Rows: " + f"documents={counts['documents']} " + f"contributors={counts.get('contributors', 0)} " + f"features={counts['features']} " + f"neighbors={counts['neighbors']} " + f"clusters={counts['clusters']} " + f"candidates={counts['cluster_candidates']}" + ), + ] + ) + + +def format_pr_search_similar(result: Mapping[str, Any]) -> str: + query = result.get("query") or {} + mode_used = str(query.get("mode_used") or "indexed") + source = str(query.get("source") or "active_index") + lines = [ + f"PR #{result['pr']['pr_number']}: {result['pr']['title']}", + "", + f"Active snapshot: {result['snapshot_id']}", + f"Lookup: {mode_used} via {source}", + f"Matches: {result.get('similar_count', len(result['similar_prs']))}", + "", + ] + if not result["similar_prs"]: + lines.append("No similar PRs found in the active run.") + return "\n".join(lines) + for index, row in enumerate(result["similar_prs"], start=1): + lines.append(f"{index}. PR #{row['neighbor_pr_number']} score={row['similarity']:.2f}") + lines.append( + " " + f"content={row['content_similarity']:.2f} " + f"size={row['size_similarity']:.2f} " + f"breadth={row['breadth_similarity']:.2f} " + f"concentration={row['concentration_similarity']:.2f}" + ) + if row["shared_filenames"]: + lines.append(f" shared files: {', '.join(row['shared_filenames'][:5])}") + elif row["shared_directories"]: + lines.append(f" shared directories: {', '.join(row['shared_directories'][:5])}") + if row["cluster_ids"]: + lines.append(f" cluster: {row['cluster_ids'][0]}") + return "\n".join(lines) + + +def format_pr_search_candidate_clusters(result: Mapping[str, Any]) -> str: + lines = [ + f"PR #{result['pr']['pr_number']}: candidate scope clusters", + "", + ] + if not result["candidate_clusters"]: + lines.append("No candidate clusters found in the active run.") + return "\n".join(lines) + for index, row in enumerate(result["candidate_clusters"], start=1): + lines.append( + f"{index}. {row['cluster_id']} score={row['candidate_score']:.2f} " + f"assigned={'yes' if row['assigned'] else 'no'}" + ) + lines.append(f" representative: PR #{row['representative_pr_number']}") + matched = row.get("matched_member_pr_numbers") or [] + if matched: + lines.append(f" matched members: {', '.join(f'#{number}' for number in matched)}") + if row.get("reason"): + lines.append(f" reason: {row['reason']}") + return "\n".join(lines) + + +def format_pr_search_clusters(result: Mapping[str, Any]) -> str: + query = result.get("query") or {} + mode_used = str(query.get("mode_used") or "indexed") + source = str(query.get("source") or "active_index") + lines = [ + f"PR #{result['pr']['pr_number']}: cluster context", + "", + f"Lookup: {mode_used} via {source}", + f"Assigned: {result.get('assigned_cluster_count', len(result.get('assigned_clusters') or []))}", + f"Candidates: {result.get('candidate_cluster_count', len(result.get('candidate_clusters') or []))}", + "", + "Assigned clusters:", + ] + assigned_clusters = result.get("assigned_clusters") or [] + if not assigned_clusters: + lines.append("- none") + else: + for cluster in assigned_clusters: + lines.append( + f"- {cluster['cluster_id']} representative=PR #{cluster['representative_pr_number']} " + f"size={cluster['cluster_size']}" + ) + if cluster.get("summary"): + lines.append(f" {cluster['summary']}") + lines.extend(["", "Candidate clusters:"]) + candidate_clusters = result.get("candidate_clusters") or [] + if not candidate_clusters: + lines.append("- none") + return "\n".join(lines) + for index, row in enumerate(candidate_clusters, start=1): + lines.append( + f"{index}. {row['cluster_id']} score={row['candidate_score']:.2f} " + f"assigned={'yes' if row['assigned'] else 'no'}" + ) + lines.append(f" representative: PR #{row['representative_pr_number']}") + matched = row.get("matched_member_pr_numbers") or [] + if matched: + lines.append(f" matched members: {', '.join(f'#{number}' for number in matched)}") + if row.get("reason"): + lines.append(f" reason: {row['reason']}") + return "\n".join(lines) + + +def format_pr_search_cluster(result: Mapping[str, Any]) -> str: + cluster = result["cluster"] + lines = [ + f"Cluster {cluster['cluster_id']}", + f"Representative PR: #{cluster['representative_pr_number']}", + f"Members: {result.get('member_count', len(result['members']))}", + f"Average similarity: {cluster['average_similarity']:.2f}", + cluster["summary"], + "", + "Members:", + ] + for member in result["members"]: + suffix = " (representative)" if member["member_role"] == "representative" else "" + title = member.get("title") or "" + lines.append(f"- PR #{member['pr_number']}{suffix}: {title}") + return "\n".join(lines) + + +def format_pr_search_cluster_list(result: Mapping[str, Any]) -> str: + lines = [ + f"Repo: {result['repo']}", + f"Active snapshot: {result['snapshot_id']}", + f"Clusters returned: {result.get('cluster_count', len(result.get('clusters') or []))}", + "", + "Clusters:", + ] + clusters = result.get("clusters") or [] + if not clusters: + lines.append("- none") + return "\n".join(lines) + for index, cluster in enumerate(clusters, start=1): + lines.append( + f"{cluster.get('rank', index)}. {cluster['cluster_id']} representative=PR #{cluster['representative_pr_number']} " + f"size={cluster['cluster_size']} avg={cluster['average_similarity']:.2f}" + ) + if cluster.get("representative_title"): + lines.append(f" {cluster['representative_title']}") + if cluster.get("summary"): + lines.append(f" {cluster['summary']}") + return "\n".join(lines) + + +def format_pr_search_pair(result: Mapping[str, Any]) -> str: + pair = result["pair"] + lines = [ + f"PR pair #{result['left_pr']['pr_number']} vs #{result['right_pr']['pr_number']}", + f"Materialized: {'yes' if result['materialized'] else 'no'}", + ( + "Scores: " + f"similarity={pair['similarity']:.2f} " + f"content={pair['content_similarity']:.2f} " + f"size={pair['size_similarity']:.2f} " + f"breadth={pair['breadth_similarity']:.2f} " + f"concentration={pair['concentration_similarity']:.2f}" + ), + ] + if pair["shared_filenames"]: + lines.append(f"Shared files: {', '.join(pair['shared_filenames'][:5])}") + if pair["shared_directories"]: + lines.append(f"Shared directories: {', '.join(pair['shared_directories'][:5])}") + if result["shared_cluster_ids"]: + lines.append(f"Shared clusters: {', '.join(result['shared_cluster_ids'])}") + return "\n".join(lines) + + +def format_pr_search_probe(result: Mapping[str, Any]) -> str: + lines = [ + f"GitHub probe PR #{result['probe_pr']['pr_number']}: {result['probe_pr']['title']}", + "", + f"Compared against active snapshot: {result['snapshot_id']}", + "", + "Similar PRs:", + ] + if not result["similar_prs"]: + lines.append("- none above the current similarity threshold") + else: + for index, row in enumerate(result["similar_prs"], start=1): + lines.append(f"{index}. PR #{row['neighbor_pr_number']} score={row['similarity']:.2f}") + lines.append( + " " + f"content={row['content_similarity']:.2f} " + f"size={row['size_similarity']:.2f} " + f"breadth={row['breadth_similarity']:.2f} " + f"concentration={row['concentration_similarity']:.2f}" + ) + if row["shared_filenames"]: + lines.append(f" shared files: {', '.join(row['shared_filenames'][:5])}") + elif row["shared_directories"]: + lines.append(f" shared directories: {', '.join(row['shared_directories'][:5])}") + if row["cluster_ids"]: + lines.append(f" cluster: {row['cluster_ids'][0]}") + lines.extend(["", "Candidate clusters:"]) + if not result["candidate_clusters"]: + lines.append("- none") + else: + for index, row in enumerate(result["candidate_clusters"], start=1): + lines.append( + f"{index}. {row['cluster_id']} score={row['candidate_score']:.2f} " + f"assigned={'yes' if row['assigned'] else 'no'}" + ) + lines.append(f" representative: PR #{row['representative_pr_number']}") + matched = row.get("matched_member_pr_numbers") or [] + if matched: + lines.append(f" matched members: {', '.join(f'#{number}' for number in matched)}") + if row.get("reason"): + lines.append(f" reason: {row['reason']}") + return "\n".join(lines) + + +def format_pr_search_contributor(result: Mapping[str, Any]) -> str: + contributor = result["contributor"] + lines = [ + f"Contributor {contributor['author_login']}", + f"Repo: {result['repo']}", + f"Snapshot: {result['snapshot_id']}", + f"Name: {contributor.get('name') or '-'}", + f"Profile: {contributor.get('profile_url') or '-'}", + f"Association: {contributor.get('repo_association') or '-'}", + f"First seen in snapshot: {'yes' if contributor.get('first_seen_in_snapshot') else 'no'}", + ( + "Scores: " + f"follow-through={contributor.get('follow_through_score') or '-'} " + f"breadth={contributor.get('breadth_score') or '-'} " + f"risk={contributor.get('automation_risk_signal') or '-'}" + ), + f"Heuristic: {contributor.get('heuristic_note') or '-'}", + f"Public orgs: {', '.join(contributor.get('public_orgs') or []) or '-'}", + "", + "Recent indexed PRs:", + ] + pulls = result.get("pulls") or [] + if not pulls: + lines.append("- none") + return "\n".join(lines) + for row in pulls: + lines.append( + f"- PR #{row['pr_number']}: {row.get('title') or ''} " + f"[state={row.get('state') or '-'} merged={'yes' if row.get('merged') else 'no'}]" + ) + return "\n".join(lines) + + +def format_pr_search_contributor_pulls(result: Mapping[str, Any]) -> str: + contributor = result["contributor"] + lines = [ + f"Contributor PRs: {contributor['author_login']}", + f"Repo: {result['repo']}", + f"Snapshot: {result['snapshot_id']}", + f"Pull requests: {result.get('pull_count', len(result.get('pulls') or []))}", + "", + ] + pulls = result.get("pulls") or [] + if not pulls: + lines.append("No indexed PRs found for that contributor.") + return "\n".join(lines) + for row in pulls: + lines.append( + f"- PR #{row['pr_number']}: {row.get('title') or ''} " + f"(updated={row.get('updated_at') or '-'}, state={row.get('state') or '-'})" + ) + return "\n".join(lines) + + +def format_pr_search_pull_contributor(result: Mapping[str, Any]) -> str: + pr = result["pr"] + contributor = result["contributor"] + return "\n".join( + [ + f"PR #{pr['pr_number']}: {pr.get('title') or ''}", + f"Author: {contributor['author_login']}", + f"Risk: {contributor.get('automation_risk_signal') or '-'}", + f"Follow-through: {contributor.get('follow_through_score') or '-'}", + f"Breadth: {contributor.get('breadth_score') or '-'}", + f"Heuristic: {contributor.get('heuristic_note') or '-'}", + f"Profile: {contributor.get('profile_url') or '-'}", + ] + ) diff --git a/src/slop_farmer/app/pr_search_api.py b/src/slop_farmer/app/pr_search_api.py new file mode 100644 index 0000000000000000000000000000000000000000..6ee5c150bdfb884995d3ee3725d5bed2ea972a08 --- /dev/null +++ b/src/slop_farmer/app/pr_search_api.py @@ -0,0 +1,766 @@ +from __future__ import annotations + +import os +from contextlib import asynccontextmanager +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Literal + +from fastapi import FastAPI, HTTPException, Request +from fastapi.responses import JSONResponse + +from slop_farmer.config import PrSearchRefreshOptions +from slop_farmer.data.ghreplica_api import GhReplicaProbeUnavailableError, GhrProbeClient +from slop_farmer.data.snapshot_materialize import materialize_hf_dataset_snapshot +from slop_farmer.data.snapshot_paths import ( + CURRENT_ANALYSIS_MANIFEST_PATH, + default_hf_materialize_dir, +) +from slop_farmer.reports.analysis_service import ( + get_analysis_best, + get_analysis_meta_bug, + get_analysis_status, + get_pr_analysis, + list_analysis_duplicate_prs, + list_analysis_meta_bugs, +) +from slop_farmer.reports.pr_search_service import ( + get_pr_search_cluster, + get_pr_search_clusters, + get_pr_search_contributor_pulls, + get_pr_search_pull_contributor, + get_pr_search_similar_lookup, + get_pr_search_status, + list_pr_search_clusters, + run_pr_search_refresh, +) +from slop_farmer.reports.read_views import ( + check_issue_cluster_membership, + get_contributor, + get_contributor_risk, + get_contributor_status, + get_issue_best, + get_issue_cluster, + get_issue_cluster_status, + get_issue_clusters_for_pr, + get_snapshot_surfaces, + list_contributors, + list_issue_clusters, + list_issue_duplicate_prs, +) + + +@dataclass(slots=True) +class PrSearchApiSettings: + default_repo: str | None + index_path: Path + output_dir: Path + snapshot_dir: Path | None = None + hf_repo_id: str | None = None + hf_revision: str | None = None + hf_materialize_dir: Path | None = None + ghr_base_url: str | None = None + http_timeout: int = 180 + http_max_retries: int = 5 + refresh_if_missing: bool = False + rebuild_on_start: bool = False + include_drafts: bool = False + include_closed: bool = False + similar_limit_default: int = 10 + similar_limit_max: int = 50 + candidate_limit_default: int = 5 + candidate_limit_max: int = 20 + cluster_list_limit_default: int = 50 + cluster_list_limit_max: int = 200 + issue_list_limit_default: int = 50 + issue_list_limit_max: int = 200 + contributor_list_limit_default: int = 50 + contributor_list_limit_max: int = 200 + probe_limit_default: int = 10 + probe_limit_max: int = 25 + + @classmethod + def from_env(cls) -> PrSearchApiSettings: + output_dir = Path(os.environ.get("OUTPUT_DIR", "data")).resolve() + index_path = Path( + os.environ.get("INDEX_PATH", str(output_dir / "state" / "pr-search.duckdb")) + ).resolve() + snapshot_dir = _env_path("SNAPSHOT_DIR") + hf_materialize_dir = _env_path("HF_MATERIALIZE_DIR") + return cls( + default_repo=os.environ.get("DEFAULT_REPO"), + index_path=index_path, + output_dir=output_dir, + snapshot_dir=snapshot_dir, + hf_repo_id=os.environ.get("HF_REPO_ID"), + hf_revision=os.environ.get("HF_REVISION"), + hf_materialize_dir=hf_materialize_dir, + ghr_base_url=os.environ.get("GHR_BASE_URL"), + http_timeout=_env_int("HTTP_TIMEOUT", 180), + http_max_retries=_env_int("HTTP_MAX_RETRIES", 5), + refresh_if_missing=_env_bool("REFRESH_IF_MISSING", False), + rebuild_on_start=_env_bool("REBUILD_ON_START", False), + include_drafts=_env_bool("INCLUDE_DRAFTS", False), + include_closed=_env_bool("INCLUDE_CLOSED", False), + similar_limit_default=_env_int("SIMILAR_LIMIT_DEFAULT", 10), + similar_limit_max=_env_int("SIMILAR_LIMIT_MAX", 50), + candidate_limit_default=_env_int("CANDIDATE_LIMIT_DEFAULT", 5), + candidate_limit_max=_env_int("CANDIDATE_LIMIT_MAX", 20), + cluster_list_limit_default=_env_int("CLUSTER_LIST_LIMIT_DEFAULT", 50), + cluster_list_limit_max=_env_int("CLUSTER_LIST_LIMIT_MAX", 200), + issue_list_limit_default=_env_int("ISSUE_LIST_LIMIT_DEFAULT", 50), + issue_list_limit_max=_env_int("ISSUE_LIST_LIMIT_MAX", 200), + contributor_list_limit_default=_env_int("CONTRIBUTOR_LIST_LIMIT_DEFAULT", 50), + contributor_list_limit_max=_env_int("CONTRIBUTOR_LIST_LIMIT_MAX", 200), + probe_limit_default=_env_int("PROBE_LIMIT_DEFAULT", 10), + probe_limit_max=_env_int("PROBE_LIMIT_MAX", 25), + ) + + +def create_app(settings: PrSearchApiSettings | None = None) -> FastAPI: + api_settings = settings or PrSearchApiSettings.from_env() + + @asynccontextmanager + async def lifespan(app: FastAPI): + app.state.settings = api_settings + app.state.ready = False + app.state.startup_error = None + try: + _bootstrap_snapshot_assets(api_settings) + _bootstrap_index(api_settings) + app.state.ready = _is_ready(api_settings) + except Exception as exc: + app.state.startup_error = str(exc) + yield + + app = FastAPI(title="slop PR search API", version="0.1.1", lifespan=lifespan) + + @app.exception_handler(ValueError) + async def handle_value_error(_request: Request, exc: ValueError) -> JSONResponse: + status_code = 404 if _looks_not_found(exc) else 400 + return JSONResponse({"detail": str(exc)}, status_code=status_code) + + @app.exception_handler(GhReplicaProbeUnavailableError) + async def handle_probe_unavailable( + _request: Request, exc: GhReplicaProbeUnavailableError + ) -> JSONResponse: + return JSONResponse({"detail": str(exc)}, status_code=exc.status_code) + + @app.get("/healthz") + async def healthz() -> dict[str, bool]: + return {"ok": True} + + @app.get("/readyz") + async def readyz(request: Request) -> JSONResponse: + settings = request.app.state.settings + error = request.app.state.startup_error + ready = request.app.state.ready and _is_ready(settings) + if ready: + return JSONResponse({"ok": True}) + detail = error or _readiness_detail(settings) + return JSONResponse({"ok": False, "detail": detail}, status_code=503) + + @app.get("/v1/repos/{owner}/{repo}/status") + async def repo_status(owner: str, repo: str, request: Request) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + status = get_pr_search_status(settings.index_path, repo=repo_slug) + issue_snapshot_dir = _surface_snapshot_dir(settings, repo_slug, surface="issues") + contributor_snapshot_dir = _surface_snapshot_dir( + settings, repo_slug, surface="contributors" + ) + return { + **status, + "surfaces": { + "issues": get_snapshot_surfaces(issue_snapshot_dir)["issues"], + "contributors": get_snapshot_surfaces(contributor_snapshot_dir)["contributors"], + }, + } + + @app.get("/v1/repos/{owner}/{repo}/pulls/{number}/similar") + async def pr_similar( + owner: str, + repo: str, + number: int, + request: Request, + limit: int | None = None, + mode: Literal["auto", "indexed", "live"] = "auto", + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_pr_search_similar_lookup( + settings.index_path, + repo=repo_slug, + pr_number=number, + limit=_limit( + limit, default=settings.similar_limit_default, maximum=settings.similar_limit_max + ), + mode=mode, + client=_probe_client(settings), + ) + + @app.get("/v1/repos/{owner}/{repo}/pulls/{number}/clusters") + async def pr_clusters( + owner: str, + repo: str, + number: int, + request: Request, + limit: int | None = None, + mode: Literal["auto", "indexed", "live"] = "auto", + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_pr_search_clusters( + settings.index_path, + repo=repo_slug, + pr_number=number, + limit=_limit( + limit, + default=settings.candidate_limit_default, + maximum=settings.candidate_limit_max, + ), + mode=mode, + client=_probe_client(settings), + ) + + @app.get("/v1/repos/{owner}/{repo}/clusters/{cluster_id}") + async def cluster_view( + owner: str, + repo: str, + cluster_id: str, + request: Request, + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_pr_search_cluster(settings.index_path, repo=repo_slug, cluster_id=cluster_id) + + @app.get("/v1/repos/{owner}/{repo}/clusters") + async def cluster_list( + owner: str, + repo: str, + request: Request, + limit: int | None = None, + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return list_pr_search_clusters( + settings.index_path, + repo=repo_slug, + limit=_limit( + limit, + default=settings.cluster_list_limit_default, + maximum=settings.cluster_list_limit_max, + ), + ) + + @app.get("/v1/repos/{owner}/{repo}/contributors/{login}/pulls") + async def contributor_pulls( + owner: str, + repo: str, + login: str, + request: Request, + limit: int | None = None, + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_pr_search_contributor_pulls( + settings.index_path, + repo=repo_slug, + author_login=login, + limit=_limit( + limit, default=settings.similar_limit_default, maximum=settings.similar_limit_max + ), + ) + + @app.get("/v1/repos/{owner}/{repo}/pulls/{number}/contributor") + async def pull_contributor( + owner: str, + repo: str, + number: int, + request: Request, + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_pr_search_pull_contributor(settings.index_path, repo=repo_slug, pr_number=number) + + @app.get("/v1/repos/{owner}/{repo}/analysis/status") + async def analysis_status( + owner: str, + repo: str, + request: Request, + variant: Literal["auto", "hybrid", "deterministic"] = "auto", + snapshot_id: str | None = None, + analysis_id: str | None = None, + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_analysis_status( + settings.index_path, + repo=repo_slug, + variant=variant, + snapshot_id=snapshot_id, + analysis_id=analysis_id, + ) + + @app.get("/v1/repos/{owner}/{repo}/pulls/{number}/analysis") + async def pr_analysis( + owner: str, + repo: str, + number: int, + request: Request, + variant: Literal["auto", "hybrid", "deterministic"] = "auto", + snapshot_id: str | None = None, + analysis_id: str | None = None, + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_pr_analysis( + settings.index_path, + repo=repo_slug, + pr_number=number, + variant=variant, + snapshot_id=snapshot_id, + analysis_id=analysis_id, + ) + + @app.get("/v1/repos/{owner}/{repo}/analysis/meta-bugs") + async def analysis_meta_bugs( + owner: str, + repo: str, + request: Request, + limit: int | None = None, + variant: Literal["auto", "hybrid", "deterministic"] = "auto", + snapshot_id: str | None = None, + analysis_id: str | None = None, + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return list_analysis_meta_bugs( + settings.index_path, + repo=repo_slug, + variant=variant, + limit=_limit( + limit, + default=settings.cluster_list_limit_default, + maximum=settings.cluster_list_limit_max, + ), + snapshot_id=snapshot_id, + analysis_id=analysis_id, + ) + + @app.get("/v1/repos/{owner}/{repo}/analysis/meta-bugs/{cluster_id}") + async def analysis_meta_bug( + owner: str, + repo: str, + cluster_id: str, + request: Request, + variant: Literal["auto", "hybrid", "deterministic"] = "auto", + snapshot_id: str | None = None, + analysis_id: str | None = None, + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_analysis_meta_bug( + settings.index_path, + repo=repo_slug, + cluster_id=cluster_id, + variant=variant, + snapshot_id=snapshot_id, + analysis_id=analysis_id, + ) + + @app.get("/v1/repos/{owner}/{repo}/analysis/duplicate-prs") + async def analysis_duplicate_prs( + owner: str, + repo: str, + request: Request, + limit: int | None = None, + variant: Literal["auto", "hybrid", "deterministic"] = "auto", + snapshot_id: str | None = None, + analysis_id: str | None = None, + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return list_analysis_duplicate_prs( + settings.index_path, + repo=repo_slug, + variant=variant, + limit=_limit( + limit, + default=settings.cluster_list_limit_default, + maximum=settings.cluster_list_limit_max, + ), + snapshot_id=snapshot_id, + analysis_id=analysis_id, + ) + + @app.get("/v1/repos/{owner}/{repo}/analysis/best") + async def analysis_best( + owner: str, + repo: str, + request: Request, + variant: Literal["auto", "hybrid", "deterministic"] = "auto", + snapshot_id: str | None = None, + analysis_id: str | None = None, + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_analysis_best( + settings.index_path, + repo=repo_slug, + variant=variant, + snapshot_id=snapshot_id, + analysis_id=analysis_id, + ) + + @app.get("/v1/repos/{owner}/{repo}/issues/status") + async def issue_status( + owner: str, + repo: str, + request: Request, + variant: Literal["auto", "hybrid", "deterministic"] = "auto", + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_issue_cluster_status( + _surface_snapshot_dir(settings, repo_slug, surface="issues"), + variant=variant, + ) + + @app.get("/v1/repos/{owner}/{repo}/issues/clusters") + async def issue_clusters( + owner: str, + repo: str, + request: Request, + limit: int | None = None, + variant: Literal["auto", "hybrid", "deterministic"] = "auto", + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return list_issue_clusters( + _surface_snapshot_dir(settings, repo_slug, surface="issues"), + limit=_limit( + limit, + default=settings.issue_list_limit_default, + maximum=settings.issue_list_limit_max, + ), + variant=variant, + ) + + @app.get("/v1/repos/{owner}/{repo}/issues/clusters/{cluster_id}") + async def issue_cluster( + owner: str, + repo: str, + cluster_id: str, + request: Request, + variant: Literal["auto", "hybrid", "deterministic"] = "auto", + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_issue_cluster( + _surface_snapshot_dir(settings, repo_slug, surface="issues"), + cluster_id=cluster_id, + variant=variant, + ) + + @app.get("/v1/repos/{owner}/{repo}/issues/pulls/{number}") + async def issue_clusters_for_pr( + owner: str, + repo: str, + number: int, + request: Request, + variant: Literal["auto", "hybrid", "deterministic"] = "auto", + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_issue_clusters_for_pr( + _surface_snapshot_dir(settings, repo_slug, surface="issues"), + pr_number=number, + variant=variant, + ) + + @app.get("/v1/repos/{owner}/{repo}/issues/pulls/{number}/membership") + async def issue_membership_for_pr( + owner: str, + repo: str, + number: int, + request: Request, + cluster_id: str | None = None, + variant: Literal["auto", "hybrid", "deterministic"] = "auto", + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return check_issue_cluster_membership( + _surface_snapshot_dir(settings, repo_slug, surface="issues"), + pr_number=number, + cluster_id=cluster_id, + variant=variant, + ) + + @app.get("/v1/repos/{owner}/{repo}/issues/duplicate-prs") + async def issue_duplicate_prs( + owner: str, + repo: str, + request: Request, + limit: int | None = None, + variant: Literal["auto", "hybrid", "deterministic"] = "auto", + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return list_issue_duplicate_prs( + _surface_snapshot_dir(settings, repo_slug, surface="issues"), + limit=_limit( + limit, + default=settings.issue_list_limit_default, + maximum=settings.issue_list_limit_max, + ), + variant=variant, + ) + + @app.get("/v1/repos/{owner}/{repo}/issues/best") + async def issue_best( + owner: str, + repo: str, + request: Request, + variant: Literal["auto", "hybrid", "deterministic"] = "auto", + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_issue_best( + _surface_snapshot_dir(settings, repo_slug, surface="issues"), + variant=variant, + ) + + @app.get("/v1/repos/{owner}/{repo}/contributors/status") + async def contributor_status( + owner: str, + repo: str, + request: Request, + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_contributor_status( + _surface_snapshot_dir(settings, repo_slug, surface="contributors") + ) + + @app.get("/v1/repos/{owner}/{repo}/contributors") + async def contributors( + owner: str, + repo: str, + request: Request, + limit: int | None = None, + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return list_contributors( + _surface_snapshot_dir(settings, repo_slug, surface="contributors"), + limit=_limit( + limit, + default=settings.contributor_list_limit_default, + maximum=settings.contributor_list_limit_max, + ), + ) + + @app.get("/v1/repos/{owner}/{repo}/contributors/{login}") + async def contributor( + owner: str, + repo: str, + login: str, + request: Request, + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_contributor( + _surface_snapshot_dir(settings, repo_slug, surface="contributors"), + author_login=login, + ) + + @app.get("/v1/repos/{owner}/{repo}/contributors/{login}/risk") + async def contributor_risk( + owner: str, + repo: str, + login: str, + request: Request, + ) -> dict[str, Any]: + settings = request.app.state.settings + repo_slug = _repo_slug(settings, owner, repo) + return get_contributor_risk( + _surface_snapshot_dir(settings, repo_slug, surface="contributors"), + author_login=login, + ) + + return app + + +def _bootstrap_index(settings: PrSearchApiSettings) -> None: + settings.output_dir.mkdir(parents=True, exist_ok=True) + settings.index_path.parent.mkdir(parents=True, exist_ok=True) + if not _needs_refresh(settings): + return + if settings.snapshot_dir is None and settings.hf_repo_id is None: + return + run_pr_search_refresh( + PrSearchRefreshOptions( + snapshot_dir=settings.snapshot_dir, + output_dir=settings.output_dir, + db=settings.index_path, + hf_repo_id=settings.hf_repo_id, + hf_revision=settings.hf_revision, + hf_materialize_dir=settings.hf_materialize_dir, + include_drafts=settings.include_drafts, + include_closed=settings.include_closed, + ) + ) + + +def _bootstrap_snapshot_assets(settings: PrSearchApiSettings) -> None: + if settings.snapshot_dir is not None or settings.hf_repo_id is None: + return + materialize_dir = settings.hf_materialize_dir or default_hf_materialize_dir( + settings.output_dir, + settings.hf_repo_id, + settings.hf_revision, + ) + materialize_hf_dataset_snapshot( + repo_id=settings.hf_repo_id, + local_dir=materialize_dir, + revision=settings.hf_revision, + ) + + +def _needs_refresh(settings: PrSearchApiSettings) -> bool: + if settings.rebuild_on_start: + return True + if not settings.refresh_if_missing: + return False + return not _is_ready(settings) + + +def _is_ready(settings: PrSearchApiSettings) -> bool: + if not settings.index_path.exists(): + return False + try: + get_pr_search_status(settings.index_path, repo=settings.default_repo) + except Exception: + return False + return True + + +def _readiness_detail(settings: PrSearchApiSettings) -> str: + if not settings.index_path.exists(): + return f"index not found at {settings.index_path}" + try: + get_pr_search_status(settings.index_path, repo=settings.default_repo) + except Exception as exc: + return str(exc) + return "ready" + + +def _repo_slug(settings: PrSearchApiSettings, owner: str, repo: str) -> str: + repo_slug = f"{owner}/{repo}" + if settings.default_repo and repo_slug != settings.default_repo: + raise HTTPException( + status_code=400, + detail=f"repo {settings.default_repo} is the only configured repo for this deployment", + ) + return repo_slug + + +def _active_snapshot_dir(settings: PrSearchApiSettings, repo_slug: str) -> Path: + return _status_snapshot_dir(get_pr_search_status(settings.index_path, repo=repo_slug)) + + +def _surface_snapshot_dir( + settings: PrSearchApiSettings, + repo_slug: str, + *, + surface: Literal["issues", "contributors"], +) -> Path: + active_snapshot_dir = _active_snapshot_dir(settings, repo_slug) + if _surface_available(active_snapshot_dir, surface=surface): + return active_snapshot_dir + materialized_snapshot_dir = _materialized_snapshot_dir(settings) + if materialized_snapshot_dir is not None and _surface_available( + materialized_snapshot_dir, surface=surface + ): + return materialized_snapshot_dir + return active_snapshot_dir + + +def _status_snapshot_dir(status: dict[str, Any]) -> Path: + snapshot_dir = status.get("snapshot_dir") + if not snapshot_dir: + raise HTTPException(status_code=503, detail="active snapshot directory is unavailable") + return Path(str(snapshot_dir)) + + +def _materialized_snapshot_dir(settings: PrSearchApiSettings) -> Path | None: + if settings.hf_repo_id is None: + return None + return settings.hf_materialize_dir or default_hf_materialize_dir( + settings.output_dir, + settings.hf_repo_id, + settings.hf_revision, + ) + + +def _surface_available(snapshot_dir: Path, *, surface: Literal["issues", "contributors"]) -> bool: + if not snapshot_dir.exists(): + return False + if surface == "issues": + return (snapshot_dir / CURRENT_ANALYSIS_MANIFEST_PATH).exists() or any( + snapshot_dir.glob("analysis-report*.json") + ) + return (snapshot_dir / "new-contributors-report.json").exists() + + +def _limit(value: int | None, *, default: int, maximum: int) -> int: + limit = default if value is None else value + if limit < 1: + raise HTTPException(status_code=400, detail="limit must be at least 1") + if limit > maximum: + raise HTTPException(status_code=400, detail=f"limit must be at most {maximum}") + return limit + + +def _probe_client(settings: PrSearchApiSettings) -> Any: + if not settings.ghr_base_url: + return None + return GhrProbeClient( + base_url=settings.ghr_base_url, + timeout=settings.http_timeout, + max_retries=settings.http_max_retries, + ) + + +def _looks_not_found(exc: ValueError) -> bool: + message = str(exc).lower() + return ( + "not found" in message + or "analysis report was not found" in message + or "no analysis report was found" in message + or "published analysis" in message + or "materialized snapshot" in message + or "no active pr search run" in message + or "was not found in the active indexed universe" in message + ) + + +def _env_bool(name: str, default: bool) -> bool: + raw = os.environ.get(name) + if raw is None: + return default + return raw.strip().lower() in {"1", "true", "yes", "on"} + + +def _env_int(name: str, default: int) -> int: + raw = os.environ.get(name) + return default if raw is None else int(raw) + + +def _env_path(name: str) -> Path | None: + raw = os.environ.get(name) + return None if raw is None else Path(raw).resolve() + + +app = create_app() diff --git a/src/slop_farmer/app/publish_analysis.py b/src/slop_farmer/app/publish_analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..3566b2b66f3485d23dda1ac9e3360268bfc3a467 --- /dev/null +++ b/src/slop_farmer/app/publish_analysis.py @@ -0,0 +1,366 @@ +from __future__ import annotations + +import json +from collections.abc import Callable, Iterable +from dataclasses import dataclass +from datetime import UTC, datetime +from pathlib import Path +from typing import Any, Protocol, cast + +from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download + +from slop_farmer.app.save_cache import _save_analysis_cache_api +from slop_farmer.config import PublishAnalysisArtifactsOptions +from slop_farmer.data.parquet_io import read_json +from slop_farmer.data.snapshot_paths import ( + ANALYSIS_REPORT_FILENAME_BY_VARIANT, + HYBRID_ANALYSIS_REVIEWS_FILENAME, + ROOT_MANIFEST_FILENAME, + analysis_run_artifact_path, + analysis_run_manifest_path, + archived_snapshot_manifest_path, + build_archived_analysis_run_manifest, + build_current_analysis_manifest, + current_analysis_artifact_path, + resolve_snapshot_dir_from_output, +) + + +class HubApiLike(Protocol): + def create_repo( + self, + repo_id: str, + *, + repo_type: str, + private: bool, + exist_ok: bool, + ) -> None: ... + + def create_commit( + self, + repo_id: str, + operations: Iterable[CommitOperationAdd], + *, + commit_message: str, + repo_type: str, + ) -> Any: ... + + def upload_folder( + self, + *, + repo_id: str, + folder_path: Path, + path_in_repo: str, + repo_type: str, + commit_message: str, + ) -> None: ... + + +@dataclass(frozen=True, slots=True) +class PublishableAnalysisArtifacts: + repo: str + snapshot_id: str + model: str | None + report_path: Path + reviews_path: Path | None + report_payload: dict[str, Any] + + +def run_publish_analysis_artifacts(options: PublishAnalysisArtifactsOptions) -> dict[str, Any]: + snapshot_dir = resolve_snapshot_dir_from_output(options.output_dir, options.snapshot_dir) + return publish_analysis_artifacts( + snapshot_dir=snapshot_dir, + analysis_input=options.analysis_input, + hf_repo_id=options.hf_repo_id, + analysis_id=options.analysis_id, + canonical=options.canonical, + save_cache=options.save_cache, + private=options.private_hf_repo, + ) + + +def publish_analysis_artifacts( + *, + snapshot_dir: Path, + analysis_input: Path | None, + hf_repo_id: str, + analysis_id: str, + canonical: bool, + private: bool, + save_cache: bool = False, + log: Callable[[str], None] | None = None, +) -> dict[str, Any]: + return _publish_analysis_artifacts_api( + cast("HubApiLike", HfApi()), + snapshot_dir=snapshot_dir, + analysis_input=analysis_input, + hf_repo_id=hf_repo_id, + analysis_id=analysis_id, + canonical=canonical, + private=private, + save_cache=save_cache, + log=log, + ) + + +def _publish_analysis_artifacts_api( + api: HubApiLike, + *, + snapshot_dir: Path, + analysis_input: Path | None = None, + hf_repo_id: str, + analysis_id: str, + canonical: bool, + private: bool, + save_cache: bool = False, + log: Callable[[str], None] | None = None, +) -> dict[str, Any]: + artifacts = _discover_publishable_analysis(snapshot_dir, analysis_input=analysis_input) + published_at = _iso_now() + channel = "canonical" if canonical else "comparison" + archived_manifest = build_archived_analysis_run_manifest( + repo=artifacts.repo, + snapshot_id=artifacts.snapshot_id, + analysis_id=analysis_id, + variant="hybrid", + channel=channel, + model=artifacts.model, + published_at=published_at, + include_hybrid_reviews=artifacts.reviews_path is not None, + ) + current_manifest = ( + build_current_analysis_manifest( + repo=artifacts.repo, + snapshot_id=artifacts.snapshot_id, + analysis_id=analysis_id, + variant="hybrid", + channel=channel, + model=artifacts.model, + published_at=published_at, + include_hybrid_reviews=artifacts.reviews_path is not None, + ) + if canonical + else None + ) + snapshot_manifest = _updated_snapshot_manifest( + snapshot_dir=snapshot_dir, + hf_repo_id=hf_repo_id, + snapshot_id=artifacts.snapshot_id, + analysis_id=analysis_id, + archived_manifest=archived_manifest, + canonical=canonical, + ) + operations = _commit_operations( + artifacts=artifacts, + analysis_id=analysis_id, + archived_manifest=archived_manifest, + current_manifest=current_manifest, + snapshot_manifest=snapshot_manifest, + ) + + if log: + log(f"Ensuring Hub dataset repo exists: {hf_repo_id}") + api.create_repo(hf_repo_id, repo_type="dataset", private=private, exist_ok=True) + if log: + log(f"Publishing analysis {analysis_id} for snapshot {artifacts.snapshot_id}") + api.create_commit( + hf_repo_id, + operations, + commit_message=f"Publish analysis {analysis_id} for snapshot {artifacts.snapshot_id}", + repo_type="dataset", + ) + cache_result = ( + _save_analysis_cache_api( + api, + snapshot_dir=snapshot_dir, + hf_repo_id=hf_repo_id, + private=private, + log=log, + ) + if save_cache + else None + ) + result: dict[str, Any] = { + "repo": artifacts.repo, + "dataset_id": hf_repo_id, + "snapshot_id": artifacts.snapshot_id, + "analysis_id": analysis_id, + "canonical": canonical, + "save_cache": save_cache, + "published_at": published_at, + "artifact_paths": [operation.path_in_repo for operation in operations], + } + if cache_result is not None: + result["cache"] = cache_result + if log: + log(f"Published analysis artifacts to {hf_repo_id}") + return result + + +def _discover_publishable_analysis( + snapshot_dir: Path, *, analysis_input: Path | None +) -> PublishableAnalysisArtifacts: + manifest_path = snapshot_dir / ROOT_MANIFEST_FILENAME + if not manifest_path.exists(): + raise FileNotFoundError(f"Snapshot manifest is missing: {manifest_path}") + manifest = read_json(manifest_path) + if not isinstance(manifest, dict): + raise ValueError(f"Snapshot manifest at {manifest_path} must contain a JSON object.") + snapshot_id = str(manifest.get("snapshot_id") or snapshot_dir.name).strip() + repo = str(manifest.get("repo") or "").strip() + if not repo: + raise ValueError(f"Snapshot manifest at {manifest_path} does not define repo.") + + report_path = ( + analysis_input.resolve() + if analysis_input is not None + else snapshot_dir / ANALYSIS_REPORT_FILENAME_BY_VARIANT["hybrid"] + ) + if not report_path.exists(): + raise FileNotFoundError(f"Hybrid analysis report is missing: {report_path}") + report_payload = read_json(report_path) + if not isinstance(report_payload, dict): + raise ValueError(f"Hybrid analysis report at {report_path} must contain a JSON object.") + report_snapshot_id = str(report_payload.get("snapshot_id") or snapshot_id).strip() + if report_snapshot_id != snapshot_id: + raise ValueError( + f"Hybrid analysis report snapshot_id {report_snapshot_id!r} does not match manifest snapshot_id {snapshot_id!r}." + ) + report_repo = str(report_payload.get("repo") or repo).strip() + if report_repo != repo: + raise ValueError( + f"Hybrid analysis report repo {report_repo!r} does not match manifest repo {repo!r}." + ) + model = report_payload.get("model") + if model is not None: + model = str(model) + + reviews_path = report_path.with_name(f"{report_path.stem}.llm-reviews.json") + return PublishableAnalysisArtifacts( + repo=repo, + snapshot_id=snapshot_id, + model=model, + report_path=report_path, + reviews_path=reviews_path if reviews_path.exists() else None, + report_payload={str(key): value for key, value in report_payload.items()}, + ) + + +def _updated_snapshot_manifest( + *, + snapshot_dir: Path, + hf_repo_id: str, + snapshot_id: str, + analysis_id: str, + archived_manifest: dict[str, Any], + canonical: bool, +) -> dict[str, Any]: + manifest = _load_remote_snapshot_manifest(hf_repo_id, snapshot_id) or read_json( + snapshot_dir / ROOT_MANIFEST_FILENAME + ) + if not isinstance(manifest, dict): + raise ValueError("Archived snapshot manifest must contain a JSON object.") + updated = {str(key): value for key, value in manifest.items()} + published_analysis: dict[str, Any] | Any = updated.get("published_analysis") + if not isinstance(published_analysis, dict): + published_analysis = {"schema_version": 1, "runs": {}} + runs: dict[str, Any] | Any = published_analysis.get("runs") + if not isinstance(runs, dict): + runs = {} + runs[analysis_id] = { + "analysis_id": analysis_id, + "variant": archived_manifest["variant"], + "channel": archived_manifest["channel"], + "model": archived_manifest.get("model"), + "published_at": archived_manifest["published_at"], + "manifest_path": analysis_run_manifest_path(snapshot_id, analysis_id), + "artifacts": archived_manifest["artifacts"], + } + published_analysis["schema_version"] = 1 + published_analysis["runs"] = runs + if canonical: + published_analysis["canonical_analysis_id"] = analysis_id + updated["published_analysis"] = published_analysis + return updated + + +def _load_remote_snapshot_manifest(hf_repo_id: str, snapshot_id: str) -> dict[str, Any] | None: + try: + downloaded = hf_hub_download( + repo_id=hf_repo_id, + repo_type="dataset", + filename=archived_snapshot_manifest_path(snapshot_id), + ) + except Exception: + return None + payload = json.loads(Path(downloaded).read_text(encoding="utf-8")) + return payload if isinstance(payload, dict) else None + + +def _commit_operations( + *, + artifacts: PublishableAnalysisArtifacts, + analysis_id: str, + archived_manifest: dict[str, Any], + current_manifest: dict[str, Any] | None, + snapshot_manifest: dict[str, Any], +) -> list[CommitOperationAdd]: + report_filename = ANALYSIS_REPORT_FILENAME_BY_VARIANT["hybrid"] + operations = [ + CommitOperationAdd( + path_in_repo=analysis_run_artifact_path( + artifacts.snapshot_id, + analysis_id, + report_filename, + ), + path_or_fileobj=artifacts.report_path, + ), + CommitOperationAdd( + path_in_repo=analysis_run_manifest_path(artifacts.snapshot_id, analysis_id), + path_or_fileobj=_json_bytes(archived_manifest), + ), + CommitOperationAdd( + path_in_repo=archived_snapshot_manifest_path(artifacts.snapshot_id), + path_or_fileobj=_json_bytes(snapshot_manifest), + ), + ] + if artifacts.reviews_path is not None: + operations.append( + CommitOperationAdd( + path_in_repo=analysis_run_artifact_path( + artifacts.snapshot_id, + analysis_id, + HYBRID_ANALYSIS_REVIEWS_FILENAME, + ), + path_or_fileobj=artifacts.reviews_path, + ) + ) + if current_manifest is not None: + operations.extend( + [ + CommitOperationAdd( + path_in_repo=current_analysis_artifact_path(report_filename), + path_or_fileobj=artifacts.report_path, + ), + CommitOperationAdd( + path_in_repo=current_analysis_artifact_path(ROOT_MANIFEST_FILENAME), + path_or_fileobj=_json_bytes(current_manifest), + ), + ] + ) + if artifacts.reviews_path is not None: + operations.append( + CommitOperationAdd( + path_in_repo=current_analysis_artifact_path(HYBRID_ANALYSIS_REVIEWS_FILENAME), + path_or_fileobj=artifacts.reviews_path, + ) + ) + return operations + + +def _json_bytes(payload: dict[str, Any]) -> bytes: + return (json.dumps(payload, indent=2, sort_keys=True) + "\n").encode("utf-8") + + +def _iso_now() -> str: + return datetime.now(tz=UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") diff --git a/src/slop_farmer/app/publish_dataset_snapshot.py b/src/slop_farmer/app/publish_dataset_snapshot.py new file mode 100644 index 0000000000000000000000000000000000000000..df408db9f7fddcb4ef3221a0fbfb9eb2f887cd76 --- /dev/null +++ b/src/slop_farmer/app/publish_dataset_snapshot.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from collections.abc import Callable +from pathlib import Path +from typing import Protocol, cast + +from huggingface_hub import HfApi + + +class HubApiLike(Protocol): + def create_repo( + self, repo_id: str, *, repo_type: str, private: bool, exist_ok: bool + ) -> None: ... + + def upload_folder( + self, + *, + repo_id: str, + folder_path: Path, + path_in_repo: str, + repo_type: str, + commit_message: str, + ) -> None: ... + + +def publish_dataset_snapshot( + snapshot_dir: Path, + hf_repo_id: str, + *, + private: bool, + log: Callable[[str], None] | None = None, +) -> None: + _publish_dataset_snapshot_api( + cast("HubApiLike", HfApi()), + snapshot_dir, + hf_repo_id, + private, + log=log, + ) + + +def _publish_dataset_snapshot_api( + api: HubApiLike, + snapshot_dir: Path, + hf_repo_id: str, + private: bool, + log: Callable[[str], None] | None = None, +) -> None: + if log: + log(f"Ensuring Hub dataset repo exists: {hf_repo_id}") + api.create_repo(hf_repo_id, repo_type="dataset", private=private, exist_ok=True) + if log: + log(f"Uploading snapshot to Hub: {snapshot_dir}") + api.upload_folder( + repo_id=hf_repo_id, + folder_path=snapshot_dir, + path_in_repo=".", + repo_type="dataset", + commit_message=f"Add snapshot {snapshot_dir.name}", + ) + if log: + log(f"Upload finished: {hf_repo_id}") diff --git a/src/slop_farmer/app/save_cache.py b/src/slop_farmer/app/save_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..646ecb673c8bc4dba02ed883ecb015a21dcdf3b4 --- /dev/null +++ b/src/slop_farmer/app/save_cache.py @@ -0,0 +1,115 @@ +from __future__ import annotations + +from collections.abc import Callable +from pathlib import Path +from typing import Any, Protocol, cast + +from huggingface_hub import HfApi + +from slop_farmer.config import SaveCacheOptions +from slop_farmer.data.parquet_io import read_json +from slop_farmer.data.snapshot_paths import ROOT_MANIFEST_FILENAME, resolve_snapshot_dir_from_output + +ANALYSIS_STATE_DIRNAME = "analysis-state" + + +class HubApiLike(Protocol): + def create_repo( + self, + repo_id: str, + *, + repo_type: str, + private: bool, + exist_ok: bool, + ) -> None: ... + + def upload_folder( + self, + *, + repo_id: str, + folder_path: Path, + path_in_repo: str, + repo_type: str, + commit_message: str, + ) -> None: ... + + +def run_save_cache(options: SaveCacheOptions) -> dict[str, Any]: + snapshot_dir = resolve_snapshot_dir_from_output(options.output_dir, options.snapshot_dir) + return save_analysis_cache( + snapshot_dir=snapshot_dir, + hf_repo_id=options.hf_repo_id, + private=options.private_hf_repo, + ) + + +def save_analysis_cache( + *, + snapshot_dir: Path, + hf_repo_id: str, + private: bool, + log: Callable[[str], None] | None = None, +) -> dict[str, Any]: + return _save_analysis_cache_api( + cast("HubApiLike", HfApi()), + snapshot_dir=snapshot_dir, + hf_repo_id=hf_repo_id, + private=private, + log=log, + ) + + +def _save_analysis_cache_api( + api: HubApiLike, + *, + snapshot_dir: Path, + hf_repo_id: str, + private: bool, + log: Callable[[str], None] | None = None, +) -> dict[str, Any]: + cache_dir = snapshot_dir / ANALYSIS_STATE_DIRNAME + if not cache_dir.exists(): + raise FileNotFoundError(f"Analysis cache directory is missing: {cache_dir}") + if not cache_dir.is_dir(): + raise NotADirectoryError(f"Analysis cache path is not a directory: {cache_dir}") + artifact_paths = _cache_artifact_paths(cache_dir) + if not artifact_paths: + raise ValueError(f"Analysis cache directory is empty: {cache_dir}") + + manifest_path = snapshot_dir / ROOT_MANIFEST_FILENAME + manifest = read_json(manifest_path) if manifest_path.exists() else {} + if not isinstance(manifest, dict): + raise ValueError(f"Snapshot manifest at {manifest_path} must contain a JSON object.") + snapshot_id = str(manifest.get("snapshot_id") or snapshot_dir.name).strip() + repo = str(manifest.get("repo") or "").strip() + + if log: + log(f"Ensuring Hub dataset repo exists: {hf_repo_id}") + api.create_repo(hf_repo_id, repo_type="dataset", private=private, exist_ok=True) + if log: + log(f"Saving analysis cache for snapshot {snapshot_id}") + api.upload_folder( + repo_id=hf_repo_id, + folder_path=cache_dir, + path_in_repo=ANALYSIS_STATE_DIRNAME, + repo_type="dataset", + commit_message=f"Save analysis cache for snapshot {snapshot_id}", + ) + result = { + "dataset_id": hf_repo_id, + "snapshot_id": snapshot_id, + "artifact_paths": [f"{ANALYSIS_STATE_DIRNAME}/{path}" for path in artifact_paths], + } + if repo: + result["repo"] = repo + if log: + log(f"Saved analysis cache to {hf_repo_id}") + return result + + +def _cache_artifact_paths(cache_dir: Path) -> list[str]: + return sorted( + str(path.relative_to(cache_dir).as_posix()) + for path in cache_dir.rglob("*") + if path.is_file() + ) diff --git a/src/slop_farmer/app/snapshot_state.py b/src/slop_farmer/app/snapshot_state.py new file mode 100644 index 0000000000000000000000000000000000000000..491af7934b5041e4c5960d1790f78ba3e7c886ca --- /dev/null +++ b/src/slop_farmer/app/snapshot_state.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +from pathlib import Path + +from slop_farmer.config import SnapshotAdoptOptions +from slop_farmer.data.parquet_io import read_json, read_parquet_rows, write_json + +__all__ = ["adopt_snapshot_for_pipeline"] + + +def adopt_snapshot_for_pipeline(options: SnapshotAdoptOptions) -> Path: + snapshot_dir = options.snapshot_dir.resolve() + manifest_path = snapshot_dir / "manifest.json" + manifest = read_json(manifest_path) + repo = str(manifest.get("repo") or "") + if not repo: + raise ValueError(f"Snapshot manifest has no repo: {manifest_path}") + snapshot_id = str(manifest.get("snapshot_id") or snapshot_dir.name) + manifest_watermark = ( + manifest.get("watermark") if isinstance(manifest.get("watermark"), dict) else {} + ) + next_since = ( + options.next_since + or manifest_watermark.get("next_since") + or manifest.get("crawl_started_at") + or _infer_snapshot_next_since(snapshot_dir, manifest) + or manifest.get("extracted_at") + ) + if not isinstance(next_since, str) or not next_since: + raise ValueError(f"Could not determine next_since from {manifest_path}") + + output_dir = options.output_dir.resolve() + latest_path = output_dir / "snapshots" / "latest.json" + watermark_path = output_dir / "state" / "watermark.json" + write_json( + { + "repo": repo, + "latest_snapshot_id": snapshot_id, + "snapshot_dir": str(snapshot_dir), + "manifest_path": str(manifest_path), + "next_since": next_since, + }, + latest_path, + ) + write_json( + { + "repo": repo, + "last_successful_snapshot_id": snapshot_id, + "snapshot_dir": str(snapshot_dir), + "effective_since": manifest_watermark.get("effective_since"), + "next_since": next_since, + "updated_at": manifest.get("imported_at") or manifest.get("extracted_at") or next_since, + }, + watermark_path, + ) + return latest_path + + +def _infer_snapshot_next_since(snapshot_dir: Path, manifest: dict[str, object]) -> str | None: + if manifest.get("source_type") != "hf_checkpoint_import": + return None + table_specs = ( + ("pull_requests.parquet", ("updated_at", "created_at")), + ("issues.parquet", ("updated_at", "created_at")), + ("comments.parquet", ("updated_at", "created_at")), + ("reviews.parquet", ("submitted_at",)), + ("review_comments.parquet", ("updated_at", "created_at")), + ("events.parquet", ("created_at",)), + ) + for filename, fields in table_specs: + candidates: list[str] = [] + for row in read_parquet_rows(snapshot_dir / filename): + for field in fields: + value = row.get(field) + if isinstance(value, str) and value: + candidates.append(value) + break + if candidates: + return max(candidates) + return None diff --git a/src/slop_farmer/app_config.py b/src/slop_farmer/app_config.py new file mode 100644 index 0000000000000000000000000000000000000000..c29946102a139633dba64174d4292c6baf38b1e6 --- /dev/null +++ b/src/slop_farmer/app_config.py @@ -0,0 +1,308 @@ +from __future__ import annotations + +import sys +import tomllib +from pathlib import Path +from typing import Any + +import yaml + +PATH_LIKE_DEFAULT_KEYS = { + "db", + "output-dir", + "workspace-root", + "workspace", + "pipeline-data-dir", + "web-dir", + "hf-materialize-dir", + "snapshot-dir", + "snapshot-root", + "analysis-input", + "contributors-input", + "pr-scope-input", +} + + +def _string_tuple(value: Any) -> tuple[str, ...]: + if not isinstance(value, list): + return () + return tuple(str(item) for item in value if str(item).strip()) + + +def _dict_tuple(value: Any) -> tuple[dict[str, Any], ...]: + if not isinstance(value, list): + return () + return tuple(item for item in value if isinstance(item, dict)) + + +def _bool_value(value: Any, *, field_name: str, config_path: Path) -> bool: + if isinstance(value, bool): + return value + raise ValueError(f"Expected boolean for {field_name} in config file: {config_path}") + + +def _find_project_root(start: Path) -> Path: + for directory in (start, *start.parents): + if (directory / "pyproject.toml").exists(): + return directory + return start + + +def _find_pyproject() -> Path | None: + for directory in (Path.cwd(), *Path.cwd().parents): + path = directory / "pyproject.toml" + if path.exists(): + return path + return None + + +def _pyproject_cli_defaults() -> dict[str, Any]: + path = _find_pyproject() + if path is None: + return {} + data = tomllib.loads(path.read_text(encoding="utf-8")) + tool = data.get("tool") + if not isinstance(tool, dict): + return {} + slop_farmer = tool.get("slop-farmer") + if not isinstance(slop_farmer, dict): + return {} + return slop_farmer + + +def _extract_command_config(raw: dict[str, Any], command: str) -> dict[str, Any]: + value = raw.get(command) + return value if isinstance(value, dict) else {} + + +def _config_base_dir(config_path: Path) -> Path: + return _find_project_root(config_path.parent.resolve()) + + +def _resolve_config_path(config_path: Path, raw: str) -> str: + path = Path(raw) + if path.is_absolute(): + return str(path) + return str((_config_base_dir(config_path) / path).resolve()) + + +def _resolve_command_paths(config_path: Path, values: dict[str, Any]) -> dict[str, Any]: + resolved: dict[str, Any] = {} + for key, value in values.items(): + if key in PATH_LIKE_DEFAULT_KEYS and isinstance(value, str) and value: + resolved[key] = _resolve_config_path(config_path, value) + else: + resolved[key] = value + return resolved + + +def _dashboard_config_defaults(config_path: Path) -> dict[str, dict[str, Any]]: + if yaml is None: + raise RuntimeError("PyYAML is required for --config support") + payload = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + if not isinstance(payload, dict): + raise ValueError(f"Expected mapping in config file: {config_path}") + + repo = payload.get("repo") + workspace_raw = payload.get("workspace") + dataset_id = payload.get("dataset_id") + dashboard = payload.get("dashboard") + analysis = payload.get("analysis") + scrape = payload.get("scrape") + pull_requests = payload.get("pull-requests") + if dashboard is None: + dashboard = {} + if analysis is None: + analysis = {} + if scrape is None: + scrape = {} + if pull_requests is None: + pull_requests = {} + if not isinstance(dashboard, dict): + raise ValueError(f"Expected dashboard mapping in config file: {config_path}") + if not isinstance(analysis, dict): + raise ValueError(f"Expected analysis mapping in config file: {config_path}") + if not isinstance(scrape, dict): + raise ValueError(f"Expected scrape mapping in config file: {config_path}") + if not isinstance(pull_requests, dict): + raise ValueError(f"Expected pull-requests mapping in config file: {config_path}") + + workspace_path = ( + Path(_resolve_config_path(config_path, workspace_raw)) + if isinstance(workspace_raw, str) and workspace_raw + else None + ) + data_dir = workspace_path / "data" if workspace_path else None + web_dir = workspace_path / "web" if workspace_path else None + dashboard_dir = web_dir / "public" / "data" if web_dir else None + dashboard_window_days = int(dashboard.get("window_days", 14)) + contributor_window_days = int(dashboard.get("contributor_window_days", dashboard_window_days)) + contributor_max_authors = int(dashboard.get("contributor_max_authors", 0)) + template_cleanup = pull_requests.get("template_cleanup") or {} + if not isinstance(template_cleanup, dict): + raise ValueError( + f"Expected pull-requests.template_cleanup mapping in config file: {config_path}" + ) + legacy_section_patterns = _string_tuple(pull_requests.get("template_strip_headings")) + legacy_line_patterns = _string_tuple(pull_requests.get("template_strip_line_patterns")) + pr_template_cleanup_mode = str( + template_cleanup.get("mode", pull_requests.get("template_cleanup_mode", "merge_defaults")) + ) + pr_template_strip_html_comments = _bool_value( + template_cleanup.get("strip_html_comments", True), + field_name="pull-requests.template_cleanup.strip_html_comments", + config_path=config_path, + ) + pr_template_trim_closing_reference_prefix = _bool_value( + template_cleanup.get("trim_closing_reference_prefix", True), + field_name="pull-requests.template_cleanup.trim_closing_reference_prefix", + config_path=config_path, + ) + pr_template_section_patterns = ( + _string_tuple(template_cleanup.get("section_patterns")) + legacy_section_patterns + ) + pr_template_line_patterns = ( + _string_tuple(template_cleanup.get("line_patterns")) + legacy_line_patterns + ) + cluster_suppression_rules = _dict_tuple(pull_requests.get("cluster_suppression_rules")) + tags = dashboard.get("tags") + if isinstance(tags, list): + tags_value = ",".join(str(tag).strip() for tag in tags if str(tag).strip()) + else: + tags_value = tags + + defaults: dict[str, dict[str, Any]] = { + "scrape": { + "repo": repo, + "output-dir": str(data_dir) if data_dir else None, + "hf-repo-id": dataset_id, + "new-contributor-window-days": contributor_window_days, + "new-contributor-max-authors": contributor_max_authors, + }, + "refresh-dataset": { + "repo": repo, + "hf-repo-id": dataset_id, + "fetch-timeline": scrape.get("fetch-timeline"), + "max-issues": scrape.get("max-issues"), + "max-prs": scrape.get("max-prs"), + "max-issue-comments": scrape.get("max-issue-comments"), + "max-reviews-per-pr": scrape.get("max-reviews-per-pr"), + "max-review-comments-per-pr": scrape.get("max-review-comments-per-pr"), + "new-contributor-window-days": contributor_window_days, + "new-contributor-max-authors": contributor_max_authors, + "cluster-suppression-rules": cluster_suppression_rules, + }, + "analyze": { + "output-dir": str(data_dir) if data_dir else None, + "hf-repo-id": analysis.get("hf-repo-id", dataset_id), + "model": analysis.get("model"), + "ranking-backend": analysis.get("ranking_backend"), + "max-clusters": analysis.get("max_clusters"), + "hybrid-llm-concurrency": analysis.get("hybrid_llm_concurrency"), + "cached_analysis": analysis.get("cached_analysis"), + "open-prs-only": analysis.get("open_prs_only"), + "pr-template-cleanup-mode": pr_template_cleanup_mode, + "pr-template-strip-html-comments": pr_template_strip_html_comments, + "pr-template-trim-closing-reference-prefix": pr_template_trim_closing_reference_prefix, + "pr-template-section-patterns": pr_template_section_patterns, + "pr-template-line-patterns": pr_template_line_patterns, + "cluster-suppression-rules": cluster_suppression_rules, + }, + "pr-scope": { + "output-dir": str(data_dir) if data_dir else None, + "hf-repo-id": dataset_id, + "cluster-suppression-rules": cluster_suppression_rules, + }, + "pr-search": { + "output-dir": str(data_dir) if data_dir else None, + "hf-repo-id": dataset_id, + "cluster-suppression-rules": cluster_suppression_rules, + }, + "new-contributor-report": { + "output-dir": str(data_dir) if data_dir else None, + "hf-repo-id": dataset_id, + "window-days": contributor_window_days, + "max-authors": contributor_max_authors, + }, + "dashboard-data": { + "output-dir": str(dashboard_dir) if dashboard_dir else None, + "snapshot-root": str(data_dir / "snapshots") if data_dir else None, + "hf-repo-id": dataset_id, + "window-days": dashboard_window_days, + }, + "publish-analysis-artifacts": { + "output-dir": str(data_dir) if data_dir else None, + "hf-repo-id": dataset_id, + }, + "save-cache": { + "output-dir": str(data_dir) if data_dir else None, + "hf-repo-id": dataset_id, + }, + "deploy-dashboard": { + "pipeline-data-dir": str(data_dir) if data_dir else None, + "web-dir": str(web_dir) if web_dir else None, + "hf-repo-id": dataset_id, + "dashboard-window-days": dashboard_window_days, + "contributor-window-days": contributor_window_days, + "contributor-max-authors": contributor_max_authors, + "space-id": dashboard.get("space_id"), + "space-title": dashboard.get("title"), + "space-emoji": dashboard.get("emoji"), + "space-color-from": dashboard.get("color_from"), + "space-color-to": dashboard.get("color_to"), + "space-short-description": dashboard.get("short_description"), + "dataset-id": dataset_id, + "space-tags": tags_value, + }, + "dataset-status": { + "repo": repo, + "output-dir": str(data_dir) if data_dir else None, + "hf-repo-id": dataset_id, + }, + } + for command, values in defaults.items(): + defaults[command] = {key: value for key, value in values.items() if value is not None} + + explicit_sections = {command: _extract_command_config(payload, command) for command in defaults} + for command, values in explicit_sections.items(): + if not values: + continue + defaults[command].update(_resolve_command_paths(config_path, values)) + + defaults["scrape"].update(_resolve_command_paths(config_path, scrape)) + defaults["refresh-dataset"].update(_resolve_command_paths(config_path, scrape)) + defaults["analyze"].update(_resolve_command_paths(config_path, analysis)) + return defaults + + +def project_cli_defaults(config_path: Path | None = None) -> dict[str, Any]: + defaults = _pyproject_cli_defaults() + if config_path is None: + return defaults + merged = dict(defaults) + for command, values in _dashboard_config_defaults(config_path).items(): + current = merged.get(command) + if isinstance(current, dict): + updated = dict(current) + updated.update(values) + merged[command] = updated + else: + merged[command] = dict(values) + return merged + + +def command_defaults(command: str, *, config_path: Path | None = None) -> dict[str, Any]: + defaults = project_cli_defaults(config_path=config_path).get(command) + if not isinstance(defaults, dict): + return {} + return defaults + + +def extract_cli_config_path(argv: list[str] | None = None) -> Path | None: + args = list(sys.argv[1:] if argv is None else argv) + for index, arg in enumerate(args): + if arg == "--config" and index + 1 < len(args): + return Path(args[index + 1]).resolve() + if arg.startswith("--config="): + return Path(arg.split("=", 1)[1]).resolve() + return None diff --git a/src/slop_farmer/config.py b/src/slop_farmer/config.py new file mode 100644 index 0000000000000000000000000000000000000000..353ce791ec5031416a5c4b2f67c37c4775abcd19 --- /dev/null +++ b/src/slop_farmer/config.py @@ -0,0 +1,269 @@ +from __future__ import annotations + +import os +import subprocess +from dataclasses import dataclass +from pathlib import Path +from typing import Any + + +def _read_gh_token() -> str | None: + try: + result = subprocess.run( + ["gh", "auth", "token"], + check=True, + capture_output=True, + text=True, + ) + except (OSError, subprocess.CalledProcessError): + return None + token = result.stdout.strip() + return token or None + + +def _read_dotenv_token() -> str | None: + for directory in (Path.cwd(), *Path.cwd().parents): + path = directory / ".env" + if not path.exists(): + continue + values: dict[str, str] = {} + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, value = line.split("=", 1) + values[key.strip()] = value.strip().strip("'").strip('"') + for key in ("GITHUB_TOKEN", "GRAPHQL_TOKEN", "GH_TOKEN"): + token = values.get(key) + if token: + return token + return None + + +def resolve_github_token() -> str | None: + return ( + os.environ.get("GITHUB_TOKEN") + or os.environ.get("GRAPHQL_TOKEN") + or os.environ.get("GH_TOKEN") + or _read_dotenv_token() + or _read_gh_token() + ) + + +@dataclass(slots=True) +class RepoRef: + owner: str + name: str + + @classmethod + def parse(cls, raw: str) -> RepoRef: + owner, sep, name = raw.partition("/") + if not sep or not owner or not name: + raise ValueError(f"Expected REPO in owner/name form, got: {raw!r}") + return cls(owner=owner, name=name) + + @property + def slug(self) -> str: + return f"{self.owner}/{self.name}" + + +@dataclass(slots=True) +class PipelineOptions: + repo: RepoRef + output_dir: Path + since: str | None + resume: bool + http_timeout: int + http_max_retries: int + max_issues: int | None + max_prs: int | None + max_issue_comments: int | None + max_reviews_per_pr: int | None + max_review_comments_per_pr: int | None + fetch_timeline: bool + new_contributor_report: bool + new_contributor_window_days: int + new_contributor_max_authors: int + issue_max_age_days: int | None + pr_max_age_days: int | None + + +@dataclass(slots=True) +class AnalysisOptions: + snapshot_dir: Path | None + output_dir: Path + output: Path | None + hf_repo_id: str | None + hf_revision: str | None + hf_materialize_dir: Path | None + ranking_backend: str + model: str + max_clusters: int + hybrid_llm_concurrency: int = 1 + open_prs_only: bool = False + cached_analysis: bool = False + pr_template_cleanup_mode: str = "merge_defaults" + pr_template_strip_html_comments: bool = True + pr_template_trim_closing_reference_prefix: bool = True + pr_template_section_patterns: tuple[str, ...] = () + pr_template_line_patterns: tuple[str, ...] = () + cluster_suppression_rules: tuple[dict[str, Any], ...] = () + + def __post_init__(self) -> None: + if self.hybrid_llm_concurrency < 1: + raise ValueError("hybrid_llm_concurrency must be >= 1") + + +@dataclass(slots=True) +class MarkdownReportOptions: + input: Path + output: Path | None + snapshot_dir: Path | None + + +@dataclass(slots=True) +class NewContributorReportOptions: + snapshot_dir: Path | None + output_dir: Path + output: Path | None + json_output: Path | None + window_days: int + max_authors: int + hf_repo_id: str | None = None + hf_revision: str | None = None + hf_materialize_dir: Path | None = None + + +@dataclass(slots=True) +class DashboardDataOptions: + snapshot_dir: Path | None + output_dir: Path + analysis_input: Path | None + contributors_input: Path | None + pr_scope_input: Path | None + window_days: int + hf_repo_id: str | None = None + hf_revision: str | None = None + hf_materialize_dir: Path | None = None + snapshot_root: Path | None = None + + +@dataclass(slots=True) +class DeployDashboardOptions: + pipeline_data_dir: Path + web_dir: Path + snapshot_dir: Path | None + analysis_input: Path | None + contributors_input: Path | None + pr_scope_input: Path | None + hf_repo_id: str | None + hf_revision: str | None + hf_materialize_dir: Path | None + refresh_contributors: bool + dashboard_window_days: int + contributor_window_days: int + contributor_max_authors: int + private_space: bool + commit_message: str + space_id: str + space_title: str | None + space_emoji: str + space_color_from: str + space_color_to: str + space_short_description: str + dataset_id: str | None + space_tags: str | None + + +@dataclass(slots=True) +class PrScopeOptions: + snapshot_dir: Path | None + output_dir: Path + output: Path | None + hf_repo_id: str | None + hf_revision: str | None + hf_materialize_dir: Path | None + cluster_suppression_rules: tuple[dict[str, Any], ...] = () + + +@dataclass(slots=True) +class PrSearchRefreshOptions: + snapshot_dir: Path | None + output_dir: Path + db: Path | None + hf_repo_id: str | None + hf_revision: str | None + hf_materialize_dir: Path | None + include_drafts: bool = False + include_closed: bool = False + limit_prs: int | None = None + replace_active: bool = True + cluster_suppression_rules: tuple[dict[str, Any], ...] = () + + +@dataclass(slots=True) +class CheckpointImportOptions: + source_repo_id: str + output_dir: Path + checkpoint_id: str | None + checkpoint_root: str | None + publish_repo_id: str | None + private_hf_repo: bool + force: bool + + +@dataclass(slots=True) +class SnapshotAdoptOptions: + snapshot_dir: Path + output_dir: Path + next_since: str | None + + +@dataclass(slots=True) +class DatasetRefreshOptions: + repo: RepoRef + hf_repo_id: str + private_hf_repo: bool + max_issues: int | None + max_prs: int | None + max_issue_comments: int | None + max_reviews_per_pr: int | None + max_review_comments_per_pr: int | None + fetch_timeline: bool + new_contributor_report: bool + new_contributor_window_days: int + new_contributor_max_authors: int + http_timeout: int + http_max_retries: int + checkpoint_every_comments: int + checkpoint_every_prs: int + cluster_suppression_rules: tuple[dict[str, Any], ...] = () + + +@dataclass(slots=True) +class PublishAnalysisArtifactsOptions: + output_dir: Path + snapshot_dir: Path | None + analysis_input: Path | None + hf_repo_id: str + analysis_id: str + canonical: bool = False + save_cache: bool = False + private_hf_repo: bool = False + + +@dataclass(slots=True) +class SaveCacheOptions: + output_dir: Path + snapshot_dir: Path | None + hf_repo_id: str + private_hf_repo: bool = False + + +@dataclass(slots=True) +class DatasetStatusOptions: + output_dir: Path + hf_repo_id: str | None + hf_revision: str | None + repo: str | None = None + json_output: bool = False diff --git a/src/slop_farmer/data/__init__.py b/src/slop_farmer/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9d48db4f9f85e1752cf424c49ee18a6907c3f160 --- /dev/null +++ b/src/slop_farmer/data/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/src/slop_farmer/data/dataset_card.py b/src/slop_farmer/data/dataset_card.py new file mode 100644 index 0000000000000000000000000000000000000000..954e9ebf00a9e16cdf8733c374adf71d0f69b3b8 --- /dev/null +++ b/src/slop_farmer/data/dataset_card.py @@ -0,0 +1,107 @@ +from __future__ import annotations + + +def _repo_title(repo_slug: str) -> str: + name = repo_slug.split("/", 1)[-1] + return name.replace("-", " ").replace("_", " ").title() + + +def build_hf_dataset_card( + repo_slug: str, + snapshot_id: str, + *, + include_new_contributors: bool = False, + notes: list[str] | None = None, +) -> str: + repo_title = _repo_title(repo_slug) + dataset_title = f"{repo_title} PR Dataset" + new_contributor_config = "" + new_contributor_files = "" + if include_new_contributors: + new_contributor_config = """- config_name: new_contributors + data_files: + - split: train + path: new_contributors.parquet +""" + new_contributor_files = """- `new_contributors.parquet` +- `new-contributors-report.json` +- `new-contributors-report.md` +""" + note_lines = "\n".join(f"- {note}" for note in (notes or [])) + if note_lines: + note_lines = f"{note_lines}\n" + return f"""--- +pretty_name: {dataset_title} +configs: +- config_name: issues + data_files: + - split: train + path: issues.parquet + default: true +- config_name: prs + data_files: + - split: train + path: pull_requests.parquet +- config_name: issue_comments + data_files: + - split: train + path: issue_comments.parquet +- config_name: pr_comments + data_files: + - split: train + path: pr_comments.parquet +- config_name: pr_reviews + data_files: + - split: train + path: reviews.parquet +- config_name: pr_files + data_files: + - split: train + path: pr_files.parquet +- config_name: pr_diffs + data_files: + - split: train + path: pr_diffs.parquet +- config_name: review_comments + data_files: + - split: train + path: review_comments.parquet +- config_name: links + data_files: + - split: train + path: links.parquet +- config_name: events + data_files: + - split: train + path: events.parquet +{new_contributor_config}--- +--- + +# {dataset_title} + +Normalized snapshots of issues, pull requests, comments, reviews, and linkage data from `{repo_slug}`. + +Files: +- `issues.parquet` +- `pull_requests.parquet` +- `comments.parquet` +- `issue_comments.parquet` (derived view of issue discussion comments) +- `pr_comments.parquet` (derived view of pull request discussion comments) +- `reviews.parquet` +- `pr_files.parquet` +- `pr_diffs.parquet` +- `review_comments.parquet` +- `links.parquet` +- `events.parquet` +{new_contributor_files} +Use: +- duplicate PR and issue analysis +- triage and ranking experiments +- eval set creation + +Notes: +- latest snapshot: `{snapshot_id}` +- raw data only; no labels or moderation decisions +- PR metadata, file-level patch hunks, and full unified diffs are included +- full file contents for changed files are not included +{note_lines}""" diff --git a/src/slop_farmer/data/ghreplica_api.py b/src/slop_farmer/data/ghreplica_api.py new file mode 100644 index 0000000000000000000000000000000000000000..67ef3a0f0a90792e707f3e32393e26056cc2d274 --- /dev/null +++ b/src/slop_farmer/data/ghreplica_api.py @@ -0,0 +1,145 @@ +from __future__ import annotations + +import json +import urllib.error +import urllib.request +from collections.abc import Callable, Iterable +from typing import Any + +from slop_farmer.data.http import urlopen_with_retry + + +class GhReplicaApiRequestError(RuntimeError): + """Raised when ghreplica returns a non-recoverable HTTP response.""" + + def __init__(self, status_code: int, path: str, detail: str): + self.status_code = status_code + self.path = path + self.detail = detail + super().__init__(f"ghreplica API request failed: {status_code} {path} {detail}") + + +class GhReplicaProbeUnavailableError(RuntimeError): + """Raised when ghreplica cannot yet serve a live probe payload.""" + + def __init__(self, detail: str, *, status_code: int = 503): + self.status_code = status_code + super().__init__(detail) + + +class GhrProbeClient: + provider = "ghreplica" + + def __init__( + self, + *, + base_url: str, + timeout: int = 180, + max_retries: int = 5, + log: Callable[[str], None] | None = None, + ): + self.base_url = base_url.rstrip("/") + self.timeout = timeout + self.max_retries = max_retries + self.log = log + + def _request_json(self, path: str) -> Any: + request = urllib.request.Request(f"{self.base_url}{path}") + request.add_header("Accept", "application/json") + try: + with urlopen_with_retry( + request, + timeout=self.timeout, + max_retries=self.max_retries, + log=self.log, + label=path, + ) as response: + payload = response.read().decode("utf-8") + except urllib.error.HTTPError as exc: + detail = exc.read().decode("utf-8", errors="replace") + raise GhReplicaApiRequestError(exc.code, path, detail) from exc + return json.loads(payload) + + def _request_json_or_none(self, path: str) -> Any | None: + try: + return self._request_json(path) + except GhReplicaApiRequestError as exc: + if exc.status_code == 404: + return None + raise + + def get_pull_request(self, owner: str, repo: str, number: int) -> dict[str, Any]: + try: + payload = self._request_json(f"/v1/github/repos/{owner}/{repo}/pulls/{number}") + except GhReplicaApiRequestError as exc: + if exc.status_code == 404: + raise GhReplicaProbeUnavailableError( + f"PR #{number} was not found in ghreplica.", + status_code=404, + ) from exc + raise + if not isinstance(payload, dict): + raise RuntimeError(f"Expected dict payload for pull request, got {type(payload)!r}") + return payload + + def iter_pull_files(self, owner: str, repo: str, number: int) -> Iterable[dict[str, Any]]: + try: + payload = self._request_json(f"/v1/changes/repos/{owner}/{repo}/pulls/{number}/files") + except GhReplicaApiRequestError as exc: + if exc.status_code != 404: + raise + status = self.get_pull_request_status(owner, repo, number) + if isinstance(status, dict): + detail_bits = [] + for key in ( + "indexed", + "backfill_in_progress", + "changed_files", + "indexed_file_count", + ): + if key in status: + detail_bits.append(f"{key}={status[key]}") + suffix = f" ({', '.join(detail_bits)})" if detail_bits else "" + raise GhReplicaProbeUnavailableError( + f"PR #{number} is not available in ghreplica yet{suffix}.", + status_code=503, + ) from exc + raise GhReplicaProbeUnavailableError( + f"PR #{number} was not found in ghreplica changed-file replica.", + status_code=404, + ) from exc + rows = payload if isinstance(payload, list) else payload.get("files") + if not isinstance(rows, list): + raise RuntimeError( + f"Expected list payload for pull request files, got {type(payload)!r}" + ) + for row in rows: + if not isinstance(row, dict): + continue + additions = int(row.get("additions") or 0) + deletions = int(row.get("deletions") or 0) + yield { + "sha": row.get("sha"), + "filename": row.get("filename") or row.get("path"), + "status": row.get("status"), + "additions": additions, + "deletions": deletions, + "changes": row.get("changes") or additions + deletions, + "blob_url": row.get("blob_url"), + "raw_url": row.get("raw_url"), + "contents_url": row.get("contents_url"), + "previous_filename": row.get("previous_filename"), + "patch": row.get("patch"), + } + + def get_pull_request_status(self, owner: str, repo: str, number: int) -> dict[str, Any] | None: + payload = self._request_json_or_none( + f"/v1/changes/repos/{owner}/{repo}/pulls/{number}/status" + ) + if payload is None: + return None + if not isinstance(payload, dict): + raise RuntimeError( + f"Expected dict payload for pull request status, got {type(payload)!r}" + ) + return payload diff --git a/src/slop_farmer/data/github_api.py b/src/slop_farmer/data/github_api.py new file mode 100644 index 0000000000000000000000000000000000000000..ad98467cd3df809b9b0ce8c15c52152158ad9383 --- /dev/null +++ b/src/slop_farmer/data/github_api.py @@ -0,0 +1,309 @@ +from __future__ import annotations + +import json +import time +import urllib.error +import urllib.parse +import urllib.request +from collections.abc import Callable, Iterator +from datetime import UTC, datetime + +from slop_farmer.data.http import urlopen_with_retry + + +class PullRequestDiffTooLargeError(RuntimeError): + """Raised when GitHub refuses to render a PR diff because it is too large.""" + + +class GitHubApiRequestError(RuntimeError): + """Raised when GitHub returns a non-recoverable HTTP response.""" + + def __init__(self, status_code: int, path: str, detail: str): + self.status_code = status_code + self.path = path + self.detail = detail + super().__init__(f"GitHub API request failed: {status_code} {path} {detail}") + + +class GitHubClient: + def __init__( + self, + token: str | None, + per_page: int = 100, + timeout: int = 180, + max_retries: int = 5, + log: Callable[[str], None] | None = None, + ): + self.token = token + self.per_page = per_page + self.timeout = timeout + self.max_retries = max_retries + self.log = log + self.base_url = "https://api.github.com" + self.request_count = 0 + self.rate_limit_log_every = 25 + self._rate_limit_thresholds_logged: dict[str, set[int]] = {} + + @staticmethod + def _header_int(headers: dict[str, str], name: str) -> int | None: + value = headers.get(name) + if value is None: + return None + try: + return int(value) + except ValueError: + return None + + def _maybe_log_rate_limit(self, path: str, headers: dict[str, str]) -> None: + self.request_count += 1 + if not self.log: + return + limit = self._header_int(headers, "x-ratelimit-limit") + remaining = self._header_int(headers, "x-ratelimit-remaining") + used = self._header_int(headers, "x-ratelimit-used") + reset_at = self._header_int(headers, "x-ratelimit-reset") + resource = headers.get("x-ratelimit-resource", "?") + if limit is None or remaining is None: + return + should_log = self.request_count == 1 or self.request_count % self.rate_limit_log_every == 0 + thresholds = (1000, 500, 250, 100, 50, 25, 10, 5, 1, 0) + logged = self._rate_limit_thresholds_logged.setdefault(resource, set()) + for threshold in thresholds: + if remaining <= threshold and threshold not in logged: + should_log = True + logged.add(threshold) + if not should_log: + return + reset_text = "?" + if reset_at is not None: + reset_text = datetime.fromtimestamp(reset_at, tz=UTC).strftime("%Y-%m-%dT%H:%M:%SZ") + used_text = "?" if used is None else str(used) + self.log( + f"GitHub rate limit: resource={resource} used={used_text} remaining={remaining}/{limit} " + f"reset={reset_text} after {path} (request {self.request_count})" + ) + + def _request( + self, + path: str, + params: dict[str, object] | None = None, + accept: str = "application/vnd.github+json", + ) -> tuple[object, dict[str, str]]: + query = f"?{urllib.parse.urlencode(params)}" if params else "" + url = f"{self.base_url}{path}{query}" + request = urllib.request.Request(url) + request.add_header("Accept", accept) + request.add_header("X-GitHub-Api-Version", "2022-11-28") + if self.token: + request.add_header("Authorization", f"Bearer {self.token}") + while True: + try: + with urlopen_with_retry( + request, + timeout=self.timeout, + max_retries=self.max_retries, + log=self.log, + label=path, + ) as response: + payload = response.read().decode("utf-8") + headers = {k.lower(): v for k, v in response.headers.items()} + self._maybe_log_rate_limit(path, headers) + return json.loads(payload), headers + except urllib.error.HTTPError as exc: + if exc.code == 403 and exc.headers.get("X-RateLimit-Remaining") == "0": + reset_at = int(exc.headers.get("X-RateLimit-Reset", "0") or "0") + sleep_for = max(reset_at - int(time.time()), 1) + if self.log: + self.log( + f"GitHub rate limit reached for {path}; sleeping {sleep_for}s until reset" + ) + time.sleep(sleep_for) + continue + detail = exc.read().decode("utf-8", errors="replace") + if ( + exc.code == 406 + and '"field":"diff"' in detail + and '"code":"too_large"' in detail + ): + raise PullRequestDiffTooLargeError( + f"GitHub diff too large: {path} {detail}" + ) from exc + raise GitHubApiRequestError(exc.code, path, detail) from exc + + def _request_text( + self, + path: str, + params: dict[str, object] | None = None, + accept: str = "application/vnd.github.diff", + ) -> tuple[str, dict[str, str]]: + query = f"?{urllib.parse.urlencode(params)}" if params else "" + url = f"{self.base_url}{path}{query}" + request = urllib.request.Request(url) + request.add_header("Accept", accept) + request.add_header("X-GitHub-Api-Version", "2022-11-28") + if self.token: + request.add_header("Authorization", f"Bearer {self.token}") + while True: + try: + with urlopen_with_retry( + request, + timeout=self.timeout, + max_retries=self.max_retries, + log=self.log, + label=path, + ) as response: + payload = response.read().decode("utf-8", errors="replace") + headers = {k.lower(): v for k, v in response.headers.items()} + self._maybe_log_rate_limit(path, headers) + return payload, headers + except urllib.error.HTTPError as exc: + if exc.code == 403 and exc.headers.get("X-RateLimit-Remaining") == "0": + reset_at = int(exc.headers.get("X-RateLimit-Reset", "0") or "0") + sleep_for = max(reset_at - int(time.time()), 1) + if self.log: + self.log( + f"GitHub rate limit reached for {path}; sleeping {sleep_for}s until reset" + ) + time.sleep(sleep_for) + continue + detail = exc.read().decode("utf-8", errors="replace") + if ( + exc.code == 406 + and '"field":"diff"' in detail + and '"code":"too_large"' in detail + ): + raise PullRequestDiffTooLargeError( + f"GitHub diff too large: {path} {detail}" + ) from exc + raise GitHubApiRequestError(exc.code, path, detail) from exc + + def paginate( + self, + path: str, + params: dict[str, object] | None = None, + accept: str = "application/vnd.github+json", + limit: int | None = None, + ) -> Iterator[dict]: + page = 1 + yielded = 0 + params = dict(params or {}) + params["per_page"] = self.per_page + while True: + params["page"] = page + payload, _headers = self._request(path, params=params, accept=accept) + if not isinstance(payload, list): + raise RuntimeError(f"Expected list payload from {path}, got {type(payload)!r}") + if not payload: + break + for item in payload: + if not isinstance(item, dict): + continue + yield item + yielded += 1 + if limit is not None and yielded >= limit: + return + if len(payload) < self.per_page: + break + page += 1 + + def get_json(self, path: str, accept: str = "application/vnd.github+json") -> dict: + payload, _headers = self._request(path, accept=accept) + if not isinstance(payload, dict): + raise RuntimeError(f"Expected dict payload from {path}, got {type(payload)!r}") + return payload + + def iter_repo_issues( + self, owner: str, repo: str, since: str | None, limit: int | None + ) -> Iterator[dict]: + direction = "asc" + if since is None and limit is not None: + # When the caller asks for a bounded first pass without a watermark, + # prefer the most recently updated items. Using ascending order here + # causes small smoke tests to read the stalest issue/PR stubs first, + # which can make recent-age filters appear to return an empty repo. + direction = "desc" + params: dict[str, object] = {"state": "all", "sort": "updated", "direction": direction} + if since: + params["since"] = since + yield from self.paginate(f"/repos/{owner}/{repo}/issues", params=params, limit=limit) + + def iter_issue_comments( + self, owner: str, repo: str, since: str | None, limit: int | None + ) -> Iterator[dict]: + params: dict[str, object] = {"sort": "updated", "direction": "asc"} + if since: + params["since"] = since + yield from self.paginate( + f"/repos/{owner}/{repo}/issues/comments", params=params, limit=limit + ) + + def iter_issue_comments_for_number( + self, + owner: str, + repo: str, + number: int, + since: str | None, + limit: int | None = None, + ) -> Iterator[dict]: + params: dict[str, object] = {"sort": "updated", "direction": "asc"} + if since: + params["since"] = since + yield from self.paginate( + f"/repos/{owner}/{repo}/issues/{number}/comments", params=params, limit=limit + ) + + def get_pull_request(self, owner: str, repo: str, number: int) -> dict: + return self.get_json(f"/repos/{owner}/{repo}/pulls/{number}") + + def iter_pull_reviews( + self, owner: str, repo: str, number: int, limit: int | None = None + ) -> Iterator[dict]: + yield from self.paginate(f"/repos/{owner}/{repo}/pulls/{number}/reviews", limit=limit) + + def iter_pull_review_comments( + self, owner: str, repo: str, number: int, limit: int | None = None + ) -> Iterator[dict]: + yield from self.paginate( + f"/repos/{owner}/{repo}/pulls/{number}/comments", + params={"sort": "updated", "direction": "asc"}, + limit=limit, + ) + + def iter_pull_files( + self, owner: str, repo: str, number: int, limit: int | None = None + ) -> Iterator[dict]: + yield from self.paginate(f"/repos/{owner}/{repo}/pulls/{number}/files", limit=limit) + + def get_pull_request_diff(self, owner: str, repo: str, number: int) -> str: + path = f"/repos/{owner}/{repo}/pulls/{number}" + try: + payload, _headers = self._request_text( + path, + accept="application/vnd.github.diff", + ) + return payload + except PullRequestDiffTooLargeError: + if self.log: + self.log( + f"Skipping unified diff for pull request #{number}; GitHub reports diff too large" + ) + return "" + + def iter_issue_timeline( + self, owner: str, repo: str, number: int, limit: int | None = None + ) -> Iterator[dict]: + path = f"/repos/{owner}/{repo}/issues/{number}/timeline" + try: + yield from self.paginate( + path, + accept="application/vnd.github+json, application/vnd.github.mockingbird-preview+json", + limit=limit, + ) + except GitHubApiRequestError as exc: + if exc.status_code < 500: + raise + if self.log: + self.log( + f"Skipping timeline fetch for issue #{number} after GitHub {exc.status_code}: {path}" + ) + return diff --git a/src/slop_farmer/data/hf_dataset_repo.py b/src/slop_farmer/data/hf_dataset_repo.py new file mode 100644 index 0000000000000000000000000000000000000000..db4bc48e10c4c9cf5c346bb484dfcf9b85f015dc --- /dev/null +++ b/src/slop_farmer/data/hf_dataset_repo.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Any + +from huggingface_hub import HfApi, hf_hub_download + + +def load_remote_file( + api: HfApi, + repo_id: str, + path_in_repo: str, + local_dir: Path, + *, + revision: str | None = None, +) -> Path | None: + del api + try: + downloaded = hf_hub_download( + repo_id=repo_id, + filename=path_in_repo, + repo_type="dataset", + revision=revision, + local_dir=str(local_dir), + token=os.getenv("HF_TOKEN"), + ) + except Exception: + return None + return Path(downloaded) + + +def load_remote_json_file( + api: HfApi, + repo_id: str, + path_in_repo: str, + local_dir: Path, + *, + revision: str | None = None, +) -> dict[str, Any] | None: + downloaded = load_remote_file( + api, + repo_id, + path_in_repo, + local_dir, + revision=revision, + ) + if downloaded is None: + return None + return json.loads(downloaded.read_text(encoding="utf-8")) + + +def list_remote_paths(api: HfApi, repo_id: str, *, revision: str | None = None) -> set[str]: + try: + info = api.dataset_info(repo_id=repo_id, revision=revision, files_metadata=True) + except TypeError: + info = api.dataset_info(repo_id=repo_id, revision=revision) + except Exception: + return set() + return {sibling.rfilename for sibling in getattr(info, "siblings", [])} + + +def stable_snapshot_candidates(latest_payload: dict[str, Any] | None, filename: str) -> list[str]: + if latest_payload is None: + return [filename] + candidates: list[str] = [] + manifest_path = str(latest_payload.get("manifest_path") or "").strip("/") + snapshot_dir = str(latest_payload.get("snapshot_dir") or "").strip("/") + latest_snapshot_id = str(latest_payload.get("latest_snapshot_id") or "").strip() + + if filename == "manifest.json" and manifest_path: + candidates.append(manifest_path) + if snapshot_dir and snapshot_dir not in {".", "/"}: + candidates.append(f"{snapshot_dir}/{filename}") + archived_manifest_path = str(latest_payload.get("archived_manifest_path") or "").strip("/") + if filename == "manifest.json" and archived_manifest_path: + candidates.append(archived_manifest_path) + if manifest_path and "/" in manifest_path: + manifest_dir = manifest_path.rsplit("/", 1)[0] + candidates.append(f"{manifest_dir}/{filename}") + if latest_snapshot_id: + candidates.append(f"snapshots/{latest_snapshot_id}/{filename}") + candidates.append(filename) + + deduped: list[str] = [] + seen: set[str] = set() + for candidate in candidates: + normalized = candidate.lstrip("./") + if not normalized or normalized in seen: + continue + seen.add(normalized) + deduped.append(normalized) + return deduped diff --git a/src/slop_farmer/data/http.py b/src/slop_farmer/data/http.py new file mode 100644 index 0000000000000000000000000000000000000000..32710953f8dc234f0ade91b0d94c5d30d6d4c7df --- /dev/null +++ b/src/slop_farmer/data/http.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import http.client +import socket +import time +import urllib.error +import urllib.request +from collections.abc import Callable +from typing import Any + +TRANSIENT_HTTP_ERRORS = ( + TimeoutError, + socket.timeout, + urllib.error.URLError, + http.client.RemoteDisconnected, + ConnectionResetError, +) + + +def urlopen_with_retry( + request: urllib.request.Request | str, + *, + timeout: int, + max_retries: int = 5, + log: Callable[[str], None] | None = None, + label: str | None = None, + opener: Callable[..., Any] | None = None, + sleep: Callable[[float], None] = time.sleep, +) -> Any: + attempt = 0 + target = label or (request if isinstance(request, str) else request.full_url) + opener = opener or urllib.request.urlopen + while True: + try: + return opener(request, timeout=timeout) + except urllib.error.HTTPError: + raise + except TRANSIENT_HTTP_ERRORS as exc: + attempt += 1 + if attempt > max_retries: + raise RuntimeError( + f"HTTP request failed after {max_retries} retries: {target} {exc}" + ) from exc + sleep_for = min(2**attempt, 30) + if log is not None: + log( + f"Transient network failure for {target} (attempt {attempt}/{max_retries}); retrying in {sleep_for}s" + ) + sleep(sleep_for) diff --git a/src/slop_farmer/data/links.py b/src/slop_farmer/data/links.py new file mode 100644 index 0000000000000000000000000000000000000000..443a26dd571f76f5a1fd71b8e1935f58362687d6 --- /dev/null +++ b/src/slop_farmer/data/links.py @@ -0,0 +1,139 @@ +from __future__ import annotations + +import re +from collections import defaultdict +from typing import Any + +REFERENCE_PATTERN = re.compile( + r"(?:(?P[A-Za-z0-9_.-]+)/(?P[A-Za-z0-9_.-]+))?#(?P\d+)" +) +CLOSING_KEYWORD_PATTERN = re.compile( + r"\b(?Pclose[sd]?|fix(?:e[sd])?|resolve[sd]?)\s+(?:(?P[A-Za-z0-9_.-]+)/(?P[A-Za-z0-9_.-]+))?#(?P\d+)\b", + flags=re.IGNORECASE, +) +DUPLICATE_REFERENCE_PATTERN = re.compile( + r"\bduplicate\s+of\s+(?:(?P[A-Za-z0-9_.-]+)/(?P[A-Za-z0-9_.-]+))?#(?P\d+)\b", + flags=re.IGNORECASE, +) + + +def extract_references( + text: str | None, default_owner: str, default_repo: str +) -> list[dict[str, Any]]: + if not text: + return [] + refs: list[dict[str, Any]] = [] + for match in REFERENCE_PATTERN.finditer(text): + refs.append( + { + "target_owner": match.group("owner") or default_owner, + "target_repo": match.group("repo") or default_repo, + "target_number": int(match.group("number")), + "reference_kind": "mention", + } + ) + for match in CLOSING_KEYWORD_PATTERN.finditer(text): + refs.append( + { + "target_owner": match.group("owner") or default_owner, + "target_repo": match.group("repo") or default_repo, + "target_number": int(match.group("number")), + "reference_kind": "closing_reference", + "verb": match.group("verb").lower(), + } + ) + for match in DUPLICATE_REFERENCE_PATTERN.finditer(text): + refs.append( + { + "target_owner": match.group("owner") or default_owner, + "target_repo": match.group("repo") or default_repo, + "target_number": int(match.group("number")), + "reference_kind": "duplicate_reference", + } + ) + return refs + + +def build_text_link_rows( + *, + repo: str, + owner: str, + repo_name: str, + source_type: str, + source_number: int, + source_id: int | None, + body: str | None, + snapshot_id: str, + extracted_at: str, +) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + seen: set[tuple[str, str, int, str]] = set() + for ref in extract_references(body, owner, repo_name): + key = (ref["target_owner"], ref["target_repo"], ref["target_number"], ref["reference_kind"]) + if key in seen: + continue + seen.add(key) + rows.append( + { + "repo": repo, + "source_type": source_type, + "source_number": source_number, + "source_github_id": source_id, + "target_owner": ref["target_owner"], + "target_repo": ref["target_repo"], + "target_number": ref["target_number"], + "link_type": ref["reference_kind"], + "link_origin": "text", + "snapshot_id": snapshot_id, + "extracted_at": extracted_at, + } + ) + return rows + + +def build_pr_duplicate_candidate_rows( + *, + repo: str, + pull_requests: list[dict[str, Any]], + link_rows: list[dict[str, Any]], + snapshot_id: str, + extracted_at: str, +) -> list[dict[str, Any]]: + pr_targets: dict[int, set[int]] = defaultdict(set) + for row in link_rows: + if row["source_type"] != "pull_request": + continue + if ( + row["target_owner"] != repo.split("/", 1)[0] + or row["target_repo"] != repo.split("/", 1)[1] + ): + continue + pr_targets[row["source_number"]].add(row["target_number"]) + + target_to_prs: dict[int, list[int]] = defaultdict(list) + for pr_row in pull_requests: + number = pr_row["number"] + for target in sorted(pr_targets.get(number, set())): + target_to_prs[target].append(number) + + rows: list[dict[str, Any]] = [] + for target_number, prs in target_to_prs.items(): + if len(prs) < 2: + continue + for pr_number in prs: + rows.append( + { + "repo": repo, + "source_type": "pull_request", + "source_number": pr_number, + "source_github_id": None, + "target_owner": repo.split("/", 1)[0], + "target_repo": repo.split("/", 1)[1], + "target_number": target_number, + "link_type": "shared_issue_target", + "link_origin": "derived", + "snapshot_id": snapshot_id, + "extracted_at": extracted_at, + } + ) + return rows diff --git a/src/slop_farmer/data/normalize.py b/src/slop_farmer/data/normalize.py new file mode 100644 index 0000000000000000000000000000000000000000..c87a721fb9241e139eb2ade3ab2a61caf3c7d4c9 --- /dev/null +++ b/src/slop_farmer/data/normalize.py @@ -0,0 +1,277 @@ +from __future__ import annotations + +from typing import Any +from urllib.parse import urlparse + + +def _user_fields(user: dict[str, Any] | None) -> dict[str, Any]: + user = user or {} + return { + "author_login": user.get("login"), + "author_id": user.get("id"), + "author_node_id": user.get("node_id"), + "author_type": user.get("type"), + "author_site_admin": user.get("site_admin"), + } + + +def _labels(labels: list[dict[str, Any]] | None) -> list[str]: + return [ + name + for label in labels or [] + if isinstance(label, dict) and isinstance((name := label.get("name")), str) and name + ] + + +def _assignees(users: list[dict[str, Any]] | None) -> list[str]: + return [ + login + for user in users or [] + if isinstance(user, dict) and isinstance((login := user.get("login")), str) and login + ] + + +def issue_url_to_number(issue_url: str | None) -> int | None: + if not issue_url: + return None + path = urlparse(issue_url).path.rstrip("/") + tail = path.rsplit("/", 1)[-1] + try: + return int(tail) + except ValueError: + return None + + +def normalize_issue( + repo: str, item: dict[str, Any], snapshot_id: str, extracted_at: str +) -> dict[str, Any]: + return { + "repo": repo, + "github_id": item.get("id"), + "github_node_id": item.get("node_id"), + "number": item.get("number"), + "html_url": item.get("html_url"), + "api_url": item.get("url"), + "title": item.get("title"), + "body": item.get("body"), + "state": item.get("state"), + "state_reason": item.get("state_reason"), + "locked": item.get("locked"), + "comments_count": item.get("comments"), + "labels": _labels(item.get("labels")), + "assignees": _assignees(item.get("assignees")), + "created_at": item.get("created_at"), + "updated_at": item.get("updated_at"), + "closed_at": item.get("closed_at"), + "author_association": item.get("author_association"), + "milestone_title": (item.get("milestone") or {}).get("title"), + "snapshot_id": snapshot_id, + "extracted_at": extracted_at, + **_user_fields(item.get("user")), + } + + +def normalize_pull_request( + repo: str, + issue_stub: dict[str, Any], + pr_detail: dict[str, Any], + snapshot_id: str, + extracted_at: str, +) -> dict[str, Any]: + head = pr_detail.get("head") or {} + base = pr_detail.get("base") or {} + return { + "repo": repo, + "github_id": pr_detail.get("id") or issue_stub.get("id"), + "github_node_id": pr_detail.get("node_id") or issue_stub.get("node_id"), + "number": issue_stub.get("number"), + "html_url": issue_stub.get("html_url"), + "api_url": issue_stub.get("url"), + "title": issue_stub.get("title"), + "body": issue_stub.get("body"), + "state": issue_stub.get("state"), + "state_reason": issue_stub.get("state_reason"), + "locked": issue_stub.get("locked"), + "comments_count": issue_stub.get("comments"), + "labels": _labels(issue_stub.get("labels")), + "assignees": _assignees(issue_stub.get("assignees")), + "created_at": issue_stub.get("created_at"), + "updated_at": issue_stub.get("updated_at"), + "closed_at": issue_stub.get("closed_at"), + "author_association": issue_stub.get("author_association") + or pr_detail.get("author_association"), + "merged_at": pr_detail.get("merged_at"), + "merge_commit_sha": pr_detail.get("merge_commit_sha"), + "merged": pr_detail.get("merged"), + "mergeable": pr_detail.get("mergeable"), + "mergeable_state": pr_detail.get("mergeable_state"), + "draft": pr_detail.get("draft"), + "additions": pr_detail.get("additions"), + "deletions": pr_detail.get("deletions"), + "changed_files": pr_detail.get("changed_files"), + "commits": pr_detail.get("commits"), + "review_comments_count": pr_detail.get("review_comments"), + "maintainer_can_modify": pr_detail.get("maintainer_can_modify"), + "head_ref": head.get("ref"), + "head_sha": head.get("sha"), + "head_repo_full_name": (head.get("repo") or {}).get("full_name"), + "base_ref": base.get("ref"), + "base_sha": base.get("sha"), + "base_repo_full_name": (base.get("repo") or {}).get("full_name"), + "snapshot_id": snapshot_id, + "extracted_at": extracted_at, + **_user_fields(issue_stub.get("user")), + } + + +def normalize_comment( + repo: str, + item: dict[str, Any], + parent_kind: str, + parent_number: int | None, + snapshot_id: str, + extracted_at: str, +) -> dict[str, Any]: + return { + "repo": repo, + "github_id": item.get("id"), + "github_node_id": item.get("node_id"), + "parent_kind": parent_kind, + "parent_number": parent_number, + "html_url": item.get("html_url"), + "api_url": item.get("url"), + "issue_api_url": item.get("issue_url"), + "body": item.get("body"), + "created_at": item.get("created_at"), + "updated_at": item.get("updated_at"), + "author_association": item.get("author_association"), + "snapshot_id": snapshot_id, + "extracted_at": extracted_at, + **_user_fields(item.get("user")), + } + + +def normalize_review( + repo: str, pr_number: int, item: dict[str, Any], snapshot_id: str, extracted_at: str +) -> dict[str, Any]: + return { + "repo": repo, + "github_id": item.get("id"), + "github_node_id": item.get("node_id"), + "pull_request_number": pr_number, + "html_url": item.get("html_url"), + "api_url": item.get("url"), + "body": item.get("body"), + "state": item.get("state"), + "submitted_at": item.get("submitted_at"), + "commit_id": item.get("commit_id"), + "author_association": item.get("author_association"), + "snapshot_id": snapshot_id, + "extracted_at": extracted_at, + **_user_fields(item.get("user")), + } + + +def normalize_review_comment( + repo: str, pr_number: int, item: dict[str, Any], snapshot_id: str, extracted_at: str +) -> dict[str, Any]: + return { + "repo": repo, + "github_id": item.get("id"), + "github_node_id": item.get("node_id"), + "pull_request_number": pr_number, + "review_id": item.get("pull_request_review_id"), + "html_url": item.get("html_url"), + "api_url": item.get("url"), + "pull_request_api_url": item.get("pull_request_url"), + "body": item.get("body"), + "path": item.get("path"), + "commit_id": item.get("commit_id"), + "original_commit_id": item.get("original_commit_id"), + "position": item.get("position"), + "original_position": item.get("original_position"), + "line": item.get("line"), + "start_line": item.get("start_line"), + "side": item.get("side"), + "start_side": item.get("start_side"), + "subject_type": item.get("subject_type"), + "created_at": item.get("created_at"), + "updated_at": item.get("updated_at"), + "author_association": item.get("author_association"), + "snapshot_id": snapshot_id, + "extracted_at": extracted_at, + **_user_fields(item.get("user")), + } + + +def normalize_pr_file( + repo: str, + pr_number: int, + item: dict[str, Any], + snapshot_id: str, + extracted_at: str, +) -> dict[str, Any]: + return { + "repo": repo, + "pull_request_number": pr_number, + "sha": item.get("sha"), + "filename": item.get("filename"), + "status": item.get("status"), + "additions": item.get("additions"), + "deletions": item.get("deletions"), + "changes": item.get("changes"), + "blob_url": item.get("blob_url"), + "raw_url": item.get("raw_url"), + "contents_url": item.get("contents_url"), + "previous_filename": item.get("previous_filename"), + "patch": item.get("patch"), + "snapshot_id": snapshot_id, + "extracted_at": extracted_at, + } + + +def normalize_pr_diff( + repo: str, + pr_number: int, + html_url: str | None, + api_url: str | None, + diff: str, + snapshot_id: str, + extracted_at: str, +) -> dict[str, Any]: + return { + "repo": repo, + "pull_request_number": pr_number, + "html_url": html_url, + "api_url": api_url, + "diff": diff, + "snapshot_id": snapshot_id, + "extracted_at": extracted_at, + } + + +def normalize_timeline_event( + repo: str, + number: int, + parent_kind: str, + item: dict[str, Any], + snapshot_id: str, + extracted_at: str, +) -> dict[str, Any]: + source = item.get("source") or {} + issue = source.get("issue") or {} + return { + "repo": repo, + "parent_kind": parent_kind, + "parent_number": number, + "event": item.get("event"), + "created_at": item.get("created_at"), + "actor_login": (item.get("actor") or {}).get("login"), + "source_issue_number": issue.get("number"), + "source_issue_title": issue.get("title"), + "source_issue_url": issue.get("html_url"), + "commit_id": item.get("commit_id"), + "label_name": (item.get("label") or {}).get("name"), + "snapshot_id": snapshot_id, + "extracted_at": extracted_at, + } diff --git a/src/slop_farmer/data/parquet_io.py b/src/slop_farmer/data/parquet_io.py new file mode 100644 index 0000000000000000000000000000000000000000..4a0ae8ac63d1bcc0e04bc4656cb95e4fa37f59e1 --- /dev/null +++ b/src/slop_farmer/data/parquet_io.py @@ -0,0 +1,322 @@ +from __future__ import annotations + +import json +import os +import tempfile +from pathlib import Path +from typing import Any + +import pyarrow as pa +import pyarrow.parquet as pq + +SCHEMAS: dict[str, pa.Schema] = { + "issues": pa.schema( + [ + ("repo", pa.string()), + ("github_id", pa.int64()), + ("github_node_id", pa.string()), + ("number", pa.int64()), + ("html_url", pa.string()), + ("api_url", pa.string()), + ("title", pa.string()), + ("body", pa.string()), + ("state", pa.string()), + ("state_reason", pa.string()), + ("locked", pa.bool_()), + ("comments_count", pa.int64()), + ("labels", pa.list_(pa.string())), + ("assignees", pa.list_(pa.string())), + ("created_at", pa.string()), + ("updated_at", pa.string()), + ("closed_at", pa.string()), + ("author_association", pa.string()), + ("milestone_title", pa.string()), + ("snapshot_id", pa.string()), + ("extracted_at", pa.string()), + ("author_login", pa.string()), + ("author_id", pa.int64()), + ("author_node_id", pa.string()), + ("author_type", pa.string()), + ("author_site_admin", pa.bool_()), + ] + ), + "pull_requests": pa.schema( + [ + ("repo", pa.string()), + ("github_id", pa.int64()), + ("github_node_id", pa.string()), + ("number", pa.int64()), + ("html_url", pa.string()), + ("api_url", pa.string()), + ("title", pa.string()), + ("body", pa.string()), + ("state", pa.string()), + ("state_reason", pa.string()), + ("locked", pa.bool_()), + ("comments_count", pa.int64()), + ("labels", pa.list_(pa.string())), + ("assignees", pa.list_(pa.string())), + ("created_at", pa.string()), + ("updated_at", pa.string()), + ("closed_at", pa.string()), + ("author_association", pa.string()), + ("merged_at", pa.string()), + ("merge_commit_sha", pa.string()), + ("merged", pa.bool_()), + ("mergeable", pa.bool_()), + ("mergeable_state", pa.string()), + ("draft", pa.bool_()), + ("additions", pa.int64()), + ("deletions", pa.int64()), + ("changed_files", pa.int64()), + ("commits", pa.int64()), + ("review_comments_count", pa.int64()), + ("maintainer_can_modify", pa.bool_()), + ("head_ref", pa.string()), + ("head_sha", pa.string()), + ("head_repo_full_name", pa.string()), + ("base_ref", pa.string()), + ("base_sha", pa.string()), + ("base_repo_full_name", pa.string()), + ("snapshot_id", pa.string()), + ("extracted_at", pa.string()), + ("author_login", pa.string()), + ("author_id", pa.int64()), + ("author_node_id", pa.string()), + ("author_type", pa.string()), + ("author_site_admin", pa.bool_()), + ] + ), + "comments": pa.schema( + [ + ("repo", pa.string()), + ("github_id", pa.int64()), + ("github_node_id", pa.string()), + ("parent_kind", pa.string()), + ("parent_number", pa.int64()), + ("html_url", pa.string()), + ("api_url", pa.string()), + ("issue_api_url", pa.string()), + ("body", pa.string()), + ("created_at", pa.string()), + ("updated_at", pa.string()), + ("author_association", pa.string()), + ("snapshot_id", pa.string()), + ("extracted_at", pa.string()), + ("author_login", pa.string()), + ("author_id", pa.int64()), + ("author_node_id", pa.string()), + ("author_type", pa.string()), + ("author_site_admin", pa.bool_()), + ] + ), + "reviews": pa.schema( + [ + ("repo", pa.string()), + ("github_id", pa.int64()), + ("github_node_id", pa.string()), + ("pull_request_number", pa.int64()), + ("html_url", pa.string()), + ("api_url", pa.string()), + ("body", pa.string()), + ("state", pa.string()), + ("submitted_at", pa.string()), + ("commit_id", pa.string()), + ("author_association", pa.string()), + ("snapshot_id", pa.string()), + ("extracted_at", pa.string()), + ("author_login", pa.string()), + ("author_id", pa.int64()), + ("author_node_id", pa.string()), + ("author_type", pa.string()), + ("author_site_admin", pa.bool_()), + ] + ), + "review_comments": pa.schema( + [ + ("repo", pa.string()), + ("github_id", pa.int64()), + ("github_node_id", pa.string()), + ("pull_request_number", pa.int64()), + ("review_id", pa.int64()), + ("html_url", pa.string()), + ("api_url", pa.string()), + ("pull_request_api_url", pa.string()), + ("body", pa.string()), + ("path", pa.string()), + ("commit_id", pa.string()), + ("original_commit_id", pa.string()), + ("position", pa.int64()), + ("original_position", pa.int64()), + ("line", pa.int64()), + ("start_line", pa.int64()), + ("side", pa.string()), + ("start_side", pa.string()), + ("subject_type", pa.string()), + ("created_at", pa.string()), + ("updated_at", pa.string()), + ("author_association", pa.string()), + ("snapshot_id", pa.string()), + ("extracted_at", pa.string()), + ("author_login", pa.string()), + ("author_id", pa.int64()), + ("author_node_id", pa.string()), + ("author_type", pa.string()), + ("author_site_admin", pa.bool_()), + ] + ), + "pr_files": pa.schema( + [ + ("repo", pa.string()), + ("pull_request_number", pa.int64()), + ("sha", pa.string()), + ("filename", pa.string()), + ("status", pa.string()), + ("additions", pa.int64()), + ("deletions", pa.int64()), + ("changes", pa.int64()), + ("blob_url", pa.string()), + ("raw_url", pa.string()), + ("contents_url", pa.string()), + ("previous_filename", pa.string()), + ("patch", pa.string()), + ("snapshot_id", pa.string()), + ("extracted_at", pa.string()), + ] + ), + "pr_diffs": pa.schema( + [ + ("repo", pa.string()), + ("pull_request_number", pa.int64()), + ("html_url", pa.string()), + ("api_url", pa.string()), + ("diff", pa.string()), + ("snapshot_id", pa.string()), + ("extracted_at", pa.string()), + ] + ), + "links": pa.schema( + [ + ("repo", pa.string()), + ("source_type", pa.string()), + ("source_number", pa.int64()), + ("source_github_id", pa.int64()), + ("target_owner", pa.string()), + ("target_repo", pa.string()), + ("target_number", pa.int64()), + ("link_type", pa.string()), + ("link_origin", pa.string()), + ("snapshot_id", pa.string()), + ("extracted_at", pa.string()), + ] + ), + "events": pa.schema( + [ + ("repo", pa.string()), + ("parent_kind", pa.string()), + ("parent_number", pa.int64()), + ("event", pa.string()), + ("created_at", pa.string()), + ("actor_login", pa.string()), + ("source_issue_number", pa.int64()), + ("source_issue_title", pa.string()), + ("source_issue_url", pa.string()), + ("commit_id", pa.string()), + ("label_name", pa.string()), + ("snapshot_id", pa.string()), + ("extracted_at", pa.string()), + ] + ), + "new_contributors": pa.schema( + [ + ("repo", pa.string()), + ("snapshot_id", pa.string()), + ("report_generated_at", pa.string()), + ("window_days", pa.int64()), + ("author_login", pa.string()), + ("name", pa.string()), + ("profile_url", pa.string()), + ("repo_pull_requests_url", pa.string()), + ("repo_issues_url", pa.string()), + ("repo_first_seen_at", pa.string()), + ("repo_last_seen_at", pa.string()), + ("repo_primary_artifact_count", pa.int64()), + ("repo_artifact_count", pa.int64()), + ("snapshot_issue_count", pa.int64()), + ("snapshot_pr_count", pa.int64()), + ("snapshot_comment_count", pa.int64()), + ("snapshot_review_count", pa.int64()), + ("snapshot_review_comment_count", pa.int64()), + ("repo_association", pa.string()), + ("new_to_repo", pa.bool_()), + ("first_seen_in_snapshot", pa.bool_()), + ("report_reason", pa.string()), + ("account_age_days", pa.int64()), + ("young_account", pa.bool_()), + ("follow_through_score", pa.string()), + ("breadth_score", pa.string()), + ("automation_risk_signal", pa.string()), + ("heuristic_note", pa.string()), + ("public_orgs", pa.list_(pa.string())), + ("visible_authored_pr_count", pa.int64()), + ("merged_pr_count", pa.int64()), + ("closed_unmerged_pr_count", pa.int64()), + ("open_pr_count", pa.int64()), + ("merged_pr_rate", pa.float64()), + ("closed_unmerged_pr_rate", pa.float64()), + ("still_open_pr_rate", pa.float64()), + ("distinct_repos_with_authored_prs", pa.int64()), + ("distinct_repos_with_open_prs", pa.int64()), + ("fetch_error", pa.string()), + ] + ), +} + + +def _tmp_path(path: Path) -> Path: + path.parent.mkdir(parents=True, exist_ok=True) + fd, raw_path = tempfile.mkstemp(prefix=f".{path.name}.", suffix=".tmp", dir=path.parent) + os.close(fd) + return Path(raw_path) + + +def write_parquet(rows: list[dict[str, Any]], path: Path, table_name: str) -> None: + schema = SCHEMAS[table_name] + table = pa.Table.from_pylist(rows, schema=schema) + tmp_path = _tmp_path(path) + try: + pq.write_table(table, tmp_path) + tmp_path.replace(path) + finally: + if tmp_path.exists(): + tmp_path.unlink() + + +def write_json(data: Any, path: Path) -> None: + tmp_path = _tmp_path(path) + try: + tmp_path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8") + tmp_path.replace(path) + finally: + if tmp_path.exists(): + tmp_path.unlink() + + +def read_json(path: Path) -> Any: + return json.loads(path.read_text(encoding="utf-8")) + + +def write_text(content: str, path: Path) -> None: + tmp_path = _tmp_path(path) + try: + tmp_path.write_text(content, encoding="utf-8") + tmp_path.replace(path) + finally: + if tmp_path.exists(): + tmp_path.unlink() + + +def read_parquet_rows(path: Path) -> list[dict[str, Any]]: + if not path.exists(): + return [] + return pq.read_table(path).to_pylist() diff --git a/src/slop_farmer/data/search_duckdb.py b/src/slop_farmer/data/search_duckdb.py new file mode 100644 index 0000000000000000000000000000000000000000..0f24acf5ea2eaa26a2683ac491994a315754b7eb --- /dev/null +++ b/src/slop_farmer/data/search_duckdb.py @@ -0,0 +1,765 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +import duckdb + +TABLE_COLUMNS: dict[str, tuple[str, ...]] = { + "pr_search_runs": ( + "id", + "repo", + "snapshot_id", + "snapshot_dir", + "source_type", + "hf_repo_id", + "hf_revision", + "started_at", + "finished_at", + "status", + "settings_json", + "notes", + ), + "pr_search_active_run": ( + "repo", + "run_id", + "activated_at", + ), + "pr_search_documents": ( + "run_id", + "repo", + "pr_number", + "github_id", + "author_login", + "state", + "draft", + "merged", + "title", + "base_ref", + "created_at", + "updated_at", + "merged_at", + "additions", + "deletions", + "changed_files", + "comments_count", + "review_comments_count", + "html_url", + ), + "pr_search_contributors": ( + "run_id", + "repo", + "snapshot_id", + "report_generated_at", + "window_days", + "author_login", + "name", + "profile_url", + "repo_pull_requests_url", + "repo_issues_url", + "repo_first_seen_at", + "repo_last_seen_at", + "repo_primary_artifact_count", + "repo_artifact_count", + "snapshot_issue_count", + "snapshot_pr_count", + "snapshot_comment_count", + "snapshot_review_count", + "snapshot_review_comment_count", + "repo_association", + "new_to_repo", + "first_seen_in_snapshot", + "report_reason", + "account_age_days", + "young_account", + "follow_through_score", + "breadth_score", + "automation_risk_signal", + "heuristic_note", + "public_orgs_json", + "visible_authored_pr_count", + "merged_pr_count", + "closed_unmerged_pr_count", + "open_pr_count", + "merged_pr_rate", + "closed_unmerged_pr_rate", + "still_open_pr_rate", + "distinct_repos_with_authored_prs", + "distinct_repos_with_open_prs", + "fetch_error", + ), + "pr_scope_features": ( + "run_id", + "repo", + "pr_number", + "feature_version", + "total_changed_lines", + "file_count", + "directory_count", + "dominant_dir_share", + "filenames_json", + "directories_json", + "vector_json", + "computed_at", + ), + "pr_scope_run_artifacts": ( + "run_id", + "repo", + "feature_version", + "idf_json", + "computed_at", + ), + "pr_scope_neighbors": ( + "run_id", + "repo", + "left_pr_number", + "right_pr_number", + "rank_from_left", + "rank_from_right", + "similarity", + "content_similarity", + "size_similarity", + "breadth_similarity", + "concentration_similarity", + "shared_filenames_json", + "shared_directories_json", + "created_at", + ), + "pr_scope_clusters": ( + "run_id", + "repo", + "cluster_id", + "representative_pr_number", + "cluster_size", + "average_similarity", + "summary", + "shared_filenames_json", + "shared_directories_json", + "created_at", + ), + "pr_scope_cluster_members": ( + "run_id", + "repo", + "cluster_id", + "pr_number", + "member_role", + ), + "pr_scope_cluster_candidates": ( + "run_id", + "repo", + "pr_number", + "cluster_id", + "candidate_rank", + "candidate_score", + "matched_member_count", + "best_member_pr_number", + "max_member_similarity", + "avg_top_member_similarity", + "evidence_json", + "assigned", + ), +} + + +SCHEMA_SQL = """ +CREATE TABLE IF NOT EXISTS pr_search_runs ( + id VARCHAR, + repo VARCHAR, + snapshot_id VARCHAR, + snapshot_dir VARCHAR, + source_type VARCHAR, + hf_repo_id VARCHAR, + hf_revision VARCHAR, + started_at VARCHAR, + finished_at VARCHAR, + status VARCHAR, + settings_json VARCHAR, + notes VARCHAR +); +CREATE TABLE IF NOT EXISTS pr_search_active_run ( + repo VARCHAR, + run_id VARCHAR, + activated_at VARCHAR +); +CREATE TABLE IF NOT EXISTS pr_search_documents ( + run_id VARCHAR, + repo VARCHAR, + pr_number BIGINT, + github_id BIGINT, + author_login VARCHAR, + state VARCHAR, + draft BOOLEAN, + merged BOOLEAN, + title VARCHAR, + base_ref VARCHAR, + created_at VARCHAR, + updated_at VARCHAR, + merged_at VARCHAR, + additions BIGINT, + deletions BIGINT, + changed_files BIGINT, + comments_count BIGINT, + review_comments_count BIGINT, + html_url VARCHAR +); +CREATE TABLE IF NOT EXISTS pr_search_contributors ( + run_id VARCHAR, + repo VARCHAR, + snapshot_id VARCHAR, + report_generated_at VARCHAR, + window_days BIGINT, + author_login VARCHAR, + name VARCHAR, + profile_url VARCHAR, + repo_pull_requests_url VARCHAR, + repo_issues_url VARCHAR, + repo_first_seen_at VARCHAR, + repo_last_seen_at VARCHAR, + repo_primary_artifact_count BIGINT, + repo_artifact_count BIGINT, + snapshot_issue_count BIGINT, + snapshot_pr_count BIGINT, + snapshot_comment_count BIGINT, + snapshot_review_count BIGINT, + snapshot_review_comment_count BIGINT, + repo_association VARCHAR, + new_to_repo BOOLEAN, + first_seen_in_snapshot BOOLEAN, + report_reason VARCHAR, + account_age_days BIGINT, + young_account BOOLEAN, + follow_through_score VARCHAR, + breadth_score VARCHAR, + automation_risk_signal VARCHAR, + heuristic_note VARCHAR, + public_orgs_json VARCHAR, + visible_authored_pr_count BIGINT, + merged_pr_count BIGINT, + closed_unmerged_pr_count BIGINT, + open_pr_count BIGINT, + merged_pr_rate DOUBLE, + closed_unmerged_pr_rate DOUBLE, + still_open_pr_rate DOUBLE, + distinct_repos_with_authored_prs BIGINT, + distinct_repos_with_open_prs BIGINT, + fetch_error VARCHAR +); +CREATE TABLE IF NOT EXISTS pr_scope_features ( + run_id VARCHAR, + repo VARCHAR, + pr_number BIGINT, + feature_version VARCHAR, + total_changed_lines BIGINT, + file_count BIGINT, + directory_count BIGINT, + dominant_dir_share DOUBLE, + filenames_json VARCHAR, + directories_json VARCHAR, + vector_json VARCHAR, + computed_at VARCHAR +); +CREATE TABLE IF NOT EXISTS pr_scope_run_artifacts ( + run_id VARCHAR, + repo VARCHAR, + feature_version VARCHAR, + idf_json VARCHAR, + computed_at VARCHAR +); +CREATE TABLE IF NOT EXISTS pr_scope_neighbors ( + run_id VARCHAR, + repo VARCHAR, + left_pr_number BIGINT, + right_pr_number BIGINT, + rank_from_left BIGINT, + rank_from_right BIGINT, + similarity DOUBLE, + content_similarity DOUBLE, + size_similarity DOUBLE, + breadth_similarity DOUBLE, + concentration_similarity DOUBLE, + shared_filenames_json VARCHAR, + shared_directories_json VARCHAR, + created_at VARCHAR +); +CREATE TABLE IF NOT EXISTS pr_scope_clusters ( + run_id VARCHAR, + repo VARCHAR, + cluster_id VARCHAR, + representative_pr_number BIGINT, + cluster_size BIGINT, + average_similarity DOUBLE, + summary VARCHAR, + shared_filenames_json VARCHAR, + shared_directories_json VARCHAR, + created_at VARCHAR +); +CREATE TABLE IF NOT EXISTS pr_scope_cluster_members ( + run_id VARCHAR, + repo VARCHAR, + cluster_id VARCHAR, + pr_number BIGINT, + member_role VARCHAR +); +CREATE TABLE IF NOT EXISTS pr_scope_cluster_candidates ( + run_id VARCHAR, + repo VARCHAR, + pr_number BIGINT, + cluster_id VARCHAR, + candidate_rank BIGINT, + candidate_score DOUBLE, + matched_member_count BIGINT, + best_member_pr_number BIGINT, + max_member_similarity DOUBLE, + avg_top_member_similarity DOUBLE, + evidence_json VARCHAR, + assigned BOOLEAN +); +CREATE INDEX IF NOT EXISTS idx_pr_search_active_run_repo ON pr_search_active_run (repo); +CREATE INDEX IF NOT EXISTS idx_pr_search_runs_repo_status ON pr_search_runs (repo, status); +CREATE INDEX IF NOT EXISTS idx_pr_search_documents_run_pr ON pr_search_documents (run_id, pr_number); +CREATE INDEX IF NOT EXISTS idx_pr_search_documents_run_author ON pr_search_documents (run_id, author_login); +CREATE INDEX IF NOT EXISTS idx_pr_search_contributors_run_author ON pr_search_contributors (run_id, author_login); +CREATE INDEX IF NOT EXISTS idx_pr_scope_features_run_pr ON pr_scope_features (run_id, pr_number); +CREATE INDEX IF NOT EXISTS idx_pr_scope_run_artifacts_run ON pr_scope_run_artifacts (run_id); +CREATE INDEX IF NOT EXISTS idx_pr_scope_neighbors_run_left ON pr_scope_neighbors (run_id, left_pr_number); +CREATE INDEX IF NOT EXISTS idx_pr_scope_neighbors_run_right ON pr_scope_neighbors (run_id, right_pr_number); +CREATE INDEX IF NOT EXISTS idx_pr_scope_clusters_run_cluster ON pr_scope_clusters (run_id, cluster_id); +CREATE INDEX IF NOT EXISTS idx_pr_scope_cluster_members_run_pr ON pr_scope_cluster_members (run_id, pr_number); +CREATE INDEX IF NOT EXISTS idx_pr_scope_cluster_candidates_run_pr ON pr_scope_cluster_candidates (run_id, pr_number); +""" + + +def connect_pr_search_db(path: Path, *, read_only: bool = False) -> duckdb.DuckDBPyConnection: + resolved = path.resolve() + if read_only and not resolved.exists(): + raise FileNotFoundError(f"PR search database does not exist: {resolved}") + if not read_only: + resolved.parent.mkdir(parents=True, exist_ok=True) + connection = duckdb.connect(str(resolved), read_only=read_only) + if not read_only: + ensure_pr_search_schema(connection) + return connection + + +def ensure_pr_search_schema(connection: duckdb.DuckDBPyConnection) -> None: + connection.execute(SCHEMA_SQL) + connection.execute( + "ALTER TABLE pr_search_documents ADD COLUMN IF NOT EXISTS author_login VARCHAR" + ) + + +def insert_rows( + connection: duckdb.DuckDBPyConnection, + table_name: str, + rows: list[dict[str, Any]], +) -> None: + if not rows: + return + columns = TABLE_COLUMNS[table_name] + placeholders = ", ".join("?" for _ in columns) + column_sql = ", ".join(columns) + values = [tuple(_db_value(row.get(column)) for column in columns) for row in rows] + connection.executemany( + f"INSERT INTO {table_name} ({column_sql}) VALUES ({placeholders})", + values, + ) + + +def update_run_status( + connection: duckdb.DuckDBPyConnection, + *, + run_id: str, + status: str, + finished_at: str | None = None, + notes: str | None = None, +) -> None: + connection.execute( + """ + UPDATE pr_search_runs + SET status = ?, finished_at = COALESCE(?, finished_at), notes = COALESCE(?, notes) + WHERE id = ? + """, + [status, finished_at, notes, run_id], + ) + + +def replace_active_run( + connection: duckdb.DuckDBPyConnection, + *, + repo: str, + run_id: str, + activated_at: str, +) -> str | None: + previous = fetch_one( + connection, + "SELECT run_id FROM pr_search_active_run WHERE repo = ?", + [repo], + ) + connection.execute("DELETE FROM pr_search_active_run WHERE repo = ?", [repo]) + connection.execute( + "INSERT INTO pr_search_active_run (repo, run_id, activated_at) VALUES (?, ?, ?)", + [repo, run_id, activated_at], + ) + previous_run_id = None if previous is None else str(previous["run_id"]) + if previous_run_id is not None and previous_run_id != run_id: + connection.execute( + "UPDATE pr_search_runs SET status = 'superseded' WHERE id = ? AND status = 'succeeded'", + [previous_run_id], + ) + return previous_run_id + + +def resolve_active_run( + connection: duckdb.DuckDBPyConnection, + *, + repo: str | None = None, +) -> dict[str, Any]: + if repo is None: + active_repos = fetch_rows( + connection, + "SELECT repo FROM pr_search_active_run ORDER BY repo", + ) + if not active_repos: + raise ValueError("No active PR search run found.") + if len(active_repos) > 1: + raise ValueError("Multiple active repos found; pass --repo.") + repo = str(active_repos[0]["repo"]) + row = fetch_one( + connection, + """ + SELECT r.* + FROM pr_search_runs AS r + JOIN pr_search_active_run AS a + ON a.run_id = r.id AND a.repo = r.repo + WHERE a.repo = ? + """, + [repo], + ) + if row is None: + raise ValueError(f"No active PR search run found for repo {repo!r}.") + return row + + +def get_run_counts(connection: duckdb.DuckDBPyConnection, *, run_id: str) -> dict[str, int]: + return { + "documents": _count(connection, "pr_search_documents", run_id), + "contributors": _count(connection, "pr_search_contributors", run_id), + "features": _count(connection, "pr_scope_features", run_id), + "run_artifacts": _count(connection, "pr_scope_run_artifacts", run_id), + "neighbors": _count(connection, "pr_scope_neighbors", run_id), + "clusters": _count(connection, "pr_scope_clusters", run_id), + "cluster_members": _count(connection, "pr_scope_cluster_members", run_id), + "cluster_candidates": _count(connection, "pr_scope_cluster_candidates", run_id), + } + + +def get_document( + connection: duckdb.DuckDBPyConnection, + *, + run_id: str, + pr_number: int, +) -> dict[str, Any] | None: + return fetch_one( + connection, + "SELECT * FROM pr_search_documents WHERE run_id = ? AND pr_number = ?", + [run_id, pr_number], + ) + + +def get_contributor( + connection: duckdb.DuckDBPyConnection, + *, + run_id: str, + author_login: str, +) -> dict[str, Any] | None: + return fetch_one( + connection, + """ + SELECT * + FROM pr_search_contributors + WHERE run_id = ? AND lower(author_login) = lower(?) + """, + [run_id, author_login], + ) + + +def get_contributor_pulls( + connection: duckdb.DuckDBPyConnection, + *, + run_id: str, + author_login: str, + limit: int, +) -> list[dict[str, Any]]: + return fetch_rows( + connection, + """ + SELECT + pr_number, + github_id, + author_login, + state, + draft, + merged, + title, + base_ref, + created_at, + updated_at, + merged_at, + additions, + deletions, + changed_files, + comments_count, + review_comments_count, + html_url + FROM pr_search_documents + WHERE run_id = ? AND lower(author_login) = lower(?) + ORDER BY updated_at DESC NULLS LAST, pr_number DESC + LIMIT ? + """, + [run_id, author_login, limit], + ) + + +def get_feature( + connection: duckdb.DuckDBPyConnection, + *, + run_id: str, + pr_number: int, +) -> dict[str, Any] | None: + return fetch_one( + connection, + "SELECT * FROM pr_scope_features WHERE run_id = ? AND pr_number = ?", + [run_id, pr_number], + ) + + +def get_scope_run_artifact( + connection: duckdb.DuckDBPyConnection, + *, + run_id: str, +) -> dict[str, Any] | None: + try: + return fetch_one( + connection, + """ + SELECT * + FROM pr_scope_run_artifacts + WHERE run_id = ? + """, + [run_id], + ) + except duckdb.Error: + return None + + +def get_similar_pr_rows( + connection: duckdb.DuckDBPyConnection, + *, + run_id: str, + pr_number: int, + limit: int, +) -> list[dict[str, Any]]: + return fetch_rows( + connection, + """ + SELECT + CASE WHEN left_pr_number = ? THEN right_pr_number ELSE left_pr_number END AS neighbor_pr_number, + CASE WHEN left_pr_number = ? THEN rank_from_left ELSE rank_from_right END AS neighbor_rank, + similarity, + content_similarity, + size_similarity, + breadth_similarity, + concentration_similarity, + shared_filenames_json, + shared_directories_json + FROM pr_scope_neighbors + WHERE run_id = ? AND (? = left_pr_number OR ? = right_pr_number) + ORDER BY neighbor_rank IS NULL, neighbor_rank, similarity DESC, neighbor_pr_number + LIMIT ? + """, + [pr_number, pr_number, run_id, pr_number, pr_number, limit], + ) + + +def get_candidate_cluster_rows( + connection: duckdb.DuckDBPyConnection, + *, + run_id: str, + pr_number: int, + limit: int, +) -> list[dict[str, Any]]: + return fetch_rows( + connection, + """ + SELECT + c.cluster_id, + c.candidate_rank, + c.candidate_score, + c.matched_member_count, + c.best_member_pr_number, + c.max_member_similarity, + c.avg_top_member_similarity, + c.evidence_json, + c.assigned, + cl.representative_pr_number, + cl.cluster_size, + cl.average_similarity, + cl.summary, + cl.shared_filenames_json, + cl.shared_directories_json, + d.title AS representative_title + FROM pr_scope_cluster_candidates AS c + JOIN pr_scope_clusters AS cl + ON cl.run_id = c.run_id AND cl.cluster_id = c.cluster_id + LEFT JOIN pr_search_documents AS d + ON d.run_id = cl.run_id AND d.pr_number = cl.representative_pr_number + WHERE c.run_id = ? AND c.pr_number = ? + ORDER BY c.candidate_rank, c.candidate_score DESC, c.cluster_id + LIMIT ? + """, + [run_id, pr_number, limit], + ) + + +def get_cluster( + connection: duckdb.DuckDBPyConnection, + *, + run_id: str, + cluster_id: str, +) -> dict[str, Any] | None: + return fetch_one( + connection, + "SELECT * FROM pr_scope_clusters WHERE run_id = ? AND cluster_id = ?", + [run_id, cluster_id], + ) + + +def get_cluster_members( + connection: duckdb.DuckDBPyConnection, + *, + run_id: str, + cluster_id: str, +) -> list[dict[str, Any]]: + return fetch_rows( + connection, + """ + SELECT + m.pr_number, + m.member_role, + d.title, + d.html_url, + d.state, + d.draft + FROM pr_scope_cluster_members AS m + LEFT JOIN pr_search_documents AS d + ON d.run_id = m.run_id AND d.pr_number = m.pr_number + WHERE m.run_id = ? AND m.cluster_id = ? + ORDER BY m.member_role != 'representative', m.pr_number + """, + [run_id, cluster_id], + ) + + +def get_cluster_ids_for_prs( + connection: duckdb.DuckDBPyConnection, + *, + run_id: str, + pr_numbers: list[int], +) -> dict[int, list[str]]: + if not pr_numbers: + return {} + placeholders = ", ".join("?" for _ in pr_numbers) + rows = fetch_rows( + connection, + f""" + SELECT pr_number, cluster_id + FROM pr_scope_cluster_members + WHERE run_id = ? AND pr_number IN ({placeholders}) + ORDER BY pr_number, cluster_id + """, + [run_id, *pr_numbers], + ) + result: dict[int, list[str]] = {} + for row in rows: + result.setdefault(int(row["pr_number"]), []).append(str(row["cluster_id"])) + return result + + +def get_shared_cluster_ids( + connection: duckdb.DuckDBPyConnection, + *, + run_id: str, + left_pr_number: int, + right_pr_number: int, +) -> list[str]: + rows = fetch_rows( + connection, + """ + SELECT left_members.cluster_id + FROM pr_scope_cluster_members AS left_members + JOIN pr_scope_cluster_members AS right_members + ON right_members.run_id = left_members.run_id + AND right_members.cluster_id = left_members.cluster_id + WHERE left_members.run_id = ? + AND left_members.pr_number = ? + AND right_members.pr_number = ? + ORDER BY left_members.cluster_id + """, + [run_id, left_pr_number, right_pr_number], + ) + return [str(row["cluster_id"]) for row in rows] + + +def get_pair_neighbor_row( + connection: duckdb.DuckDBPyConnection, + *, + run_id: str, + left_pr_number: int, + right_pr_number: int, +) -> dict[str, Any] | None: + canonical_left = min(left_pr_number, right_pr_number) + canonical_right = max(left_pr_number, right_pr_number) + return fetch_one( + connection, + """ + SELECT * + FROM pr_scope_neighbors + WHERE run_id = ? AND left_pr_number = ? AND right_pr_number = ? + """, + [run_id, canonical_left, canonical_right], + ) + + +def fetch_rows( + connection: duckdb.DuckDBPyConnection, + sql: str, + parameters: list[Any] | tuple[Any, ...] | None = None, +) -> list[dict[str, Any]]: + cursor = connection.execute(sql, parameters or []) + columns = [column[0] for column in cursor.description] + return [dict(zip(columns, row, strict=False)) for row in cursor.fetchall()] + + +def fetch_one( + connection: duckdb.DuckDBPyConnection, + sql: str, + parameters: list[Any] | tuple[Any, ...] | None = None, +) -> dict[str, Any] | None: + rows = fetch_rows(connection, sql, parameters) + return rows[0] if rows else None + + +def _count(connection: duckdb.DuckDBPyConnection, table_name: str, run_id: str) -> int: + row = fetch_one( + connection, + f"SELECT COUNT(*) AS row_count FROM {table_name} WHERE run_id = ?", + [run_id], + ) + return 0 if row is None else int(row["row_count"]) + + +def _db_value(value: Any) -> Any: + if isinstance(value, (dict, list)): + return json.dumps(value, sort_keys=True) + return value diff --git a/src/slop_farmer/data/snapshot_materialize.py b/src/slop_farmer/data/snapshot_materialize.py new file mode 100644 index 0000000000000000000000000000000000000000..f5e87626b1baafba4ac5c37ae996932ad3942bc1 --- /dev/null +++ b/src/slop_farmer/data/snapshot_materialize.py @@ -0,0 +1,539 @@ +from __future__ import annotations + +import json +import shutil +import urllib.parse +import urllib.request +from datetime import UTC, datetime +from pathlib import Path, PurePosixPath +from typing import Any + +from huggingface_hub import HfApi, hf_hub_download + +from slop_farmer.data.http import urlopen_with_retry +from slop_farmer.data.parquet_io import read_json, write_text +from slop_farmer.data.snapshot_paths import ( + CONTRIBUTOR_ARTIFACT_FILENAMES, + CURRENT_ANALYSIS_MANIFEST_PATH, + LEGACY_ANALYSIS_FILENAMES, + PR_SCOPE_CLUSTERS_FILENAME, + RAW_TABLE_FILENAMES, + README_FILENAME, + ROOT_MANIFEST_FILENAME, + SNAPSHOTS_LATEST_PATH, + STATE_WATERMARK_PATH, + load_archived_analysis_run_manifest, + load_current_analysis_manifest, + repo_relative_path_to_local, +) + + +def materialize_hf_dataset_snapshot( + *, + repo_id: str, + local_dir: Path, + revision: str | None = None, +) -> Path: + info = _hf_dataset_info(repo_id=repo_id, revision=revision, files_metadata=True) + remote_paths = {sibling.rfilename for sibling in info.siblings} + resolved_revision = str(info.sha or revision or "main") + if SNAPSHOTS_LATEST_PATH in remote_paths: + return _materialize_hf_snapshot_repo_snapshot( + repo_id=repo_id, + local_dir=local_dir, + revision=resolved_revision, + requested_revision=revision, + hf_sha=info.sha, + remote_paths=remote_paths, + ) + if {"issues.parquet", "pull_requests.parquet"} <= remote_paths: + return _materialize_hf_root_snapshot( + repo_id=repo_id, + local_dir=local_dir, + revision=resolved_revision, + requested_revision=revision, + hf_sha=info.sha, + remote_paths=remote_paths, + ) + return _materialize_hf_dataset_viewer_snapshot( + repo_id=repo_id, + local_dir=local_dir, + revision=resolved_revision, + requested_revision=revision, + hf_sha=info.sha, + ) + + +def _materialize_hf_snapshot_repo_snapshot( + *, + repo_id: str, + local_dir: Path, + revision: str, + requested_revision: str | None, + hf_sha: str | None, + remote_paths: set[str], +) -> Path: + local_dir.mkdir(parents=True, exist_ok=True) + latest_download = Path( + hf_hub_download( + repo_id=repo_id, + repo_type="dataset", + filename=SNAPSHOTS_LATEST_PATH, + revision=revision, + ) + ) + latest_payload = json.loads(latest_download.read_text(encoding="utf-8")) + downloaded_files: set[str] = set() + _copy_downloaded_file( + latest_download, repo_relative_path_to_local(local_dir, SNAPSHOTS_LATEST_PATH) + ) + downloaded_files.add(SNAPSHOTS_LATEST_PATH) + + for filename in ( + *RAW_TABLE_FILENAMES, + ROOT_MANIFEST_FILENAME, + PR_SCOPE_CLUSTERS_FILENAME, + *CONTRIBUTOR_ARTIFACT_FILENAMES, + *LEGACY_ANALYSIS_FILENAMES, + ): + downloaded = _download_first_available_hf_file( + repo_id=repo_id, + revision=revision, + filenames=_hf_latest_snapshot_candidates(latest_payload, filename), + ) + if downloaded is None: + continue + _copy_downloaded_file(downloaded, local_dir / filename) + downloaded_files.add(filename) + + if STATE_WATERMARK_PATH in remote_paths: + _download_repo_file( + repo_id=repo_id, + revision=revision, + local_dir=local_dir, + repo_path=STATE_WATERMARK_PATH, + downloaded_files=downloaded_files, + ) + + _download_analysis_state_files( + repo_id=repo_id, + revision=revision, + local_dir=local_dir, + remote_paths=remote_paths, + downloaded_files=downloaded_files, + ) + + _download_published_analysis_files( + repo_id=repo_id, + revision=revision, + local_dir=local_dir, + remote_paths=remote_paths, + downloaded_files=downloaded_files, + ) + + _download_repo_file( + repo_id=repo_id, + revision=revision, + local_dir=local_dir, + repo_path=README_FILENAME, + downloaded_files=downloaded_files, + required=False, + ) + + manifest = ( + read_json(local_dir / ROOT_MANIFEST_FILENAME) + if (local_dir / ROOT_MANIFEST_FILENAME).exists() + else {} + ) + manifest.setdefault("repo", _infer_repo_from_materialized_snapshot(local_dir)) + manifest.setdefault( + "snapshot_id", + str(latest_payload.get("latest_snapshot_id") or hf_sha or local_dir.name), + ) + manifest.update( + { + "source_type": "hf_snapshot_repo", + "hf_repo_id": repo_id, + "hf_revision": requested_revision, + "hf_resolved_revision": revision, + "hf_sha": hf_sha, + "materialized_at": _iso_now(), + "downloaded_files": sorted(downloaded_files), + "hf_latest_pointer": latest_payload, + } + ) + write_text(json.dumps(manifest, indent=2) + "\n", local_dir / ROOT_MANIFEST_FILENAME) + return local_dir + + +def _materialize_hf_root_snapshot( + *, + repo_id: str, + local_dir: Path, + revision: str, + requested_revision: str | None, + hf_sha: str | None, + remote_paths: set[str], +) -> Path: + local_dir.mkdir(parents=True, exist_ok=True) + downloaded_files: set[str] = set() + for repo_path in ( + *RAW_TABLE_FILENAMES, + ROOT_MANIFEST_FILENAME, + PR_SCOPE_CLUSTERS_FILENAME, + *CONTRIBUTOR_ARTIFACT_FILENAMES, + *LEGACY_ANALYSIS_FILENAMES, + SNAPSHOTS_LATEST_PATH, + STATE_WATERMARK_PATH, + README_FILENAME, + ): + if repo_path not in remote_paths: + continue + _download_repo_file( + repo_id=repo_id, + revision=revision, + local_dir=local_dir, + repo_path=repo_path, + downloaded_files=downloaded_files, + ) + + _download_analysis_state_files( + repo_id=repo_id, + revision=revision, + local_dir=local_dir, + remote_paths=remote_paths, + downloaded_files=downloaded_files, + ) + + _download_published_analysis_files( + repo_id=repo_id, + revision=revision, + local_dir=local_dir, + remote_paths=remote_paths, + downloaded_files=downloaded_files, + ) + + manifest = ( + read_json(local_dir / ROOT_MANIFEST_FILENAME) + if (local_dir / ROOT_MANIFEST_FILENAME).exists() + else {} + ) + manifest.setdefault("repo", _infer_repo_from_materialized_snapshot(local_dir)) + manifest.setdefault("snapshot_id", hf_sha or local_dir.name) + manifest.update( + { + "source_type": "hf_root_snapshot", + "hf_repo_id": repo_id, + "hf_revision": requested_revision, + "hf_resolved_revision": revision, + "hf_sha": hf_sha, + "materialized_at": _iso_now(), + "downloaded_files": sorted(downloaded_files), + } + ) + write_text(json.dumps(manifest, indent=2) + "\n", local_dir / ROOT_MANIFEST_FILENAME) + return local_dir + + +def _materialize_hf_dataset_viewer_snapshot( + *, + repo_id: str, + local_dir: Path, + revision: str, + requested_revision: str | None, + hf_sha: str | None, +) -> Path: + local_dir.mkdir(parents=True, exist_ok=True) + downloaded_files: set[str] = set() + for index, url in enumerate(_hf_dataset_parquet_urls(repo_id, revision)): + temporary_path = local_dir / f"tmp-{index:04d}.parquet" + _download_url_to_path(url, temporary_path) + table_name = _parquet_table_name(temporary_path) + temporary_path.replace(local_dir / table_name) + downloaded_files.add(table_name) + + readme_path = hf_hub_download( + repo_id=repo_id, + repo_type="dataset", + filename=README_FILENAME, + revision=revision, + ) + shutil.copy2(readme_path, local_dir / README_FILENAME) + downloaded_files.add(README_FILENAME) + manifest = { + "repo": _infer_repo_from_materialized_snapshot(local_dir), + "snapshot_id": hf_sha or local_dir.name, + "source_type": "hf_dataset_viewer", + "hf_repo_id": repo_id, + "hf_revision": requested_revision, + "hf_resolved_revision": revision, + "hf_sha": hf_sha, + "materialized_at": _iso_now(), + "downloaded_files": sorted(downloaded_files), + } + write_text(json.dumps(manifest, indent=2) + "\n", local_dir / ROOT_MANIFEST_FILENAME) + return local_dir + + +def _download_published_analysis_files( + *, + repo_id: str, + revision: str, + local_dir: Path, + remote_paths: set[str], + downloaded_files: set[str], +) -> None: + if CURRENT_ANALYSIS_MANIFEST_PATH in remote_paths: + manifest_path = _download_repo_file( + repo_id=repo_id, + revision=revision, + local_dir=local_dir, + repo_path=CURRENT_ANALYSIS_MANIFEST_PATH, + downloaded_files=downloaded_files, + ) + current_manifest = load_current_analysis_manifest(manifest_path) + for repo_path in _manifest_artifact_paths(current_manifest, include_archived=True): + if repo_path not in remote_paths: + continue + _download_repo_file( + repo_id=repo_id, + revision=revision, + local_dir=local_dir, + repo_path=repo_path, + downloaded_files=downloaded_files, + ) + + for repo_path in sorted( + path for path in remote_paths if _is_archived_analysis_manifest_path(path) + ): + manifest_path = _download_repo_file( + repo_id=repo_id, + revision=revision, + local_dir=local_dir, + repo_path=repo_path, + downloaded_files=downloaded_files, + ) + archived_manifest = load_archived_analysis_run_manifest(manifest_path) + for artifact_path in _manifest_artifact_paths(archived_manifest, include_archived=False): + if artifact_path not in remote_paths: + continue + _download_repo_file( + repo_id=repo_id, + revision=revision, + local_dir=local_dir, + repo_path=artifact_path, + downloaded_files=downloaded_files, + ) + + +def _download_analysis_state_files( + *, + repo_id: str, + revision: str, + local_dir: Path, + remote_paths: set[str], + downloaded_files: set[str], +) -> None: + for repo_path in sorted( + path for path in remote_paths if PurePosixPath(path).parts[:1] == ("analysis-state",) + ): + _download_repo_file( + repo_id=repo_id, + revision=revision, + local_dir=local_dir, + repo_path=repo_path, + downloaded_files=downloaded_files, + ) + + +def _manifest_artifact_paths( + payload: dict[str, Any], + *, + include_archived: bool, +) -> list[str]: + paths = [ + str(value) for value in (payload.get("artifacts") or {}).values() if isinstance(value, str) + ] + if include_archived: + paths.extend( + str(value) + for value in (payload.get("archived_artifacts") or {}).values() + if isinstance(value, str) + ) + deduped: list[str] = [] + seen: set[str] = set() + for repo_path in paths: + normalized = repo_path.lstrip("./") + if not normalized or normalized in seen: + continue + seen.add(normalized) + deduped.append(normalized) + return deduped + + +def _is_archived_analysis_manifest_path(repo_path: str) -> bool: + parts = PurePosixPath(repo_path).parts + return ( + len(parts) == 5 + and parts[0] == "snapshots" + and parts[2] == "analysis-runs" + and parts[4] == ROOT_MANIFEST_FILENAME + ) + + +def _download_repo_file( + *, + repo_id: str, + revision: str, + local_dir: Path, + repo_path: str, + downloaded_files: set[str], + required: bool = True, +) -> Path: + try: + downloaded = Path( + hf_hub_download( + repo_id=repo_id, + repo_type="dataset", + filename=repo_path, + revision=revision, + ) + ) + except Exception: + if required: + raise + return local_dir / repo_path + destination = repo_relative_path_to_local(local_dir, repo_path) + _copy_downloaded_file(downloaded, destination) + downloaded_files.add(repo_path) + return destination + + +def _copy_downloaded_file(downloaded_path: Path, destination: Path) -> None: + destination.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(downloaded_path, destination) + + +def _hf_dataset_info(repo_id: str, revision: str | None, *, files_metadata: bool) -> Any: + api = HfApi() + try: + return api.dataset_info(repo_id=repo_id, revision=revision, files_metadata=files_metadata) + except TypeError: + return api.dataset_info(repo_id=repo_id, revision=revision) + + +def _hf_dataset_parquet_urls(repo_id: str, revision: str | None = None) -> list[str]: + query = urllib.parse.urlencode({"revision": revision}) if revision else "" + api_url = ( + f"https://huggingface.co/api/datasets/{urllib.parse.quote(repo_id, safe='')}/parquet" + f"{f'?{query}' if query else ''}" + ) + with urlopen_with_retry(api_url, timeout=120, label=api_url) as response: + payload = json.loads(response.read().decode("utf-8")) + urls = payload.get("default", {}).get("train", []) + if not isinstance(urls, list) or not urls: + raise FileNotFoundError( + f"No parquet export URLs found for HF dataset {repo_id} at {api_url}" + ) + return [str(url) for url in urls] + + +def _download_first_available_hf_file( + *, + repo_id: str, + revision: str, + filenames: list[str], +) -> Path | None: + for filename in filenames: + try: + downloaded = Path( + hf_hub_download( + repo_id=repo_id, + repo_type="dataset", + filename=filename, + revision=revision, + ) + ) + except Exception: + continue + if downloaded.exists(): + return downloaded + return None + + +def _hf_latest_snapshot_candidates(latest_payload: dict[str, Any], filename: str) -> list[str]: + candidates: list[str] = [] + manifest_path = str(latest_payload.get("manifest_path") or "").strip("/") + snapshot_dir = str(latest_payload.get("snapshot_dir") or "").strip("/") + latest_snapshot_id = str(latest_payload.get("latest_snapshot_id") or "").strip() + archived_manifest_path = str(latest_payload.get("archived_manifest_path") or "").strip("/") + + if filename == ROOT_MANIFEST_FILENAME and manifest_path: + candidates.append(manifest_path) + if snapshot_dir and snapshot_dir not in {".", "/"}: + candidates.append(f"{snapshot_dir}/{filename}") + if filename == ROOT_MANIFEST_FILENAME and archived_manifest_path: + candidates.append(archived_manifest_path) + if manifest_path and "/" in manifest_path: + manifest_dir = manifest_path.rsplit("/", 1)[0] + candidates.append(f"{manifest_dir}/{filename}") + if latest_snapshot_id: + candidates.append(str(PurePosixPath("snapshots") / latest_snapshot_id / filename)) + candidates.append(filename) + + seen: set[str] = set() + deduped: list[str] = [] + for candidate in candidates: + normalized = candidate.lstrip("./") + if not normalized or normalized in seen: + continue + seen.add(normalized) + deduped.append(normalized) + return deduped + + +def _download_url_to_path(url: str, destination: Path) -> None: + destination.parent.mkdir(parents=True, exist_ok=True) + urllib.request.urlretrieve(url, destination) + + +def _parquet_table_name(path: Path) -> str: + import pyarrow.parquet as pq + + columns = set(pq.read_table(path).column_names) + if {"parent_kind", "issue_api_url", "body"} <= columns: + return "comments.parquet" + if {"event", "source_issue_number", "source_issue_url"} <= columns: + return "events.parquet" + if {"milestone_title", "comments_count"} <= columns and "merged_at" not in columns: + return "issues.parquet" + if {"link_type", "link_origin", "target_number"} <= columns: + return "links.parquet" + if {"pull_request_number", "filename", "blob_url", "patch"} <= columns: + return "pr_files.parquet" + if {"pull_request_number", "diff", "html_url", "api_url"} <= columns: + return "pr_diffs.parquet" + if {"merged_at", "head_ref", "base_ref"} <= columns: + return "pull_requests.parquet" + if {"review_id", "pull_request_api_url", "path"} <= columns: + return "review_comments.parquet" + if {"pull_request_number", "submitted_at"} <= columns and "review_id" not in columns: + return "reviews.parquet" + raise ValueError(f"Unrecognized HF parquet schema for {path.name}: {sorted(columns)}") + + +def _infer_repo_from_materialized_snapshot(local_dir: Path) -> str: + import pyarrow.parquet as pq + + for table_filename in RAW_TABLE_FILENAMES: + path = local_dir / table_filename + if not path.exists(): + continue + rows = pq.read_table(path).slice(0, 1).to_pylist() + if rows and rows[0].get("repo"): + return str(rows[0]["repo"]) + raise FileNotFoundError(f"Could not infer repo from materialized snapshot in {local_dir}") + + +def _iso_now() -> str: + return datetime.now(tz=UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") diff --git a/src/slop_farmer/data/snapshot_paths.py b/src/slop_farmer/data/snapshot_paths.py new file mode 100644 index 0000000000000000000000000000000000000000..57db00a9a35e9789d0ed5471769d8553d9abf2ef --- /dev/null +++ b/src/slop_farmer/data/snapshot_paths.py @@ -0,0 +1,430 @@ +from __future__ import annotations + +import re +from dataclasses import dataclass +from pathlib import Path, PurePosixPath +from typing import Any + +from slop_farmer.data.parquet_io import read_json + +RAW_TABLE_FILENAMES: tuple[str, ...] = ( + "issues.parquet", + "pull_requests.parquet", + "comments.parquet", + "reviews.parquet", + "review_comments.parquet", + "pr_files.parquet", + "pr_diffs.parquet", + "links.parquet", + "events.parquet", +) +VIEWER_SPLIT_FILENAMES: tuple[str, ...] = ( + "issue_comments.parquet", + "pr_comments.parquet", +) +ROOT_MANIFEST_FILENAME = "manifest.json" +README_FILENAME = "README.md" +STATE_WATERMARK_PATH = "state/watermark.json" +SNAPSHOTS_LATEST_PATH = "snapshots/latest.json" +PR_SCOPE_CLUSTERS_FILENAME = "pr-scope-clusters.json" +NEW_CONTRIBUTORS_PARQUET_FILENAME = "new_contributors.parquet" +NEW_CONTRIBUTORS_REPORT_JSON_FILENAME = "new-contributors-report.json" +NEW_CONTRIBUTORS_REPORT_MARKDOWN_FILENAME = "new-contributors-report.md" +CONTRIBUTOR_ARTIFACT_FILENAMES: tuple[str, ...] = ( + NEW_CONTRIBUTORS_PARQUET_FILENAME, + NEW_CONTRIBUTORS_REPORT_JSON_FILENAME, + NEW_CONTRIBUTORS_REPORT_MARKDOWN_FILENAME, +) +ANALYSIS_REPORT_FILENAME_BY_VARIANT: dict[str, str] = { + "deterministic": "analysis-report.json", + "hybrid": "analysis-report-hybrid.json", +} +HYBRID_ANALYSIS_REVIEWS_FILENAME = "analysis-report-hybrid.llm-reviews.json" +LEGACY_ANALYSIS_FILENAMES: tuple[str, ...] = ( + ANALYSIS_REPORT_FILENAME_BY_VARIANT["deterministic"], + ANALYSIS_REPORT_FILENAME_BY_VARIANT["hybrid"], + HYBRID_ANALYSIS_REVIEWS_FILENAME, +) +CURRENT_ANALYSIS_DIR = PurePosixPath("analysis/current") +CURRENT_ANALYSIS_MANIFEST_PATH = str(CURRENT_ANALYSIS_DIR / ROOT_MANIFEST_FILENAME) +ANALYSIS_MANIFEST_SCHEMA_VERSION = 1 + + +@dataclass(frozen=True, slots=True) +class ResolvedAnalysisReportPath: + path: Path + variant: str + source: str + snapshot_id: str | None = None + analysis_id: str | None = None + + +def default_hf_materialize_dir(output_dir: Path, repo_id: str, revision: str | None) -> Path: + suffix = repo_id.replace("/", "--") + if revision: + suffix = f"{suffix}--{revision.replace('/', '--')}" + return output_dir.resolve() / "snapshots" / f"hf-{suffix}" + + +def repo_relative_path_to_local(base_dir: Path, repo_relative_path: str) -> Path: + return base_dir.joinpath(*PurePosixPath(repo_relative_path).parts) + + +def snapshot_artifact_path(snapshot_id: str, filename: str) -> str: + return str(PurePosixPath("snapshots") / snapshot_id / filename) + + +def archived_snapshot_manifest_path(snapshot_id: str) -> str: + return snapshot_artifact_path(snapshot_id, ROOT_MANIFEST_FILENAME) + + +def analysis_run_artifact_path(snapshot_id: str, analysis_id: str, filename: str) -> str: + return str(PurePosixPath("snapshots") / snapshot_id / "analysis-runs" / analysis_id / filename) + + +def analysis_run_manifest_path(snapshot_id: str, analysis_id: str) -> str: + return analysis_run_artifact_path(snapshot_id, analysis_id, ROOT_MANIFEST_FILENAME) + + +def current_analysis_artifact_path(filename: str) -> str: + return str(CURRENT_ANALYSIS_DIR / filename) + + +def repo_key(repo_slug: str) -> str: + return _path_key(repo_slug) + + +def model_key(model: str) -> str: + return _path_key(model) + + +def build_current_analysis_manifest( + *, + repo: str, + snapshot_id: str, + analysis_id: str, + variant: str, + channel: str, + model: str | None, + published_at: str, + include_hybrid_reviews: bool, +) -> dict[str, Any]: + artifacts = { + "hybrid": current_analysis_artifact_path(ANALYSIS_REPORT_FILENAME_BY_VARIANT["hybrid"]), + } + archived_artifacts = { + "hybrid": analysis_run_artifact_path( + snapshot_id, + analysis_id, + ANALYSIS_REPORT_FILENAME_BY_VARIANT["hybrid"], + ) + } + if include_hybrid_reviews: + artifacts["hybrid_reviews"] = current_analysis_artifact_path( + HYBRID_ANALYSIS_REVIEWS_FILENAME + ) + archived_artifacts["hybrid_reviews"] = analysis_run_artifact_path( + snapshot_id, + analysis_id, + HYBRID_ANALYSIS_REVIEWS_FILENAME, + ) + payload = { + "schema_version": ANALYSIS_MANIFEST_SCHEMA_VERSION, + "repo": repo, + "snapshot_id": snapshot_id, + "analysis_id": analysis_id, + "variant": variant, + "channel": channel, + "model": model, + "published_at": published_at, + "artifacts": artifacts, + "archived_artifacts": archived_artifacts, + } + return validate_current_analysis_manifest(payload) + + +def build_archived_analysis_run_manifest( + *, + repo: str, + snapshot_id: str, + analysis_id: str, + variant: str, + channel: str, + model: str | None, + published_at: str, + include_hybrid_reviews: bool, +) -> dict[str, Any]: + artifacts = { + "hybrid": analysis_run_artifact_path( + snapshot_id, + analysis_id, + ANALYSIS_REPORT_FILENAME_BY_VARIANT["hybrid"], + ) + } + if include_hybrid_reviews: + artifacts["hybrid_reviews"] = analysis_run_artifact_path( + snapshot_id, + analysis_id, + HYBRID_ANALYSIS_REVIEWS_FILENAME, + ) + payload = { + "schema_version": ANALYSIS_MANIFEST_SCHEMA_VERSION, + "repo": repo, + "snapshot_id": snapshot_id, + "analysis_id": analysis_id, + "variant": variant, + "channel": channel, + "model": model, + "published_at": published_at, + "artifacts": artifacts, + } + return validate_archived_analysis_run_manifest(payload) + + +def load_current_analysis_manifest(path: Path) -> dict[str, Any]: + payload = read_json(path) + if not isinstance(payload, dict): + raise ValueError(f"Current analysis manifest at {path} must contain a JSON object.") + return validate_current_analysis_manifest(payload) + + +def load_archived_analysis_run_manifest(path: Path) -> dict[str, Any]: + payload = read_json(path) + if not isinstance(payload, dict): + raise ValueError(f"Archived analysis manifest at {path} must contain a JSON object.") + return validate_archived_analysis_run_manifest(payload) + + +def resolve_default_dashboard_analysis_report( + snapshot_dir: Path, +) -> ResolvedAnalysisReportPath | None: + current = resolve_current_analysis_report(snapshot_dir) + if current is not None and _analysis_matches_snapshot(snapshot_dir, current): + return current + return resolve_snapshot_local_analysis_report(snapshot_dir, variant="auto") + + +def resolve_current_analysis_report( + snapshot_dir: Path, + *, + variant: str = "auto", +) -> ResolvedAnalysisReportPath | None: + normalized = _normalize_analysis_variant(variant) + manifest_path = repo_relative_path_to_local(snapshot_dir, CURRENT_ANALYSIS_MANIFEST_PATH) + if not manifest_path.exists(): + return None + manifest = load_current_analysis_manifest(manifest_path) + artifact_key = _analysis_artifact_key_for_variant(normalized, manifest_kind="current") + artifact_path = manifest.get("artifacts", {}).get(artifact_key) + if not isinstance(artifact_path, str) or not artifact_path: + message = ( + f"Published current analysis manifest does not provide the {normalized} artifact." + if normalized != "auto" + else "Published current analysis manifest does not provide the canonical hybrid artifact." + ) + raise ValueError(message) + report_path = repo_relative_path_to_local(snapshot_dir, artifact_path) + if not report_path.exists(): + raise ValueError( + f"Published current analysis artifact {artifact_path!r} is missing from the materialized snapshot." + ) + return ResolvedAnalysisReportPath( + path=report_path, + variant="hybrid" if artifact_key == "hybrid" else normalized, + source="current", + snapshot_id=str(manifest["snapshot_id"]), + analysis_id=str(manifest["analysis_id"]), + ) + + +def resolve_snapshot_local_analysis_report( + snapshot_dir: Path, + *, + variant: str = "auto", +) -> ResolvedAnalysisReportPath | None: + normalized = _normalize_analysis_variant(variant) + if normalized == "auto": + hybrid_path = snapshot_dir / ANALYSIS_REPORT_FILENAME_BY_VARIANT["hybrid"] + if hybrid_path.exists(): + return ResolvedAnalysisReportPath( + path=hybrid_path, + variant="hybrid", + source="snapshot", + ) + deterministic_path = snapshot_dir / ANALYSIS_REPORT_FILENAME_BY_VARIANT["deterministic"] + if deterministic_path.exists(): + return ResolvedAnalysisReportPath( + path=deterministic_path, + variant="deterministic", + source="snapshot", + ) + return None + report_path = snapshot_dir / ANALYSIS_REPORT_FILENAME_BY_VARIANT[normalized] + if not report_path.exists(): + return None + return ResolvedAnalysisReportPath( + path=report_path, + variant=normalized, + source="snapshot", + ) + + +def validate_current_analysis_manifest(payload: dict[str, Any]) -> dict[str, Any]: + validated = _validate_analysis_manifest(payload, require_archived_artifacts=True) + archived_artifacts = _validate_artifacts( + dict(validated["archived_artifacts"]), + expected_prefix=analysis_run_artifact_path( + str(validated["snapshot_id"]), + str(validated["analysis_id"]), + "", + ), + ) + if set(archived_artifacts) != set(validated["artifacts"]): + raise ValueError("Current analysis manifest artifacts and archived_artifacts must match.") + validated["archived_artifacts"] = archived_artifacts + return validated + + +def validate_archived_analysis_run_manifest(payload: dict[str, Any]) -> dict[str, Any]: + return _validate_analysis_manifest(payload, require_archived_artifacts=False) + + +def load_latest_snapshot_pointer(snapshots_root: Path) -> Path | None: + resolved_snapshots_root = snapshots_root.resolve() + latest_path = resolved_snapshots_root / "latest.json" + if not latest_path.exists(): + return None + payload = read_json(latest_path) + snapshot_dir = payload.get("snapshot_dir") + if isinstance(snapshot_dir, str) and snapshot_dir: + path = Path(snapshot_dir) + if path.is_absolute(): + return path.resolve() + return (resolved_snapshots_root.parent / path).resolve() + return None + + +def resolve_snapshot_dir_from_output(output_dir: Path, snapshot_dir: Path | None) -> Path: + return resolve_snapshot_dir_from_snapshots_root( + output_dir.resolve() / "snapshots", snapshot_dir + ) + + +def resolve_snapshot_dir_from_snapshots_root( + snapshots_root: Path, + snapshot_dir: Path | None, +) -> Path: + if snapshot_dir is not None: + return snapshot_dir.resolve() + + resolved_snapshots_root = snapshots_root.resolve() + latest_path = resolved_snapshots_root / "latest.json" + latest_snapshot_dir = load_latest_snapshot_pointer(resolved_snapshots_root) + if latest_snapshot_dir is not None: + return latest_snapshot_dir + + snapshot_dirs = sorted(path for path in resolved_snapshots_root.glob("*") if path.is_dir()) + if snapshot_dirs: + return snapshot_dirs[-1].resolve() + raise FileNotFoundError(f"Could not resolve a snapshot directory from {latest_path}") + + +def _validate_analysis_manifest( + payload: dict[str, Any], + *, + require_archived_artifacts: bool, +) -> dict[str, Any]: + validated = {str(key): value for key, value in payload.items()} + if validated.get("schema_version") != ANALYSIS_MANIFEST_SCHEMA_VERSION: + raise ValueError( + f"Unsupported analysis manifest schema version: {validated.get('schema_version')!r}" + ) + for field in ("repo", "snapshot_id", "analysis_id", "variant", "channel", "published_at"): + if not isinstance(validated.get(field), str) or not str(validated[field]).strip(): + raise ValueError(f"Analysis manifest field {field!r} must be a non-empty string.") + validated[field] = str(validated[field]).strip() + model = validated.get("model") + if model is not None and not isinstance(model, str): + raise ValueError("Analysis manifest field 'model' must be a string when present.") + artifacts = validated.get("artifacts") + if not isinstance(artifacts, dict): + raise ValueError("Analysis manifest field 'artifacts' must be an object.") + expected_prefix = ( + current_analysis_artifact_path("") + if require_archived_artifacts + else analysis_run_artifact_path( + str(validated["snapshot_id"]), + str(validated["analysis_id"]), + "", + ) + ) + validated["artifacts"] = _validate_artifacts(dict(artifacts), expected_prefix=expected_prefix) + if require_archived_artifacts: + archived_artifacts = validated.get("archived_artifacts") + if not isinstance(archived_artifacts, dict): + raise ValueError( + "Current analysis manifest field 'archived_artifacts' must be an object." + ) + validated["archived_artifacts"] = { + str(key): value for key, value in archived_artifacts.items() + } + return validated + + +def _validate_artifacts(artifacts: dict[str, Any], *, expected_prefix: str) -> dict[str, str]: + normalized = {str(key): value for key, value in artifacts.items()} + hybrid_path = normalized.get("hybrid") + if not isinstance(hybrid_path, str) or not hybrid_path: + raise ValueError("Analysis manifest must include artifacts.hybrid.") + validated = {"hybrid": hybrid_path} + hybrid_reviews_path = normalized.get("hybrid_reviews") + if hybrid_reviews_path is not None: + if not isinstance(hybrid_reviews_path, str) or not hybrid_reviews_path: + raise ValueError( + "Analysis manifest artifacts.hybrid_reviews must be a non-empty string." + ) + validated["hybrid_reviews"] = hybrid_reviews_path + for key, value in validated.items(): + if not value.startswith(expected_prefix): + raise ValueError( + f"Analysis manifest artifact {key!r} must live under {expected_prefix!r}, got {value!r}." + ) + return validated + + +def _path_key(value: str) -> str: + normalized = re.sub(r"[^a-z0-9]+", "-", value.strip().lower()) + normalized = re.sub(r"-+", "-", normalized).strip("-") + if not normalized: + raise ValueError("Expected a non-empty path key value.") + return normalized + + +def _analysis_matches_snapshot( + snapshot_dir: Path, + analysis_path: ResolvedAnalysisReportPath, +) -> bool: + snapshot_manifest_path = snapshot_dir / ROOT_MANIFEST_FILENAME + if snapshot_manifest_path.exists(): + snapshot_manifest = read_json(snapshot_manifest_path) + snapshot_id = snapshot_manifest.get("snapshot_id") + if snapshot_id is not None: + return str(snapshot_id) == str(analysis_path.snapshot_id) + return snapshot_dir.name == str(analysis_path.snapshot_id) + + +def _normalize_analysis_variant(variant: str) -> str: + normalized = variant.strip().lower() + if normalized not in {"auto", "deterministic", "hybrid"}: + raise ValueError( + f"Unsupported analysis variant {variant!r}; expected auto, hybrid, or deterministic." + ) + return normalized + + +def _analysis_artifact_key_for_variant(variant: str, *, manifest_kind: str) -> str: + if variant in {"auto", "hybrid"}: + return "hybrid" + raise ValueError( + f"Published {manifest_kind} analysis only serves canonical hybrid artifacts; requested {variant!r}." + ) diff --git a/src/slop_farmer/data/snapshot_source.py b/src/slop_farmer/data/snapshot_source.py new file mode 100644 index 0000000000000000000000000000000000000000..dd21ad5d4d725d2cc7261c9cc82f86e1747c7390 --- /dev/null +++ b/src/slop_farmer/data/snapshot_source.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from pathlib import Path + +from slop_farmer.data.snapshot_materialize import materialize_hf_dataset_snapshot +from slop_farmer.data.snapshot_paths import ( + default_hf_materialize_dir, + resolve_snapshot_dir_from_snapshots_root, +) + + +def resolve_snapshot_source_dir( + *, + snapshot_dir: Path | None, + local_snapshots_root: Path, + hf_repo_id: str | None, + hf_revision: str | None, + hf_materialize_dir: Path | None, + hf_output_dir: Path | None = None, +) -> Path: + if snapshot_dir is not None: + return snapshot_dir.resolve() + if hf_repo_id: + output_dir = (hf_output_dir or local_snapshots_root.parent).resolve() + return materialize_hf_dataset_snapshot( + repo_id=hf_repo_id, + local_dir=hf_materialize_dir + or default_hf_materialize_dir(output_dir, hf_repo_id, hf_revision), + revision=hf_revision, + ).resolve() + return resolve_snapshot_dir_from_snapshots_root(local_snapshots_root.resolve(), None) diff --git a/src/slop_farmer/reports/__init__.py b/src/slop_farmer/reports/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9d48db4f9f85e1752cf424c49ee18a6907c3f160 --- /dev/null +++ b/src/slop_farmer/reports/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/src/slop_farmer/reports/analysis.py b/src/slop_farmer/reports/analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..cfa96054581106bb58c7f47b2b7525b759eaff56 --- /dev/null +++ b/src/slop_farmer/reports/analysis.py @@ -0,0 +1,3506 @@ +from __future__ import annotations + +import asyncio +import copy +import json +import os +import re +import shutil +import sys +from collections import Counter, defaultdict +from dataclasses import dataclass +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from pydantic import BaseModel, Field +from rank_bm25 import BM25Okapi + +from slop_farmer.config import AnalysisOptions, MarkdownReportOptions +from slop_farmer.data.links import build_text_link_rows +from slop_farmer.data.parquet_io import read_json, read_parquet_rows, write_text +from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir +from slop_farmer.reports.analysis_cache import ( + HYBRID_REVIEW_CACHE_SCHEMA_VERSION, + PREPARED_REVIEW_UNIT_SCHEMA_VERSION, + HybridReviewCacheEntry, + HybridReviewCacheKey, + HybridReviewCacheManifest, + HybridReviewCacheStore, + HybridReviewSettingsFingerprint, + build_hybrid_review_cache_key, + hybrid_review_cache_dir, +) +from slop_farmer.reports.pr_heuristics import ( + build_template_cleanup_settings, + compile_cluster_suppression_rules, + strip_pull_request_template, + suppressed_pull_request_reasons, +) + +LINK_KEY_FIELDS = ( + "repo", + "source_type", + "source_number", + "source_github_id", + "target_owner", + "target_repo", + "target_number", + "link_type", + "link_origin", +) +STOPWORDS = { + "a", + "an", + "and", + "are", + "as", + "at", + "be", + "by", + "for", + "from", + "how", + "if", + "in", + "into", + "is", + "it", + "of", + "on", + "or", + "that", + "the", + "this", + "to", + "was", + "were", + "with", +} +TOKEN_PATTERN = re.compile(r"[a-z0-9_]+") +HUNK_HEADER_PATTERN = re.compile(r"^@@ -\d+(?:,\d+)? \+(?P\d+)(?:,(?P\d+))? @@") +LLM_PROVIDER_ENV_VARS = ( + "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", + "GOOGLE_API_KEY", + "DEEPSEEK_API_KEY", +) +LLM_PACKET_CHARS_PER_TOKEN = 4 +LLM_MAX_INPUT_TOKENS = 60_000 +LLM_MAX_NODES_PER_PACKET = 48 +LLM_MAX_SOFT_PAIRS_PER_PACKET = 72 +LLM_MAX_DIFF_CHARS_PER_ITEM = 1_200 +LLM_MAX_FILENAMES_PER_ITEM = 16 +LLM_SKIP_EVALUATOR_ABOVE_TOKENS = 60_000 +LLM_OVERFLOW_POLICY = "truncate_then_skip" +LLM_SHARED_TARGET_MAX_NEIGHBORS_PER_PR = 3 +LLM_SHARED_TARGET_MAX_EXTRA_PAIRS_PER_TARGET = 18 +LLM_SHARED_TARGET_MIN_TEXT_JACCARD = 0.1 +CLUSTER_ANALYST_PROMPT_VERSION = "1.0" +CLUSTER_EVALUATOR_PROMPT_VERSION = "1.0" +CLUSTER_ANALYST_INSTRUCTION = ( + "You analyze clustered GitHub issues and pull requests for duplicate triage. " + "Return a short summary, confidence between 0 and 1, concise reasons for canonical issue/PR choices, " + "concise reasons for global best issue/PR suitability, and accept/reject verdicts for each soft edge candidate. " + "Only accept a soft edge when the two artifacts look like the same underlying bug or change. " + "Use titles, descriptions, explicit issue targets, changed filenames, and diff previews when available. " + "For pull requests, be strict: accept only when the PRs appear to fix the same concrete code-path problem and could plausibly be merged into one PR. " + "Do not merge PRs just because they mention the same tracking issue, touch the same broad subsystem, or both change documentation/tests." +) +CLUSTER_EVALUATOR_INSTRUCTION = ( + "You review the analyst output for precision. Accept only when the summary is grounded in the packet " + "and every soft-edge verdict is conservative. Reject if the analyst overstates evidence. " + "For pull-request pairs, reject if the two changes do not look mergeable into a single PR for the same bugfix." +) + + +class SoftEdgeVerdict(BaseModel): + left: str + right: str + accept: bool + reason: str + + +class ClusterAnalystResponse(BaseModel): + summary: str + confidence: float + canonical_issue_reason: str | None = None + canonical_pr_reason: str | None = None + best_issue_reason: str | None = None + best_pr_reason: str | None = None + soft_edge_verdicts: list[SoftEdgeVerdict] = Field(default_factory=list) + + +class ClusterEvaluatorResponse(BaseModel): + accept: bool + feedback: str = "" + + +class PrFileAreaEntry(BaseModel): + filename: str + left_ranges: list[list[int]] + right_ranges: list[list[int]] + + +class PrComparisonEntry(BaseModel): + left_pr_number: int + right_pr_number: int + code_similarity: float + size_similarity: float + file_overlap: float + area_overlap: float + patch_similarity: float + shared_filenames: list[str] + shared_file_areas: list[PrFileAreaEntry] + + +class MetaBugEntry(BaseModel): + cluster_id: str + summary: str + status: str + confidence: float + canonical_issue_number: int | None + canonical_pr_number: int | None + issue_numbers: list[int] + pr_numbers: list[int] + evidence_types: list[str] + pr_comparisons: list[PrComparisonEntry] = Field(default_factory=list) + + +class DuplicateIssuesEntry(BaseModel): + cluster_id: str + canonical_issue_number: int + duplicate_issue_numbers: list[int] + reason: str + + +class DuplicatePrsEntry(BaseModel): + cluster_id: str + canonical_pr_number: int + duplicate_pr_numbers: list[int] + target_issue_number: int | None + reason: str + + +class BestIssueEntry(BaseModel): + cluster_id: str + issue_number: int + reason: str + score: float + + +class BestPrEntry(BaseModel): + cluster_id: str + pr_number: int + reason: str + score: float + + +class AnalysisReport(BaseModel): + schema_version: str + repo: str + snapshot_id: str + generated_at: str + evidence_quality: str + llm_enrichment: bool + meta_bugs: list[MetaBugEntry] + duplicate_issues: list[DuplicateIssuesEntry] + duplicate_prs: list[DuplicatePrsEntry] + best_issue: BestIssueEntry | None + best_pr: BestPrEntry | None + + +@dataclass(slots=True) +class SnapshotData: + repo: str + snapshot_id: str + snapshot_dir: Path + manifest: dict[str, Any] + issues: list[dict[str, Any]] + pull_requests: list[dict[str, Any]] + comments: list[dict[str, Any]] + reviews: list[dict[str, Any]] + review_comments: list[dict[str, Any]] + pr_files: list[dict[str, Any]] + pr_diffs: list[dict[str, Any]] + links: list[dict[str, Any]] + events: list[dict[str, Any]] + evidence_quality: str + + +@dataclass(slots=True) +class ArtifactFeature: + node_id: str + kind: str + number: int + row: dict[str, Any] + tokens: list[str] + title_tokens: set[str] + title_length: int + body_length: int + discussion_activity: int + review_activity: int + inbound_references: int + explicit_issue_links: int + explicit_issue_targets: list[int] + diff_size: int + filenames: list[str] + diff_preview: str | None + file_ranges_by_name: dict[str, list[tuple[int, int]]] + patch_tokens: list[str] + + +@dataclass(slots=True) +class ClusterRecord: + cluster_id: str + nodes: list[str] + issue_numbers: list[int] + pr_numbers: list[int] + evidence_types: list[str] + canonical_issue_number: int | None + canonical_pr_number: int | None + target_issue_number: int | None + summary: str + status: str + confidence: float + canonical_issue_reason: str | None + canonical_pr_reason: str | None + best_issue_reason: str | None + best_pr_reason: str | None + cluster_score: float + best_issue_score: float | None + best_pr_score: float | None + + +@dataclass(frozen=True, slots=True) +class PacketBudget: + node_count: int + item_count: int + soft_pair_count: int + serialized_chars: int + estimated_input_tokens: int + estimated_eval_tokens: int + + +@dataclass(frozen=True, slots=True) +class PreparedLlmPacket: + packet: dict[str, Any] + budget: PacketBudget + original_budget: PacketBudget + trimmed: bool + aggressively_trimmed: bool + split: bool + + +@dataclass(frozen=True, slots=True) +class ClusterAnalysisCallResult: + analyst_result: ClusterAnalystResponse | None + evaluator_result: ClusterEvaluatorResponse | None + error_kind: str | None + error_message: str | None + evaluator_used: bool + retried: bool + + +@dataclass(frozen=True, slots=True) +class AnalysisBuildResult: + report: AnalysisReport + llm_reviews: list[dict[str, Any]] + + +@dataclass(frozen=True, slots=True) +class SoftPairReviewUnitMeta: + label: str + component_index: int + component_count: int + review_unit_index: int + review_unit_count: int + cluster_id: str + prefix: str + nodes: tuple[str, ...] + soft_pairs: tuple[str, ...] + component_budget: PacketBudget + budget: PacketBudget + prepared_review_unit_hash: str | None + trimmed: bool + aggressively_trimmed: bool + split: bool + + +@dataclass(frozen=True, slots=True) +class PendingSoftPairReview: + meta: SoftPairReviewUnitMeta + prepared: PreparedLlmPacket + cache_key: HybridReviewCacheKey + + +@dataclass(frozen=True, slots=True) +class CompletedSoftPairReview: + meta: SoftPairReviewUnitMeta + result: ClusterAnalysisCallResult | None + status: str + reason: str | None + source: str | None + cache_hit: bool + + +def _hybrid_review_cache_manifest() -> HybridReviewCacheManifest: + return HybridReviewCacheManifest( + cache_schema_version=HYBRID_REVIEW_CACHE_SCHEMA_VERSION, + prepared_review_unit_schema_version=PREPARED_REVIEW_UNIT_SCHEMA_VERSION, + analyst_prompt_version=CLUSTER_ANALYST_PROMPT_VERSION, + evaluator_prompt_version=CLUSTER_EVALUATOR_PROMPT_VERSION, + hybrid_review_settings=HybridReviewSettingsFingerprint( + llm_max_input_tokens=LLM_MAX_INPUT_TOKENS, + llm_max_nodes_per_packet=LLM_MAX_NODES_PER_PACKET, + llm_max_soft_pairs_per_packet=LLM_MAX_SOFT_PAIRS_PER_PACKET, + llm_max_diff_chars_per_item=LLM_MAX_DIFF_CHARS_PER_ITEM, + llm_max_filenames_per_item=LLM_MAX_FILENAMES_PER_ITEM, + llm_skip_evaluator_above_tokens=LLM_SKIP_EVALUATOR_ABOVE_TOKENS, + llm_overflow_policy=LLM_OVERFLOW_POLICY, + ), + ) + + +def _prepared_review_unit_payload(prepared: PreparedLlmPacket) -> dict[str, Any]: + return { + "packet": copy.deepcopy(prepared.packet), + "budget": _packet_budget_json(prepared.budget), + "original_budget": _packet_budget_json(prepared.original_budget), + "trimmed": prepared.trimmed, + "aggressively_trimmed": prepared.aggressively_trimmed, + "split": prepared.split, + } + + +def _cluster_analysis_call_result_payload(result: ClusterAnalysisCallResult) -> dict[str, Any]: + return { + "analyst_result": ( + None if result.analyst_result is None else result.analyst_result.model_dump(mode="json") + ), + "evaluator_result": ( + None + if result.evaluator_result is None + else result.evaluator_result.model_dump(mode="json") + ), + "error_kind": result.error_kind, + "error_message": result.error_message, + "evaluator_used": result.evaluator_used, + "retried": result.retried, + } + + +def _cluster_analysis_call_result_from_payload( + payload: dict[str, Any], +) -> ClusterAnalysisCallResult: + return ClusterAnalysisCallResult( + analyst_result=( + None + if payload.get("analyst_result") is None + else ClusterAnalystResponse.model_validate(payload["analyst_result"]) + ), + evaluator_result=( + None + if payload.get("evaluator_result") is None + else ClusterEvaluatorResponse.model_validate(payload["evaluator_result"]) + ), + error_kind=payload.get("error_kind"), + error_message=payload.get("error_message"), + evaluator_used=bool(payload.get("evaluator_used", False)), + retried=bool(payload.get("retried", False)), + ) + + +def _cacheable_cluster_analysis_result(result: ClusterAnalysisCallResult) -> bool: + return result.analyst_result is not None and result.error_kind is None + + +def run_analysis(options: AnalysisOptions) -> Path: + if options.snapshot_dir is not None and options.hf_repo_id: + raise ValueError("--snapshot-dir and --hf-repo-id are mutually exclusive") + warning = _llm_fallback_warning(options) + if warning: + _analysis_log(warning) + snapshot_dir = _resolve_snapshot_dir(options) + snapshot = _load_snapshot(snapshot_dir) + _maybe_carry_forward_hybrid_review_cache(snapshot, enabled=options.cached_analysis) + build = asyncio.run(_build_report(snapshot, options)) + output_path = options.output or (snapshot_dir / "analysis-report.json") + output_path.parent.mkdir(parents=True, exist_ok=True) + write_text(json.dumps(build.report.model_dump(mode="json"), indent=2) + "\n", output_path) + llm_reviews_path = _llm_reviews_output_path(output_path) + if build.llm_reviews: + write_text( + json.dumps( + { + "schema_version": "1.0", + "repo": build.report.repo, + "snapshot_id": build.report.snapshot_id, + "generated_at": build.report.generated_at, + "model": options.model, + "reviews": build.llm_reviews, + }, + indent=2, + ) + + "\n", + llm_reviews_path, + ) + elif llm_reviews_path.exists(): + llm_reviews_path.unlink() + _log_hybrid_review_cache_summary(build.llm_reviews, enabled=options.cached_analysis) + return output_path + + +def _analysis_log(message: str) -> None: + stamp = datetime.now(tz=UTC).strftime("%H:%M:%SZ") + print(f"[{stamp}] {message}", file=sys.stderr, flush=True) + + +def _llm_reviews_output_path(output_path: Path) -> Path: + return output_path.with_name(f"{output_path.stem}.llm-reviews.json") + + +def _llm_fallback_warning(options: AnalysisOptions) -> str | None: + if options.ranking_backend != "hybrid": + return None + if _can_use_fast_agent(): + return None + return ( + "Analyze requested ranking-backend=hybrid but fast-agent LLM enrichment is unavailable; " + "reusing cached hybrid review results when available and falling back to deterministic-only clustering " + "for cache misses. " + "Install the llm extra and set one of " + f"{', '.join(LLM_PROVIDER_ENV_VARS)}." + ) + + +def _maybe_carry_forward_hybrid_review_cache(snapshot: SnapshotData, *, enabled: bool) -> None: + if not enabled: + return + current_cache_dir = hybrid_review_cache_dir(snapshot.snapshot_dir) + if current_cache_dir.exists(): + _analysis_log( + f"Cached analysis enabled: using existing analysis-state in {current_cache_dir}" + ) + return + watermark = snapshot.manifest.get("watermark") + if not isinstance(watermark, dict): + _analysis_log("Cached analysis enabled: no previous snapshot recorded; starting fresh") + return + previous_snapshot_dir = watermark.get("previous_snapshot_dir") + if not isinstance(previous_snapshot_dir, str) or not previous_snapshot_dir: + _analysis_log("Cached analysis enabled: no previous snapshot recorded; starting fresh") + return + previous_cache_dir = hybrid_review_cache_dir(Path(previous_snapshot_dir)) + if not previous_cache_dir.exists(): + _analysis_log( + "Cached analysis enabled: previous snapshot has no analysis-state; starting fresh" + ) + return + shutil.copytree(previous_cache_dir, current_cache_dir) + _analysis_log( + f"Cached analysis enabled: copied analysis-state from {previous_cache_dir} to {current_cache_dir}" + ) + + +def _log_hybrid_review_cache_summary(llm_reviews: list[dict[str, Any]], *, enabled: bool) -> None: + if not enabled: + return + if not llm_reviews: + _analysis_log("Hybrid review cache summary: no LLM review units were produced") + return + reviewed = [review for review in llm_reviews if review.get("status") == "reviewed"] + cache_hits = [review for review in reviewed if review.get("cache_hit")] + cache_sourced = [review for review in reviewed if review.get("source") == "cache"] + llm_sourced = [review for review in reviewed if review.get("source") == "llm"] + skipped = [review for review in llm_reviews if review.get("status") != "reviewed"] + hit_rate = 100.0 * len(cache_hits) / len(reviewed) if reviewed else 0.0 + _analysis_log( + "Hybrid review cache summary: " + f"{len(cache_hits)}/{len(reviewed)} reviewed units reused from cache " + f"({hit_rate:.1f}%); " + f"source_cache={len(cache_sourced)}, source_llm={len(llm_sourced)}, skipped={len(skipped)}" + ) + if skipped: + reasons = Counter(str(review.get("reason")) for review in skipped if review.get("reason")) + if reasons: + formatted = ", ".join(f"{reason}={count}" for reason, count in reasons.most_common(5)) + _analysis_log(f"Hybrid review cache skipped reasons: {formatted}") + + +def render_markdown_report(options: MarkdownReportOptions) -> Path: + input_path = options.input.resolve() + report = AnalysisReport.model_validate(read_json(input_path)) + snapshot_dir = _resolve_markdown_snapshot_dir(input_path, options.snapshot_dir) + issue_map, pr_map = _report_artifact_maps(snapshot_dir) + output_path = (options.output or input_path.with_suffix(".md")).resolve() + markdown = _markdown_report_text( + report=report, + issue_map=issue_map, + pr_map=pr_map, + ) + write_text(markdown, output_path) + return output_path + + +def _resolve_markdown_snapshot_dir(input_path: Path, snapshot_dir: Path | None) -> Path | None: + if snapshot_dir is not None: + return snapshot_dir.resolve() + candidate = input_path.parent.resolve() + if (candidate / "issues.parquet").exists() or (candidate / "pull_requests.parquet").exists(): + return candidate + return None + + +def _report_artifact_maps( + snapshot_dir: Path | None, +) -> tuple[dict[int, dict[str, Any]], dict[int, dict[str, Any]]]: + if snapshot_dir is None: + return {}, {} + issues = { + int(row["number"]): row + for row in read_parquet_rows(snapshot_dir / "issues.parquet") + if row.get("number") is not None + } + pull_requests = { + int(row["number"]): row + for row in read_parquet_rows(snapshot_dir / "pull_requests.parquet") + if row.get("number") is not None + } + return issues, pull_requests + + +def _markdown_report_text( + *, + report: AnalysisReport, + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], +) -> str: + lines = [ + f"# Analysis Report: {report.repo}", + "", + f"- Snapshot: `{report.snapshot_id}`", + f"- Generated: `{report.generated_at}`", + f"- Evidence quality: `{report.evidence_quality}`", + f"- LLM enrichment: `{str(report.llm_enrichment).lower()}`", + f"- Meta bugs: `{len(report.meta_bugs)}`", + ] + if report.best_issue is not None: + lines.append( + f"- Best issue: {_artifact_markdown_link(report.repo, 'issue', report.best_issue.issue_number, issue_map.get(report.best_issue.issue_number))}" + ) + if report.best_pr is not None: + lines.append( + f"- Best PR: {_artifact_markdown_link(report.repo, 'pull_request', report.best_pr.pr_number, pr_map.get(report.best_pr.pr_number))}" + ) + lines.append("") + + ordered_meta_bugs = sorted( + report.meta_bugs, + key=lambda entry: _meta_bug_sort_key(entry, issue_map, pr_map), + ) + if not ordered_meta_bugs: + lines.append("No meta bugs found.") + lines.append("") + return "\n".join(lines) + + for meta_bug in ordered_meta_bugs: + lines.extend(_meta_bug_markdown_lines(report.repo, meta_bug, issue_map, pr_map)) + return "\n".join(lines).rstrip() + "\n" + + +def _meta_bug_markdown_lines( + repo: str, + meta_bug: MetaBugEntry, + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], +) -> list[str]: + artifact_count = len(meta_bug.issue_numbers) + len(meta_bug.pr_numbers) + latest_activity = _meta_bug_latest_activity(meta_bug, issue_map, pr_map) + issue_numbers_to_render = [ + number for number in meta_bug.issue_numbers if number != meta_bug.canonical_issue_number + ] + lines = [ + f"## {meta_bug.summary}", + "", + f"- Cluster: `{meta_bug.cluster_id}`", + f"- Status: `{meta_bug.status}`", + f"- Confidence: `{meta_bug.confidence:.3f}`", + f"- Artifacts: `{artifact_count}`", + f"- Latest activity: `{latest_activity}`", + ] + if meta_bug.canonical_issue_number is not None: + lines.append( + f"- Canonical issue: {_artifact_markdown_link(repo, 'issue', meta_bug.canonical_issue_number, issue_map.get(meta_bug.canonical_issue_number))}" + ) + if meta_bug.canonical_pr_number is not None: + lines.append( + f"- Canonical PR: {_artifact_markdown_link(repo, 'pull_request', meta_bug.canonical_pr_number, pr_map.get(meta_bug.canonical_pr_number))}" + ) + if meta_bug.evidence_types: + lines.append(f"- Evidence: `{', '.join(meta_bug.evidence_types)}`") + lines.append("") + + if issue_numbers_to_render: + lines.append("### Issues") + lines.append("") + for number in _sorted_artifact_numbers(issue_numbers_to_render, issue_map): + lines.append( + f"- {_artifact_markdown_link(repo, 'issue', number, issue_map.get(number))}{_artifact_suffix(issue_map.get(number), 'issue')}" + ) + lines.append("") + + if meta_bug.pr_numbers: + lines.append("### PRs") + lines.append("") + for number in _sorted_artifact_numbers(meta_bug.pr_numbers, pr_map): + lines.append( + f"- {_artifact_markdown_link(repo, 'pull_request', number, pr_map.get(number))}{_artifact_suffix(pr_map.get(number), 'pull_request')}" + ) + lines.append("") + + if meta_bug.pr_comparisons: + lines.append("### PR comparison") + lines.append("") + for comparison in meta_bug.pr_comparisons: + shared_files = ", ".join(f"`{name}`" for name in comparison.shared_filenames) or "none" + lines.append( + f"- PR #{comparison.left_pr_number} vs PR #{comparison.right_pr_number}: " + f"code `{comparison.code_similarity:.3f}`, " + f"size `{comparison.size_similarity:.3f}`, " + f"files `{comparison.file_overlap:.3f}`, " + f"areas `{comparison.area_overlap:.3f}`, " + f"patch `{comparison.patch_similarity:.3f}`; " + f"shared files: {shared_files}" + ) + lines.append("") + + return lines + + +def _meta_bug_sort_key( + meta_bug: MetaBugEntry, + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], +) -> tuple[int, float, int, str]: + artifact_count = len(meta_bug.issue_numbers) + len(meta_bug.pr_numbers) + latest_activity = _meta_bug_latest_activity_dt(meta_bug, issue_map, pr_map).timestamp() + largest_number = max([*meta_bug.issue_numbers, *meta_bug.pr_numbers], default=0) + return (-artifact_count, -latest_activity, -largest_number, meta_bug.cluster_id) + + +def _meta_bug_latest_activity( + meta_bug: MetaBugEntry, issue_map: dict[int, dict[str, Any]], pr_map: dict[int, dict[str, Any]] +) -> str: + latest_row = _meta_bug_latest_row(meta_bug, issue_map, pr_map) + if latest_row is None: + return "unknown" + return str( + latest_row.get("updated_at") + or latest_row.get("created_at") + or latest_row.get("closed_at") + or "unknown" + ) + + +def _meta_bug_latest_activity_dt( + meta_bug: MetaBugEntry, + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], +) -> datetime: + latest_row = _meta_bug_latest_row(meta_bug, issue_map, pr_map) + if latest_row is None: + return datetime(1970, 1, 1, tzinfo=UTC) + return _row_activity_dt(latest_row) + + +def _meta_bug_latest_row( + meta_bug: MetaBugEntry, + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], +) -> dict[str, Any] | None: + rows = [issue_map[number] for number in meta_bug.issue_numbers if number in issue_map] + rows.extend(pr_map[number] for number in meta_bug.pr_numbers if number in pr_map) + if not rows: + return None + return max(rows, key=_row_activity_dt) + + +def _sorted_artifact_numbers(numbers: list[int], row_map: dict[int, dict[str, Any]]) -> list[int]: + return sorted( + numbers, + key=lambda number: ( + -_row_activity_dt(row_map.get(number)).timestamp(), + -number, + ), + ) + + +def _row_activity_dt(row: dict[str, Any] | None) -> datetime: + if not row: + return datetime(1970, 1, 1, tzinfo=UTC) + for field in ("updated_at", "created_at", "closed_at", "merged_at"): + value = row.get(field) + if not value: + continue + try: + return _parse_dt(str(value)) + except ValueError: + continue + return datetime(1970, 1, 1, tzinfo=UTC) + + +def _artifact_markdown_link(repo: str, kind: str, number: int, row: dict[str, Any] | None) -> str: + title = _artifact_title(kind, number, row) + url = _artifact_url(repo, kind, number, row) + return f"[{title}]({url})" + + +def _artifact_title(kind: str, number: int, row: dict[str, Any] | None) -> str: + prefix = "PR" if kind == "pull_request" else "Issue" + title = str((row or {}).get("title") or "").strip() + if not title and kind == "pull_request": + body = str((row or {}).get("body") or "").strip() + if body: + title = body.splitlines()[0].strip()[:120] + if title: + return f"{prefix} #{number}: {title}" + return f"{prefix} #{number}" + + +def _artifact_url(repo: str, kind: str, number: int, row: dict[str, Any] | None) -> str: + html_url = str((row or {}).get("html_url") or "").strip() + if html_url: + return html_url + if repo: + path = "pull" if kind == "pull_request" else "issues" + return f"https://github.com/{repo}/{path}/{number}" + return "#" + + +def _artifact_suffix(row: dict[str, Any] | None, kind: str) -> str: + if not row: + return "" + details: list[str] = [] + state = str(row.get("state") or "").strip() + if state: + details.append(state) + if kind == "pull_request": + if bool(row.get("merged")): + details.append("merged") + if bool(row.get("draft")): + details.append("draft") + timestamp = row.get("updated_at") or row.get("created_at") + if timestamp: + details.append(str(timestamp)) + if not details: + return "" + return f" ({', '.join(details)})" + + +def _resolve_snapshot_dir(options: AnalysisOptions) -> Path: + return resolve_snapshot_source_dir( + snapshot_dir=options.snapshot_dir, + local_snapshots_root=options.output_dir.resolve() / "snapshots", + hf_repo_id=options.hf_repo_id, + hf_revision=options.hf_revision, + hf_materialize_dir=options.hf_materialize_dir, + hf_output_dir=options.output_dir, + ) + + +def _load_snapshot(snapshot_dir: Path) -> SnapshotData: + manifest_path = snapshot_dir / "manifest.json" + manifest = read_json(manifest_path) if manifest_path.exists() else {} + + issues = read_parquet_rows(snapshot_dir / "issues.parquet") + pull_requests = read_parquet_rows(snapshot_dir / "pull_requests.parquet") + comments = read_parquet_rows(snapshot_dir / "comments.parquet") + reviews = read_parquet_rows(snapshot_dir / "reviews.parquet") + review_comments = read_parquet_rows(snapshot_dir / "review_comments.parquet") + pr_files = read_parquet_rows(snapshot_dir / "pr_files.parquet") + pr_diffs = read_parquet_rows(snapshot_dir / "pr_diffs.parquet") + links = read_parquet_rows(snapshot_dir / "links.parquet") + events_path = snapshot_dir / "events.parquet" + events = read_parquet_rows(events_path) + if not any( + [ + issues, + pull_requests, + comments, + reviews, + review_comments, + pr_files, + pr_diffs, + links, + events, + ] + ): + parquet_files = sorted(str(path.name) for path in snapshot_dir.glob("*.parquet")) + raise FileNotFoundError( + f"No analysis tables found in {snapshot_dir}. " + f"Expected local files like issues.parquet/pull_requests.parquet. " + f"Found parquet files: {parquet_files or 'none'}. " + "Use --hf-repo-id for Hugging Face datasets or point --snapshot-dir at a local slop-farmer snapshot." + ) + + repo = ( + manifest.get("repo") + or (issues[0]["repo"] if issues else None) + or (pull_requests[0]["repo"] if pull_requests else None) + or (comments[0]["repo"] if comments else None) + or "" + ) + snapshot_id = manifest.get("snapshot_id") or snapshot_dir.name + evidence_quality = "full" if events_path.exists() and events else "partial" + return SnapshotData( + repo=repo, + snapshot_id=snapshot_id, + snapshot_dir=snapshot_dir, + manifest=manifest, + issues=issues, + pull_requests=pull_requests, + comments=comments, + reviews=reviews, + review_comments=review_comments, + pr_files=pr_files, + pr_diffs=pr_diffs, + links=links, + events=events, + evidence_quality=evidence_quality, + ) + + +async def _build_report(snapshot: SnapshotData, options: AnalysisOptions) -> AnalysisBuildResult: + combined_links = _combined_links(snapshot) + llm_available = _can_use_fast_agent() + hybrid_review_cache = HybridReviewCacheStore( + hybrid_review_cache_dir(snapshot.snapshot_dir), + _hybrid_review_cache_manifest(), + enabled=options.ranking_backend == "hybrid", + ) + if hybrid_review_cache.invalidation_reason is not None: + _analysis_log( + "Hybrid review cache invalidated; ignoring cached entries " + f"({hybrid_review_cache.invalidation_reason})" + ) + issue_map = {int(row["number"]): row for row in snapshot.issues} + pr_map = {int(row["number"]): row for row in snapshot.pull_requests} + suppressed_pr_reasons = suppressed_pull_request_reasons( + snapshot.pull_requests, + snapshot.pr_files, + compile_cluster_suppression_rules(options.cluster_suppression_rules), + ) + if suppressed_pr_reasons: + original_pr_count = len(pr_map) + pr_map = { + number: row for number, row in pr_map.items() if number not in suppressed_pr_reasons + } + _analysis_log( + f"Suppressing {len(suppressed_pr_reasons)} routine PRs from clustering: " + f"{len(pr_map)}/{original_pr_count} PRs kept" + ) + if options.open_prs_only: + original_pr_count = len(pr_map) + pr_map = { + number: row + for number, row in pr_map.items() + if str(row.get("state") or "").lower() == "open" + } + _analysis_log( + f"Restricting PR analysis to open PRs only: {len(pr_map)}/{original_pr_count} PRs kept " + "(draft PRs remain eligible)" + ) + comment_map = { + int(row["github_id"]): row for row in snapshot.comments if row.get("github_id") is not None + } + review_map = { + int(row["github_id"]): row for row in snapshot.reviews if row.get("github_id") is not None + } + review_comment_map = { + int(row["github_id"]): row + for row in snapshot.review_comments + if row.get("github_id") is not None + } + + inbound_references, _ = _reference_counts( + snapshot.repo, + combined_links, + issue_map, + pr_map, + comment_map, + review_map, + review_comment_map, + ) + explicit_issue_link_targets = _explicit_pr_issue_targets( + repo=snapshot.repo, + combined_links=combined_links, + issue_map=issue_map, + pr_map=pr_map, + ) + features = _artifact_features( + snapshot, + options=options, + issue_map=issue_map, + pr_map=pr_map, + inbound_references=inbound_references, + explicit_issue_link_targets=explicit_issue_link_targets, + ) + issue_hard_pairs = _issue_hard_pairs( + repo=snapshot.repo, + combined_links=combined_links, + issue_map=issue_map, + pr_map=pr_map, + comment_map=comment_map, + review_map=review_map, + review_comment_map=review_comment_map, + ) + issue_soft_candidates = _issue_soft_candidates(issue_map, features, issue_hard_pairs) + pr_soft_candidates, pr_pair_target_issues = _pr_duplicate_candidates( + options=options, + snapshot=snapshot, + issue_map=issue_map, + pr_map=pr_map, + features=features, + ) + review_semaphore = asyncio.Semaphore(options.hybrid_llm_concurrency) + ( + (accepted_issue_pairs, issue_llm_enabled, issue_llm_reviews), + (accepted_pr_pairs, pr_llm_enabled, pr_llm_reviews), + ) = await asyncio.gather( + _accepted_soft_pairs( + options=options, + snapshot=snapshot, + features=features, + hard_pairs=issue_hard_pairs, + soft_candidates=issue_soft_candidates, + label="issue", + hybrid_review_cache=hybrid_review_cache, + llm_available=llm_available, + review_semaphore=review_semaphore, + ), + _accepted_soft_pairs( + options=options, + snapshot=snapshot, + features=features, + hard_pairs={}, + soft_candidates=pr_soft_candidates, + label="pull_request", + hybrid_review_cache=hybrid_review_cache, + llm_available=llm_available, + review_semaphore=review_semaphore, + ), + ) + issue_pairs = dict(issue_hard_pairs) + for pair, detail in accepted_issue_pairs.items(): + issue_pairs.setdefault(pair, set()).update( + detail.get("evidence_types") or {"soft_similarity"} + ) + pr_pairs: dict[tuple[str, str], set[str]] = {} + for pair, detail in accepted_pr_pairs.items(): + pr_pairs.setdefault(pair, set()).update(detail.get("evidence_types") or {"soft_similarity"}) + + issue_clusters = _clusters( + snapshot=snapshot, + features=features, + final_pairs=issue_pairs, + pair_target_issues=defaultdict(set), + llm_cluster_payloads={}, + ) + pr_clusters = _clusters( + snapshot=snapshot, + features=features, + final_pairs=pr_pairs, + pair_target_issues=pr_pair_target_issues, + llm_cluster_payloads={}, + ) + clusters = _meta_bug_clusters( + features=features, + issue_clusters=issue_clusters, + pr_clusters=pr_clusters, + explicit_issue_link_targets=explicit_issue_link_targets, + issue_map=issue_map, + pr_map=pr_map, + ) + + meta_clusters = sorted( + clusters, key=lambda cluster: (-cluster.cluster_score, cluster.cluster_id) + )[: options.max_clusters] + duplicate_issues = [ + DuplicateIssuesEntry( + cluster_id=cluster.cluster_id, + canonical_issue_number=cluster.canonical_issue_number, + duplicate_issue_numbers=[ + number + for number in cluster.issue_numbers + if number != cluster.canonical_issue_number + ], + reason=_duplicate_issue_reason(cluster), + ) + for cluster in clusters + if cluster.canonical_issue_number is not None and len(cluster.issue_numbers) >= 2 + ] + duplicate_prs = [ + DuplicatePrsEntry( + cluster_id=cluster.cluster_id, + canonical_pr_number=cluster.canonical_pr_number, + duplicate_pr_numbers=[ + number for number in cluster.pr_numbers if number != cluster.canonical_pr_number + ], + target_issue_number=cluster.target_issue_number, + reason=_duplicate_pr_reason(cluster), + ) + for cluster in clusters + if cluster.canonical_pr_number is not None and len(cluster.pr_numbers) >= 2 + ] + best_issue = _best_issue(meta_clusters, features) + best_pr = _best_pr(meta_clusters, features) + return AnalysisBuildResult( + report=AnalysisReport( + schema_version="1.0", + repo=snapshot.repo, + snapshot_id=snapshot.snapshot_id, + generated_at=_iso_now(), + evidence_quality=snapshot.evidence_quality, + llm_enrichment=issue_llm_enabled or pr_llm_enabled, + meta_bugs=[ + MetaBugEntry( + cluster_id=cluster.cluster_id, + summary=cluster.summary, + status=cluster.status, + confidence=round(cluster.confidence, 3), + canonical_issue_number=cluster.canonical_issue_number, + canonical_pr_number=cluster.canonical_pr_number, + issue_numbers=cluster.issue_numbers, + pr_numbers=cluster.pr_numbers, + evidence_types=cluster.evidence_types, + pr_comparisons=_cluster_pr_comparisons(cluster, features), + ) + for cluster in meta_clusters + ], + duplicate_issues=duplicate_issues, + duplicate_prs=duplicate_prs, + best_issue=best_issue, + best_pr=best_pr, + ), + llm_reviews=issue_llm_reviews + pr_llm_reviews, + ) + + +def _iso_now() -> str: + return datetime.now(tz=UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def _combined_links(snapshot: SnapshotData) -> list[dict[str, Any]]: + owner, repo_name = snapshot.repo.split("/", 1) + extracted_at = snapshot.manifest.get("extracted_at") or _iso_now() + rows = list(snapshot.links) + for issue in snapshot.issues: + rows.extend( + build_text_link_rows( + repo=snapshot.repo, + owner=owner, + repo_name=repo_name, + source_type="issue", + source_number=int(issue["number"]), + source_id=issue.get("github_id"), + body=issue.get("body"), + snapshot_id=snapshot.snapshot_id, + extracted_at=extracted_at, + ) + ) + for pr in snapshot.pull_requests: + rows.extend( + build_text_link_rows( + repo=snapshot.repo, + owner=owner, + repo_name=repo_name, + source_type="pull_request", + source_number=int(pr["number"]), + source_id=pr.get("github_id"), + body=pr.get("body"), + snapshot_id=snapshot.snapshot_id, + extracted_at=extracted_at, + ) + ) + for comment in snapshot.comments: + if comment.get("parent_number") is None: + continue + rows.extend( + build_text_link_rows( + repo=snapshot.repo, + owner=owner, + repo_name=repo_name, + source_type="comment", + source_number=int(comment["parent_number"]), + source_id=comment.get("github_id"), + body=comment.get("body"), + snapshot_id=snapshot.snapshot_id, + extracted_at=extracted_at, + ) + ) + for review in snapshot.reviews: + rows.extend( + build_text_link_rows( + repo=snapshot.repo, + owner=owner, + repo_name=repo_name, + source_type="review", + source_number=int(review["pull_request_number"]), + source_id=review.get("github_id"), + body=review.get("body"), + snapshot_id=snapshot.snapshot_id, + extracted_at=extracted_at, + ) + ) + for review_comment in snapshot.review_comments: + rows.extend( + build_text_link_rows( + repo=snapshot.repo, + owner=owner, + repo_name=repo_name, + source_type="review_comment", + source_number=int(review_comment["pull_request_number"]), + source_id=review_comment.get("github_id"), + body=review_comment.get("body"), + snapshot_id=snapshot.snapshot_id, + extracted_at=extracted_at, + ) + ) + deduped: dict[tuple[Any, ...], dict[str, Any]] = {} + for row in rows: + key = tuple(row.get(field) for field in LINK_KEY_FIELDS) + deduped[key] = row + return list(deduped.values()) + + +def _reference_counts( + repo: str, + links: list[dict[str, Any]], + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], + comment_map: dict[int, dict[str, Any]], + review_map: dict[int, dict[str, Any]], + review_comment_map: dict[int, dict[str, Any]], +) -> tuple[Counter[str], defaultdict[int, set[int]]]: + inbound_references: Counter[str] = Counter() + explicit_issue_link_targets: defaultdict[int, set[int]] = defaultdict(set) + for row in links: + source_node = _resolve_source_node( + row, issue_map, pr_map, comment_map, review_map, review_comment_map + ) + target_node = _resolve_target_node(repo, row, issue_map, pr_map) + if source_node is not None and target_node is not None: + inbound_references[target_node] += 1 + if ( + source_node + and target_node + and source_node.startswith("pull_request:") + and target_node.startswith("issue:") + ): + explicit_issue_link_targets[int(source_node.split(":", 1)[1])].add( + int(target_node.split(":", 1)[1]) + ) + return inbound_references, explicit_issue_link_targets + + +def _artifact_features( + snapshot: SnapshotData, + *, + options: AnalysisOptions, + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], + inbound_references: Counter[str], + explicit_issue_link_targets: defaultdict[int, set[int]], +) -> dict[str, ArtifactFeature]: + template_cleanup = build_template_cleanup_settings( + mode=options.pr_template_cleanup_mode, + strip_html_comments=options.pr_template_strip_html_comments, + trim_closing_reference_prefix=options.pr_template_trim_closing_reference_prefix, + section_patterns=options.pr_template_section_patterns, + line_patterns=options.pr_template_line_patterns, + ) + comments_by_parent: defaultdict[int, int] = defaultdict(int) + reviews_by_pr: defaultdict[int, int] = defaultdict(int) + review_comments_by_pr: defaultdict[int, int] = defaultdict(int) + filenames_by_pr: defaultdict[int, set[str]] = defaultdict(set) + file_ranges_by_pr: defaultdict[int, dict[str, list[tuple[int, int]]]] = defaultdict( + lambda: defaultdict(list) + ) + patch_tokens_by_pr: defaultdict[int, list[str]] = defaultdict(list) + diff_preview_by_pr: dict[int, str] = {} + for comment in snapshot.comments: + parent_number = comment.get("parent_number") + if parent_number is not None: + comments_by_parent[int(parent_number)] += 1 + for review in snapshot.reviews: + reviews_by_pr[int(review["pull_request_number"])] += 1 + for review_comment in snapshot.review_comments: + review_comments_by_pr[int(review_comment["pull_request_number"])] += 1 + for pr_file in snapshot.pr_files: + pr_number = pr_file.get("pull_request_number") + filename = pr_file.get("filename") + if pr_number is None or not filename: + continue + filenames_by_pr[int(pr_number)].add(str(filename)) + patch = pr_file.get("patch") + if patch: + file_ranges_by_pr[int(pr_number)][str(filename)].extend(_patch_ranges(str(patch))) + patch_tokens_by_pr[int(pr_number)].extend(_patch_content_tokens(str(patch))) + for pr_diff in snapshot.pr_diffs: + pr_number = pr_diff.get("pull_request_number") + diff = pr_diff.get("diff") + if pr_number is None or not diff: + continue + diff_preview_by_pr[int(pr_number)] = str(diff)[:1200] + + features: dict[str, ArtifactFeature] = {} + for number, issue in issue_map.items(): + title = issue.get("title") or "" + body = issue.get("body") or "" + node_id = f"issue:{number}" + title_tokens = set(_tokenize(title, remove_stopwords=True)) + features[node_id] = ArtifactFeature( + node_id=node_id, + kind="issue", + number=number, + row=issue, + tokens=_tokenize(f"{title} {body}", remove_stopwords=True), + title_tokens=title_tokens, + title_length=len(title), + body_length=len(body), + discussion_activity=max( + int(issue.get("comments_count") or 0), comments_by_parent[number] + ), + review_activity=0, + inbound_references=inbound_references[node_id], + explicit_issue_links=0, + explicit_issue_targets=[], + diff_size=0, + filenames=[], + diff_preview=None, + file_ranges_by_name={}, + patch_tokens=[], + ) + for number, pr in pr_map.items(): + title = pr.get("title") or "" + body = pr.get("body") or "" + similarity_body = _strip_pull_request_template(body, settings=template_cleanup) + node_id = f"pull_request:{number}" + discussion_activity = max(int(pr.get("comments_count") or 0), comments_by_parent[number]) + review_activity = reviews_by_pr[number] + max( + int(pr.get("review_comments_count") or 0), review_comments_by_pr[number] + ) + diff_size = ( + int(pr.get("additions") or 0) + + int(pr.get("deletions") or 0) + + int(pr.get("changed_files") or 0) * 10 + ) + features[node_id] = ArtifactFeature( + node_id=node_id, + kind="pull_request", + number=number, + row=pr, + tokens=_tokenize(f"{title} {similarity_body}", remove_stopwords=True), + title_tokens=set(_tokenize(title, remove_stopwords=True)), + title_length=len(title), + body_length=len(body), + discussion_activity=discussion_activity, + review_activity=review_activity, + inbound_references=inbound_references[node_id], + explicit_issue_links=len(explicit_issue_link_targets[number]), + explicit_issue_targets=sorted(explicit_issue_link_targets[number]), + diff_size=diff_size, + filenames=sorted(filenames_by_pr[number]), + diff_preview=diff_preview_by_pr.get(number), + file_ranges_by_name={ + filename: sorted(ranges) for filename, ranges in file_ranges_by_pr[number].items() + }, + patch_tokens=patch_tokens_by_pr[number], + ) + return features + + +def _explicit_pr_issue_targets( + *, + repo: str, + combined_links: list[dict[str, Any]], + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], +) -> defaultdict[int, set[int]]: + targets: defaultdict[int, set[int]] = defaultdict(set) + owner, repo_name = repo.split("/", 1) + for row in combined_links: + if row.get("source_type") != "pull_request": + continue + if row.get("link_type") != "closing_reference": + continue + if row.get("target_owner") != owner or row.get("target_repo") != repo_name: + continue + source_number = row.get("source_number") + target_number = row.get("target_number") + if source_number is None or target_number is None: + continue + pr_number = int(source_number) + issue_number = int(target_number) + if pr_number not in pr_map or issue_number not in issue_map: + continue + targets[pr_number].add(issue_number) + return targets + + +def _issue_hard_pairs( + *, + repo: str, + combined_links: list[dict[str, Any]], + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], + comment_map: dict[int, dict[str, Any]], + review_map: dict[int, dict[str, Any]], + review_comment_map: dict[int, dict[str, Any]], +) -> dict[tuple[str, str], set[str]]: + hard_pairs: dict[tuple[str, str], set[str]] = defaultdict(set) + for row in combined_links: + source_node = _resolve_source_node( + row, issue_map, pr_map, comment_map, review_map, review_comment_map + ) + target_node = _resolve_target_node(repo, row, issue_map, pr_map) + if source_node is None or target_node is None or source_node == target_node: + continue + if ( + row["link_type"] == "duplicate_reference" + and source_node.startswith("issue:") + and target_node.startswith("issue:") + ): + hard_pairs[_pair_key(source_node, target_node)].add("duplicate_reference") + return hard_pairs + + +def _issue_soft_candidates( + issue_map: dict[int, dict[str, Any]], + features: dict[str, ArtifactFeature], + hard_pairs: dict[tuple[str, str], set[str]], +) -> dict[tuple[str, str], dict[str, Any]]: + hard_neighbors: defaultdict[str, set[str]] = defaultdict(set) + for left, right in hard_pairs: + hard_neighbors[left].add(right) + hard_neighbors[right].add(left) + candidates = _bm25_candidates( + numbers=sorted(issue_map), + kind="issue", + features=features, + hard_neighbors=hard_neighbors, + max_candidates=5, + extra_filter=_issue_soft_filter, + ) + for detail in candidates.values(): + detail["evidence_types"] = {"soft_similarity"} + detail["deterministic_accept"] = False + return candidates + + +def _pr_duplicate_candidates( + *, + options: AnalysisOptions, + snapshot: SnapshotData | None, + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], + features: dict[str, ArtifactFeature], +) -> tuple[dict[tuple[str, str], dict[str, Any]], dict[tuple[str, str], set[int]]]: + del options, snapshot + candidates: dict[tuple[str, str], dict[str, Any]] = {} + pair_target_issues: dict[tuple[str, str], set[int]] = defaultdict(set) + explicit_targets = { + number: set(features[f"pull_request:{number}"].explicit_issue_targets) + for number in pr_map + if features[f"pull_request:{number}"].explicit_issue_targets + } + + for pair, detail in _bm25_candidates( + numbers=sorted(pr_map), + kind="pull_request", + features=features, + hard_neighbors=defaultdict(set), + max_candidates=5, + extra_filter=_pr_soft_filter, + ).items(): + left = features[pair[0]] + right = features[pair[1]] + shared_files = _shared_filenames(left, right) + _merge_candidate_detail( + candidates, + pair, + { + **detail, + "evidence_types": {"soft_similarity"}, + "shared_filenames": shared_files, + "shared_targets": sorted( + explicit_targets.get(left.number, set()) + & explicit_targets.get(right.number, set()) + ), + "deterministic_accept": detail["jaccard"] >= 0.35, + }, + ) + + prs_by_target: defaultdict[int, set[str]] = defaultdict(set) + for pr_number, targets in explicit_targets.items(): + for target in targets: + prs_by_target[target].add(f"pull_request:{pr_number}") + for target_issue, pr_nodes in prs_by_target.items(): + pr_nodes_list = sorted(pr_nodes) + if len(pr_nodes_list) < 2: + continue + scored_pairs: list[tuple[tuple[Any, ...], tuple[str, str], dict[str, Any]]] = [] + for index, left_node in enumerate(pr_nodes_list): + for right_node in pr_nodes_list[index + 1 :]: + pair = _pair_key(left_node, right_node) + left = features[left_node] + right = features[right_node] + if not _pr_soft_filter(left, right): + continue + shared_files = _shared_filenames(left, right) + text_jaccard = _jaccard_sets(set(left.tokens), set(right.tokens)) + if text_jaccard < LLM_SHARED_TARGET_MIN_TEXT_JACCARD and not shared_files: + continue + score = max(5.0, text_jaccard * 10.0 + len(shared_files)) + scored_pairs.append( + ( + ( + -len(shared_files), + -text_jaccard, + -score, + pair[0], + pair[1], + ), + pair, + { + "left": pair[0], + "right": pair[1], + "kind": "pull_request", + "score": score, + "jaccard": text_jaccard, + "evidence_types": {"shared_issue_target"}, + "shared_targets": [target_issue], + "shared_filenames": shared_files, + "deterministic_accept": text_jaccard >= 0.2, + }, + ) + ) + for pair, detail in _bounded_shared_target_pairs(scored_pairs): + _merge_candidate_detail(candidates, pair, detail) + pair_target_issues[pair].add(target_issue) + return candidates, pair_target_issues + + +def _bounded_shared_target_pairs( + scored_pairs: list[tuple[tuple[Any, ...], tuple[str, str], dict[str, Any]]], +) -> list[tuple[tuple[str, str], dict[str, Any]]]: + ordered = sorted(scored_pairs, key=lambda item: item[0]) + if not ordered: + return [] + + parent = {node: node for _, pair, _ in ordered for node in pair} + + def find(node: str) -> str: + root = node + while parent[root] != root: + root = parent[root] + while parent[node] != node: + next_node = parent[node] + parent[node] = root + node = next_node + return root + + def union(left: str, right: str) -> None: + left_root = find(left) + right_root = find(right) + if left_root != right_root: + parent[right_root] = left_root + + neighbor_counts: Counter[str] = Counter() + selected_pairs: set[tuple[str, str]] = set() + selected: list[tuple[tuple[str, str], dict[str, Any]]] = [] + + def record(pair: tuple[str, str], detail: dict[str, Any]) -> None: + selected_pairs.add(pair) + selected.append((pair, detail)) + neighbor_counts[pair[0]] += 1 + neighbor_counts[pair[1]] += 1 + + extra_pairs_added = 0 + + def can_add_redundant_pair(pair: tuple[str, str]) -> bool: + return ( + extra_pairs_added < LLM_SHARED_TARGET_MAX_EXTRA_PAIRS_PER_TARGET + and neighbor_counts[pair[0]] < LLM_SHARED_TARGET_MAX_NEIGHBORS_PER_PR + and neighbor_counts[pair[1]] < LLM_SHARED_TARGET_MAX_NEIGHBORS_PER_PR + ) + + for _, pair, detail in ordered: + if pair in selected_pairs: + continue + if find(pair[0]) == find(pair[1]): + continue + record(pair, detail) + union(pair[0], pair[1]) + + for _, pair, detail in ordered: + if pair in selected_pairs or not can_add_redundant_pair(pair): + continue + record(pair, detail) + extra_pairs_added += 1 + + return selected + + +def _merge_candidate_detail( + candidates: dict[tuple[str, str], dict[str, Any]], + pair: tuple[str, str], + detail: dict[str, Any], +) -> None: + current = candidates.get(pair) + if current is None: + copied = dict(detail) + copied["evidence_types"] = set(detail.get("evidence_types") or []) + copied["shared_targets"] = list(detail.get("shared_targets") or []) + copied["shared_filenames"] = list(detail.get("shared_filenames") or []) + copied["deterministic_accept"] = bool(detail.get("deterministic_accept")) + candidates[pair] = copied + return + current["score"] = max(float(current.get("score") or 0.0), float(detail.get("score") or 0.0)) + current["jaccard"] = max( + float(current.get("jaccard") or 0.0), float(detail.get("jaccard") or 0.0) + ) + current["evidence_types"] = set(current.get("evidence_types") or []) | set( + detail.get("evidence_types") or [] + ) + current["shared_targets"] = sorted( + set(current.get("shared_targets") or []) | set(detail.get("shared_targets") or []) + ) + current["shared_filenames"] = sorted( + set(current.get("shared_filenames") or []) | set(detail.get("shared_filenames") or []) + )[:10] + current["deterministic_accept"] = bool(current.get("deterministic_accept")) or bool( + detail.get("deterministic_accept") + ) + + +def _shared_filenames(left: ArtifactFeature, right: ArtifactFeature) -> list[str]: + return sorted(set(left.filenames) & set(right.filenames))[:10] + + +def _hard_pairs( + snapshot: SnapshotData, + *, + combined_links: list[dict[str, Any]], + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], + comment_map: dict[int, dict[str, Any]], + review_map: dict[int, dict[str, Any]], + review_comment_map: dict[int, dict[str, Any]], +) -> tuple[dict[tuple[str, str], set[str]], dict[tuple[str, str], set[int]]]: + hard_pairs: dict[tuple[str, str], set[str]] = defaultdict(set) + pair_target_issues: dict[tuple[str, str], set[int]] = defaultdict(set) + + for row in combined_links: + source_node = _resolve_source_node( + row, issue_map, pr_map, comment_map, review_map, review_comment_map + ) + target_node = _resolve_target_node(snapshot.repo, row, issue_map, pr_map) + if source_node is None or target_node is None or source_node == target_node: + continue + if ( + row["link_type"] == "duplicate_reference" + and source_node.startswith("issue:") + and target_node.startswith("issue:") + ): + hard_pairs[_pair_key(source_node, target_node)].add("duplicate_reference") + if ( + row["link_type"] == "closing_reference" + and source_node.startswith("pull_request:") + and target_node.startswith("issue:") + ): + hard_pairs[_pair_key(source_node, target_node)].add("closing_reference") + + prs_by_target: defaultdict[int, set[str]] = defaultdict(set) + for row in combined_links: + source_node = _resolve_source_node( + row, issue_map, pr_map, comment_map, review_map, review_comment_map + ) + target_node = _resolve_target_node(snapshot.repo, row, issue_map, pr_map) + if ( + source_node + and target_node + and source_node.startswith("pull_request:") + and target_node.startswith("issue:") + ): + prs_by_target[int(target_node.split(":", 1)[1])].add(source_node) + for target_issue, pr_nodes in prs_by_target.items(): + pr_nodes_list = sorted(pr_nodes) + if len(pr_nodes_list) < 2: + continue + for index, left in enumerate(pr_nodes_list): + for right in pr_nodes_list[index + 1 :]: + pair = _pair_key(left, right) + hard_pairs[pair].add("shared_issue_target") + pair_target_issues[pair].add(target_issue) + + for event in snapshot.events: + source_number = event.get("source_issue_number") + if event.get("event") != "cross-referenced" or source_number is None: + continue + parent_kind = event.get("parent_kind") + parent_number = event.get("parent_number") + if parent_kind not in {"issue", "pull_request"} or parent_number is None: + continue + parent_node = f"{parent_kind}:{int(parent_number)}" + if parent_node not in features_from_maps(issue_map, pr_map): + continue + target_node = _node_from_number(int(source_number), issue_map, pr_map) + if target_node is None or target_node == parent_node: + continue + hard_pairs[_pair_key(parent_node, target_node)].add("timeline:cross-referenced") + + return hard_pairs, pair_target_issues + + +def features_from_maps( + issue_map: dict[int, dict[str, Any]], pr_map: dict[int, dict[str, Any]] +) -> set[str]: + return {f"issue:{number}" for number in issue_map} | { + f"pull_request:{number}" for number in pr_map + } + + +def _soft_candidates( + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], + features: dict[str, ArtifactFeature], + hard_pairs: dict[tuple[str, str], set[str]], +) -> dict[tuple[str, str], dict[str, Any]]: + candidates: dict[tuple[str, str], dict[str, Any]] = {} + hard_neighbors: defaultdict[str, set[str]] = defaultdict(set) + for left, right in hard_pairs: + if left.split(":", 1)[0] == right.split(":", 1)[0]: + hard_neighbors[left].add(right) + hard_neighbors[right].add(left) + issue_candidates = _bm25_candidates( + numbers=sorted(issue_map), + kind="issue", + features=features, + hard_neighbors=hard_neighbors, + max_candidates=5, + extra_filter=_issue_soft_filter, + ) + pr_candidates = _bm25_candidates( + numbers=sorted(pr_map), + kind="pull_request", + features=features, + hard_neighbors=hard_neighbors, + max_candidates=5, + extra_filter=_pr_soft_filter, + ) + candidates.update(issue_candidates) + candidates.update(pr_candidates) + return candidates + + +def _bm25_candidates( + *, + numbers: list[int], + kind: str, + features: dict[str, ArtifactFeature], + hard_neighbors: defaultdict[str, set[str]], + max_candidates: int, + extra_filter: Any, +) -> dict[tuple[str, str], dict[str, Any]]: + if not numbers: + return {} + nodes = [f"{kind}:{number}" for number in numbers] + token_sets = [set(features[node].tokens) for node in nodes] + if len(nodes) > 4000: + return _sparse_token_candidates( + nodes=nodes, + kind=kind, + features=features, + token_sets=token_sets, + hard_neighbors=hard_neighbors, + max_candidates=max_candidates, + extra_filter=extra_filter, + ) + corpus = [features[node].tokens or ["empty"] for node in nodes] + bm25 = BM25Okapi(corpus) + candidates: dict[tuple[str, str], dict[str, Any]] = {} + for index, node in enumerate(nodes): + feature = features[node] + if not feature.tokens: + continue + scores = bm25.get_scores(feature.tokens) + ranked = sorted(range(len(nodes)), key=lambda position: scores[position], reverse=True) + accepted = 0 + for candidate_index in ranked: + if candidate_index == index: + continue + candidate_node = nodes[candidate_index] + if candidate_node in hard_neighbors[node]: + continue + score = float(scores[candidate_index]) + if score <= 0: + continue + candidate_feature = features[candidate_node] + jaccard = _jaccard_sets(token_sets[index], token_sets[candidate_index]) + if jaccard < 0.2: + continue + if not extra_filter(feature, candidate_feature): + continue + pair = _pair_key(node, candidate_node) + current = candidates.get(pair) + if current is None or score > current["score"]: + candidates[pair] = { + "left": pair[0], + "right": pair[1], + "kind": kind, + "score": score, + "jaccard": jaccard, + } + accepted += 1 + if accepted >= max_candidates: + break + return candidates + + +def _sparse_token_candidates( + *, + nodes: list[str], + kind: str, + features: dict[str, ArtifactFeature], + token_sets: list[set[str]], + hard_neighbors: defaultdict[str, set[str]], + max_candidates: int, + extra_filter: Any, +) -> dict[tuple[str, str], dict[str, Any]]: + anchor_tokens: list[set[str]] = [] + inverted: defaultdict[str, list[int]] = defaultdict(list) + for index, node in enumerate(nodes): + tokens = features[node].title_tokens or token_sets[index] + anchor_tokens.append(tokens) + for token in tokens: + inverted[token].append(index) + + candidates: dict[tuple[str, str], dict[str, Any]] = {} + for index, node in enumerate(nodes): + feature = features[node] + if not token_sets[index]: + continue + probe_tokens = sorted(anchor_tokens[index], key=lambda token: len(inverted[token]))[:8] + overlap_scores: Counter[int] = Counter() + for token in probe_tokens: + for candidate_index in inverted[token]: + if candidate_index != index: + overlap_scores[candidate_index] += 1 + accepted = 0 + for candidate_index, overlap in overlap_scores.most_common(): + candidate_node = nodes[candidate_index] + if candidate_node in hard_neighbors[node]: + continue + candidate_feature = features[candidate_node] + jaccard = _jaccard_sets(token_sets[index], token_sets[candidate_index]) + if jaccard < 0.2: + continue + if not extra_filter(feature, candidate_feature): + continue + pair = _pair_key(node, candidate_node) + score = float(overlap) + current = candidates.get(pair) + if current is None or score > current["score"]: + candidates[pair] = { + "left": pair[0], + "right": pair[1], + "kind": kind, + "score": score, + "jaccard": jaccard, + } + accepted += 1 + if accepted >= max_candidates: + break + return candidates + + +def _issue_soft_filter(left: ArtifactFeature, right: ArtifactFeature) -> bool: + if _days_between(left.row.get("created_at"), right.row.get("created_at")) <= 365: + return True + return len(left.title_tokens & right.title_tokens) >= 3 + + +def _pr_soft_filter(left: ArtifactFeature, right: ArtifactFeature) -> bool: + if not left.row.get("base_ref") or left.row.get("base_ref") != right.row.get("base_ref"): + return False + return _days_between(left.row.get("created_at"), right.row.get("created_at")) <= 180 + + +def _estimate_packet_size(packet: dict[str, Any], model: str) -> PacketBudget: + del model + serialized = json.dumps(packet, indent=2, sort_keys=True) + estimated_input_tokens = max( + 1, (len(serialized) + LLM_PACKET_CHARS_PER_TOKEN - 1) // LLM_PACKET_CHARS_PER_TOKEN + ) + return PacketBudget( + node_count=len(packet["nodes"]), + item_count=len(packet["items"]), + soft_pair_count=len(packet["soft_pairs"]), + serialized_chars=len(serialized), + estimated_input_tokens=estimated_input_tokens, + estimated_eval_tokens=estimated_input_tokens * 2 + 256, + ) + + +def _packet_budget_json(budget: PacketBudget) -> dict[str, int]: + return { + "node_count": budget.node_count, + "item_count": budget.item_count, + "soft_pair_count": budget.soft_pair_count, + "serialized_chars": budget.serialized_chars, + "estimated_input_tokens": budget.estimated_input_tokens, + "estimated_eval_tokens": budget.estimated_eval_tokens, + } + + +def _packet_over_budget(budget: PacketBudget) -> bool: + return ( + budget.node_count > LLM_MAX_NODES_PER_PACKET + or budget.soft_pair_count > LLM_MAX_SOFT_PAIRS_PER_PACKET + or budget.estimated_input_tokens > LLM_MAX_INPUT_TOKENS + ) + + +def _soft_pair_review_sort_key(pair: dict[str, Any]) -> tuple[Any, ...]: + return ( + 0 if not bool(pair.get("deterministic_accept", True)) else 1, + -len(pair.get("shared_targets") or []), + -float(pair.get("score") or 0.0), + -float(pair.get("jaccard") or 0.0), + str(pair["left"]), + str(pair["right"]), + ) + + +def _review_subpacket(packet: dict[str, Any], soft_pairs: list[dict[str, Any]]) -> dict[str, Any]: + node_ids = { + node_id for pair in soft_pairs for node_id in (str(pair["left"]), str(pair["right"])) + } + soft_pair_keys = {_pair_key(str(pair["left"]), str(pair["right"])) for pair in soft_pairs} + items_by_node = {str(item["node_id"]): item for item in packet["items"]} + pair_evidence: dict[str, list[str]] = {} + for key, evidence in packet["pair_evidence"].items(): + left, right = key.split("|", 1) + if left not in node_ids or right not in node_ids: + continue + filtered = sorted(value for value in evidence if value != "soft_similarity") + if key in soft_pair_keys: + pair_evidence[key] = sorted(evidence) + elif filtered: + pair_evidence[key] = filtered + nodes = sorted(node_ids) + return { + "nodes": nodes, + "items": [dict(items_by_node[node]) for node in nodes], + "pair_evidence": pair_evidence, + "soft_pairs": [dict(pair) for pair in soft_pairs], + } + + +def _split_packet_for_review(packet: dict[str, Any], model: str) -> list[dict[str, Any]]: + if not packet["soft_pairs"]: + return [packet] + if not _packet_over_budget(_estimate_packet_size(packet, model)): + return [packet] + batches: list[list[dict[str, Any]]] = [] + current_batch: list[dict[str, Any]] = [] + for soft_pair in sorted(packet["soft_pairs"], key=_soft_pair_review_sort_key): + candidate_batch = [*current_batch, soft_pair] + candidate_packet = _review_subpacket(packet, candidate_batch) + if current_batch and _packet_over_budget(_estimate_packet_size(candidate_packet, model)): + batches.append(current_batch) + current_batch = [soft_pair] + continue + current_batch = candidate_batch + if current_batch: + batches.append(current_batch) + return [_review_subpacket(packet, batch) for batch in batches] + + +def _trim_packet_for_llm( + packet: dict[str, Any], *, max_diff_chars: int, max_filenames: int +) -> dict[str, Any]: + return { + "nodes": list(packet["nodes"]), + "items": [ + { + **item, + "filenames": list(item.get("filenames") or [])[:max_filenames], + "diff_preview": ( + None + if item.get("diff_preview") is None + else str(item["diff_preview"])[:max_diff_chars] + ), + } + for item in packet["items"] + ], + "pair_evidence": {key: list(values) for key, values in packet["pair_evidence"].items()}, + "soft_pairs": [dict(pair) for pair in packet["soft_pairs"]], + } + + +def _prepare_packet_for_llm( + packet: dict[str, Any], model: str, *, split: bool +) -> PreparedLlmPacket | None: + original_budget = _estimate_packet_size(packet, model) + if not _packet_over_budget(original_budget): + return PreparedLlmPacket( + packet=packet, + budget=original_budget, + original_budget=original_budget, + trimmed=False, + aggressively_trimmed=False, + split=split, + ) + trim_levels = ( + (LLM_MAX_DIFF_CHARS_PER_ITEM, LLM_MAX_FILENAMES_PER_ITEM, False), + ( + max(120, LLM_MAX_DIFF_CHARS_PER_ITEM // 2), + max(2, LLM_MAX_FILENAMES_PER_ITEM // 2), + True, + ), + ) + for max_diff_chars, max_filenames, aggressively_trimmed in trim_levels: + trimmed_packet = _trim_packet_for_llm( + packet, + max_diff_chars=max_diff_chars, + max_filenames=max_filenames, + ) + budget = _estimate_packet_size(trimmed_packet, model) + if not _packet_over_budget(budget): + return PreparedLlmPacket( + packet=trimmed_packet, + budget=budget, + original_budget=original_budget, + trimmed=True, + aggressively_trimmed=aggressively_trimmed, + split=split, + ) + return None + + +def _accepted_nontrivial_soft_edge( + packet: dict[str, Any], analyst_result: ClusterAnalystResponse +) -> bool: + accepted = { + _pair_key(verdict.left, verdict.right) + for verdict in analyst_result.soft_edge_verdicts + if verdict.accept + } + return any( + not bool(pair.get("deterministic_accept", True)) + and _pair_key(str(pair["left"]), str(pair["right"])) in accepted + for pair in packet["soft_pairs"] + ) + + +def _should_run_evaluator( + packet: dict[str, Any], + budget: PacketBudget, + *, + split: bool, + aggressively_trimmed: bool, + analyst_result: ClusterAnalystResponse, +) -> bool: + del split + if aggressively_trimmed: + return False + if budget.estimated_eval_tokens > LLM_SKIP_EVALUATOR_ABOVE_TOKENS: + return False + return _accepted_nontrivial_soft_edge(packet, analyst_result) + + +def _classify_llm_error(exc: Exception) -> str: + message = f"{type(exc).__name__}: {exc}".lower() + type_name = type(exc).__name__.lower() + if ( + "context window" in message + or "maximum context length" in message + or "exceeds the context" in message + ): + return "context_window_exceeded" + if "timeout" in message or "timed out" in message: + return "provider_timeout" + if any(term in message for term in ("auth", "api key", "unauthorized", "forbidden")): + return "provider_auth_error" + if any(term in type_name for term in ("validation", "decode")) or "parse" in message: + return "structured_parse_error" + return "unknown_provider_error" + + +def _summarize_llm_error(exc: Exception) -> str: + return re.sub(r"\s+", " ", str(exc)).strip()[:300] + + +def _packet_soft_pair_ids(packet: dict[str, Any]) -> list[str]: + return [ + "|".join(_pair_key(str(pair["left"]), str(pair["right"]))) for pair in packet["soft_pairs"] + ] + + +def _soft_pair_review_meta( + *, + label: str, + component_index: int, + component_count: int, + review_unit_index: int, + review_unit_count: int, + cluster_id: str, + component_budget: PacketBudget, + budget: PacketBudget, + prepared_review_unit_hash: str | None, + trimmed: bool, + aggressively_trimmed: bool, + split: bool, + packet: dict[str, Any], +) -> SoftPairReviewUnitMeta: + prefix = ( + f"LLM {label} soft-edge review {component_index}/{component_count}" + f" unit {review_unit_index}/{review_unit_count}" + ) + return SoftPairReviewUnitMeta( + label=label, + component_index=component_index, + component_count=component_count, + review_unit_index=review_unit_index, + review_unit_count=review_unit_count, + cluster_id=cluster_id, + prefix=prefix, + nodes=tuple(str(node) for node in packet["nodes"]), + soft_pairs=tuple(_packet_soft_pair_ids(packet)), + component_budget=component_budget, + budget=budget, + prepared_review_unit_hash=prepared_review_unit_hash, + trimmed=trimmed, + aggressively_trimmed=aggressively_trimmed, + split=split, + ) + + +def _completed_soft_pair_review_sort_key(review: CompletedSoftPairReview) -> tuple[int, int]: + return ( + review.meta.component_index, + review.meta.review_unit_index, + ) + + +def _soft_pair_review_record( + *, + review: CompletedSoftPairReview, + model: str, + accepted_nontrivial_soft_edge: bool, +) -> dict[str, Any]: + result = review.result + return { + "label": review.meta.label, + "component_index": review.meta.component_index, + "component_count": review.meta.component_count, + "review_unit_index": review.meta.review_unit_index, + "review_unit_count": review.meta.review_unit_count, + "status": review.status, + "reason": review.reason, + "source": review.source, + "cache_hit": review.cache_hit, + "model": model, + "cluster_id": review.meta.cluster_id, + "nodes": list(review.meta.nodes), + "soft_pairs": list(review.meta.soft_pairs), + "prepared_review_unit_hash": review.meta.prepared_review_unit_hash, + "component_budget": _packet_budget_json(review.meta.component_budget), + "budget": _packet_budget_json(review.meta.budget), + "overflow_policy": LLM_OVERFLOW_POLICY, + "trimmed": review.meta.trimmed, + "aggressively_trimmed": review.meta.aggressively_trimmed, + "split": review.meta.split, + "analyst_result": ( + None + if result is None or result.analyst_result is None + else result.analyst_result.model_dump(mode="json") + ), + "evaluator_result": ( + None + if result is None or result.evaluator_result is None + else result.evaluator_result.model_dump(mode="json") + ), + "evaluator_used": False if result is None else result.evaluator_used, + "retried": False if result is None else result.retried, + "accepted_nontrivial_soft_edge": accepted_nontrivial_soft_edge, + "error_kind": None if result is None else result.error_kind, + "error_message": None if result is None else result.error_message, + } + + +def _completed_soft_pair_review_from_result( + pending: PendingSoftPairReview, + result: ClusterAnalysisCallResult, +) -> CompletedSoftPairReview: + return CompletedSoftPairReview( + meta=pending.meta, + result=result, + status="reviewed" if result.analyst_result is not None else "error", + reason=None, + source="llm", + cache_hit=False, + ) + + +async def _run_pending_soft_pair_review( + pending: PendingSoftPairReview, + *, + model: str, + review_semaphore: asyncio.Semaphore, +) -> CompletedSoftPairReview: + async with review_semaphore: + try: + result = await _fast_agent_cluster_analysis(pending.prepared, model) + except Exception as exc: + result = ClusterAnalysisCallResult( + analyst_result=None, + evaluator_result=None, + error_kind=_classify_llm_error(exc), + error_message=_summarize_llm_error(exc), + evaluator_used=False, + retried=False, + ) + return _completed_soft_pair_review_from_result(pending, result) + + +async def _run_pending_soft_pair_reviews( + pending_reviews: list[PendingSoftPairReview], + *, + concurrency: int, + model: str, + review_semaphore: asyncio.Semaphore, +) -> list[CompletedSoftPairReview]: + if not pending_reviews: + return [] + if concurrency <= 1: + completed: list[CompletedSoftPairReview] = [] + for pending in pending_reviews: + completed.append( + await _run_pending_soft_pair_review( + pending, + model=model, + review_semaphore=review_semaphore, + ) + ) + return completed + tasks = [ + asyncio.create_task( + _run_pending_soft_pair_review( + pending, + model=model, + review_semaphore=review_semaphore, + ) + ) + for pending in pending_reviews + ] + return await asyncio.gather(*tasks) + + +async def _accepted_soft_pairs( + *, + options: AnalysisOptions, + snapshot: SnapshotData, + features: dict[str, ArtifactFeature], + hard_pairs: dict[tuple[str, str], set[str]], + soft_candidates: dict[tuple[str, str], dict[str, Any]], + label: str, + hybrid_review_cache: HybridReviewCacheStore, + llm_available: bool, + review_semaphore: asyncio.Semaphore, +) -> tuple[dict[tuple[str, str], dict[str, Any]], bool, list[dict[str, Any]]]: + del snapshot + if not soft_candidates: + return {}, False, [] + deterministic_accepts = { + pair: detail + for pair, detail in soft_candidates.items() + if bool(detail.get("deterministic_accept", True)) + } + if options.ranking_backend != "hybrid": + return deterministic_accepts, False, [] + if not llm_available and not hybrid_review_cache.has_entries: + return deterministic_accepts, False, [] + + candidate_graph = dict(hard_pairs) + for pair in soft_candidates: + candidate_graph.setdefault(pair, set()).add("soft_similarity") + component_payloads = _component_packets(features, candidate_graph, soft_candidates) + pending_reviews: list[PendingSoftPairReview] = [] + completed_reviews: list[CompletedSoftPairReview] = [] + accepted: dict[tuple[str, str], dict[str, Any]] = dict(deterministic_accepts) + llm_used = False + review_records: list[dict[str, Any]] = [] + total_components = len(component_payloads) + for index, payload in enumerate(component_payloads, start=1): + component_budget = _estimate_packet_size(payload, options.model) + cluster_id = _cluster_id_from_nodes(payload["nodes"]) + review_units = _split_packet_for_review(payload, options.model) + if len(review_units) > 1: + _analysis_log( + f"LLM {label} soft-edge review {index}/{total_components}: " + f"split oversized component into {len(review_units)} review units " + f"(nodes={component_budget.node_count}, soft_pairs={component_budget.soft_pair_count}, " + f"est_tokens={component_budget.estimated_input_tokens})" + ) + for unit_index, review_unit in enumerate(review_units, start=1): + prepared = _prepare_packet_for_llm( + review_unit, + options.model, + split=len(review_units) > 1, + ) + if prepared is None: + unit_budget = _estimate_packet_size(review_unit, options.model) + completed_reviews.append( + CompletedSoftPairReview( + meta=_soft_pair_review_meta( + label=label, + component_index=index, + component_count=total_components, + review_unit_index=unit_index, + review_unit_count=len(review_units), + cluster_id=cluster_id, + component_budget=component_budget, + budget=unit_budget, + prepared_review_unit_hash=None, + trimmed=True, + aggressively_trimmed=True, + split=len(review_units) > 1, + packet=review_unit, + ), + result=None, + status="skipped", + reason="over_budget_after_truncate", + source=None, + cache_hit=False, + ) + ) + continue + prepared_review_unit = _prepared_review_unit_payload(prepared) + cache_key = build_hybrid_review_cache_key( + manifest=hybrid_review_cache.manifest, + model=options.model, + prepared_review_unit=prepared_review_unit, + ) + meta = _soft_pair_review_meta( + label=label, + component_index=index, + component_count=total_components, + review_unit_index=unit_index, + review_unit_count=len(review_units), + cluster_id=cluster_id, + component_budget=component_budget, + budget=prepared.budget, + prepared_review_unit_hash=cache_key.prepared_review_unit_hash, + trimmed=prepared.trimmed, + aggressively_trimmed=prepared.aggressively_trimmed, + split=prepared.split, + packet=prepared.packet, + ) + cached_entry = hybrid_review_cache.get(cache_key) + if cached_entry is not None: + completed_reviews.append( + CompletedSoftPairReview( + meta=meta, + result=_cluster_analysis_call_result_from_payload(cached_entry.result), + status=( + "reviewed" + if cached_entry.result.get("analyst_result") is not None + else "error" + ), + reason=None, + source="cache", + cache_hit=True, + ) + ) + continue + if not llm_available: + completed_reviews.append( + CompletedSoftPairReview( + meta=meta, + result=None, + status="skipped", + reason="llm_unavailable_cache_miss", + source=None, + cache_hit=False, + ) + ) + continue + pending_reviews.append( + PendingSoftPairReview( + meta=meta, + prepared=prepared, + cache_key=cache_key, + ) + ) + reviewed_from_cache = sum(1 for review in completed_reviews if review.cache_hit) + skipped_reviews = sum(1 for review in completed_reviews if review.status == "skipped") + _analysis_log( + f"LLM {label} soft-edge review scheduling: " + f"units={len(pending_reviews) + len(completed_reviews)}, " + f"cache_hits={reviewed_from_cache}, " + f"cache_misses={len(pending_reviews)}, " + f"skipped={skipped_reviews}, " + f"concurrency={options.hybrid_llm_concurrency}" + ) + completed_reviews.extend( + await _run_pending_soft_pair_reviews( + pending_reviews, + concurrency=options.hybrid_llm_concurrency, + model=options.model, + review_semaphore=review_semaphore, + ) + ) + pending_by_position = { + (pending.meta.component_index, pending.meta.review_unit_index): pending + for pending in pending_reviews + } + for review in sorted(completed_reviews, key=_completed_soft_pair_review_sort_key): + accepted_nontrivial = False + pending = pending_by_position.get( + (review.meta.component_index, review.meta.review_unit_index) + ) + result = review.result + if review.reason == "over_budget_after_truncate": + _analysis_log( + f"{review.meta.prefix}: skipped over-budget packet " + f"(nodes={review.meta.budget.node_count}, soft_pairs={review.meta.budget.soft_pair_count}, " + f"est_tokens={review.meta.budget.estimated_input_tokens}, overflow_policy={LLM_OVERFLOW_POLICY})" + ) + elif review.reason == "llm_unavailable_cache_miss": + _analysis_log( + f"{review.meta.prefix}: cache miss with fast-agent unavailable; " + "keeping deterministic-only soft edges" + ) + else: + if review.cache_hit: + _analysis_log( + f"{review.meta.prefix}: cache hit " + f"(nodes={review.meta.budget.node_count}, soft_pairs={review.meta.budget.soft_pair_count}, " + f"est_tokens={review.meta.budget.estimated_input_tokens}, model={options.model})" + ) + if result is None or result.analyst_result is None: + if result is not None and result.error_kind is not None: + _analysis_log( + f"{review.meta.prefix}: {result.error_kind}" + f" (nodes={review.meta.budget.node_count}, soft_pairs={review.meta.budget.soft_pair_count}, " + f"est_tokens={review.meta.budget.estimated_input_tokens}, " + f"overflow_policy={LLM_OVERFLOW_POLICY})" + ) + else: + _analysis_log(f"{review.meta.prefix}: no result") + else: + llm_used = True + verdicts = { + _pair_key(verdict.left, verdict.right): verdict + for verdict in result.analyst_result.soft_edge_verdicts + } + accepted_count = sum(1 for verdict in verdicts.values() if verdict.accept) + rejected_count = sum(1 for verdict in verdicts.values() if not verdict.accept) + accepted_nontrivial = any( + verdicts.get(_pair_key(*pair_id.split("|", 1))) is not None + and verdicts[_pair_key(*pair_id.split("|", 1))].accept + and not bool( + soft_candidates[_pair_key(*pair_id.split("|", 1))].get( + "deterministic_accept", + True, + ) + ) + for pair_id in review.meta.soft_pairs + ) + evaluator_status = "used" if result.evaluator_used else "skipped" + _analysis_log( + f"{review.meta.prefix}: {accepted_count} accepted, {rejected_count} rejected, " + f"evaluator={evaluator_status}, source={review.source}" + ) + if result.error_kind is not None: + _analysis_log( + f"{review.meta.prefix}: {result.error_kind}; keeping analyst result" + ) + for pair_id in review.meta.soft_pairs: + normalized_pair = _pair_key(*pair_id.split("|", 1)) + verdict = verdicts.get(normalized_pair) + if verdict is None: + continue + if verdict.accept: + accepted[normalized_pair] = soft_candidates[normalized_pair] + else: + accepted.pop(normalized_pair, None) + if ( + pending is not None + and review.source == "llm" + and _cacheable_cluster_analysis_result(result) + ): + hybrid_review_cache.put( + HybridReviewCacheEntry( + key=pending.cache_key, + result=_cluster_analysis_call_result_payload(result), + cached_at=_iso_now(), + nodes=tuple(pending.prepared.packet["nodes"]), + soft_pairs=tuple(_packet_soft_pair_ids(pending.prepared.packet)), + budget=_packet_budget_json(pending.prepared.budget), + split=pending.prepared.split, + trimmed=pending.prepared.trimmed, + aggressively_trimmed=pending.prepared.aggressively_trimmed, + ) + ) + review_records.append( + _soft_pair_review_record( + review=review, + model=options.model, + accepted_nontrivial_soft_edge=accepted_nontrivial, + ) + ) + return accepted, llm_used, review_records + + +def _component_packets( + features: dict[str, ArtifactFeature], + pairs: dict[tuple[str, str], set[str]], + soft_candidates: dict[tuple[str, str], dict[str, Any]], +) -> list[dict[str, Any]]: + components = _connected_components(features, pairs) + packets: list[dict[str, Any]] = [] + for nodes in components: + pair_members = { + pair: evidence + for pair, evidence in pairs.items() + if pair[0] in nodes and pair[1] in nodes + } + soft_pairs = [ + { + "left": pair[0], + "right": pair[1], + "score": detail["score"], + "jaccard": detail["jaccard"], + "evidence_types": sorted(detail.get("evidence_types") or []), + "shared_targets": detail.get("shared_targets") or [], + "shared_filenames": detail.get("shared_filenames") or [], + "deterministic_accept": bool(detail.get("deterministic_accept", True)), + } + for pair, detail in soft_candidates.items() + if pair[0] in nodes and pair[1] in nodes + ] + packets.append( + { + "nodes": nodes, + "items": [_cluster_item(features[node]) for node in nodes], + "pair_evidence": { + f"{left}|{right}": sorted(evidence) + for (left, right), evidence in pair_members.items() + }, + "soft_pairs": soft_pairs, + } + ) + return packets + + +def _clusters( + *, + snapshot: SnapshotData, + features: dict[str, ArtifactFeature], + final_pairs: dict[tuple[str, str], set[str]], + pair_target_issues: dict[tuple[str, str], set[int]], + llm_cluster_payloads: dict[str, ClusterAnalystResponse], +) -> list[ClusterRecord]: + clusters: list[ClusterRecord] = [] + for nodes in _connected_components(features, final_pairs): + issue_numbers = sorted( + int(node.split(":", 1)[1]) for node in nodes if node.startswith("issue:") + ) + pr_numbers = sorted( + int(node.split(":", 1)[1]) for node in nodes if node.startswith("pull_request:") + ) + evidence_types = sorted( + { + evidence + for (left, right), evidences in final_pairs.items() + if left in nodes and right in nodes + for evidence in evidences + } + ) + target_counter: Counter[int] = Counter() + for pair, targets in pair_target_issues.items(): + if pair[0] in nodes and pair[1] in nodes: + target_counter.update(targets) + target_issue_number = target_counter.most_common(1)[0][0] if target_counter else None + llm_payload = llm_cluster_payloads.get(_cluster_id_from_nodes(nodes)) + clusters.append( + _cluster_record_from_members( + features=features, + issue_numbers=issue_numbers, + pr_numbers=pr_numbers, + evidence_types=evidence_types, + target_issue_number=target_issue_number, + llm_payload=llm_payload, + ) + ) + return clusters + + +def _cluster_record_from_members( + *, + features: dict[str, ArtifactFeature], + issue_numbers: list[int], + pr_numbers: list[int], + evidence_types: list[str], + target_issue_number: int | None, + llm_payload: ClusterAnalystResponse | None = None, +) -> ClusterRecord: + nodes = sorted( + [f"issue:{number}" for number in issue_numbers] + + [f"pull_request:{number}" for number in pr_numbers] + ) + cluster_id = _cluster_id_from_nodes(nodes) + canonical_issue_number = _canonical_issue(issue_numbers, features) + canonical_pr_number = _canonical_pr(pr_numbers, features) + status = _cluster_status(issue_numbers, pr_numbers, features) + confidence = _cluster_confidence(evidence_types) + summary = _cluster_summary(issue_numbers, pr_numbers, target_issue_number, evidence_types) + canonical_issue_reason = ( + _canonical_issue_reason(canonical_issue_number, features, issue_numbers) + if canonical_issue_number is not None + else None + ) + canonical_pr_reason = ( + _canonical_pr_reason(canonical_pr_number, features, pr_numbers) + if canonical_pr_number is not None + else None + ) + best_issue_reason = ( + _best_issue_reason(canonical_issue_number, features, len(nodes)) + if canonical_issue_number is not None + else None + ) + best_pr_reason = ( + _best_pr_reason(canonical_pr_number, features, len(nodes)) + if canonical_pr_number is not None + else None + ) + if llm_payload is not None: + summary = llm_payload.summary or summary + confidence = max(0.0, min(1.0, llm_payload.confidence)) + canonical_issue_reason = llm_payload.canonical_issue_reason or canonical_issue_reason + canonical_pr_reason = llm_payload.canonical_pr_reason or canonical_pr_reason + best_issue_reason = llm_payload.best_issue_reason or best_issue_reason + best_pr_reason = llm_payload.best_pr_reason or best_pr_reason + cluster_score = _cluster_score(issue_numbers, pr_numbers, features, status) + best_issue_score = ( + _issue_score(canonical_issue_number, features, len(nodes)) + if canonical_issue_number is not None + else None + ) + best_pr_score = ( + _pr_score(canonical_pr_number, features, len(nodes)) + if canonical_pr_number is not None + else None + ) + return ClusterRecord( + cluster_id=cluster_id, + nodes=nodes, + issue_numbers=issue_numbers, + pr_numbers=pr_numbers, + evidence_types=evidence_types, + canonical_issue_number=canonical_issue_number, + canonical_pr_number=canonical_pr_number, + target_issue_number=target_issue_number, + summary=summary, + status=status, + confidence=confidence, + canonical_issue_reason=canonical_issue_reason, + canonical_pr_reason=canonical_pr_reason, + best_issue_reason=best_issue_reason, + best_pr_reason=best_pr_reason, + cluster_score=cluster_score, + best_issue_score=best_issue_score, + best_pr_score=best_pr_score, + ) + + +def _meta_bug_clusters( + *, + features: dict[str, ArtifactFeature], + issue_clusters: list[ClusterRecord], + pr_clusters: list[ClusterRecord], + explicit_issue_link_targets: defaultdict[int, set[int]], + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], +) -> list[ClusterRecord]: + issue_cluster_by_issue: dict[int, ClusterRecord] = {} + for cluster in issue_clusters: + for issue_number in cluster.issue_numbers: + issue_cluster_by_issue[issue_number] = cluster + + pr_cluster_by_pr: dict[int, ClusterRecord] = {} + for cluster in pr_clusters: + for pr_number in cluster.pr_numbers: + pr_cluster_by_pr[pr_number] = cluster + + prs_by_target_issue: defaultdict[int, set[int]] = defaultdict(set) + for pr_number, targets in explicit_issue_link_targets.items(): + for target in targets: + if target in issue_map: + prs_by_target_issue[target].add(pr_number) + + issue_anchors: list[ClusterRecord] = list(issue_clusters) + targeted_issue_numbers = sorted(prs_by_target_issue) + for issue_number in targeted_issue_numbers: + if issue_number in issue_cluster_by_issue: + continue + singleton = _cluster_record_from_members( + features=features, + issue_numbers=[issue_number], + pr_numbers=[], + evidence_types=["closing_reference"], + target_issue_number=issue_number, + ) + issue_anchors.append(singleton) + issue_cluster_by_issue[issue_number] = singleton + + pr_groups: list[ClusterRecord] = list(pr_clusters) + for pr_number, targets in explicit_issue_link_targets.items(): + if pr_number in pr_cluster_by_pr or pr_number not in pr_map: + continue + singleton = _cluster_record_from_members( + features=features, + issue_numbers=[], + pr_numbers=[pr_number], + evidence_types=["closing_reference"], + target_issue_number=min(targets) if targets else None, + ) + pr_groups.append(singleton) + pr_cluster_by_pr[pr_number] = singleton + + anchor_buckets: dict[str, dict[str, Any]] = {} + issue_anchor_for_issue: dict[int, str] = {} + for cluster in issue_anchors: + has_attached_prs = any( + prs_by_target_issue.get(issue_number) for issue_number in cluster.issue_numbers + ) + if len(cluster.issue_numbers) < 2 and not has_attached_prs: + continue + anchor_buckets[cluster.cluster_id] = { + "issue_numbers": set(cluster.issue_numbers), + "pr_numbers": set(), + "evidence_types": set(cluster.evidence_types), + "target_issue_number": cluster.canonical_issue_number + or (cluster.issue_numbers[0] if cluster.issue_numbers else None), + } + for issue_number in cluster.issue_numbers: + issue_anchor_for_issue[issue_number] = cluster.cluster_id + + attached_pr_clusters: set[str] = set() + for cluster in pr_groups: + anchor_id = _select_issue_anchor_for_pr_cluster( + cluster=cluster, + explicit_issue_link_targets=explicit_issue_link_targets, + issue_map=issue_map, + issue_anchor_for_issue=issue_anchor_for_issue, + anchor_buckets=anchor_buckets, + ) + if anchor_id is not None: + bucket = anchor_buckets[anchor_id] + bucket["pr_numbers"].update(cluster.pr_numbers) + bucket["evidence_types"].update(cluster.evidence_types) + bucket["evidence_types"].add("closing_reference") + attached_pr_clusters.add(cluster.cluster_id) + + meta_clusters: list[ClusterRecord] = [] + for bucket in anchor_buckets.values(): + if len(bucket["pr_numbers"]) < 2: + continue + meta_clusters.append( + _cluster_record_from_members( + features=features, + issue_numbers=sorted(bucket["issue_numbers"]), + pr_numbers=sorted(bucket["pr_numbers"]), + evidence_types=sorted(bucket["evidence_types"]), + target_issue_number=bucket["target_issue_number"], + ) + ) + + for cluster in pr_groups: + if cluster.cluster_id in attached_pr_clusters: + continue + if len(cluster.pr_numbers) < 2: + continue + meta_clusters.append( + _cluster_record_from_members( + features=features, + issue_numbers=[], + pr_numbers=cluster.pr_numbers, + evidence_types=cluster.evidence_types, + target_issue_number=cluster.target_issue_number, + ) + ) + + return sorted( + {cluster.cluster_id: cluster for cluster in meta_clusters}.values(), + key=lambda cluster: cluster.cluster_id, + ) + + +def _select_issue_anchor_for_pr_cluster( + *, + cluster: ClusterRecord, + explicit_issue_link_targets: defaultdict[int, set[int]], + issue_map: dict[int, dict[str, Any]], + issue_anchor_for_issue: dict[int, str], + anchor_buckets: dict[str, dict[str, Any]], +) -> str | None: + anchor_counts: Counter[str] = Counter() + targeted_pr_count = 0 + for pr_number in cluster.pr_numbers: + anchor_ids = { + issue_anchor_for_issue[target] + for target in explicit_issue_link_targets.get(pr_number, set()) + if target in issue_map and target in issue_anchor_for_issue + } + if not anchor_ids: + continue + targeted_pr_count += 1 + anchor_counts.update(anchor_ids) + if not anchor_counts or targeted_pr_count <= 0: + return None + ranked = sorted( + anchor_counts.items(), + key=lambda item: ( + -item[1], + min(anchor_buckets[item[0]]["issue_numbers"]), + ), + ) + winner_id, winner_count = ranked[0] + runner_up_count = ranked[1][1] if len(ranked) > 1 else 0 + if winner_count <= runner_up_count: + return None + if winner_count * 2 < targeted_pr_count: + return None + return winner_id + + +def _best_issue( + clusters: list[ClusterRecord], features: dict[str, ArtifactFeature] +) -> BestIssueEntry | None: + candidates = [cluster for cluster in clusters if cluster.canonical_issue_number is not None] + if not candidates: + return None + winner = min( + candidates, + key=lambda cluster: ( + 0 + if features[f"issue:{cluster.canonical_issue_number}"].row.get("state") == "open" + else 1, + -len(cluster.nodes), + -features[f"issue:{cluster.canonical_issue_number}"].discussion_activity, + -features[f"issue:{cluster.canonical_issue_number}"].inbound_references, + _sort_timestamp( + features[f"issue:{cluster.canonical_issue_number}"].row.get("created_at") + ), + cluster.canonical_issue_number, + ), + ) + issue_number = winner.canonical_issue_number + assert issue_number is not None + issue_reason = winner.best_issue_reason + if issue_reason is None: + issue_reason = _best_issue_reason(issue_number, features, len(winner.nodes)) + assert issue_reason is not None + return BestIssueEntry( + cluster_id=winner.cluster_id, + issue_number=issue_number, + reason=issue_reason, + score=round(float(winner.best_issue_score or 0.0), 3), + ) + + +def _best_pr( + clusters: list[ClusterRecord], features: dict[str, ArtifactFeature] +) -> BestPrEntry | None: + candidates = [cluster for cluster in clusters if cluster.canonical_pr_number is not None] + if not candidates: + return None + open_candidates = [ + cluster + for cluster in candidates + if features[f"pull_request:{cluster.canonical_pr_number}"].row.get("state") == "open" + and not bool(features[f"pull_request:{cluster.canonical_pr_number}"].row.get("draft")) + ] + pool = ( + open_candidates + or [ + cluster + for cluster in candidates + if bool(features[f"pull_request:{cluster.canonical_pr_number}"].row.get("merged")) + ] + or candidates + ) + winner = min( + pool, + key=lambda cluster: ( + 0 + if features[f"pull_request:{cluster.canonical_pr_number}"].row.get("state") == "open" + and not bool(features[f"pull_request:{cluster.canonical_pr_number}"].row.get("draft")) + else 1, + -len(cluster.nodes), + -features[f"pull_request:{cluster.canonical_pr_number}"].explicit_issue_links, + -( + features[f"pull_request:{cluster.canonical_pr_number}"].discussion_activity + + features[f"pull_request:{cluster.canonical_pr_number}"].review_activity + ), + features[f"pull_request:{cluster.canonical_pr_number}"].diff_size, + _sort_timestamp( + features[f"pull_request:{cluster.canonical_pr_number}"].row.get("created_at") + ), + cluster.canonical_pr_number, + ), + ) + pr_number = winner.canonical_pr_number + assert pr_number is not None + pr_reason = winner.best_pr_reason + if pr_reason is None: + pr_reason = _best_pr_reason(pr_number, features, len(winner.nodes)) + assert pr_reason is not None + return BestPrEntry( + cluster_id=winner.cluster_id, + pr_number=pr_number, + reason=pr_reason, + score=round(float(winner.best_pr_score or 0.0), 3), + ) + + +def _resolve_source_node( + row: dict[str, Any], + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], + comment_map: dict[int, dict[str, Any]], + review_map: dict[int, dict[str, Any]], + review_comment_map: dict[int, dict[str, Any]], +) -> str | None: + source_type = row.get("source_type") + source_number = row.get("source_number") + if source_type in {"issue", "pull_request"} and source_number is not None: + return _node_from_number(int(source_number), issue_map, pr_map) + source_id = row.get("source_github_id") + if source_type == "comment" and source_id is not None: + comment = comment_map.get(int(source_id)) + if comment and comment.get("parent_number") is not None: + parent_kind = comment.get("parent_kind") + if parent_kind in {"issue", "pull_request"}: + return _node_from_number(int(comment["parent_number"]), issue_map, pr_map) + if source_type == "review" and source_id is not None: + review = review_map.get(int(source_id)) + if review: + return _node_from_number(int(review["pull_request_number"]), issue_map, pr_map) + if source_type == "review_comment" and source_id is not None: + review_comment = review_comment_map.get(int(source_id)) + if review_comment: + return _node_from_number(int(review_comment["pull_request_number"]), issue_map, pr_map) + if source_number is None: + return None + return _node_from_number(int(source_number), issue_map, pr_map) + + +def _resolve_target_node( + repo: str, + row: dict[str, Any], + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], +) -> str | None: + if ( + row.get("target_owner") != repo.split("/", 1)[0] + or row.get("target_repo") != repo.split("/", 1)[1] + ): + return None + target_number = row.get("target_number") + if target_number is None: + return None + return _node_from_number(int(target_number), issue_map, pr_map) + + +def _node_from_number( + number: int, issue_map: dict[int, dict[str, Any]], pr_map: dict[int, dict[str, Any]] +) -> str | None: + if number in issue_map: + return f"issue:{number}" + if number in pr_map: + return f"pull_request:{number}" + return None + + +def _connected_components( + features: dict[str, ArtifactFeature], + pairs: dict[tuple[str, str], set[str]], +) -> list[list[str]]: + adjacency: defaultdict[str, set[str]] = defaultdict(set) + for left, right in pairs: + adjacency[left].add(right) + adjacency[right].add(left) + visited: set[str] = set() + components: list[list[str]] = [] + for node in sorted(adjacency): + if node in visited: + continue + stack = [node] + component: list[str] = [] + while stack: + current = stack.pop() + if current in visited: + continue + visited.add(current) + component.append(current) + stack.extend(sorted(adjacency[current] - visited)) + components.append(sorted(component)) + return components + + +def _canonical_issue(issue_numbers: list[int], features: dict[str, ArtifactFeature]) -> int | None: + if not issue_numbers: + return None + return min( + issue_numbers, + key=lambda number: ( + 0 if features[f"issue:{number}"].row.get("state") == "open" else 1, + -features[f"issue:{number}"].inbound_references, + -features[f"issue:{number}"].discussion_activity, + _sort_timestamp(features[f"issue:{number}"].row.get("created_at")), + number, + ), + ) + + +def _canonical_pr(pr_numbers: list[int], features: dict[str, ArtifactFeature]) -> int | None: + if not pr_numbers: + return None + return min( + pr_numbers, + key=lambda number: ( + 0 if bool(features[f"pull_request:{number}"].row.get("merged")) else 1, + 0 + if features[f"pull_request:{number}"].row.get("state") == "open" + and not bool(features[f"pull_request:{number}"].row.get("draft")) + else 1, + -features[f"pull_request:{number}"].explicit_issue_links, + -( + features[f"pull_request:{number}"].discussion_activity + + features[f"pull_request:{number}"].review_activity + ), + features[f"pull_request:{number}"].diff_size, + _sort_timestamp(features[f"pull_request:{number}"].row.get("created_at")), + number, + ), + ) + + +def _cluster_status( + issue_numbers: list[int], pr_numbers: list[int], features: dict[str, ArtifactFeature] +) -> str: + if any(features[f"issue:{number}"].row.get("state") == "open" for number in issue_numbers): + return "open" + if any( + features[f"pull_request:{number}"].row.get("state") == "open" + and not bool(features[f"pull_request:{number}"].row.get("draft")) + for number in pr_numbers + ): + return "open" + if any(bool(features[f"pull_request:{number}"].row.get("merged")) for number in pr_numbers): + return "merged" + return "closed" + + +def _cluster_confidence(evidence_types: list[str]) -> float: + confidence = 0.45 + if "duplicate_reference" in evidence_types: + confidence += 0.25 + if "shared_issue_target" in evidence_types: + confidence += 0.2 + if "closing_reference" in evidence_types: + confidence += 0.1 + if "timeline:cross-referenced" in evidence_types: + confidence += 0.1 + if "soft_similarity" in evidence_types: + confidence += 0.05 + return min(confidence, 0.99) + + +def _cluster_summary( + issue_numbers: list[int], + pr_numbers: list[int], + target_issue_number: int | None, + evidence_types: list[str], +) -> str: + if issue_numbers and pr_numbers and target_issue_number is not None: + return f"Cluster of {len(issue_numbers)} issues and {len(pr_numbers)} PRs centered on issue #{target_issue_number}." + if pr_numbers and target_issue_number is not None: + return f"Cluster of {len(pr_numbers)} PRs targeting issue #{target_issue_number}." + if issue_numbers: + return f"Cluster of {len(issue_numbers)} related issues linked by {', '.join(evidence_types[:2]) or 'duplicate evidence'}." + return f"Cluster of {len(pr_numbers)} related pull requests linked by {', '.join(evidence_types[:2]) or 'shared evidence'}." + + +def _cluster_score( + issue_numbers: list[int], + pr_numbers: list[int], + features: dict[str, ArtifactFeature], + status: str, +) -> float: + cluster_size = len(issue_numbers) + len(pr_numbers) + has_mixed = 1 if issue_numbers and pr_numbers else 0 + duplicate_pressure = max(len(issue_numbers) - 1, 0) + max(len(pr_numbers) - 1, 0) + open_bonus = 1 if status == "open" else 0 + discussion = sum( + features[f"issue:{number}"].discussion_activity for number in issue_numbers + ) + sum( + features[f"pull_request:{number}"].discussion_activity + + features[f"pull_request:{number}"].review_activity + for number in pr_numbers + ) + return float( + cluster_size * 100 + has_mixed * 50 + duplicate_pressure * 25 + open_bonus * 20 + discussion + ) + + +def _issue_score( + number: int | None, features: dict[str, ArtifactFeature], cluster_size: int +) -> float | None: + if number is None: + return None + feature = features[f"issue:{number}"] + score = 0.0 + if feature.row.get("state") == "open": + score += 100.0 + score += cluster_size * 10.0 + score += feature.discussion_activity * 2.0 + score += feature.inbound_references + return score + + +def _pr_score( + number: int | None, features: dict[str, ArtifactFeature], cluster_size: int +) -> float | None: + if number is None: + return None + feature = features[f"pull_request:{number}"] + score = 0.0 + if feature.row.get("state") == "open" and not bool(feature.row.get("draft")): + score += 120.0 + elif bool(feature.row.get("merged")): + score += 60.0 + score += cluster_size * 10.0 + score += feature.explicit_issue_links * 5.0 + score += (feature.discussion_activity + feature.review_activity) * 2.0 + score -= feature.diff_size / 1000.0 + return score + + +def _canonical_issue_reason( + number: int | None, features: dict[str, ArtifactFeature], issue_numbers: list[int] +) -> str | None: + if number is None: + return None + feature = features[f"issue:{number}"] + return ( + f"Issue #{number} is canonical because it is {'open' if feature.row.get('state') == 'open' else 'closed'}, " + f"has {feature.inbound_references} inbound references, and has the strongest discussion signal in a cluster of {len(issue_numbers)} issues." + ) + + +def _canonical_pr_reason( + number: int | None, features: dict[str, ArtifactFeature], pr_numbers: list[int] +) -> str | None: + if number is None: + return None + feature = features[f"pull_request:{number}"] + review_signal = feature.discussion_activity + feature.review_activity + return ( + f"PR #{number} is canonical because it is {'merged' if feature.row.get('merged') else 'open' if feature.row.get('state') == 'open' else 'closed'}, " + f"links to {feature.explicit_issue_links} issues, and has {review_signal} review/discussion events across {len(pr_numbers)} related PRs." + ) + + +def _best_issue_reason( + number: int | None, features: dict[str, ArtifactFeature], cluster_size: int +) -> str | None: + if number is None: + return None + feature = features[f"issue:{number}"] + return ( + f"Issue #{number} is the strongest global issue candidate because it is {'open' if feature.row.get('state') == 'open' else 'closed'}, " + f"belongs to a cluster with {cluster_size} artifacts, and carries {feature.discussion_activity} discussion comments plus {feature.inbound_references} inbound references." + ) + + +def _best_pr_reason( + number: int | None, features: dict[str, ArtifactFeature], cluster_size: int +) -> str | None: + if number is None: + return None + feature = features[f"pull_request:{number}"] + return ( + f"PR #{number} is the strongest global PR candidate because it is {'open' if feature.row.get('state') == 'open' else 'merged' if feature.row.get('merged') else 'closed'}, " + f"belongs to a cluster with {cluster_size} artifacts, links to {feature.explicit_issue_links} issues, and carries {feature.discussion_activity + feature.review_activity} review/discussion events." + ) + + +def _duplicate_issue_reason(cluster: ClusterRecord) -> str: + return f"Issues in {cluster.cluster_id} are treated as duplicates because they share {', '.join(cluster.evidence_types)} evidence." + + +def _duplicate_pr_reason(cluster: ClusterRecord) -> str: + if cluster.target_issue_number is not None: + return f"PRs in {cluster.cluster_id} are treated as duplicates because they converge on issue #{cluster.target_issue_number} with {', '.join(cluster.evidence_types)} evidence." + return f"PRs in {cluster.cluster_id} are treated as duplicates because they share {', '.join(cluster.evidence_types)} evidence." + + +def _cluster_pr_comparisons( + cluster: ClusterRecord, features: dict[str, ArtifactFeature] +) -> list[PrComparisonEntry]: + comparisons: list[PrComparisonEntry] = [] + numbers = sorted(cluster.pr_numbers) + for index, left_number in enumerate(numbers): + left = features[f"pull_request:{left_number}"] + for right_number in numbers[index + 1 :]: + right = features[f"pull_request:{right_number}"] + comparisons.append(_pr_comparison(left, right)) + return comparisons + + +def _pr_comparison(left: ArtifactFeature, right: ArtifactFeature) -> PrComparisonEntry: + shared_filenames = sorted(set(left.filenames) & set(right.filenames)) + size_similarity = _size_similarity(left.diff_size, right.diff_size) + file_overlap = _jaccard_sets(set(left.filenames), set(right.filenames)) + area_overlap, shared_file_areas = _file_area_overlap( + left.file_ranges_by_name, right.file_ranges_by_name + ) + patch_similarity = _jaccard_sets(set(left.patch_tokens), set(right.patch_tokens)) + code_similarity = ( + size_similarity * 0.20 + file_overlap * 0.30 + area_overlap * 0.35 + patch_similarity * 0.15 + ) + return PrComparisonEntry( + left_pr_number=left.number, + right_pr_number=right.number, + code_similarity=round(code_similarity, 3), + size_similarity=round(size_similarity, 3), + file_overlap=round(file_overlap, 3), + area_overlap=round(area_overlap, 3), + patch_similarity=round(patch_similarity, 3), + shared_filenames=shared_filenames, + shared_file_areas=shared_file_areas, + ) + + +def _cluster_item(feature: ArtifactFeature) -> dict[str, Any]: + return { + "node_id": feature.node_id, + "kind": feature.kind, + "number": feature.number, + "title": feature.row.get("title"), + "state": feature.row.get("state"), + "draft": feature.row.get("draft"), + "merged": feature.row.get("merged"), + "created_at": feature.row.get("created_at"), + "body_length": feature.body_length, + "discussion_activity": feature.discussion_activity, + "review_activity": feature.review_activity, + "inbound_references": feature.inbound_references, + "explicit_issue_links": feature.explicit_issue_links, + "explicit_issue_targets": feature.explicit_issue_targets, + "diff_size": feature.diff_size, + "filenames": feature.filenames[:20], + "diff_preview": feature.diff_preview, + } + + +async def _fast_agent_cluster_analysis( + prepared: PreparedLlmPacket, model: str +) -> ClusterAnalysisCallResult: + try: + from fast_agent import FastAgent + except Exception as exc: + return ClusterAnalysisCallResult( + analyst_result=None, + evaluator_result=None, + error_kind=_classify_llm_error(exc), + error_message=_summarize_llm_error(exc), + evaluator_used=False, + retried=False, + ) + + fast = FastAgent("slop-farmer-analysis") + + @fast.agent( + name="cluster_analyst", + instruction=CLUSTER_ANALYST_INSTRUCTION, + model=model, + use_history=False, + ) + async def analyst_stub() -> None: + return None + + @fast.agent( + name="cluster_evaluator", + instruction=CLUSTER_EVALUATOR_INSTRUCTION, + model=model, + use_history=False, + ) + async def evaluator_stub() -> None: + return None + + packet = prepared.packet + prompt = json.dumps(packet, indent=2, sort_keys=True) + try: + async with fast.run() as agent: + analyst_result, _ = await agent.cluster_analyst.structured( + prompt, ClusterAnalystResponse + ) + if analyst_result is None: + return ClusterAnalysisCallResult( + analyst_result=None, + evaluator_result=None, + error_kind=None, + error_message=None, + evaluator_used=False, + retried=False, + ) + if not _should_run_evaluator( + packet, + prepared.budget, + split=prepared.split, + aggressively_trimmed=prepared.aggressively_trimmed, + analyst_result=analyst_result, + ): + return ClusterAnalysisCallResult( + analyst_result=analyst_result, + evaluator_result=None, + error_kind=None, + error_message=None, + evaluator_used=False, + retried=False, + ) + evaluation_prompt = json.dumps( + {"packet": packet, "analyst_result": analyst_result.model_dump(mode="json")}, + indent=2, + sort_keys=True, + ) + try: + evaluation_result, _ = await agent.cluster_evaluator.structured( + evaluation_prompt, ClusterEvaluatorResponse + ) + except Exception as exc: + return ClusterAnalysisCallResult( + analyst_result=analyst_result, + evaluator_result=None, + error_kind=_classify_llm_error(exc), + error_message=_summarize_llm_error(exc), + evaluator_used=True, + retried=False, + ) + if evaluation_result is None or evaluation_result.accept: + return ClusterAnalysisCallResult( + analyst_result=analyst_result, + evaluator_result=evaluation_result, + error_kind=None, + error_message=None, + evaluator_used=True, + retried=False, + ) + retry_prompt = json.dumps( + { + "packet": packet, + "previous_result": analyst_result.model_dump(mode="json"), + "feedback": evaluation_result.feedback, + }, + indent=2, + sort_keys=True, + ) + try: + retry_result, _ = await agent.cluster_analyst.structured( + retry_prompt, ClusterAnalystResponse + ) + except Exception as exc: + return ClusterAnalysisCallResult( + analyst_result=analyst_result, + evaluator_result=evaluation_result, + error_kind=_classify_llm_error(exc), + error_message=_summarize_llm_error(exc), + evaluator_used=True, + retried=True, + ) + return ClusterAnalysisCallResult( + analyst_result=retry_result or analyst_result, + evaluator_result=evaluation_result, + error_kind=None, + error_message=None, + evaluator_used=True, + retried=True, + ) + except Exception as exc: + return ClusterAnalysisCallResult( + analyst_result=None, + evaluator_result=None, + error_kind=_classify_llm_error(exc), + error_message=_summarize_llm_error(exc), + evaluator_used=False, + retried=False, + ) + + +def _can_use_fast_agent() -> bool: + try: + import fast_agent # noqa: F401 + except Exception: + return False + return any(os.environ.get(name) for name in LLM_PROVIDER_ENV_VARS) + + +def _tokenize(text: str | None, *, remove_stopwords: bool) -> list[str]: + tokens = TOKEN_PATTERN.findall((text or "").lower()) + if not remove_stopwords: + return tokens + return [token for token in tokens if token not in STOPWORDS] + + +def _strip_pull_request_template( + body: str | None, + *, + settings: Any | None = None, +) -> str: + return strip_pull_request_template(body, settings=settings) + + +def _patch_ranges(patch: str) -> list[tuple[int, int]]: + ranges: list[tuple[int, int]] = [] + for line in patch.splitlines(): + match = HUNK_HEADER_PATTERN.match(line) + if match is None: + continue + start = int(match.group("start")) + count = int(match.group("count") or "1") + end = start if count == 0 else start + count - 1 + ranges.append((start, end)) + return ranges + + +def _patch_content_tokens(patch: str) -> list[str]: + lines = [] + for line in patch.splitlines(): + if line.startswith("+++") or line.startswith("---"): + continue + if line.startswith("+") or line.startswith("-"): + lines.append(line[1:]) + return _tokenize("\n".join(lines), remove_stopwords=True) + + +def _size_similarity(left: int, right: int) -> float: + largest = max(left, right) + if largest <= 0: + return 1.0 + return min(left, right) / largest + + +def _file_area_overlap( + left_ranges_by_name: dict[str, list[tuple[int, int]]], + right_ranges_by_name: dict[str, list[tuple[int, int]]], +) -> tuple[float, list[PrFileAreaEntry]]: + shared_names = sorted(set(left_ranges_by_name) & set(right_ranges_by_name)) + if not shared_names: + return 0.0, [] + total_overlap = 0 + total_union = 0 + entries: list[PrFileAreaEntry] = [] + for filename in shared_names: + left_ranges = _merge_ranges(left_ranges_by_name.get(filename) or []) + right_ranges = _merge_ranges(right_ranges_by_name.get(filename) or []) + overlap = _ranges_overlap_size(left_ranges, right_ranges) + union = _ranges_size(_merge_ranges([*left_ranges, *right_ranges])) + total_overlap += overlap + total_union += union + entries.append( + PrFileAreaEntry( + filename=filename, + left_ranges=[[start, end] for start, end in left_ranges], + right_ranges=[[start, end] for start, end in right_ranges], + ) + ) + if total_union == 0: + return 0.0, entries + return total_overlap / total_union, entries + + +def _merge_ranges(ranges: list[tuple[int, int]]) -> list[tuple[int, int]]: + if not ranges: + return [] + merged: list[tuple[int, int]] = [] + for start, end in sorted(ranges): + if not merged or start > merged[-1][1] + 1: + merged.append((start, end)) + continue + merged[-1] = (merged[-1][0], max(merged[-1][1], end)) + return merged + + +def _ranges_size(ranges: list[tuple[int, int]]) -> int: + return sum(end - start + 1 for start, end in ranges) + + +def _ranges_overlap_size(left: list[tuple[int, int]], right: list[tuple[int, int]]) -> int: + overlap = 0 + left_index = 0 + right_index = 0 + while left_index < len(left) and right_index < len(right): + left_start, left_end = left[left_index] + right_start, right_end = right[right_index] + overlap_start = max(left_start, right_start) + overlap_end = min(left_end, right_end) + if overlap_start <= overlap_end: + overlap += overlap_end - overlap_start + 1 + if left_end <= right_end: + left_index += 1 + else: + right_index += 1 + return overlap + + +def _days_between(left: str | None, right: str | None) -> int: + if not left or not right: + return 10**9 + return abs((_parse_dt(left) - _parse_dt(right)).days) + + +def _parse_dt(value: str) -> datetime: + return datetime.fromisoformat(value.replace("Z", "+00:00")).astimezone(UTC) + + +def _sort_timestamp(value: str | None) -> str: + return value or "9999-99-99T99:99:99Z" + + +def _pair_key(left: str, right: str) -> tuple[str, str]: + return (left, right) if left <= right else (right, left) + + +def _jaccard(left: list[str], right: list[str]) -> float: + return _jaccard_sets(set(left), set(right)) + + +def _jaccard_sets(left_set: set[str], right_set: set[str]) -> float: + if not left_set or not right_set: + return 0.0 + return len(left_set & right_set) / len(left_set | right_set) + + +def _cluster_id_from_nodes(nodes: list[str]) -> str: + numbers = sorted(int(node.split(":", 1)[1]) for node in nodes) + return f"cluster-{numbers[0]}-{len(nodes)}" diff --git a/src/slop_farmer/reports/analysis_cache.py b/src/slop_farmer/reports/analysis_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..9e32c1c9fa801c0c3540538f8df0ed199715dc17 --- /dev/null +++ b/src/slop_farmer/reports/analysis_cache.py @@ -0,0 +1,336 @@ +from __future__ import annotations + +import hashlib +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from slop_farmer.data.parquet_io import read_json, write_json + +HYBRID_REVIEW_CACHE_MANIFEST_FILENAME = "hybrid-review-cache-manifest.json" +HYBRID_REVIEW_CACHE_ENTRIES_FILENAME = "hybrid-review-cache.jsonl" +HYBRID_REVIEW_CACHE_SCHEMA_VERSION = "1.0" +PREPARED_REVIEW_UNIT_SCHEMA_VERSION = "1.0" + + +def _canonical_json_bytes(data: Any) -> bytes: + return json.dumps( + data, + ensure_ascii=False, + separators=(",", ":"), + sort_keys=True, + ).encode("utf-8") + + +def _normalize_review_item_for_hash(item: dict[str, Any]) -> dict[str, Any]: + normalized = dict(item) + filenames = normalized.get("filenames") + if filenames is not None: + normalized["filenames"] = sorted(str(filename) for filename in filenames) + explicit_issue_targets = normalized.get("explicit_issue_targets") + if explicit_issue_targets is not None: + normalized["explicit_issue_targets"] = sorted( + int(target) for target in explicit_issue_targets + ) + return normalized + + +def _normalize_soft_pair_for_hash(pair: dict[str, Any]) -> dict[str, Any]: + normalized = dict(pair) + evidence_types = normalized.get("evidence_types") + if evidence_types is not None: + normalized["evidence_types"] = sorted(str(value) for value in evidence_types) + shared_targets = normalized.get("shared_targets") + if shared_targets is not None: + normalized["shared_targets"] = sorted(int(target) for target in shared_targets) + shared_filenames = normalized.get("shared_filenames") + if shared_filenames is not None: + normalized["shared_filenames"] = sorted(str(filename) for filename in shared_filenames) + return normalized + + +def _normalize_prepared_review_unit_for_hash( + prepared_review_unit: dict[str, Any], +) -> dict[str, Any]: + normalized = dict(prepared_review_unit) + packet = dict(normalized.get("packet") or {}) + packet["nodes"] = sorted(str(node) for node in packet.get("nodes") or []) + packet["items"] = sorted( + (_normalize_review_item_for_hash(dict(item)) for item in packet.get("items") or []), + key=lambda item: str(item.get("node_id") or ""), + ) + packet["pair_evidence"] = { + str(pair): sorted(str(value) for value in values) + for pair, values in sorted(dict(packet.get("pair_evidence") or {}).items()) + } + packet["soft_pairs"] = sorted( + (_normalize_soft_pair_for_hash(dict(pair)) for pair in packet.get("soft_pairs") or []), + key=lambda pair: ( + str(pair.get("left") or ""), + str(pair.get("right") or ""), + ), + ) + normalized["packet"] = packet + return normalized + + +@dataclass(frozen=True, slots=True) +class HybridReviewSettingsFingerprint: + llm_max_input_tokens: int + llm_max_nodes_per_packet: int + llm_max_soft_pairs_per_packet: int + llm_max_diff_chars_per_item: int + llm_max_filenames_per_item: int + llm_skip_evaluator_above_tokens: int + llm_overflow_policy: str + + @property + def value(self) -> str: + return hashlib.sha256(_canonical_json_bytes(self.to_json())).hexdigest() + + def to_json(self) -> dict[str, Any]: + return { + "llm_max_input_tokens": self.llm_max_input_tokens, + "llm_max_nodes_per_packet": self.llm_max_nodes_per_packet, + "llm_max_soft_pairs_per_packet": self.llm_max_soft_pairs_per_packet, + "llm_max_diff_chars_per_item": self.llm_max_diff_chars_per_item, + "llm_max_filenames_per_item": self.llm_max_filenames_per_item, + "llm_skip_evaluator_above_tokens": self.llm_skip_evaluator_above_tokens, + "llm_overflow_policy": self.llm_overflow_policy, + } + + @classmethod + def from_json(cls, payload: dict[str, Any]) -> HybridReviewSettingsFingerprint: + return cls( + llm_max_input_tokens=int(payload["llm_max_input_tokens"]), + llm_max_nodes_per_packet=int(payload["llm_max_nodes_per_packet"]), + llm_max_soft_pairs_per_packet=int(payload["llm_max_soft_pairs_per_packet"]), + llm_max_diff_chars_per_item=int(payload["llm_max_diff_chars_per_item"]), + llm_max_filenames_per_item=int(payload["llm_max_filenames_per_item"]), + llm_skip_evaluator_above_tokens=int(payload["llm_skip_evaluator_above_tokens"]), + llm_overflow_policy=str(payload["llm_overflow_policy"]), + ) + + +@dataclass(frozen=True, slots=True) +class HybridReviewCacheManifest: + cache_schema_version: str + prepared_review_unit_schema_version: str + analyst_prompt_version: str + evaluator_prompt_version: str + hybrid_review_settings: HybridReviewSettingsFingerprint + + @property + def hybrid_review_settings_fingerprint(self) -> str: + return self.hybrid_review_settings.value + + def to_json(self) -> dict[str, Any]: + return { + "cache_schema_version": self.cache_schema_version, + "prepared_review_unit_schema_version": self.prepared_review_unit_schema_version, + "analyst_prompt_version": self.analyst_prompt_version, + "evaluator_prompt_version": self.evaluator_prompt_version, + "hybrid_review_settings": self.hybrid_review_settings.to_json(), + "hybrid_review_settings_fingerprint": self.hybrid_review_settings_fingerprint, + } + + @classmethod + def from_json(cls, payload: dict[str, Any]) -> HybridReviewCacheManifest: + return cls( + cache_schema_version=str(payload["cache_schema_version"]), + prepared_review_unit_schema_version=str(payload["prepared_review_unit_schema_version"]), + analyst_prompt_version=str(payload["analyst_prompt_version"]), + evaluator_prompt_version=str(payload["evaluator_prompt_version"]), + hybrid_review_settings=HybridReviewSettingsFingerprint.from_json( + payload["hybrid_review_settings"] + ), + ) + + +@dataclass(frozen=True, slots=True) +class HybridReviewCacheKey: + cache_schema_version: str + prepared_review_unit_schema_version: str + analyst_prompt_version: str + evaluator_prompt_version: str + hybrid_review_settings_fingerprint: str + model: str + prepared_review_unit_hash: str + + def to_json(self) -> dict[str, Any]: + return { + "cache_schema_version": self.cache_schema_version, + "prepared_review_unit_schema_version": self.prepared_review_unit_schema_version, + "analyst_prompt_version": self.analyst_prompt_version, + "evaluator_prompt_version": self.evaluator_prompt_version, + "hybrid_review_settings_fingerprint": self.hybrid_review_settings_fingerprint, + "model": self.model, + "prepared_review_unit_hash": self.prepared_review_unit_hash, + } + + @classmethod + def from_json(cls, payload: dict[str, Any]) -> HybridReviewCacheKey: + return cls( + cache_schema_version=str(payload["cache_schema_version"]), + prepared_review_unit_schema_version=str(payload["prepared_review_unit_schema_version"]), + analyst_prompt_version=str(payload["analyst_prompt_version"]), + evaluator_prompt_version=str(payload["evaluator_prompt_version"]), + hybrid_review_settings_fingerprint=str(payload["hybrid_review_settings_fingerprint"]), + model=str(payload["model"]), + prepared_review_unit_hash=str(payload["prepared_review_unit_hash"]), + ) + + +@dataclass(frozen=True, slots=True) +class HybridReviewCacheEntry: + key: HybridReviewCacheKey + result: dict[str, Any] + cached_at: str + nodes: tuple[str, ...] = () + soft_pairs: tuple[str, ...] = () + budget: dict[str, int] | None = None + split: bool = False + trimmed: bool = False + aggressively_trimmed: bool = False + + def to_json(self) -> dict[str, Any]: + return { + "key": self.key.to_json(), + "result": self.result, + "cached_at": self.cached_at, + "nodes": list(self.nodes), + "soft_pairs": list(self.soft_pairs), + "budget": self.budget, + "split": self.split, + "trimmed": self.trimmed, + "aggressively_trimmed": self.aggressively_trimmed, + } + + @classmethod + def from_json(cls, payload: dict[str, Any]) -> HybridReviewCacheEntry: + return cls( + key=HybridReviewCacheKey.from_json(payload["key"]), + result=dict(payload["result"]), + cached_at=str(payload["cached_at"]), + nodes=tuple(str(node) for node in payload.get("nodes") or []), + soft_pairs=tuple(str(pair) for pair in payload.get("soft_pairs") or []), + budget=( + None + if payload.get("budget") is None + else {str(key): int(value) for key, value in dict(payload["budget"]).items()} + ), + split=bool(payload.get("split", False)), + trimmed=bool(payload.get("trimmed", False)), + aggressively_trimmed=bool(payload.get("aggressively_trimmed", False)), + ) + + +def prepared_review_unit_hash(prepared_review_unit: dict[str, Any]) -> str: + normalized = _normalize_prepared_review_unit_for_hash(prepared_review_unit) + return hashlib.sha256(_canonical_json_bytes(normalized)).hexdigest() + + +def build_hybrid_review_cache_key( + *, + manifest: HybridReviewCacheManifest, + model: str, + prepared_review_unit: dict[str, Any], +) -> HybridReviewCacheKey: + return HybridReviewCacheKey( + cache_schema_version=manifest.cache_schema_version, + prepared_review_unit_schema_version=manifest.prepared_review_unit_schema_version, + analyst_prompt_version=manifest.analyst_prompt_version, + evaluator_prompt_version=manifest.evaluator_prompt_version, + hybrid_review_settings_fingerprint=manifest.hybrid_review_settings_fingerprint, + model=model, + prepared_review_unit_hash=prepared_review_unit_hash(prepared_review_unit), + ) + + +def hybrid_review_cache_dir(snapshot_dir: Path) -> Path: + return snapshot_dir / "analysis-state" + + +class HybridReviewCacheStore: + def __init__( + self, + cache_dir: Path, + manifest: HybridReviewCacheManifest, + *, + enabled: bool = True, + ) -> None: + self.cache_dir = cache_dir + self.manifest = manifest + self.enabled = enabled + self.invalidation_reason: str | None = None + self._entries: dict[HybridReviewCacheKey, HybridReviewCacheEntry] = {} + self._needs_reset = False + if self.enabled: + self._load() + + @property + def manifest_path(self) -> Path: + return self.cache_dir / HYBRID_REVIEW_CACHE_MANIFEST_FILENAME + + @property + def entries_path(self) -> Path: + return self.cache_dir / HYBRID_REVIEW_CACHE_ENTRIES_FILENAME + + @property + def has_entries(self) -> bool: + return bool(self._entries) + + def get(self, key: HybridReviewCacheKey) -> HybridReviewCacheEntry | None: + if not self.enabled: + return None + return self._entries.get(key) + + def put(self, entry: HybridReviewCacheEntry) -> None: + if not self.enabled or entry.key in self._entries: + return + self._prepare_for_write() + with self.entries_path.open("a", encoding="utf-8") as handle: + handle.write(json.dumps(entry.to_json(), sort_keys=True) + "\n") + self._entries[entry.key] = entry + + def _load(self) -> None: + if not self.manifest_path.exists(): + if self.entries_path.exists(): + self.invalidation_reason = "missing_manifest" + self._needs_reset = True + return + try: + existing_manifest = HybridReviewCacheManifest.from_json(read_json(self.manifest_path)) + except Exception: + self.invalidation_reason = "invalid_manifest" + self._needs_reset = True + return + if existing_manifest != self.manifest: + self.invalidation_reason = "manifest_mismatch" + self._needs_reset = True + return + if not self.entries_path.exists(): + return + try: + with self.entries_path.open("r", encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if not line: + continue + entry = HybridReviewCacheEntry.from_json(json.loads(line)) + self._entries[entry.key] = entry + except Exception: + self._entries.clear() + self.invalidation_reason = "invalid_entries" + self._needs_reset = True + + def _prepare_for_write(self) -> None: + self.cache_dir.mkdir(parents=True, exist_ok=True) + if self._needs_reset: + self.entries_path.write_text("", encoding="utf-8") + elif not self.entries_path.exists(): + self.entries_path.touch() + if self._needs_reset or not self.manifest_path.exists(): + write_json(self.manifest.to_json(), self.manifest_path) + self._needs_reset = False diff --git a/src/slop_farmer/reports/analysis_service.py b/src/slop_farmer/reports/analysis_service.py new file mode 100644 index 0000000000000000000000000000000000000000..c6d920e7cf7ad4610156a9a9fb3325e7613cf780 --- /dev/null +++ b/src/slop_farmer/reports/analysis_service.py @@ -0,0 +1,497 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from slop_farmer.data.parquet_io import read_json +from slop_farmer.data.search_duckdb import connect_pr_search_db, resolve_active_run +from slop_farmer.data.snapshot_paths import ( + ANALYSIS_REPORT_FILENAME_BY_VARIANT, + CURRENT_ANALYSIS_MANIFEST_PATH, + analysis_run_manifest_path, + load_archived_analysis_run_manifest, + load_current_analysis_manifest, + repo_relative_path_to_local, +) + +ANALYSIS_VARIANTS = {"auto", "deterministic", "hybrid"} + + +@dataclass(frozen=True, slots=True) +class ActiveSnapshotContext: + active_run: dict[str, Any] + snapshot_dir: Path + + +@dataclass(frozen=True, slots=True) +class AnalysisContext: + active_run: dict[str, Any] + report: dict[str, Any] + report_path: Path + report_source: str + variant_requested: str + variant_used: str + analysis_id: str | None + + +def get_analysis_status( + db_path: Path, + *, + repo: str | None = None, + variant: str = "auto", + snapshot_id: str | None = None, + analysis_id: str | None = None, +) -> dict[str, Any]: + active = _resolve_active_snapshot_context(db_path, repo=repo) + report_path, variant_used, report_source, resolved_analysis_id = _resolve_analysis_report_path( + active.snapshot_dir, + variant, + snapshot_id=snapshot_id, + analysis_id=analysis_id, + required=False, + ) + payload = { + "repo": str(active.active_run["repo"]), + "active_snapshot_id": str(active.active_run["snapshot_id"]), + "run_id": str(active.active_run["id"]), + "variant_requested": _normalize_analysis_variant(variant), + "available": report_path is not None, + } + if report_path is None or variant_used is None or report_source is None: + return payload + report = _load_report(report_path) + status = { + **payload, + "snapshot_id": str(report.get("snapshot_id") or active.active_run["snapshot_id"]), + "variant_used": variant_used, + "analysis_source": report_source, + "llm_enrichment": bool(report.get("llm_enrichment")), + "generated_at": report.get("generated_at"), + "counts": _analysis_counts(report), + } + if resolved_analysis_id is not None: + status["analysis_id"] = resolved_analysis_id + return status + + +def get_pr_analysis( + db_path: Path, + *, + pr_number: int, + repo: str | None = None, + variant: str = "auto", + snapshot_id: str | None = None, + analysis_id: str | None = None, +) -> dict[str, Any]: + context = _load_analysis_context( + db_path, + repo=repo, + variant=variant, + snapshot_id=snapshot_id, + analysis_id=analysis_id, + ) + meta_bug, rank = _find_meta_bug_for_pr(context.report, pr_number) + duplicate_pr = _find_duplicate_pr_for_pr(context.report, pr_number) + return { + **_analysis_base_payload(context), + "pr_number": pr_number, + "found": meta_bug is not None or duplicate_pr is not None, + "meta_bug": None if meta_bug is None else _meta_bug_payload(meta_bug, rank=rank), + "duplicate_pr": duplicate_pr, + } + + +def list_analysis_meta_bugs( + db_path: Path, + *, + repo: str | None = None, + variant: str = "auto", + limit: int = 50, + snapshot_id: str | None = None, + analysis_id: str | None = None, +) -> dict[str, Any]: + context = _load_analysis_context( + db_path, + repo=repo, + variant=variant, + snapshot_id=snapshot_id, + analysis_id=analysis_id, + ) + meta_bugs = [ + _meta_bug_payload(cluster, rank=index) + for index, cluster in enumerate(context.report.get("meta_bugs", [])[:limit], start=1) + ] + return { + **_analysis_base_payload(context), + "meta_bugs": meta_bugs, + "meta_bug_count": len(meta_bugs), + } + + +def get_analysis_meta_bug( + db_path: Path, + *, + cluster_id: str, + repo: str | None = None, + variant: str = "auto", + snapshot_id: str | None = None, + analysis_id: str | None = None, +) -> dict[str, Any]: + context = _load_analysis_context( + db_path, + repo=repo, + variant=variant, + snapshot_id=snapshot_id, + analysis_id=analysis_id, + ) + for index, cluster in enumerate(context.report.get("meta_bugs", []), start=1): + if str(cluster.get("cluster_id")) != cluster_id: + continue + return { + **_analysis_base_payload(context), + "meta_bug": _meta_bug_payload(cluster, rank=index), + "duplicate_pr": _find_duplicate_pr_by_cluster_id(context.report, cluster_id), + } + raise ValueError(f"Analysis cluster {cluster_id!r} was not found in the active analysis view.") + + +def list_analysis_duplicate_prs( + db_path: Path, + *, + repo: str | None = None, + variant: str = "auto", + limit: int = 50, + snapshot_id: str | None = None, + analysis_id: str | None = None, +) -> dict[str, Any]: + context = _load_analysis_context( + db_path, + repo=repo, + variant=variant, + snapshot_id=snapshot_id, + analysis_id=analysis_id, + ) + duplicate_prs = [ + {"rank": index, **dict(entry)} + for index, entry in enumerate(context.report.get("duplicate_prs", [])[:limit], start=1) + ] + return { + **_analysis_base_payload(context), + "duplicate_prs": duplicate_prs, + "duplicate_pr_count": len(duplicate_prs), + } + + +def get_analysis_best( + db_path: Path, + *, + repo: str | None = None, + variant: str = "auto", + snapshot_id: str | None = None, + analysis_id: str | None = None, +) -> dict[str, Any]: + context = _load_analysis_context( + db_path, + repo=repo, + variant=variant, + snapshot_id=snapshot_id, + analysis_id=analysis_id, + ) + return { + **_analysis_base_payload(context), + "best_issue": _best_entry_with_cluster_id( + context.report, + context.report.get("best_issue"), + number_key="issue_number", + numbers_key="issue_numbers", + ), + "best_pr": _best_entry_with_cluster_id( + context.report, + context.report.get("best_pr"), + number_key="pr_number", + numbers_key="pr_numbers", + ), + } + + +def _resolve_active_snapshot_context( + db_path: Path, + *, + repo: str | None, +) -> ActiveSnapshotContext: + connection = connect_pr_search_db(db_path, read_only=True) + try: + active_run = resolve_active_run(connection, repo=repo) + finally: + connection.close() + return ActiveSnapshotContext( + active_run={str(key): value for key, value in active_run.items()}, + snapshot_dir=Path(str(active_run["snapshot_dir"])).resolve(), + ) + + +def _load_analysis_context( + db_path: Path, + *, + repo: str | None, + variant: str, + snapshot_id: str | None, + analysis_id: str | None, +) -> AnalysisContext: + active = _resolve_active_snapshot_context(db_path, repo=repo) + report_path, variant_used, report_source, resolved_analysis_id = _resolve_analysis_report_path( + active.snapshot_dir, + variant, + snapshot_id=snapshot_id, + analysis_id=analysis_id, + required=True, + ) + assert report_path is not None + assert variant_used is not None + assert report_source is not None + return AnalysisContext( + active_run=active.active_run, + report=_load_report(report_path), + report_path=report_path, + report_source=report_source, + variant_requested=_normalize_analysis_variant(variant), + variant_used=variant_used, + analysis_id=resolved_analysis_id, + ) + + +def _resolve_analysis_report_path( + snapshot_dir: Path, + variant: str, + *, + snapshot_id: str | None, + analysis_id: str | None, + required: bool, +) -> tuple[Path | None, str | None, str | None, str | None]: + normalized = _normalize_analysis_variant(variant) + if (snapshot_id is None) != (analysis_id is None): + raise ValueError("snapshot_id and analysis_id must be provided together.") + if snapshot_id is not None and analysis_id is not None: + selection = _resolve_archived_analysis_report_path( + snapshot_dir, + snapshot_id=snapshot_id, + analysis_id=analysis_id, + variant=normalized, + ) + if selection is not None: + return (*selection, analysis_id) + if not required: + return None, None, None, None + raise ValueError( + f"Published analysis run {analysis_id!r} for snapshot {snapshot_id!r} was not found." + ) + + current_manifest_path = repo_relative_path_to_local( + snapshot_dir, CURRENT_ANALYSIS_MANIFEST_PATH + ) + if normalized == "deterministic": + selection = _resolve_snapshot_local_report_path(snapshot_dir, variant=normalized) + if selection is not None: + return (*selection, None) + + if current_manifest_path.exists(): + report_path, variant_used = _resolve_manifest_report_path( + snapshot_dir, + load_current_analysis_manifest(current_manifest_path), + variant=normalized, + manifest_kind="current", + ) + return ( + report_path, + variant_used, + "current", + str(load_current_analysis_manifest(current_manifest_path)["analysis_id"]), + ) + + selection = _resolve_snapshot_local_report_path(snapshot_dir, variant=normalized) + if selection is not None: + return (*selection, None) + if not required: + return None, None, None, None + raise ValueError( + "No analysis report was found for the current analysis view or active snapshot." + ) + + +def _resolve_archived_analysis_report_path( + snapshot_dir: Path, + *, + snapshot_id: str, + analysis_id: str, + variant: str, +) -> tuple[Path, str, str] | None: + manifest_path = repo_relative_path_to_local( + snapshot_dir, + analysis_run_manifest_path(snapshot_id, analysis_id), + ) + if not manifest_path.exists(): + return None + report_path, variant_used = _resolve_manifest_report_path( + snapshot_dir, + load_archived_analysis_run_manifest(manifest_path), + variant=variant, + manifest_kind="archived", + ) + return report_path, variant_used, "archived" + + +def _resolve_manifest_report_path( + snapshot_dir: Path, + manifest: dict[str, Any], + *, + variant: str, + manifest_kind: str, +) -> tuple[Path, str]: + artifact_key = _artifact_key_for_variant(variant, manifest_kind=manifest_kind) + artifacts = manifest.get("artifacts") or {} + artifact_path = artifacts.get(artifact_key) + if not isinstance(artifact_path, str) or not artifact_path: + message = ( + f"Published {manifest_kind} analysis manifest does not provide the {variant} artifact." + if variant != "auto" + else f"Published {manifest_kind} analysis manifest does not provide the canonical hybrid artifact." + ) + raise ValueError(message) + report_path = repo_relative_path_to_local(snapshot_dir, artifact_path) + if not report_path.exists(): + raise ValueError( + f"Published {manifest_kind} analysis artifact {artifact_path!r} is missing from the materialized snapshot." + ) + variant_used = "hybrid" if artifact_key == "hybrid" else variant + return report_path, variant_used + + +def _artifact_key_for_variant(variant: str, *, manifest_kind: str) -> str: + if variant == "auto": + return "hybrid" + if variant == "hybrid": + return "hybrid" + raise ValueError( + f"Published {manifest_kind} analysis only serves canonical hybrid artifacts; requested {variant!r}." + ) + + +def _resolve_snapshot_local_report_path( + snapshot_dir: Path, + *, + variant: str, +) -> tuple[Path, str, str] | None: + if variant == "auto": + hybrid_path = snapshot_dir / ANALYSIS_REPORT_FILENAME_BY_VARIANT["hybrid"] + if hybrid_path.exists(): + return hybrid_path, "hybrid", "snapshot" + deterministic_path = snapshot_dir / ANALYSIS_REPORT_FILENAME_BY_VARIANT["deterministic"] + if deterministic_path.exists(): + return deterministic_path, "deterministic", "snapshot" + return None + report_path = snapshot_dir / ANALYSIS_REPORT_FILENAME_BY_VARIANT[variant] + if not report_path.exists(): + return None + return report_path, variant, "snapshot" + + +def _normalize_analysis_variant(variant: str) -> str: + normalized = variant.strip().lower() + if normalized not in ANALYSIS_VARIANTS: + raise ValueError( + f"Unsupported analysis variant {variant!r}; expected auto, hybrid, or deterministic." + ) + return normalized + + +def _analysis_base_payload(context: AnalysisContext) -> dict[str, Any]: + active_snapshot_id = str(context.active_run["snapshot_id"]) + snapshot_id = str(context.report.get("snapshot_id") or active_snapshot_id) + payload = { + "repo": str(context.active_run["repo"]), + "snapshot_id": snapshot_id, + "active_snapshot_id": active_snapshot_id, + "run_id": str(context.active_run["id"]), + "variant_requested": context.variant_requested, + "variant_used": context.variant_used, + "analysis_source": context.report_source, + "llm_enrichment": bool(context.report.get("llm_enrichment")), + "generated_at": context.report.get("generated_at"), + } + if context.analysis_id is not None: + payload["analysis_id"] = context.analysis_id + return payload + + +def _analysis_counts(report: dict[str, Any]) -> dict[str, int]: + return { + "meta_bugs": len(report.get("meta_bugs") or []), + "duplicate_issues": len(report.get("duplicate_issues") or []), + "duplicate_prs": len(report.get("duplicate_prs") or []), + } + + +def _meta_bug_payload(cluster: dict[str, Any], *, rank: int | None = None) -> dict[str, Any]: + payload = dict(cluster) + if rank is not None: + payload["rank"] = rank + return payload + + +def _find_meta_bug_for_pr( + report: dict[str, Any], + pr_number: int, +) -> tuple[dict[str, Any] | None, int | None]: + for index, cluster in enumerate(report.get("meta_bugs", []), start=1): + pr_numbers = {int(number) for number in cluster.get("pr_numbers", [])} + if pr_number in pr_numbers: + return dict(cluster), index + return None, None + + +def _find_duplicate_pr_for_pr(report: dict[str, Any], pr_number: int) -> dict[str, Any] | None: + for entry in report.get("duplicate_prs", []): + numbers = { + int(entry["canonical_pr_number"]), + *(int(number) for number in entry.get("duplicate_pr_numbers", [])), + } + if pr_number in numbers: + return dict(entry) + return None + + +def _find_duplicate_pr_by_cluster_id( + report: dict[str, Any], + cluster_id: str, +) -> dict[str, Any] | None: + for entry in report.get("duplicate_prs", []): + if str(entry.get("cluster_id")) == cluster_id: + return dict(entry) + return None + + +def _best_entry_with_cluster_id( + report: dict[str, Any], + entry: Any, + *, + number_key: str, + numbers_key: str, +) -> dict[str, Any] | None: + if not isinstance(entry, dict): + return None + number = entry.get(number_key) + if number is None: + return dict(entry) + for cluster in report.get("meta_bugs", []): + numbers = {int(value) for value in cluster.get(numbers_key, [])} + if int(number) in numbers: + return {"cluster_id": cluster.get("cluster_id"), **dict(entry)} + return dict(entry) + + +def _load_report(path: Path) -> dict[str, Any]: + payload = read_json(path) + if not isinstance(payload, dict): + raise ValueError(f"Analysis report at {path} must contain a JSON object.") + return {str(key): value for key, value in payload.items()} diff --git a/src/slop_farmer/reports/canonical_duplicate_pr.py b/src/slop_farmer/reports/canonical_duplicate_pr.py new file mode 100644 index 0000000000000000000000000000000000000000..fb273c1368cb9afffffe37a1baa64f09d5929fd4 --- /dev/null +++ b/src/slop_farmer/reports/canonical_duplicate_pr.py @@ -0,0 +1,649 @@ +from __future__ import annotations + +import argparse +import json +from collections.abc import Iterable +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Protocol + +from slop_farmer.config import resolve_github_token +from slop_farmer.data.github_api import GitHubClient +from slop_farmer.data.normalize import ( + normalize_comment, + normalize_pr_diff, + normalize_pr_file, + normalize_review, + normalize_review_comment, +) +from slop_farmer.data.parquet_io import read_json, read_parquet_rows, write_json, write_text + +PROMPT_TEMPLATE_PATH = Path(__file__).resolve().parent / "prompts" / "canonical_duplicate_pr.md" +SCHEMA_PATH = ( + Path(__file__).resolve().parent / "schemas" / "canonical_duplicate_pr_output.schema.json" +) + +# Navigation: +# - snapshot loading + ranked cluster selection +# - stage_run_bundle(): prepare prompt/context artifacts for synthesis +# - prepare_publish_artifacts(): turn a successful Codex result into PR-ready output +# - context builders + GitHub fallback helpers +# - tiny CLI at the bottom for manual staging / publish prep + + +@dataclass(slots=True) +class SnapshotBundle: + report_path: Path + snapshot_dir: Path + repo: str + snapshot_id: str + extracted_at: str + report: dict[str, Any] + issues: list[dict[str, Any]] + pull_requests: list[dict[str, Any]] + comments: list[dict[str, Any]] + reviews: list[dict[str, Any]] + review_comments: list[dict[str, Any]] + pr_files: list[dict[str, Any]] + pr_diffs: list[dict[str, Any]] + + +class GitHubClientLike(Protocol): + def iter_pull_files(self, owner: str, repo: str, number: int) -> Iterable[dict[str, Any]]: ... + + def get_pull_request_diff(self, owner: str, repo: str, number: int) -> str: ... + + def iter_issue_comments_for_number( + self, owner: str, repo: str, number: int, since: str | None + ) -> Iterable[dict[str, Any]]: ... + + def iter_pull_reviews(self, owner: str, repo: str, number: int) -> Iterable[dict[str, Any]]: ... + + def iter_pull_review_comments( + self, owner: str, repo: str, number: int + ) -> Iterable[dict[str, Any]]: ... + + +# Snapshot loading and cluster selection + + +def load_snapshot_bundle(report_path: Path) -> SnapshotBundle: + resolved_report = report_path.resolve() + snapshot_dir = resolved_report.parent + manifest_path = snapshot_dir / "manifest.json" + manifest = read_json(manifest_path) if manifest_path.exists() else {} + report = read_json(resolved_report) + repo = str(report.get("repo") or manifest.get("repo") or "") + snapshot_id = str(report.get("snapshot_id") or manifest.get("snapshot_id") or snapshot_dir.name) + extracted_at = str(manifest.get("extracted_at") or report.get("generated_at") or "") + return SnapshotBundle( + report_path=resolved_report, + snapshot_dir=snapshot_dir, + repo=repo, + snapshot_id=snapshot_id, + extracted_at=extracted_at, + report=report, + issues=read_parquet_rows(snapshot_dir / "issues.parquet"), + pull_requests=read_parquet_rows(snapshot_dir / "pull_requests.parquet"), + comments=read_parquet_rows(snapshot_dir / "comments.parquet"), + reviews=read_parquet_rows(snapshot_dir / "reviews.parquet"), + review_comments=read_parquet_rows(snapshot_dir / "review_comments.parquet"), + pr_files=read_parquet_rows(snapshot_dir / "pr_files.parquet"), + pr_diffs=read_parquet_rows(snapshot_dir / "pr_diffs.parquet"), + ) + + +def select_ranked_duplicate_pr_clusters( + bundle: SnapshotBundle, + *, + limit: int | None = None, +) -> list[dict[str, Any]]: + duplicate_prs = { + str(row.get("cluster_id")): row + for row in bundle.report.get("duplicate_prs", []) + if row.get("cluster_id") + } + pull_request_map = { + int(row["number"]): row for row in bundle.pull_requests if row.get("number") is not None + } + candidates: list[dict[str, Any]] = [] + for rank_index, meta_bug in enumerate(bundle.report.get("meta_bugs", []), start=1): + cluster_id = str(meta_bug.get("cluster_id") or "") + duplicate_cluster = duplicate_prs.get(cluster_id) + if not duplicate_cluster: + continue + all_pr_numbers = _ordered_ints(meta_bug.get("pr_numbers")) + open_source_pr_numbers = [ + number + for number in all_pr_numbers + if _is_open_non_draft_pull_request(pull_request_map.get(number)) + ] + if len(open_source_pr_numbers) < 2: + continue + candidate = { + "cluster_id": cluster_id, + "rank_index": rank_index, + "canonical_pr_number": _coerce_int(meta_bug.get("canonical_pr_number")), + "canonical_issue_number": _coerce_int(meta_bug.get("canonical_issue_number")), + "target_issue_number": _coerce_int(duplicate_cluster.get("target_issue_number")), + "all_pr_numbers": all_pr_numbers, + "duplicate_pr_numbers": _ordered_ints(duplicate_cluster.get("duplicate_pr_numbers")), + "source_pr_numbers": open_source_pr_numbers, + "issue_numbers": _ordered_ints(meta_bug.get("issue_numbers")), + "summary": meta_bug.get("summary"), + "status": meta_bug.get("status"), + "confidence": meta_bug.get("confidence"), + "evidence_types": list(meta_bug.get("evidence_types") or []), + "reason": duplicate_cluster.get("reason"), + } + candidates.append(candidate) + if limit is not None and len(candidates) >= limit: + break + return candidates + + +def select_ranked_duplicate_pr_cluster( + bundle: SnapshotBundle, + *, + cluster_id: str | None = None, + max_clusters: int = 1, +) -> dict[str, Any]: + if max_clusters < 1: + raise ValueError("--max-clusters must be at least 1") + candidates = select_ranked_duplicate_pr_clusters(bundle) + if cluster_id: + for candidate in candidates: + if candidate["cluster_id"] == cluster_id: + return candidate + known_cluster_ids = { + str(row.get("cluster_id")) + for row in bundle.report.get("duplicate_prs", []) + if row.get("cluster_id") + } + if cluster_id in known_cluster_ids: + raise ValueError( + f"Cluster {cluster_id} does not have at least 2 open non-draft pull requests." + ) + raise ValueError(f"Unknown duplicate PR cluster: {cluster_id}") + limited = candidates[:max_clusters] + if not limited: + raise ValueError("No duplicate PR cluster has at least 2 open non-draft pull requests.") + return limited[0] + + +# Prompt/context staging and publish prep + + +def stage_run_bundle( + report_path: Path, + run_dir: Path, + *, + selected_cluster: dict[str, Any] | None = None, + cluster_id: str | None = None, + max_clusters: int = 1, + github_client: GitHubClientLike | None = None, + prompt_repo: str | None = None, + prompt_default_branch: str | None = None, + prompt_file_policy_instruction: str | None = None, +) -> dict[str, Any]: + bundle = load_snapshot_bundle(report_path) + candidates = select_ranked_duplicate_pr_clusters(bundle) + if selected_cluster is None: + selected_cluster = select_ranked_duplicate_pr_cluster( + bundle, + cluster_id=cluster_id, + max_clusters=max_clusters, + ) + pr_contexts = _build_pull_request_contexts( + bundle, selected_cluster, github_client=github_client + ) + issue_context = _build_issue_context(bundle, selected_cluster) + + selected_cluster_path = run_dir / "selected-cluster.json" + context_dir = run_dir / "context" + pr_context_dir = context_dir / "prs" + cluster_context_path = context_dir / "cluster.json" + issue_context_path = context_dir / "issue.json" + prompt_path = run_dir / "codex-prompt.md" + manifest_path = run_dir / "run-manifest.json" + result_path = run_dir / "codex-final.json" + last_message_path = run_dir / "codex-last-message.json" + publish_metadata_path = run_dir / "publish-metadata.json" + pr_body_path = run_dir / "pr-body.md" + pr_url_path = run_dir / "pr-url.txt" + + write_json(selected_cluster, selected_cluster_path) + pr_context_files: list[dict[str, Any]] = [] + for pr_context in pr_contexts: + pr_number = int(pr_context["pull_request"]["number"]) + path = pr_context_dir / f"pr-{pr_number}.json" + write_json(pr_context, path) + pr_context_files.append( + { + "pr_number": pr_number, + "path": str(path.resolve()), + } + ) + + issue_context_file: str | None = None + if issue_context is not None: + write_json(issue_context, issue_context_path) + issue_context_file = str(issue_context_path.resolve()) + + cluster_context = { + "report_path": str(bundle.report_path), + "snapshot_dir": str(bundle.snapshot_dir), + "repo": bundle.repo, + "snapshot_id": bundle.snapshot_id, + "default_branch": prompt_default_branch or _infer_default_branch(bundle, selected_cluster), + "selected_cluster": selected_cluster, + "target_issue_context_path": issue_context_file, + "pull_request_context_files": pr_context_files, + } + write_json(cluster_context, cluster_context_path) + + prompt_text = _render_prompt( + selected_cluster=selected_cluster, + selected_cluster_path=selected_cluster_path.resolve(), + cluster_context_path=cluster_context_path.resolve(), + pr_context_dir=pr_context_dir.resolve(), + issue_context_path=issue_context_file, + repo=prompt_repo or bundle.repo, + default_branch=prompt_default_branch or _infer_default_branch(bundle, selected_cluster), + file_policy_instruction=prompt_file_policy_instruction + or _default_file_policy_instruction(), + ) + write_text(prompt_text, prompt_path) + + manifest = { + "report_path": str(bundle.report_path), + "snapshot_dir": str(bundle.snapshot_dir), + "repo": bundle.repo, + "snapshot_id": bundle.snapshot_id, + "max_clusters": max_clusters, + "prompt_repo": prompt_repo or bundle.repo, + "default_branch": prompt_default_branch or _infer_default_branch(bundle, selected_cluster), + "file_policy_instruction": prompt_file_policy_instruction + or _default_file_policy_instruction(), + "candidate_clusters": candidates[:max_clusters] if max_clusters > 0 else [], + "selected_cluster": selected_cluster, + "artifacts": { + "selected_cluster_path": str(selected_cluster_path.resolve()), + "cluster_context_path": str(cluster_context_path.resolve()), + "pr_context_dir": str(pr_context_dir.resolve()), + "issue_context_path": issue_context_file, + "prompt_path": str(prompt_path.resolve()), + "schema_path": str(SCHEMA_PATH.resolve()), + "result_path": str(result_path.resolve()), + "last_message_path": str(last_message_path.resolve()), + "publish_metadata_path": str(publish_metadata_path.resolve()), + "pr_body_path": str(pr_body_path.resolve()), + "pr_url_path": str(pr_url_path.resolve()), + }, + } + write_json(manifest, manifest_path) + return manifest + + +def prepare_publish_artifacts(manifest_path: Path, result_path: Path) -> dict[str, Any]: + manifest = read_json(manifest_path.resolve()) + result = json.loads(result_path.resolve().read_text(encoding="utf-8")) + selected_cluster = manifest["selected_cluster"] + if result.get("status") != "success": + raise ValueError("Codex result did not report status=success.") + if result.get("cluster_id") != selected_cluster["cluster_id"]: + raise ValueError("Codex result cluster_id does not match the selected cluster.") + expected_source_pr_numbers = _ordered_ints(selected_cluster.get("source_pr_numbers")) + actual_source_pr_numbers = _ordered_ints(result.get("source_pr_numbers")) + if len(actual_source_pr_numbers) < 2: + raise ValueError("Codex result must reference at least two open source PRs.") + expected_source_pr_set = set(expected_source_pr_numbers) + unknown_source_pr_numbers = [ + number for number in actual_source_pr_numbers if number not in expected_source_pr_set + ] + if unknown_source_pr_numbers: + raise ValueError( + "Codex result source_pr_numbers included PRs outside the selected open PR set: " + + ", ".join(str(number) for number in unknown_source_pr_numbers) + ) + actual_source_pr_numbers = [ + number for number in expected_source_pr_numbers if number in set(actual_source_pr_numbers) + ] + pr_title = str(result.get("pr_title") or "").strip() + if not pr_title: + raise ValueError("Codex result did not provide a PR title.") + summary = str(result.get("summary") or "").strip() + if not summary: + raise ValueError("Codex result did not provide a summary.") + tests_run = [ + str(value).strip() for value in result.get("tests_run") or [] if str(value).strip() + ] + if not tests_run: + raise ValueError("Codex result did not provide any executed test commands.") + final_body = _render_minimal_pr_body( + summary=summary, + target_issue_number=_coerce_int(selected_cluster.get("target_issue_number")), + source_pr_numbers=actual_source_pr_numbers, + tests_run=tests_run, + ) + + pr_body_path = Path(manifest["artifacts"]["pr_body_path"]) + publish_metadata_path = Path(manifest["artifacts"]["publish_metadata_path"]) + write_text(final_body, pr_body_path) + publish_metadata = { + "cluster_id": selected_cluster["cluster_id"], + "canonical_pr_number": selected_cluster.get("canonical_pr_number"), + "source_pr_numbers": actual_source_pr_numbers, + "pr_title": pr_title, + "pr_body_path": str(pr_body_path.resolve()), + "summary": summary, + "tests_run": tests_run, + } + write_json(publish_metadata, publish_metadata_path) + return publish_metadata + + +# Snapshot/GitHub context builders + + +def _build_pull_request_contexts( + bundle: SnapshotBundle, + selected_cluster: dict[str, Any], + *, + github_client: GitHubClientLike | None = None, +) -> list[dict[str, Any]]: + pull_request_map = { + int(row["number"]): row for row in bundle.pull_requests if row.get("number") is not None + } + owner, repo = _split_repo(bundle.repo) + contexts: list[dict[str, Any]] = [] + client = github_client + for pr_number in selected_cluster["source_pr_numbers"]: + pull_request = pull_request_map.get(pr_number) + if pull_request is None: + raise ValueError(f"Missing pull request row for #{pr_number}") + + files = _matching_rows(bundle.pr_files, "pull_request_number", pr_number) + files_source = "snapshot" + if not files: + client = client or _build_github_client() + files = [ + normalize_pr_file( + bundle.repo, pr_number, item, bundle.snapshot_id, bundle.extracted_at + ) + for item in client.iter_pull_files(owner, repo, pr_number) + ] + files_source = "github_api" + + diff_rows = _matching_rows(bundle.pr_diffs, "pull_request_number", pr_number) + diff_source = "snapshot" + if diff_rows: + diff_row = diff_rows[0] + else: + client = client or _build_github_client() + diff_row = normalize_pr_diff( + bundle.repo, + pr_number, + pull_request.get("html_url"), + pull_request.get("api_url"), + client.get_pull_request_diff(owner, repo, pr_number), + bundle.snapshot_id, + bundle.extracted_at, + ) + diff_source = "github_api" + + comments = [ + row + for row in bundle.comments + if row.get("parent_kind") == "pull_request" + and _coerce_int(row.get("parent_number")) == pr_number + ] + comments_source = "snapshot" + if not comments: + client = client or _build_github_client() + comments = [ + normalize_comment( + bundle.repo, + item, + "pull_request", + pr_number, + bundle.snapshot_id, + bundle.extracted_at, + ) + for item in client.iter_issue_comments_for_number( + owner, repo, pr_number, since=None + ) + ] + comments_source = "github_api" + + reviews = _matching_rows(bundle.reviews, "pull_request_number", pr_number) + reviews_source = "snapshot" + if not reviews: + client = client or _build_github_client() + reviews = [ + normalize_review( + bundle.repo, pr_number, item, bundle.snapshot_id, bundle.extracted_at + ) + for item in client.iter_pull_reviews(owner, repo, pr_number) + ] + reviews_source = "github_api" + + review_comments = _matching_rows(bundle.review_comments, "pull_request_number", pr_number) + review_comments_source = "snapshot" + if not review_comments: + client = client or _build_github_client() + review_comments = [ + normalize_review_comment( + bundle.repo, + pr_number, + item, + bundle.snapshot_id, + bundle.extracted_at, + ) + for item in client.iter_pull_review_comments(owner, repo, pr_number) + ] + review_comments_source = "github_api" + + contexts.append( + { + "pull_request": pull_request, + "files": files, + "diff": diff_row, + "discussion_comments": comments, + "reviews": reviews, + "review_comments": review_comments, + "context_source": { + "files": files_source, + "diff": diff_source, + "discussion_comments": comments_source, + "reviews": reviews_source, + "review_comments": review_comments_source, + }, + } + ) + return contexts + + +def _build_issue_context( + bundle: SnapshotBundle, + selected_cluster: dict[str, Any], +) -> dict[str, Any] | None: + target_issue_number = selected_cluster.get("target_issue_number") + if target_issue_number is None: + return None + issue_map = {int(row["number"]): row for row in bundle.issues if row.get("number") is not None} + issue = issue_map.get(int(target_issue_number)) + issue_comments = [ + row + for row in bundle.comments + if row.get("parent_kind") == "issue" + and _coerce_int(row.get("parent_number")) == int(target_issue_number) + ] + return { + "issue": issue, + "comments": issue_comments, + } + + +def _render_prompt( + *, + selected_cluster: dict[str, Any], + selected_cluster_path: Path, + cluster_context_path: Path, + pr_context_dir: Path, + issue_context_path: str | None, + repo: str, + default_branch: str, + file_policy_instruction: str, +) -> str: + template = PROMPT_TEMPLATE_PATH.read_text(encoding="utf-8") + replacements = { + "CLUSTER_ID": selected_cluster["cluster_id"], + "SOURCE_PR_NUMBERS": ", ".join( + str(number) for number in selected_cluster["source_pr_numbers"] + ), + "SELECTED_CLUSTER_PATH": str(selected_cluster_path), + "CLUSTER_CONTEXT_PATH": str(cluster_context_path), + "PR_CONTEXT_DIR": str(pr_context_dir), + "ISSUE_CONTEXT_PATH": issue_context_path or "No separate issue context file is available.", + "REPO": repo, + "DEFAULT_BRANCH": default_branch, + "FILE_POLICY_INSTRUCTION": file_policy_instruction, + } + for key, value in replacements.items(): + template = template.replace(f"{{{{{key}}}}}", value) + return template + + +def _render_minimal_pr_body( + *, + summary: str, + target_issue_number: int | None, + source_pr_numbers: list[int], + tests_run: list[str], +) -> str: + lines = [summary] + if target_issue_number is not None: + lines.extend(["", f"Target issue: #{target_issue_number}."]) + lines.append("") + lines.append("Tests:") + lines.extend(f"- `{command}`" for command in tests_run) + lines.append("") + lines.append("Source PRs:") + lines.extend(f"- #{number}" for number in source_pr_numbers) + return "\n".join(lines).rstrip() + "\n" + + +def _build_github_client() -> GitHubClient: + return GitHubClient(token=resolve_github_token()) + + +def _infer_default_branch(bundle: SnapshotBundle, selected_cluster: dict[str, Any]) -> str: + pull_request_map = { + int(row["number"]): row for row in bundle.pull_requests if row.get("number") is not None + } + for pr_number in selected_cluster.get("source_pr_numbers") or []: + row = pull_request_map.get(int(pr_number)) + base_ref = str((row or {}).get("base_ref") or "").strip() + if base_ref: + return base_ref + return "main" + + +def _default_file_policy_instruction() -> str: + return ( + "Do not touch README files, changelogs, markdown docs, prose-only files, " + "or commentary artifacts. Fail instead of submitting a noisy branch." + ) + + +def _matching_rows(rows: list[dict[str, Any]], key: str, value: int) -> list[dict[str, Any]]: + matched: list[dict[str, Any]] = [] + for row in rows: + row_value = _coerce_int(row.get(key)) + if row_value == value: + matched.append(row) + return matched + + +def _is_open_non_draft_pull_request(row: dict[str, Any] | None) -> bool: + return bool(row) and row.get("state") == "open" and not bool(row.get("draft")) + + +def _ordered_ints(values: Any) -> list[int]: + ordered: list[int] = [] + seen: set[int] = set() + for value in values or []: + number = _coerce_int(value) + if number is None or number in seen: + continue + ordered.append(number) + seen.add(number) + return ordered + + +def _coerce_int(value: Any) -> int | None: + if value is None: + return None + try: + return int(value) + except (TypeError, ValueError): + return None + + +def _split_repo(repo: str) -> tuple[str, str]: + owner, _, name = repo.partition("/") + if not owner or not name: + raise ValueError(f"Expected repo in owner/name form, got: {repo!r}") + return owner, name + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="python -m slop_farmer.reports.canonical_duplicate_pr") + subparsers = parser.add_subparsers(dest="command", required=True) + + stage = subparsers.add_parser( + "stage-run", help="Select an eligible duplicate PR cluster and stage context files." + ) + stage.add_argument("--report", type=Path, required=True, help="Path to analysis-report.json.") + stage.add_argument("--run-dir", type=Path, required=True, help="Run artifact directory.") + stage.add_argument("--cluster-id", help="Optional cluster override.") + stage.add_argument( + "--max-clusters", + type=int, + default=1, + help="Maximum number of ranked eligible candidates to record.", + ) + + publish = subparsers.add_parser( + "prepare-publish", help="Normalize a Codex result into deterministic publish metadata." + ) + publish.add_argument( + "--manifest", type=Path, required=True, help="Path to a staged run-manifest.json." + ) + publish.add_argument( + "--result", type=Path, required=True, help="Path to the Codex JSON result." + ) + return parser + + +def main() -> None: + parser = _build_parser() + args = parser.parse_args() + if args.command == "stage-run": + manifest = stage_run_bundle( + args.report, + args.run_dir, + cluster_id=args.cluster_id, + max_clusters=args.max_clusters, + ) + print(manifest["artifacts"]["prompt_path"]) + return + if args.command == "prepare-publish": + publish_metadata = prepare_publish_artifacts(args.manifest, args.result) + print(publish_metadata["pr_body_path"]) + return + + +if __name__ == "__main__": + main() diff --git a/src/slop_farmer/reports/dashboard.py b/src/slop_farmer/reports/dashboard.py new file mode 100644 index 0000000000000000000000000000000000000000..8a74ee54c849c141811112cee15e7bdc95672ade --- /dev/null +++ b/src/slop_farmer/reports/dashboard.py @@ -0,0 +1,673 @@ +from __future__ import annotations + +import json +from collections import Counter, defaultdict +from datetime import UTC, datetime, timedelta +from pathlib import Path +from typing import Any + +from slop_farmer.config import DashboardDataOptions +from slop_farmer.data.parquet_io import read_json, read_parquet_rows +from slop_farmer.data.snapshot_paths import ( + ResolvedAnalysisReportPath, + resolve_default_dashboard_analysis_report, +) +from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir + + +def run_dashboard_data(options: DashboardDataOptions) -> Path: + snapshot_dir = _resolve_snapshot_dir(options) + manifest = _read_optional_json(snapshot_dir / "manifest.json") + issues = read_parquet_rows(snapshot_dir / "issues.parquet") + pull_requests = read_parquet_rows(snapshot_dir / "pull_requests.parquet") + analysis_path = _resolve_analysis_input(snapshot_dir, options.analysis_input) + analysis = _read_optional_json(analysis_path.path) if analysis_path is not None else {} + contributor_report = _read_optional_json( + options.contributors_input or snapshot_dir / "new-contributors-report.json" + ) + pr_scope_report = _read_optional_json( + options.pr_scope_input or snapshot_dir / "pr-scope-clusters.json" + ) + + repo = ( + manifest.get("repo") + or (pull_requests[0]["repo"] if pull_requests else None) + or (issues[0]["repo"] if issues else None) + or "" + ) + snapshot_id = manifest.get("snapshot_id") or snapshot_dir.name + reference_time = _reference_time(snapshot_id, pull_requests) + cutoff = reference_time - timedelta(days=options.window_days) + + issue_map = {int(row["number"]): row for row in issues if row.get("number") is not None} + pr_map = {int(row["number"]): row for row in pull_requests if row.get("number") is not None} + recent_pull_requests = [] + for row in pull_requests: + created_at = _coerce_datetime(row.get("created_at")) + if created_at is not None and created_at >= cutoff: + recent_pull_requests.append(row) + recent_pull_requests.sort(key=lambda row: row.get("created_at") or "", reverse=True) + recent_numbers = { + int(row["number"]) for row in recent_pull_requests if row.get("number") is not None + } + + clusters, memberships = _cluster_rows(analysis, issue_map, pr_map, recent_numbers) + pr_scope_clusters = _pr_scope_cluster_rows(pr_scope_report, pr_map, recent_numbers) + contributors = _contributor_rows(contributor_report, recent_pull_requests, memberships) + prs = _pr_rows(recent_pull_requests, memberships) + + summary = { + "repo": repo, + "snapshot_id": snapshot_id, + "generated_at": datetime.now(tz=UTC) + .replace(microsecond=0) + .isoformat() + .replace("+00:00", "Z"), + "window_days": options.window_days, + "reference_time": reference_time.isoformat().replace("+00:00", "Z"), + "pr_count": len(prs), + "open_pr_count": sum(1 for row in prs if row["state"] == "open"), + "merged_pr_count": sum(1 for row in prs if row["merged"]), + "cluster_count": len(clusters), + "clustered_pr_count": sum(1 for row in prs if row["cluster_id"]), + "contributor_count": len(contributors), + "analysis_available": bool(analysis), + "analysis_source": None if analysis_path is None else analysis_path.source, + "analysis_variant": None if analysis_path is None else analysis_path.variant, + "analysis_snapshot_id": ( + None + if analysis_path is None + else ( + analysis_path.snapshot_id + or ( + str(analysis.get("snapshot_id")).strip() + if analysis.get("snapshot_id") is not None + else None + ) + ) + ), + "analysis_id": None if analysis_path is None else analysis_path.analysis_id, + "contributors_available": bool(contributor_report), + "pr_scope_available": bool(pr_scope_report), + "pr_scope_cluster_count": len(pr_scope_clusters), + } + + output_dir = options.output_dir.resolve() + output_dir.mkdir(parents=True, exist_ok=True) + _write_json(summary, output_dir / "summary.json") + _write_json(clusters, output_dir / "clusters.json") + _write_json(pr_scope_clusters, output_dir / "pr_scope_clusters.json") + _write_json(prs, output_dir / "prs.json") + _write_json(contributors, output_dir / "contributors.json") + return output_dir + + +def _resolve_snapshot_dir(options: DashboardDataOptions) -> Path: + snapshots_root = ( + options.snapshot_root.resolve() + if options.snapshot_root is not None + else (Path("data") / "snapshots").resolve() + ) + return resolve_snapshot_source_dir( + snapshot_dir=options.snapshot_dir, + local_snapshots_root=snapshots_root, + hf_repo_id=options.hf_repo_id, + hf_revision=options.hf_revision, + hf_materialize_dir=options.hf_materialize_dir, + hf_output_dir=snapshots_root.parent, + ) + + +def _resolve_analysis_input( + snapshot_dir: Path, override_path: Path | None +) -> ResolvedAnalysisReportPath | None: + if override_path is not None: + resolved = override_path.resolve() + if not resolved.exists(): + raise FileNotFoundError(f"Dashboard analysis input not found: {resolved}") + return ResolvedAnalysisReportPath( + path=resolved, + variant=_analysis_variant_for_path(resolved), + source="override", + ) + return resolve_default_dashboard_analysis_report(snapshot_dir) + + +def _read_optional_json(path: Path) -> dict[str, Any]: + if path.exists(): + return read_json(path) + return {} + + +def _write_json(payload: Any, path: Path) -> None: + path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + + +def _reference_time(snapshot_id: str, pull_requests: list[dict[str, Any]]) -> datetime: + parsed = _parse_snapshot_id(snapshot_id) + if parsed is not None: + return parsed + timestamps = [ + timestamp + for row in pull_requests + for timestamp in ( + _coerce_datetime(row.get("updated_at")), + _coerce_datetime(row.get("created_at")), + ) + if timestamp is not None + ] + if timestamps: + return max(timestamps) + return datetime.now(tz=UTC) + + +def _parse_snapshot_id(value: str) -> datetime | None: + try: + return datetime.strptime(value, "%Y%m%dT%H%M%SZ").replace(tzinfo=UTC) + except ValueError: + return None + + +def _coerce_datetime(value: Any) -> datetime | None: + if not value or not isinstance(value, str): + return None + try: + return datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None + + +def _coerce_int(value: Any) -> int | None: + if value is None: + return None + try: + return int(value) + except (TypeError, ValueError): + return None + + +def _excerpt(value: Any, limit: int = 240) -> str | None: + if not value or not isinstance(value, str): + return None + compact = " ".join(value.split()) + if len(compact) <= limit: + return compact + return compact[: limit - 1].rstrip() + "…" + + +def _analysis_variant_for_path(path: Path) -> str: + if path.name == "analysis-report-hybrid.json": + return "hybrid" + if path.name == "analysis-report.json": + return "deterministic" + return "override" + + +def _cluster_rows( + analysis: dict[str, Any], + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], + recent_numbers: set[int], +) -> tuple[list[dict[str, Any]], dict[int, list[dict[str, str]]]]: + rows: list[dict[str, Any]] = [] + memberships: dict[int, list[dict[str, str]]] = defaultdict(list) + for cluster in analysis.get("meta_bugs", []): + pr_numbers = [_coerce_int(value) for value in cluster.get("pr_numbers", [])] + pr_numbers = [value for value in pr_numbers if value is not None] + recent_pr_numbers = [number for number in pr_numbers if number in recent_numbers] + outside_window_pr_numbers = [ + number for number in pr_numbers if number not in recent_numbers + ] + if not recent_pr_numbers: + continue + canonical_pr_number = _coerce_int(cluster.get("canonical_pr_number")) + canonical_issue_number = _coerce_int(cluster.get("canonical_issue_number")) + cluster_id = str(cluster.get("cluster_id") or f"cluster-{recent_pr_numbers[0]}") + title = _cluster_title( + cluster, issue_map, pr_map, canonical_issue_number, canonical_pr_number + ) + recent_authors = sorted( + { + str(pr_map[number].get("author_login")) + for number in recent_pr_numbers + if number in pr_map and pr_map[number].get("author_login") + } + ) + last_activity_at = max( + ( + pr_map[number].get("updated_at") or pr_map[number].get("created_at") + for number in recent_pr_numbers + if number in pr_map + ), + default=None, + ) + row = { + "cluster_id": cluster_id, + "title": title, + "summary": cluster.get("summary"), + "status": cluster.get("status"), + "confidence": cluster.get("confidence"), + "canonical_issue_number": canonical_issue_number, + "canonical_pr_number": canonical_pr_number, + "issue_numbers": [ + _coerce_int(value) + for value in cluster.get("issue_numbers", []) + if _coerce_int(value) is not None + ], + "pr_numbers": pr_numbers, + "recent_pr_numbers": recent_pr_numbers, + "pr_count": len(pr_numbers), + "recent_pr_count": len(recent_pr_numbers), + "outside_window_prs": [ + _pr_member_stub(number, pr_map.get(number, {})) + for number in outside_window_pr_numbers + ], + "authors": recent_authors, + "last_activity_at": last_activity_at, + "evidence_types": list(cluster.get("evidence_types", [])), + "pr_similarity": _cluster_similarity_map(cluster, canonical_pr_number), + "pairwise_similarity": _cluster_pairwise_similarity(cluster), + "github_url": _cluster_github_url( + issue_map, pr_map, canonical_issue_number, canonical_pr_number + ), + } + rows.append(row) + for number in recent_pr_numbers: + role = "canonical" if canonical_pr_number == number else "member" + memberships[number].append({"cluster_id": cluster_id, "role": role}) + rows.sort( + key=lambda row: ( + -int(row["recent_pr_count"]), + -int(row["pr_count"]), + -(float(row["confidence"]) if row["confidence"] is not None else 0.0), + row["last_activity_at"] or "", + ), + reverse=False, + ) + return rows, memberships + + +def _cluster_title( + cluster: dict[str, Any], + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], + canonical_issue_number: int | None, + canonical_pr_number: int | None, +) -> str: + if canonical_issue_number is not None and canonical_issue_number in issue_map: + return str( + issue_map[canonical_issue_number].get("title") or f"Issue #{canonical_issue_number}" + ) + if canonical_pr_number is not None and canonical_pr_number in pr_map: + return str(pr_map[canonical_pr_number].get("title") or f"PR #{canonical_pr_number}") + summary = cluster.get("summary") + if summary: + return str(summary) + cluster_id = cluster.get("cluster_id") or "cluster" + return str(cluster_id) + + +def _cluster_github_url( + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], + canonical_issue_number: int | None, + canonical_pr_number: int | None, +) -> str | None: + if canonical_issue_number is not None and canonical_issue_number in issue_map: + return issue_map[canonical_issue_number].get("html_url") + if canonical_pr_number is not None and canonical_pr_number in pr_map: + return pr_map[canonical_pr_number].get("html_url") + return None + + +def _cluster_similarity_map( + cluster: dict[str, Any], canonical_pr_number: int | None +) -> dict[str, dict[str, float]]: + if canonical_pr_number is None: + return {} + scores: dict[str, dict[str, float]] = {} + for comparison in cluster.get("pr_comparisons", []): + left = _coerce_int(comparison.get("left_pr_number")) + right = _coerce_int(comparison.get("right_pr_number")) + if left != canonical_pr_number and right != canonical_pr_number: + continue + other = right if left == canonical_pr_number else left + if other is None: + continue + scores[str(other)] = { + "patch_similarity": float(comparison.get("patch_similarity") or 0.0), + "code_similarity": float(comparison.get("code_similarity") or 0.0), + "size_similarity": float(comparison.get("size_similarity") or 0.0), + "file_overlap": float(comparison.get("file_overlap") or 0.0), + "area_overlap": float(comparison.get("area_overlap") or 0.0), + } + return scores + + +def _cluster_pairwise_similarity(cluster: dict[str, Any]) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + for comparison in cluster.get("pr_comparisons", []): + left = _coerce_int(comparison.get("left_pr_number")) + right = _coerce_int(comparison.get("right_pr_number")) + if left is None or right is None: + continue + rows.append( + { + "left_pr_number": left, + "right_pr_number": right, + "patch_similarity": float(comparison.get("patch_similarity") or 0.0), + "code_similarity": float(comparison.get("code_similarity") or 0.0), + "size_similarity": float(comparison.get("size_similarity") or 0.0), + "file_overlap": float(comparison.get("file_overlap") or 0.0), + "area_overlap": float(comparison.get("area_overlap") or 0.0), + } + ) + return rows + + +def _pr_scope_cluster_rows( + pr_scope_report: dict[str, Any], + pr_map: dict[int, dict[str, Any]], + recent_numbers: set[int], +) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + for cluster in pr_scope_report.get("pr_scope_clusters", []): + pr_numbers = [_coerce_int(value) for value in cluster.get("pr_numbers", [])] + pr_numbers = [value for value in pr_numbers if value is not None] + recent_pr_numbers = [number for number in pr_numbers if number in recent_numbers] + outside_window_pr_numbers = [ + number for number in pr_numbers if number not in recent_numbers + ] + if not recent_pr_numbers: + continue + representative_pr_number = _coerce_int(cluster.get("representative_pr_number")) + recent_authors = sorted( + { + str(pr_map[number].get("author_login")) + for number in recent_pr_numbers + if number in pr_map and pr_map[number].get("author_login") + } + ) + last_activity_at = max( + ( + pr_map[number].get("updated_at") or pr_map[number].get("created_at") + for number in recent_pr_numbers + if number in pr_map + ), + default=None, + ) + representative = pr_map.get(representative_pr_number or -1, {}) + rows.append( + { + "kind": "pr_scope", + "cluster_id": str(cluster.get("cluster_id") or f"pr-scope-{recent_pr_numbers[0]}"), + "title": _pr_scope_title(cluster, pr_map, representative_pr_number), + "summary": cluster.get("summary"), + "representative_pr_number": representative_pr_number, + "representative_title": representative.get("title"), + "representative_url": representative.get("html_url"), + "pr_numbers": pr_numbers, + "recent_pr_numbers": recent_pr_numbers, + "pr_count": len(pr_numbers), + "recent_pr_count": len(recent_pr_numbers), + "outside_window_prs": [ + _pr_member_stub(number, pr_map.get(number, {})) + for number in outside_window_pr_numbers + ], + "authors": recent_authors, + "last_activity_at": last_activity_at, + "average_similarity": float(cluster.get("average_similarity") or 0.0), + "shared_filenames": list(cluster.get("shared_filenames") or []), + "shared_directories": list(cluster.get("shared_directories") or []), + "pairwise": _pr_scope_pairwise_rows(cluster), + } + ) + rows.sort( + key=lambda row: ( + -int(row["recent_pr_count"]), + -int(row["pr_count"]), + -(float(row["average_similarity"]) if row["average_similarity"] is not None else 0.0), + row["last_activity_at"] or "", + str(row["cluster_id"]), + ) + ) + return rows + + +def _pr_scope_title( + cluster: dict[str, Any], + pr_map: dict[int, dict[str, Any]], + representative_pr_number: int | None, +) -> str: + if representative_pr_number is not None and representative_pr_number in pr_map: + title = pr_map[representative_pr_number].get("title") + if title: + return f"Scope: {title}" + shared_filenames = [str(value) for value in (cluster.get("shared_filenames") or []) if value] + if shared_filenames: + return f"Scope: {shared_filenames[0]}" + shared_directories = [ + str(value) for value in (cluster.get("shared_directories") or []) if value + ] + if shared_directories: + return f"Scope: {shared_directories[0]}" + summary = cluster.get("summary") + if summary: + return str(summary) + return str(cluster.get("cluster_id") or "pr-scope") + + +def _pr_scope_pairwise_rows(cluster: dict[str, Any]) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + for comparison in cluster.get("pairwise", []): + left = _coerce_int(comparison.get("left_pr_number")) + right = _coerce_int(comparison.get("right_pr_number")) + if left is None or right is None: + continue + rows.append( + { + "left_pr_number": left, + "right_pr_number": right, + "similarity": float(comparison.get("similarity") or 0.0), + "content_similarity": float(comparison.get("content_similarity") or 0.0), + "size_similarity": float(comparison.get("size_similarity") or 0.0), + "breadth_similarity": float(comparison.get("breadth_similarity") or 0.0), + "concentration_similarity": float( + comparison.get("concentration_similarity") or 0.0 + ), + "shared_filenames": list(comparison.get("shared_filenames") or []), + "shared_directories": list(comparison.get("shared_directories") or []), + } + ) + return rows + + +def _pr_member_stub(number: int, row: dict[str, Any]) -> dict[str, Any]: + html_url = row.get("html_url") + return { + "number": number, + "title": row.get("title"), + "author": row.get("author_login"), + "state": row.get("state"), + "merged": bool(row.get("merged")), + "draft": bool(row.get("draft")), + "created_at": row.get("created_at"), + "updated_at": row.get("updated_at"), + "changed_files": _coerce_int(row.get("changed_files")), + "additions": _coerce_int(row.get("additions")), + "deletions": _coerce_int(row.get("deletions")), + "html_url": html_url, + "files_url": f"{html_url}/files" if html_url else None, + } + + +def _pr_rows( + pull_requests: list[dict[str, Any]], + memberships: dict[int, list[dict[str, str]]], +) -> list[dict[str, Any]]: + rows = [] + for row in pull_requests: + number = _coerce_int(row.get("number")) + if number is None: + continue + cluster_memberships = memberships.get(number, []) + primary_membership = cluster_memberships[0] if cluster_memberships else None + html_url = row.get("html_url") + rows.append( + { + "number": number, + "title": row.get("title"), + "author": row.get("author_login"), + "state": row.get("state"), + "author_association": row.get("author_association"), + "merged": bool(row.get("merged")), + "draft": bool(row.get("draft")), + "created_at": row.get("created_at"), + "updated_at": row.get("updated_at"), + "changed_files": _coerce_int(row.get("changed_files")), + "additions": _coerce_int(row.get("additions")), + "deletions": _coerce_int(row.get("deletions")), + "comments_count": _coerce_int(row.get("comments_count")), + "review_comments_count": _coerce_int(row.get("review_comments_count")), + "labels": list(row.get("labels") or []), + "body_excerpt": _excerpt(row.get("body")), + "cluster_id": primary_membership["cluster_id"] if primary_membership else None, + "cluster_role": primary_membership["role"] if primary_membership else None, + "cluster_ids": [membership["cluster_id"] for membership in cluster_memberships], + "html_url": html_url, + "files_url": f"{html_url}/files" if html_url else None, + "conversation_url": html_url, + } + ) + return rows + + +def _contributor_rows( + contributor_report: dict[str, Any], + pull_requests: list[dict[str, Any]], + memberships: dict[int, list[dict[str, str]]], +) -> list[dict[str, Any]]: + recent_pr_counts = Counter( + str(row.get("author_login")) for row in pull_requests if row.get("author_login") + ) + recent_associations = _recent_repo_associations(pull_requests) + recent_cluster_counts = Counter( + str(row.get("author_login")) + for row in pull_requests + if row.get("author_login") + for _membership in memberships.get(_coerce_int(row.get("number")) or -1, []) + ) + report_rows = contributor_report.get("contributors", []) + if not report_rows: + rows = [ + { + "author": author, + "name": None, + "profile_url": f"https://github.com/{author}", + "repo_pull_requests_url": None, + "repo_issues_url": None, + "snapshot_pr_count": count, + "snapshot_issue_count": 0, + "recent_pr_count": count, + "cluster_count": recent_cluster_counts.get(author, 0), + "repo_association": recent_associations.get(author), + "new_to_repo": None, + "first_seen_in_snapshot": None, + "report_reason": None, + "known_contributor": _is_known_repo_association(recent_associations.get(author)), + "follow_through_score": None, + "breadth_score": None, + "automation_risk_signal": None, + "heuristic_note": None, + "account_age_days": None, + "quality_score": None, + "public_pr_count_42d": None, + "public_repo_count_42d": None, + } + for author, count in recent_pr_counts.items() + ] + rows.sort(key=lambda row: (-int(row["recent_pr_count"]), row["author"])) + return rows + + rows = [] + for contributor in report_rows: + author = contributor.get("author_login") + if not author: + continue + recent_pr_count = recent_pr_counts.get(str(author), 0) + if recent_pr_count == 0 and not contributor.get("snapshot_pr_count"): + continue + rows.append( + { + "author": author, + "name": contributor.get("name"), + "profile_url": contributor.get("profile_url"), + "repo_pull_requests_url": contributor.get("repo_pull_requests_url"), + "repo_issues_url": contributor.get("repo_issues_url"), + "snapshot_pr_count": _coerce_int(contributor.get("snapshot_pr_count")) or 0, + "snapshot_issue_count": _coerce_int(contributor.get("snapshot_issue_count")) or 0, + "recent_pr_count": recent_pr_count, + "cluster_count": recent_cluster_counts.get(str(author), 0), + "repo_association": contributor.get("repo_association") + or recent_associations.get(str(author)), + "new_to_repo": contributor.get("new_to_repo"), + "first_seen_in_snapshot": contributor.get("first_seen_in_snapshot"), + "report_reason": contributor.get("report_reason"), + "known_contributor": _known_contributor(contributor), + "follow_through_score": contributor.get("follow_through_score"), + "breadth_score": contributor.get("breadth_score"), + "automation_risk_signal": contributor.get("automation_risk_signal"), + "heuristic_note": contributor.get("heuristic_note"), + "account_age_days": _coerce_int(contributor.get("account_age_days")), + "quality_score": None, + "public_pr_count_42d": _coerce_int( + (contributor.get("activity") or {}).get("visible_authored_pr_count") + ), + "public_repo_count_42d": _coerce_int( + (contributor.get("activity") or {}).get("distinct_repos_with_authored_prs") + ), + } + ) + rows.sort( + key=lambda row: ( + -int(row["recent_pr_count"]), + -int(row["snapshot_pr_count"]), + -int(row["cluster_count"]), + str(row["author"]), + ) + ) + return rows + + +def _known_contributor(contributor: dict[str, Any]) -> bool: + return _is_known_repo_association(contributor.get("repo_association")) + + +def _recent_repo_associations(pull_requests: list[dict[str, Any]]) -> dict[str, str | None]: + grouped: dict[str, set[str]] = defaultdict(set) + for row in pull_requests: + login = str(row.get("author_login") or "").strip() + association = str(row.get("author_association") or "").strip() + if not login or not association: + continue + grouped[login].add(association) + return {login: _select_repo_association(sorted(values)) for login, values in grouped.items()} + + +def _select_repo_association(values: list[str]) -> str | None: + if not values: + return None + priority = { + "OWNER": 70, + "MEMBER": 60, + "COLLABORATOR": 50, + "CONTRIBUTOR": 40, + "FIRST_TIME_CONTRIBUTOR": 30, + "FIRST_TIMER": 20, + "NONE": 10, + } + return max(values, key=lambda value: (priority.get(value, 0), value)) + + +def _is_known_repo_association(value: Any) -> bool: + return str(value or "") in {"OWNER", "MEMBER", "COLLABORATOR"} diff --git a/src/slop_farmer/reports/duplicate_prs.py b/src/slop_farmer/reports/duplicate_prs.py new file mode 100644 index 0000000000000000000000000000000000000000..0bfb6b8cef31d9a77c9ce1650feeaaed2ce789d6 --- /dev/null +++ b/src/slop_farmer/reports/duplicate_prs.py @@ -0,0 +1,366 @@ +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Any + +from pydantic import BaseModel + +from slop_farmer.config import AnalysisOptions +from slop_farmer.data.parquet_io import read_json +from slop_farmer.reports.analysis import LLM_PROVIDER_ENV_VARS, run_analysis +from slop_farmer.reports.canonical_duplicate_pr import ( + SnapshotBundle, + load_snapshot_bundle, + select_ranked_duplicate_pr_cluster, + select_ranked_duplicate_pr_clusters, +) + +DEFAULT_DUPLICATE_PR_MODEL = "gpt-5.4-mini?service_tier=flex" +HYBRID_REPORT_FILENAME = "analysis-report-hybrid.json" + + +class DuplicatePrClusterMergeabilityResponse(BaseModel): + accept: bool + confidence: float + reason: str + + +def ensure_hybrid_report( + *, + report_path: Path | None, + snapshot_dir: Path | None, + model: str = DEFAULT_DUPLICATE_PR_MODEL, +) -> Path: + resolved_report, resolved_snapshot_dir = _resolve_duplicate_pr_inputs( + report_path=report_path, + snapshot_dir=snapshot_dir, + ) + if resolved_report is not None and _report_has_llm_enrichment(resolved_report): + return resolved_report + + cached_hybrid_report = resolved_snapshot_dir / HYBRID_REPORT_FILENAME + if cached_hybrid_report.exists() and _report_has_llm_enrichment(cached_hybrid_report): + return cached_hybrid_report.resolve() + + assert_hybrid_analysis_prerequisites() + output_path = cached_hybrid_report.resolve() + generated_report = run_analysis( + AnalysisOptions( + snapshot_dir=resolved_snapshot_dir, + output_dir=resolved_snapshot_dir.parent, + output=output_path, + hf_repo_id=None, + hf_revision=None, + hf_materialize_dir=None, + ranking_backend="hybrid", + model=model, + max_clusters=10, + ) + ).resolve() + if not _report_has_llm_enrichment(generated_report): + raise RuntimeError( + f"Hybrid analysis for {resolved_snapshot_dir} completed without LLM enrichment. " + "Install the optional fast-agent dependency, configure a provider API key, and retry." + ) + return generated_report + + +def assert_hybrid_analysis_prerequisites() -> None: + problems: list[str] = [] + try: + import fast_agent # noqa: F401 + except Exception: + problems.append( + "Install `slop-farmer[llm]` or `fast-agent-mcp` so hybrid duplicate-PR gating can run." + ) + + if not any(bool(os.environ.get(name)) for name in LLM_PROVIDER_ENV_VARS): + problems.append( + "Set one of OPENAI_API_KEY, ANTHROPIC_API_KEY, GOOGLE_API_KEY, or DEEPSEEK_API_KEY." + ) + + if problems: + raise RuntimeError( + "Hybrid duplicate-PR analysis prerequisites are missing. " + " ".join(problems) + ) + + +def load_duplicate_pr_bundle( + *, + report_path: Path | None, + snapshot_dir: Path | None, + model: str = DEFAULT_DUPLICATE_PR_MODEL, +) -> SnapshotBundle: + hybrid_report_path = ensure_hybrid_report( + report_path=report_path, + snapshot_dir=snapshot_dir, + model=model, + ) + return load_snapshot_bundle(hybrid_report_path) + + +def list_mergeable_duplicate_pr_clusters( + *, + report_path: Path | None, + snapshot_dir: Path | None, + limit: int | None, + model: str = DEFAULT_DUPLICATE_PR_MODEL, +) -> list[dict[str, Any]]: + if limit is not None and limit < 1: + raise ValueError("--limit must be at least 1") + + bundle = load_duplicate_pr_bundle( + report_path=report_path, + snapshot_dir=snapshot_dir, + model=model, + ) + assert_hybrid_analysis_prerequisites() + + mergeable_clusters: list[dict[str, Any]] = [] + for candidate in select_ranked_duplicate_pr_clusters(bundle): + gate_result = assess_duplicate_pr_cluster_mergeability(bundle, candidate, model=model) + if not gate_result.accept: + continue + mergeable_clusters.append( + { + **candidate, + "repo": bundle.repo, + "snapshot_id": bundle.snapshot_id, + "report_path": str(bundle.report_path), + "mergeability_confidence": round(float(gate_result.confidence), 3), + "mergeability_reason": gate_result.reason, + } + ) + if limit is not None and len(mergeable_clusters) >= limit: + break + return mergeable_clusters + + +def select_mergeable_duplicate_pr_cluster( + bundle: SnapshotBundle, + *, + cluster_id: str | None, + model: str = DEFAULT_DUPLICATE_PR_MODEL, +) -> dict[str, Any]: + assert_hybrid_analysis_prerequisites() + if cluster_id is not None: + candidate = select_ranked_duplicate_pr_cluster(bundle, cluster_id=cluster_id) + gate_result = assess_duplicate_pr_cluster_mergeability(bundle, candidate, model=model) + if not gate_result.accept: + raise ValueError( + f"Cluster {cluster_id} did not pass the mergeability gate: {gate_result.reason}" + ) + return { + **candidate, + "mergeability_confidence": round(float(gate_result.confidence), 3), + "mergeability_reason": gate_result.reason, + } + + for candidate in select_ranked_duplicate_pr_clusters(bundle): + gate_result = assess_duplicate_pr_cluster_mergeability(bundle, candidate, model=model) + if gate_result.accept: + return { + **candidate, + "mergeability_confidence": round(float(gate_result.confidence), 3), + "mergeability_reason": gate_result.reason, + } + raise ValueError("No duplicate PR cluster passed the mergeability gate.") + + +def assess_duplicate_pr_cluster_mergeability( + bundle: SnapshotBundle, + candidate: dict[str, Any], + *, + model: str = DEFAULT_DUPLICATE_PR_MODEL, +) -> DuplicatePrClusterMergeabilityResponse: + packet = _duplicate_pr_cluster_packet(bundle, candidate) + result = _run_duplicate_pr_cluster_gate(packet, model=model) + if result is None: + raise RuntimeError("Hybrid duplicate-PR mergeability gate failed to return a result.") + return result + + +def _resolve_duplicate_pr_inputs( + *, + report_path: Path | None, + snapshot_dir: Path | None, +) -> tuple[Path | None, Path]: + if (report_path is None) == (snapshot_dir is None): + raise ValueError("Provide exactly one of --report or --snapshot-dir.") + if report_path is not None: + resolved_report = report_path.resolve() + return resolved_report, resolved_report.parent.resolve() + assert snapshot_dir is not None + return None, snapshot_dir.resolve() + + +def _report_has_llm_enrichment(report_path: Path) -> bool: + if not report_path.exists(): + return False + try: + payload = read_json(report_path) + except Exception: + return False + return bool(payload.get("llm_enrichment")) + + +def _duplicate_pr_cluster_packet( + bundle: SnapshotBundle, candidate: dict[str, Any] +) -> dict[str, Any]: + pr_rows = { + int(row["number"]): row for row in bundle.pull_requests if row.get("number") is not None + } + issue_rows = {int(row["number"]): row for row in bundle.issues if row.get("number") is not None} + + pull_request_packets: list[dict[str, Any]] = [] + for pr_number in candidate["source_pr_numbers"]: + pull_request = pr_rows.get(int(pr_number)) + if pull_request is None: + continue + files = [ + row + for row in bundle.pr_files + if _coerce_int(row.get("pull_request_number")) == int(pr_number) + ] + diff_row = next( + ( + row + for row in bundle.pr_diffs + if _coerce_int(row.get("pull_request_number")) == int(pr_number) + ), + None, + ) + comments = [ + row + for row in bundle.comments + if row.get("parent_kind") == "pull_request" + and _coerce_int(row.get("parent_number")) == int(pr_number) + ] + reviews = [ + row + for row in bundle.reviews + if _coerce_int(row.get("pull_request_number")) == int(pr_number) + ] + review_comments = [ + row + for row in bundle.review_comments + if _coerce_int(row.get("pull_request_number")) == int(pr_number) + ] + pull_request_packets.append( + { + "number": int(pr_number), + "title": pull_request.get("title"), + "body_excerpt": _excerpt(pull_request.get("body"), 600), + "filenames": sorted( + {str(row.get("filename")) for row in files if row.get("filename")} + )[:20], + "diff_preview": _excerpt((diff_row or {}).get("diff"), 900), + "discussion_comments": [ + _excerpt(row.get("body"), 180) for row in comments[:2] if row.get("body") + ], + "reviews": [ + { + "state": row.get("state"), + "body_excerpt": _excerpt(row.get("body"), 180), + } + for row in reviews[:2] + ], + "review_comments": [ + { + "path": row.get("path"), + "body_excerpt": _excerpt(row.get("body"), 180), + } + for row in review_comments[:2] + ], + } + ) + + target_issue_packet: dict[str, Any] | None = None + target_issue_number = _coerce_int(candidate.get("target_issue_number")) + if target_issue_number is not None and target_issue_number in issue_rows: + issue = issue_rows[target_issue_number] + issue_comments = [ + row + for row in bundle.comments + if row.get("parent_kind") == "issue" + and _coerce_int(row.get("parent_number")) == target_issue_number + ] + target_issue_packet = { + "number": target_issue_number, + "title": issue.get("title"), + "body_excerpt": _excerpt(issue.get("body"), 500), + "comments": [ + _excerpt(row.get("body"), 180) for row in issue_comments[:2] if row.get("body") + ], + } + + return { + "repo": bundle.repo, + "snapshot_id": bundle.snapshot_id, + "cluster_id": candidate["cluster_id"], + "summary": candidate.get("summary"), + "canonical_issue_number": _coerce_int(candidate.get("canonical_issue_number")), + "canonical_pr_number": _coerce_int(candidate.get("canonical_pr_number")), + "target_issue": target_issue_packet, + "source_pr_numbers": candidate["source_pr_numbers"], + "pull_requests": pull_request_packets, + } + + +def _run_duplicate_pr_cluster_gate( + packet: dict[str, Any], + *, + model: str, +) -> DuplicatePrClusterMergeabilityResponse | None: + try: + from fast_agent import FastAgent + except Exception: + return None + + instruction = ( + "You decide whether a cluster of open GitHub pull requests should be synthesized into one " + "canonical pull request. Accept only when the PRs appear to implement the same concrete " + "code-path fix and one small patch could replace them. Reject when the root cause, scope, " + "or implementation strategy diverges, or when the overlap is only docs/tests/chatter." + ) + fast = FastAgent("slop-farmer-duplicate-pr-mergeability") + + @fast.agent(name="mergeability_gate", instruction=instruction, model=model, use_history=False) + async def mergeability_gate_stub() -> None: + return None + + prompt = json.dumps(packet, indent=2, sort_keys=True) + try: + import asyncio + + async def _run() -> DuplicatePrClusterMergeabilityResponse | None: + async with fast.run() as agent: + result, _ = await agent.mergeability_gate.structured( + prompt, + DuplicatePrClusterMergeabilityResponse, + ) + return result + + return asyncio.run(_run()) + except Exception: + return None + + +def _excerpt(value: Any, limit: int) -> str | None: + text = str(value or "").strip() + if not text: + return None + if len(text) <= limit: + return text + return text[: limit - 1].rstrip() + "…" + + +def _coerce_int(value: Any) -> int | None: + if value is None: + return None + try: + return int(value) + except (TypeError, ValueError): + return None diff --git a/src/slop_farmer/reports/new_contributor_report.py b/src/slop_farmer/reports/new_contributor_report.py new file mode 100644 index 0000000000000000000000000000000000000000..0295aa5b2296de3d604f7c26d7b4dea8d66c7507 --- /dev/null +++ b/src/slop_farmer/reports/new_contributor_report.py @@ -0,0 +1,1256 @@ +from __future__ import annotations + +import json +import time +import urllib.error +import urllib.parse +import urllib.request +from datetime import UTC, datetime, timedelta +from pathlib import Path +from typing import Any + +from slop_farmer.config import NewContributorReportOptions, resolve_github_token +from slop_farmer.data.http import urlopen_with_retry +from slop_farmer.data.parquet_io import read_json, read_parquet_rows, write_parquet, write_text +from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir +from slop_farmer.reports.user_activity import summarize_user + +GRAPHQL_URL = "https://api.github.com/graphql" +PROFILE_QUERY = """ +query UserActivityProfile($login: String!, $from: DateTime!, $to: DateTime!) { + rateLimit { cost remaining resetAt } + user(login: $login) { + login + name + createdAt + organizations(first: 100) { + nodes { + login + name + } + } + starredRepositories(first: 100, orderBy: {field: STARRED_AT, direction: DESC}) { + totalCount + nodes { + nameWithOwner + stargazerCount + owner { + login + } + } + } + contributionsCollection(from: $from, to: $to) { + contributionCalendar { + totalContributions + weeks { + contributionDays { + date + contributionCount + } + } + } + totalIssueContributions + totalPullRequestContributions + pullRequestContributionsByRepository(maxRepositories: 10) { + repository { nameWithOwner } + contributions { totalCount } + } + issueContributionsByRepository(maxRepositories: 10) { + repository { nameWithOwner } + contributions { totalCount } + } + } + } +} +""".strip() +SEARCH_PRS_QUERY = """ +query SearchPullRequests($query: String!, $cursor: String) { + search(type: ISSUE, query: $query, first: 100, after: $cursor) { + issueCount + pageInfo { hasNextPage endCursor } + nodes { + ... on PullRequest { + number + title + url + state + merged + createdAt + updatedAt + repository { nameWithOwner stargazerCount } + } + } + } +} +""".strip() +SEARCH_ISSUES_QUERY = """ +query SearchIssues($query: String!, $cursor: String) { + search(type: ISSUE, query: $query, first: 100, after: $cursor) { + issueCount + pageInfo { hasNextPage endCursor } + nodes { + ... on Issue { + number + title + url + state + createdAt + updatedAt + repository { nameWithOwner } + } + } + } +} +""".strip() +PREVIOUS_REPORT_REUSE_MAX_AGE = timedelta(days=2) + + +def run_new_contributor_report(options: NewContributorReportOptions) -> Path: + snapshot_dir = _resolve_snapshot_dir(options) + snapshot = _load_snapshot(snapshot_dir) + _report_log(f"Building contributor report for {snapshot['repo']} from {snapshot_dir}") + contributors, instrumentation = _report_contributors(snapshot, options) + report = { + "schema_version": "1.0", + "repo": snapshot["repo"], + "snapshot_id": snapshot["snapshot_id"], + "generated_at": _iso_now(), + "window_days": options.window_days, + "max_authors": options.max_authors, + "instrumentation": instrumentation, + "contributors": contributors, + } + markdown_path = (options.output or (snapshot_dir / "new-contributors-report.md")).resolve() + json_path = (options.json_output or (snapshot_dir / "new-contributors-report.json")).resolve() + parquet_path = snapshot_dir / "new_contributors.parquet" + write_text(json.dumps(report, indent=2) + "\n", json_path) + write_text(_markdown_report_text(report), markdown_path) + write_parquet(_parquet_rows(report), parquet_path, "new_contributors") + _report_log(f"Wrote contributor report for {len(contributors)} authors to {json_path}") + return markdown_path + + +def _resolve_snapshot_dir(options: NewContributorReportOptions) -> Path: + return resolve_snapshot_source_dir( + snapshot_dir=options.snapshot_dir, + local_snapshots_root=options.output_dir.resolve() / "snapshots", + hf_repo_id=options.hf_repo_id, + hf_revision=options.hf_revision, + hf_materialize_dir=options.hf_materialize_dir, + hf_output_dir=options.output_dir, + ) + + +def _load_snapshot(snapshot_dir: Path) -> dict[str, Any]: + manifest_path = snapshot_dir / "manifest.json" + manifest = read_json(manifest_path) if manifest_path.exists() else {} + issues = read_parquet_rows(snapshot_dir / "issues.parquet") + pull_requests = read_parquet_rows(snapshot_dir / "pull_requests.parquet") + comments = read_parquet_rows(snapshot_dir / "comments.parquet") + reviews = read_parquet_rows(snapshot_dir / "reviews.parquet") + review_comments = read_parquet_rows(snapshot_dir / "review_comments.parquet") + repo = ( + manifest.get("repo") + or (issues[0]["repo"] if issues else None) + or (pull_requests[0]["repo"] if pull_requests else None) + or "" + ) + snapshot_id = manifest.get("snapshot_id") or snapshot_dir.name + return { + "repo": repo, + "snapshot_id": snapshot_id, + "manifest": manifest, + "issues": issues, + "pull_requests": pull_requests, + "comments": comments, + "reviews": reviews, + "review_comments": review_comments, + } + + +def _report_contributors( + snapshot: dict[str, Any], options: NewContributorReportOptions +) -> tuple[list[dict[str, Any]], dict[str, Any]]: + author_rows = _author_rows(snapshot) + previous_snapshot_dir = _previous_snapshot_dir(snapshot) + previous_primary_authors = _snapshot_primary_authors(previous_snapshot_dir) + previous_merged_pr_authors = _snapshot_merged_pr_authors(previous_snapshot_dir) + previous_report_payload = _previous_report_payload(previous_snapshot_dir) + previous_report_contributors = _previous_report_contributors(previous_report_payload) + previous_report_entries = _previous_report_contributor_entries(previous_report_payload) + snapshot_reference_time = _snapshot_reference_time(snapshot) + previous_report_reusable = _previous_report_reuse_allowed( + previous_report_payload, + window_days=options.window_days, + reference_time=snapshot_reference_time, + ) + cutoff = snapshot_reference_time - timedelta(days=options.window_days) + selected = [] + for row in author_rows: + if row["pr_count"] == 0: + continue + recent_pr_count = 0 + for pr in row["pull_requests"]: + created_at = _coerce_datetime(pr.get("created_at")) + if created_at is not None and created_at >= cutoff: + recent_pr_count += 1 + row["recent_pr_count"] = recent_pr_count + if row["recent_pr_count"] == 0: + continue + selected.append(row) + selected.sort( + key=lambda row: ( + -int(row["recent_pr_count"]), + -int(row["pr_count"]), + -int(row["issue_count"]), + row["author_login"].casefold(), + ) + ) + if options.max_authors > 0: + selected = selected[: options.max_authors] + selected_logins = [str(row["author_login"]) for row in selected] + selected_login_set = set(selected_logins) + selected_seen_in_previous_primary = selected_login_set & previous_primary_authors + selected_seen_in_previous_report = selected_login_set & previous_report_contributors + selected_known_merged = selected_login_set & previous_merged_pr_authors + first_seen_count = sum( + 1 for row in selected if row["author_login"] not in previous_primary_authors + ) + _report_log( + f"Selected {len(selected)} contributor authors for enrichment " + f"(window={options.window_days}d, max_authors={'unlimited' if options.max_authors == 0 else options.max_authors})" + ) + _report_log( + "Contributor cacheability estimate: " + f"selected={len(selected)}, " + f"first_seen={first_seen_count}, " + f"known_via_prior_merged_pr={len(selected_known_merged)}, " + f"seen_in_previous_primary={len(selected_seen_in_previous_primary)}, " + f"overlap_previous_report={len(selected_seen_in_previous_report)}/" + f"{len(selected)}" + f" ({(100.0 * len(selected_seen_in_previous_report) / len(selected) if selected else 0.0):.1f}%), " + f"window_anchor=run_time" + ) + contributors: list[dict[str, Any]] = [] + total_selected = len(selected) + started = time.perf_counter() + reused_previous_report = 0 + live_fetches = 0 + reused_known_merged = 0 + for index, row in enumerate(selected, start=1): + first_seen_in_snapshot = row["author_login"] not in previous_primary_authors + known_via_prior_merged_pr = row["author_login"] in previous_merged_pr_authors + previous_entry = previous_report_entries.get(row["author_login"]) + if index == 1 or index == total_selected or index % 10 == 0: + _report_log( + f"Enriching contributors: {index}/{total_selected} " + f"(current={row['author_login']}, first_seen={str(first_seen_in_snapshot).lower()}, " + f"known_via_prior_merged_pr={str(known_via_prior_merged_pr).lower()})" + ) + if ( + previous_report_reusable + and previous_entry is not None + and not previous_entry.get("fetch_error") + ): + contributors.append( + _reused_previous_report_entry( + snapshot["repo"], + row, + previous_entry, + first_seen_in_snapshot=first_seen_in_snapshot, + known_via_prior_merged_pr=known_via_prior_merged_pr, + ) + ) + reused_previous_report += 1 + if known_via_prior_merged_pr: + reused_known_merged += 1 + continue + try: + summary = summarize_user(row["author_login"], options.window_days, None) + fetch_error = None + live_fetches += 1 + except ( + Exception + ) as exc: # pragma: no cover - network failures are exercised via monkeypatch in tests + summary = None + fetch_error = str(exc) + _report_log(f"Contributor enrichment failed for {row['author_login']}: {fetch_error}") + live_fetches += 1 + contributors.append( + _contributor_entry( + snapshot["repo"], + row, + summary, + fetch_error, + first_seen_in_snapshot=first_seen_in_snapshot, + known_via_prior_merged_pr=known_via_prior_merged_pr, + ) + ) + elapsed = time.perf_counter() - started + avg_seconds = elapsed / total_selected if total_selected else 0.0 + failures = sum(1 for contributor in contributors if contributor.get("fetch_error")) + _report_log( + "Contributor enrichment timing: " + f"authors={total_selected}, " + f"failures={failures}, " + f"elapsed={elapsed:.2f}s, " + f"avg={avg_seconds:.2f}s/author" + ) + instrumentation = { + "selected_contributors": total_selected, + "first_seen_in_snapshot": first_seen_count, + "known_via_prior_merged_pr": len(selected_known_merged), + "previous_primary_author_count": len(previous_primary_authors), + "selected_seen_in_previous_primary": len(selected_seen_in_previous_primary), + "previous_report_contributor_count": len(previous_report_contributors), + "selected_overlap_previous_report": len(selected_seen_in_previous_report), + "selected_overlap_previous_report_rate": round( + (len(selected_seen_in_previous_report) / total_selected) if total_selected else 0.0, + 4, + ), + "reused_previous_report_entries": reused_previous_report, + "reused_known_merged_contributors": reused_known_merged, + "live_fetches": live_fetches, + "window_anchor": "run_time", + "window_anchor_cacheable": False, + "enrichment_elapsed_seconds": round(elapsed, 3), + "enrichment_avg_seconds_per_author": round(avg_seconds, 3), + "enrichment_failures": failures, + } + return contributors, instrumentation + + +def _author_rows(snapshot: dict[str, Any]) -> list[dict[str, Any]]: + authors: dict[str, dict[str, Any]] = {} + + def ensure(login: str, author_id: int | None) -> dict[str, Any]: + row = authors.get(login) + if row is not None: + return row + row = { + "author_login": login, + "author_id": author_id, + "issue_count": 0, + "pr_count": 0, + "comment_count": 0, + "review_count": 0, + "review_comment_count": 0, + "first_seen_at": None, + "last_seen_at": None, + "issues": [], + "pull_requests": [], + "repo_association_values": set(), + } + authors[login] = row + return row + + def touch(row: dict[str, Any], timestamp: str | None) -> None: + if not timestamp: + return + if row["first_seen_at"] is None or timestamp < row["first_seen_at"]: + row["first_seen_at"] = timestamp + if row["last_seen_at"] is None or timestamp > row["last_seen_at"]: + row["last_seen_at"] = timestamp + + for issue in snapshot["issues"]: + login = str(issue.get("author_login") or "").strip() + if not login: + continue + row = ensure(login, _coerce_int(issue.get("author_id"))) + row["issue_count"] += 1 + row["issues"].append(issue) + _track_repo_association(row, issue.get("author_association")) + touch(row, issue.get("created_at") or issue.get("updated_at")) + + for pr in snapshot["pull_requests"]: + login = str(pr.get("author_login") or "").strip() + if not login: + continue + row = ensure(login, _coerce_int(pr.get("author_id"))) + row["pr_count"] += 1 + row["pull_requests"].append(pr) + _track_repo_association(row, pr.get("author_association")) + touch(row, pr.get("created_at") or pr.get("updated_at")) + + for comment in snapshot["comments"]: + login = str(comment.get("author_login") or "").strip() + if not login: + continue + row = ensure(login, _coerce_int(comment.get("author_id"))) + row["comment_count"] += 1 + _track_repo_association(row, comment.get("author_association")) + touch(row, comment.get("created_at") or comment.get("updated_at")) + + for review in snapshot["reviews"]: + login = str(review.get("author_login") or "").strip() + if not login: + continue + row = ensure(login, _coerce_int(review.get("author_id"))) + row["review_count"] += 1 + _track_repo_association(row, review.get("author_association")) + touch(row, review.get("submitted_at") or review.get("created_at")) + + for review_comment in snapshot["review_comments"]: + login = str(review_comment.get("author_login") or "").strip() + if not login: + continue + row = ensure(login, _coerce_int(review_comment.get("author_id"))) + row["review_comment_count"] += 1 + _track_repo_association(row, review_comment.get("author_association")) + touch(row, review_comment.get("created_at") or review_comment.get("updated_at")) + + rows = [] + for row in authors.values(): + row["primary_artifact_count"] = row["issue_count"] + row["pr_count"] + row["artifact_count"] = ( + row["primary_artifact_count"] + + row["comment_count"] + + row["review_count"] + + row["review_comment_count"] + ) + row["repo_association_values"] = sorted( + str(value) for value in row["repo_association_values"] + ) + row["repo_association"] = _select_repo_association(row["repo_association_values"]) + row["issues"] = sorted( + row["issues"], + key=lambda item: ( + str(item.get("created_at") or ""), + int(item.get("number") or 0), + ), + reverse=True, + )[:3] + row["pull_requests"] = sorted( + row["pull_requests"], + key=lambda item: ( + str(item.get("created_at") or ""), + int(item.get("number") or 0), + ), + reverse=True, + )[:3] + rows.append(row) + return sorted( + rows, + key=lambda row: ( + -row["pr_count"], + -row["issue_count"], + row["primary_artifact_count"], + str(row["last_seen_at"] or ""), + row["author_login"].casefold(), + ), + reverse=False, + ) + + +def _previous_snapshot_dir(snapshot: dict[str, Any]) -> Path | None: + manifest = snapshot.get("manifest") or {} + watermark = manifest.get("watermark") or {} + raw = watermark.get("previous_snapshot_dir") + if not raw: + return None + path = Path(str(raw)) + if not path.exists(): + return None + return path.resolve() + + +def _snapshot_primary_authors(snapshot_dir: Path | None) -> set[str]: + if snapshot_dir is None: + return set() + authors = set() + for table_name in ("issues", "pull_requests"): + for row in read_parquet_rows(snapshot_dir / f"{table_name}.parquet"): + login = str(row.get("author_login") or "").strip() + if login: + authors.add(login) + return authors + + +def _snapshot_merged_pr_authors(snapshot_dir: Path | None) -> set[str]: + if snapshot_dir is None: + return set() + authors = set() + for row in read_parquet_rows(snapshot_dir / "pull_requests.parquet"): + login = str(row.get("author_login") or "").strip() + if login and bool(row.get("merged")): + authors.add(login) + return authors + + +def _previous_report_payload(snapshot_dir: Path | None) -> dict[str, Any] | None: + if snapshot_dir is None: + return None + path = snapshot_dir / "new-contributors-report.json" + if not path.exists(): + return None + try: + payload = read_json(path) + except Exception: + return None + return payload if isinstance(payload, dict) else None + + +def _previous_report_contributors(payload: dict[str, Any] | None) -> set[str]: + if payload is None: + return set() + contributors = payload.get("contributors") + if not isinstance(contributors, list): + return set() + return { + str(row.get("author_login")).strip() + for row in contributors + if isinstance(row, dict) and str(row.get("author_login") or "").strip() + } + + +def _previous_report_contributor_entries( + payload: dict[str, Any] | None, +) -> dict[str, dict[str, Any]]: + if payload is None: + return {} + contributors = payload.get("contributors") + if not isinstance(contributors, list): + return {} + return { + login: row + for row in contributors + if isinstance(row, dict) and (login := str(row.get("author_login") or "").strip()) + } + + +def _previous_report_reuse_allowed( + payload: dict[str, Any] | None, + *, + window_days: int, + reference_time: datetime, +) -> bool: + if payload is None: + return False + if _coerce_int(payload.get("window_days")) != window_days: + return False + generated_at = _coerce_datetime(payload.get("generated_at")) + if generated_at is None: + return False + return abs(reference_time - generated_at) <= PREVIOUS_REPORT_REUSE_MAX_AGE + + +def _reused_previous_report_entry( + repo: str, + row: dict[str, Any], + previous_entry: dict[str, Any], + *, + first_seen_in_snapshot: bool, + known_via_prior_merged_pr: bool, +) -> dict[str, Any]: + login = row["author_login"] + age_days = _coerce_int(previous_entry.get("account_age_days")) + return { + "author_login": login, + "name": previous_entry.get("name"), + "profile_url": _profile_url(login), + "repo_pull_requests_url": _repo_search_url(repo, login, is_pr=True), + "repo_issues_url": _repo_search_url(repo, login, is_pr=False), + "repo_first_seen_at": row["first_seen_at"], + "repo_last_seen_at": row["last_seen_at"], + "repo_primary_artifact_count": row["primary_artifact_count"], + "repo_artifact_count": row["artifact_count"], + "snapshot_issue_count": row["issue_count"], + "snapshot_pr_count": row["pr_count"], + "snapshot_comment_count": row["comment_count"], + "snapshot_review_count": row["review_count"], + "snapshot_review_comment_count": row["review_comment_count"], + "repo_association": row.get("repo_association"), + "new_to_repo": first_seen_in_snapshot, + "first_seen_in_snapshot": first_seen_in_snapshot, + "known_via_prior_merged_pr": known_via_prior_merged_pr, + "report_reason": "first_seen_in_snapshot" if first_seen_in_snapshot else None, + "enrichment_source": "previous_report", + "live_refetch_skipped": True, + "account_age_days": age_days, + "young_account": age_days is not None and age_days <= 365, + "follow_through_score": previous_entry.get("follow_through_score"), + "breadth_score": previous_entry.get("breadth_score"), + "automation_risk_signal": previous_entry.get("automation_risk_signal"), + "heuristic_note": previous_entry.get("heuristic_note"), + "public_orgs": _previous_report_public_orgs(previous_entry), + "activity": _previous_report_activity(previous_entry), + "examples": { + "pull_requests": [ + _artifact_example(item, "pull_request") for item in row["pull_requests"] + ], + "issues": [_artifact_example(item, "issue") for item in row["issues"]], + }, + "fetch_error": None, + } + + +def _previous_report_public_orgs(previous_entry: dict[str, Any]) -> list[str]: + values = previous_entry.get("public_orgs") + if not isinstance(values, list): + return [] + public_orgs: list[str] = [] + for value in values: + if isinstance(value, str) and value.strip(): + public_orgs.append(value.strip()) + elif isinstance(value, dict): + login = str(value.get("login") or "").strip() + if login: + public_orgs.append(login) + return public_orgs + + +def _previous_report_activity(previous_entry: dict[str, Any]) -> dict[str, Any]: + activity = previous_entry.get("activity") + if not isinstance(activity, dict): + activity = previous_entry + return { + "visible_authored_pr_count": activity.get("visible_authored_pr_count"), + "merged_pr_count": activity.get("merged_pr_count"), + "closed_unmerged_pr_count": activity.get("closed_unmerged_pr_count"), + "open_pr_count": activity.get("open_pr_count"), + "merged_pr_rate": activity.get("merged_pr_rate"), + "closed_unmerged_pr_rate": activity.get("closed_unmerged_pr_rate"), + "still_open_pr_rate": activity.get("still_open_pr_rate"), + "distinct_repos_with_authored_prs": activity.get("distinct_repos_with_authored_prs"), + "distinct_repos_with_open_prs": activity.get("distinct_repos_with_open_prs"), + } + + +def _contributor_entry( + repo: str, + row: dict[str, Any], + summary: dict[str, Any] | None, + fetch_error: str | None, + *, + first_seen_in_snapshot: bool, + known_via_prior_merged_pr: bool, +) -> dict[str, Any]: + login = row["author_login"] + account = (summary or {}).get("account") or {} + activity = (summary or {}).get("activity") or {} + orgs = ((summary or {}).get("organization_membership") or {}).get("public_orgs") or [] + return { + "author_login": login, + "name": (summary or {}).get("name"), + "profile_url": _profile_url(login), + "repo_pull_requests_url": _repo_search_url(repo, login, is_pr=True), + "repo_issues_url": _repo_search_url(repo, login, is_pr=False), + "repo_first_seen_at": row["first_seen_at"], + "repo_last_seen_at": row["last_seen_at"], + "repo_primary_artifact_count": row["primary_artifact_count"], + "repo_artifact_count": row["artifact_count"], + "snapshot_issue_count": row["issue_count"], + "snapshot_pr_count": row["pr_count"], + "snapshot_comment_count": row["comment_count"], + "snapshot_review_count": row["review_count"], + "snapshot_review_comment_count": row["review_comment_count"], + "repo_association": row.get("repo_association"), + "new_to_repo": first_seen_in_snapshot, + "first_seen_in_snapshot": first_seen_in_snapshot, + "known_via_prior_merged_pr": known_via_prior_merged_pr, + "report_reason": "first_seen_in_snapshot" if first_seen_in_snapshot else None, + "enrichment_source": "live", + "live_refetch_skipped": False, + "account_age_days": account.get("age_days"), + "young_account": _coerce_int(account.get("age_days")) is not None + and int(account["age_days"]) <= 365, + "follow_through_score": score_follow_through(summary), + "breadth_score": score_breadth(summary), + "automation_risk_signal": score_automation_risk( + summary, repo_snapshot_pr_count=row["pr_count"] + ), + "heuristic_note": activity_note(summary, repo_snapshot_pr_count=row["pr_count"]), + "public_orgs": [org["login"] for org in orgs if isinstance(org, dict) and org.get("login")], + "activity": { + "visible_authored_pr_count": activity.get("visible_authored_pr_count"), + "merged_pr_count": activity.get("merged_pr_count"), + "closed_unmerged_pr_count": activity.get("closed_unmerged_pr_count"), + "open_pr_count": activity.get("open_pr_count"), + "merged_pr_rate": activity.get("merged_pr_rate"), + "closed_unmerged_pr_rate": activity.get("closed_unmerged_pr_rate"), + "still_open_pr_rate": activity.get("still_open_pr_rate"), + "distinct_repos_with_authored_prs": activity.get("distinct_repos_with_authored_prs"), + "distinct_repos_with_open_prs": activity.get("distinct_repos_with_open_prs"), + }, + "examples": { + "pull_requests": [ + _artifact_example(item, "pull_request") for item in row["pull_requests"] + ], + "issues": [_artifact_example(item, "issue") for item in row["issues"]], + }, + "fetch_error": fetch_error, + } + + +def _artifact_example(row: dict[str, Any], kind: str) -> dict[str, Any]: + return { + "kind": kind, + "number": _coerce_int(row.get("number")), + "title": row.get("title"), + "url": row.get("html_url"), + "state": row.get("state"), + "merged": row.get("merged"), + "draft": row.get("draft"), + "created_at": row.get("created_at"), + } + + +def _markdown_report_text(report: dict[str, Any]) -> str: + lines = [ + f"# New Contributor Report: {report['repo']}", + "", + f"- Snapshot: `{report['snapshot_id']}`", + f"- Generated: `{report['generated_at']}`", + f"- Activity window: `{report['window_days']}d`", + f"- Contributors: `{len(report['contributors'])}`", + "", + "## Summary", + "", + "| Author | Profile | First seen? | Association | Account age | Repo first seen | Snapshot PRs | Snapshot issues | Follow-through | Breadth | Automation risk | Notes |", + "| --- | --- | --- | --- | ---: | --- | ---: | ---: | --- | --- | --- | --- |", + ] + for contributor in report["contributors"]: + lines.append( + "| " + f"`{contributor['author_login']}` | " + f"[profile]({contributor['profile_url']}) | " + f"{'yes' if contributor['first_seen_in_snapshot'] else 'no'} | " + f"{contributor.get('repo_association') or 'unknown'} | " + f"{_display_int(contributor['account_age_days'])}d | " + f"`{contributor['repo_first_seen_at'] or 'unknown'}` | " + f"{contributor['snapshot_pr_count']} | " + f"{contributor['snapshot_issue_count']} | " + f"{contributor['follow_through_score']} | " + f"{contributor['breadth_score']} | " + f"{contributor['automation_risk_signal']} | " + f"{contributor['heuristic_note']} |" + ) + lines.extend(["", "## Contributors", ""]) + if not report["contributors"]: + lines.extend(["No new contributors found.", ""]) + return "\n".join(lines).rstrip() + "\n" + for contributor in report["contributors"]: + lines.extend(_contributor_markdown_lines(report["repo"], contributor)) + return "\n".join(lines).rstrip() + "\n" + + +def _parquet_rows(report: dict[str, Any]) -> list[dict[str, Any]]: + rows = [] + for contributor in report["contributors"]: + activity = contributor["activity"] + rows.append( + { + "repo": report["repo"], + "snapshot_id": report["snapshot_id"], + "report_generated_at": report["generated_at"], + "window_days": report["window_days"], + "author_login": contributor["author_login"], + "name": contributor["name"], + "profile_url": contributor["profile_url"], + "repo_pull_requests_url": contributor["repo_pull_requests_url"], + "repo_issues_url": contributor["repo_issues_url"], + "repo_first_seen_at": contributor["repo_first_seen_at"], + "repo_last_seen_at": contributor["repo_last_seen_at"], + "repo_primary_artifact_count": contributor["repo_primary_artifact_count"], + "repo_artifact_count": contributor["repo_artifact_count"], + "snapshot_issue_count": contributor["snapshot_issue_count"], + "snapshot_pr_count": contributor["snapshot_pr_count"], + "snapshot_comment_count": contributor["snapshot_comment_count"], + "snapshot_review_count": contributor["snapshot_review_count"], + "snapshot_review_comment_count": contributor["snapshot_review_comment_count"], + "repo_association": contributor.get("repo_association"), + "new_to_repo": contributor["new_to_repo"], + "first_seen_in_snapshot": contributor["first_seen_in_snapshot"], + "report_reason": contributor["report_reason"], + "account_age_days": contributor["account_age_days"], + "young_account": contributor["young_account"], + "follow_through_score": contributor["follow_through_score"], + "breadth_score": contributor["breadth_score"], + "automation_risk_signal": contributor["automation_risk_signal"], + "heuristic_note": contributor["heuristic_note"], + "public_orgs": contributor["public_orgs"], + "visible_authored_pr_count": activity.get("visible_authored_pr_count"), + "merged_pr_count": activity.get("merged_pr_count"), + "closed_unmerged_pr_count": activity.get("closed_unmerged_pr_count"), + "open_pr_count": activity.get("open_pr_count"), + "merged_pr_rate": activity.get("merged_pr_rate"), + "closed_unmerged_pr_rate": activity.get("closed_unmerged_pr_rate"), + "still_open_pr_rate": activity.get("still_open_pr_rate"), + "distinct_repos_with_authored_prs": activity.get( + "distinct_repos_with_authored_prs" + ), + "distinct_repos_with_open_prs": activity.get("distinct_repos_with_open_prs"), + "fetch_error": contributor.get("fetch_error"), + } + ) + return rows + + +def _contributor_markdown_lines(repo: str, contributor: dict[str, Any]) -> list[str]: + orgs = ", ".join(f"`{org}`" for org in contributor["public_orgs"]) or "none visible" + activity = contributor["activity"] + lines = [ + f"### `{contributor['author_login']}`", + "", + f"- profile: [github.com/{contributor['author_login']}]({contributor['profile_url']})", + f"- repo PR search: [open search]({contributor['repo_pull_requests_url']})", + f"- repo issue search: [open search]({contributor['repo_issues_url']})", + f"- repo first seen: `{contributor['repo_first_seen_at'] or 'unknown'}`", + f"- repo last seen: `{contributor['repo_last_seen_at'] or 'unknown'}`", + f"- first seen in snapshot: **{'yes' if contributor['first_seen_in_snapshot'] else 'no'}**", + f"- repo association: **{contributor.get('repo_association') or 'unknown'}**", + f"- repo primary artifacts: **{contributor['repo_primary_artifact_count']}**", + f"- snapshot authored PRs/issues: **{contributor['snapshot_pr_count']} PRs / {contributor['snapshot_issue_count']} issues**", + f"- snapshot discussion activity: **{contributor['snapshot_comment_count']} comments / {contributor['snapshot_review_count']} reviews / {contributor['snapshot_review_comment_count']} review comments**", + f"- account age: **{_display_int(contributor['account_age_days'])} days**", + ( + "- recent public PR activity: " + f"**{_display_int(activity.get('visible_authored_pr_count'))} PRs** " + f"({_display_int(activity.get('merged_pr_count'))} merged / " + f"{_display_int(activity.get('closed_unmerged_pr_count'))} closed-unmerged / " + f"{_display_int(activity.get('open_pr_count'))} open; " + f"merged rate **{_display_rate(activity.get('merged_pr_rate'))}**, " + f"closed-unmerged rate **{_display_rate(activity.get('closed_unmerged_pr_rate'))}**, " + f"open rate **{_display_rate(activity.get('still_open_pr_rate'))}**)" + ), + f"- public orgs: {orgs}", + f"- compact scores: follow-through **{contributor['follow_through_score']}**, breadth **{contributor['breadth_score']}**, automation risk **{contributor['automation_risk_signal']}**", + f"- heuristic read: **{contributor['heuristic_note']}**", + ] + if contributor.get("fetch_error"): + lines.append(f"- enrichment fetch error: `{contributor['fetch_error']}`") + lines.extend(["", "Example artifacts:"]) + examples = contributor["examples"]["pull_requests"] + contributor["examples"]["issues"] + if not examples: + lines.append("- none") + for example in examples: + prefix = "PR" if example["kind"] == "pull_request" else "Issue" + suffix_parts = [str(example["state"] or "unknown")] + if example["kind"] == "pull_request" and example.get("merged"): + suffix_parts.append("merged") + if example["kind"] == "pull_request" and example.get("draft"): + suffix_parts.append("draft") + lines.append( + f"- [{prefix} #{example['number']}: {example['title']}]({example['url']})" + f" — {', '.join(suffix_parts)}" + ) + lines.append("") + return lines + + +def _legacy_summarize_user(login: str, days: int, repo: str | None) -> dict[str, Any]: + now = datetime.now(tz=UTC) + start = (now - timedelta(days=days)).replace(microsecond=0) + from_iso = start.isoformat().replace("+00:00", "Z") + from_date = start.date().isoformat() + to_iso = now.replace(microsecond=0).isoformat().replace("+00:00", "Z") + profile = _post_graphql(PROFILE_QUERY, {"login": login, "from": from_iso, "to": to_iso})["user"] + if profile is None: + raise RuntimeError(f"unknown user {login!r}") + contributions = profile["contributionsCollection"] + calendar = contributions["contributionCalendar"] + repo_term = f" repo:{repo}" if repo else "" + pr_query = f"author:{login} is:pr created:>={from_date}{repo_term} sort:created-desc" + open_pr_query = ( + f"author:{login} is:pr is:open created:>={from_date}{repo_term} sort:created-desc" + ) + issue_query = f"author:{login} is:issue created:>={from_date}{repo_term} sort:created-desc" + pr_count, prs = _search_all(pr_query, prs=True) + open_pr_count, open_prs = _search_all(open_pr_query, prs=True) + issue_count, issues = _search_all(issue_query, prs=False) + merged_prs = [pr for pr in prs if pr.get("merged")] + closed_unmerged_prs = [pr for pr in prs if pr.get("state") == "CLOSED" and not pr.get("merged")] + still_open_prs = [pr for pr in prs if pr.get("state") == "OPEN"] + pr_repos = sorted({pr["repository"]["nameWithOwner"] for pr in prs if pr.get("repository")}) + open_pr_repos = sorted( + {pr["repository"]["nameWithOwner"] for pr in open_prs if pr.get("repository")} + ) + starred = profile.get("starredRepositories") or {} + starred_nodes = [row for row in starred.get("nodes") or [] if isinstance(row, dict)] + non_self_starred = [ + row + for row in starred_nodes + if ((row.get("owner") or {}).get("login") or "").casefold() != login.casefold() + ] + created_at = datetime.fromisoformat(profile["createdAt"].replace("Z", "+00:00")) + account_age_days = (now - created_at).days + return { + "login": profile["login"], + "name": profile.get("name"), + "repo_scope": repo, + "window": {"days": days, "from": from_iso, "to": to_iso}, + "account": {"created_at": profile["createdAt"], "age_days": account_age_days}, + "activity": { + "visible_contributions_total": calendar["totalContributions"], + **_contribution_calendar_summary(calendar["weeks"]), + "authored_issues": contributions["totalIssueContributions"], + "authored_pull_requests": contributions["totalPullRequestContributions"], + "visible_authored_issue_count": len(issues), + "visible_authored_pr_count": len(prs), + "search_authored_issue_count": issue_count, + "search_authored_pr_count": pr_count, + "search_open_pr_count": open_pr_count, + "distinct_repos_with_authored_prs": len(pr_repos), + "distinct_repos_with_open_prs": len(open_pr_repos), + "open_pr_count": len(open_prs), + "merged_pr_count": len(merged_prs), + "closed_unmerged_pr_count": len(closed_unmerged_prs), + "still_open_pr_count": len(still_open_prs), + "merged_pr_rate": _rate(len(merged_prs), len(prs)), + "closed_unmerged_pr_rate": _rate(len(closed_unmerged_prs), len(prs)), + "still_open_pr_rate": _rate(len(still_open_prs), len(prs)), + }, + "stars": { + "starred_repositories_total": starred.get("totalCount"), + "visible_non_self_starred_repositories": len(non_self_starred), + "recent_pr_repo_star_distribution": _star_distribution( + [int((pr.get("repository") or {}).get("stargazerCount") or 0) for pr in prs] + ), + }, + "organization_membership": { + "public_orgs": _public_orgs(login), + }, + } + + +def _has_single_repo_burst( + summary: dict[str, Any] | None, *, repo_snapshot_pr_count: int | None +) -> bool: + if not summary or repo_snapshot_pr_count is None: + return False + activity = summary["activity"] + closed_rate = activity.get("closed_unmerged_pr_rate") + repo_count = activity.get("distinct_repos_with_authored_prs") + return ( + repo_snapshot_pr_count >= 25 + and closed_rate is not None + and closed_rate >= 0.7 + and repo_count is not None + and repo_count <= 2 + ) + + +def activity_note( + summary: dict[str, Any] | None, *, repo_snapshot_pr_count: int | None = None +) -> str: + if not summary: + return "summary unavailable" + activity = summary["activity"] + stars = summary["stars"] + notes = [] + if summary["account"]["age_days"] < 90: + notes.append("very new account") + elif summary["account"]["age_days"] < 365: + notes.append("young account") + if _has_single_repo_burst(summary, repo_snapshot_pr_count=repo_snapshot_pr_count): + notes.append("very high single-repo PR burst") + if activity["visible_authored_pr_count"] >= 100: + notes.append("extremely high recent PR volume") + elif activity["visible_authored_pr_count"] >= 25: + notes.append("high recent PR volume") + if activity["distinct_repos_with_authored_prs"] >= 25: + notes.append("very broad repo spread") + elif activity["distinct_repos_with_authored_prs"] >= 10: + notes.append("broad repo spread") + if ( + activity["closed_unmerged_pr_rate"] is not None + and activity["closed_unmerged_pr_rate"] >= 0.4 + ): + notes.append("many recent closed-unmerged PRs") + if activity["still_open_pr_rate"] is not None and activity["still_open_pr_rate"] >= 0.4: + notes.append("high open PR share") + if stars["visible_non_self_starred_repositories"] == 0: + notes.append("no visible non-self stars") + median = stars["recent_pr_repo_star_distribution"]["median"] + if median is not None and median >= 10000: + notes.append("targets high-star repos") + return "; ".join(notes) or "—" + + +def score_follow_through(summary: dict[str, Any] | None) -> str: + if not summary: + return "n/a" + activity = summary["activity"] + rate = activity["merged_pr_rate"] + if rate is None: + return "n/a" + if rate >= 0.7 and (activity["closed_unmerged_pr_rate"] or 0.0) <= 0.1: + return "strong" + if rate >= 0.35: + return "mixed" + return "weak" + + +def score_breadth(summary: dict[str, Any] | None) -> str: + if not summary: + return "n/a" + repos = summary["activity"]["distinct_repos_with_authored_prs"] + if repos >= 25: + return "very high" + if repos >= 10: + return "high" + if repos >= 4: + return "moderate" + return "low" + + +def automation_risk_score( + summary: dict[str, Any] | None, *, repo_snapshot_pr_count: int | None = None +) -> int | None: + if not summary: + return None + activity = summary["activity"] + stars = summary["stars"] + risk = 0 + if summary["account"]["age_days"] < 30: + risk += 3 + elif summary["account"]["age_days"] < 180: + risk += 2 + if activity["visible_authored_pr_count"] >= 200: + risk += 3 + elif activity["visible_authored_pr_count"] >= 50: + risk += 2 + if activity["distinct_repos_with_authored_prs"] >= 50: + risk += 3 + elif activity["distinct_repos_with_authored_prs"] >= 15: + risk += 2 + if ( + activity["closed_unmerged_pr_rate"] is not None + and activity["closed_unmerged_pr_rate"] >= 0.4 + ): + risk += 2 + if activity["still_open_pr_rate"] is not None and activity["still_open_pr_rate"] >= 0.4: + risk += 2 + if stars["visible_non_self_starred_repositories"] == 0: + risk += 1 + if _has_single_repo_burst(summary, repo_snapshot_pr_count=repo_snapshot_pr_count): + risk = max(risk, 8) + return risk + + +def score_automation_risk( + summary: dict[str, Any] | None, *, repo_snapshot_pr_count: int | None = None +) -> str: + risk = automation_risk_score(summary, repo_snapshot_pr_count=repo_snapshot_pr_count) + if risk is None: + return "n/a" + if risk >= 8: + return "high" + if risk >= 4: + return "medium" + return "low" + + +def _post_graphql(query: str, variables: dict[str, Any]) -> dict[str, Any]: + token = resolve_github_token() + if not token: + raise RuntimeError("missing GITHUB_TOKEN/GRAPHQL_TOKEN/GH_TOKEN") + body = json.dumps({"query": query, "variables": variables}).encode() + request = urllib.request.Request( + GRAPHQL_URL, + data=body, + headers={ + "Authorization": f"bearer {token}", + "User-Agent": "slop-farmer", + "Content-Type": "application/json", + "Accept": "application/json", + }, + method="POST", + ) + try: + with urlopen_with_retry( + request, + timeout=120, + log=_report_log, + label="GitHub GraphQL contributor report", + ) as response: + payload = json.load(response) + except urllib.error.HTTPError as exc: # pragma: no cover - exercised only in live use + detail = exc.read().decode("utf-8", "replace") + raise RuntimeError(f"graphql request failed: {exc.code} {detail}") from exc + errors = payload.get("errors") or [] + if errors: + raise RuntimeError(json.dumps(errors)) + return payload["data"] + + +def _search_all(query: str, *, prs: bool) -> tuple[int, list[dict[str, Any]]]: + nodes: list[dict[str, Any]] = [] + cursor: str | None = None + issue_count = 0 + search_query = SEARCH_PRS_QUERY if prs else SEARCH_ISSUES_QUERY + while True: + data = _post_graphql(search_query, {"query": query, "cursor": cursor}) + search = data["search"] + issue_count = search["issueCount"] + nodes.extend(node for node in search["nodes"] if isinstance(node, dict)) + if not search["pageInfo"]["hasNextPage"] or len(nodes) >= 1000: + break + cursor = search["pageInfo"]["endCursor"] + return issue_count, nodes + + +def _public_orgs(login: str) -> list[dict[str, Any]]: + token = resolve_github_token() + if not token: + return [] + request = urllib.request.Request( + f"https://api.github.com/users/{login}/orgs", + headers={ + "Authorization": f"Bearer {token}", + "User-Agent": "slop-farmer", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + }, + ) + with urlopen_with_retry( + request, + timeout=120, + log=_report_log, + label=f"https://api.github.com/users/{login}/orgs", + ) as response: # pragma: no cover - exercised only in live use + payload = json.load(response) + if not isinstance(payload, list): + return [] + return [ + {"login": row.get("login"), "name": row.get("name")} + for row in payload + if isinstance(row, dict) and row.get("login") + ] + + +def _contribution_calendar_summary(weeks: list[dict[str, Any]]) -> dict[str, Any]: + daily = [day for week in weeks for day in week["contributionDays"]] + active_days = sum(1 for day in daily if day["contributionCount"] > 0) + weekly = [sum(day["contributionCount"] for day in week["contributionDays"]) for week in weeks] + return { + "active_days": active_days, + "weekly_totals": weekly, + } + + +def _star_distribution(values: list[int]) -> dict[str, Any]: + if not values: + return { + "count": 0, + "min": None, + "median": None, + "max": None, + } + ordered = sorted(values) + mid = len(ordered) // 2 + median = ordered[mid] if len(ordered) % 2 else round((ordered[mid - 1] + ordered[mid]) / 2, 1) + return { + "count": len(values), + "min": min(values), + "median": median, + "max": max(values), + } + + +def _rate(numerator: int, denominator: int) -> float | None: + if denominator == 0: + return None + return round(numerator / denominator, 4) + + +def _profile_url(login: str) -> str: + return f"https://github.com/{login}" + + +def _repo_search_url(repo: str, login: str, *, is_pr: bool) -> str: + path = "pulls" if is_pr else "issues" + query = f"is:{'pr' if is_pr else 'issue'} author:{login}" + return f"https://github.com/{repo}/{path}?q={urllib.parse.quote_plus(query)}" + + +def _coerce_int(value: Any) -> int | None: + if value is None: + return None + try: + return int(value) + except (TypeError, ValueError): + return None + + +def _display_int(value: Any) -> str: + number = _coerce_int(value) + return str(number) if number is not None else "?" + + +def _display_rate(value: Any) -> str: + if value is None: + return "n/a" + try: + return f"{float(value):.0%}" + except (TypeError, ValueError): + return "n/a" + + +def _report_reason_label(value: str) -> str: + if value == "first_seen_in_snapshot": + return "first seen this snapshot" + return value or "unknown" + + +def _snapshot_reference_time(snapshot: dict[str, Any]) -> datetime: + snapshot_id = str(snapshot.get("snapshot_id") or "") + try: + return datetime.strptime(snapshot_id, "%Y%m%dT%H%M%SZ").replace(tzinfo=UTC) + except ValueError: + pass + manifest = snapshot.get("manifest") or {} + extracted_at = manifest.get("extracted_at") + if isinstance(extracted_at, str): + parsed = _coerce_datetime(extracted_at) + if parsed is not None: + return parsed + timestamps = [ + timestamp + for row in snapshot.get("pull_requests", []) + for timestamp in ( + _coerce_datetime(row.get("created_at")), + _coerce_datetime(row.get("updated_at")), + ) + if timestamp is not None + ] + return max(timestamps, default=datetime.now(tz=UTC)) + + +def _coerce_datetime(value: Any) -> datetime | None: + if not value or not isinstance(value, str): + return None + try: + return datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None + + +def _track_repo_association(row: dict[str, Any], value: Any) -> None: + if not value: + return + row["repo_association_values"].add(str(value)) + + +def _select_repo_association(values: list[str]) -> str | None: + if not values: + return None + priority = { + "OWNER": 70, + "MEMBER": 60, + "COLLABORATOR": 50, + "CONTRIBUTOR": 40, + "FIRST_TIME_CONTRIBUTOR": 30, + "FIRST_TIMER": 20, + "NONE": 10, + } + return max(values, key=lambda value: (priority.get(value, 0), value)) + + +def _iso_now() -> str: + return datetime.now(tz=UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def _report_log(message: str) -> None: + stamp = datetime.now(tz=UTC).strftime("%H:%M:%SZ") + print(f"[{stamp}] {message}", flush=True) diff --git a/src/slop_farmer/reports/pr_heuristics.py b/src/slop_farmer/reports/pr_heuristics.py new file mode 100644 index 0000000000000000000000000000000000000000..1b98aafed27a6166d840236a4690a565693ec084 --- /dev/null +++ b/src/slop_farmer/reports/pr_heuristics.py @@ -0,0 +1,224 @@ +from __future__ import annotations + +import re +from collections import defaultdict +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from typing import Any + +HTML_COMMENT_PATTERN = re.compile(r"", re.DOTALL) +DEFAULT_TEMPLATE_CLEANUP_MODE = "merge_defaults" +DEFAULT_STRIP_HTML_COMMENTS = True +DEFAULT_TRIM_CLOSING_REFERENCE_PREFIX = True +DEFAULT_TEMPLATE_SECTION_PATTERNS = ( + r"^#{1,6}\s*code agent policy\s*$", + r"^#{1,6}\s*before submitting\s*$", + r"^#{1,6}\s*who can review\?\s*$", +) +DEFAULT_TEMPLATE_LINE_PATTERNS = ( + r"^#{1,6}\s*what does this pr do\?\s*$", + r"^(?:fix(?:e[sd])?|close[sd]?|resolve[sd]?)\s*#\s*\(?issue\)?\s*$", +) +PR_TEMPLATE_CLOSING_REFERENCE_PREFIX_PATTERN = re.compile( + r""" + ^ + (?P\s*(?:fix(?:e[sd])?|close[sd]?|resolve[sd]?)\s+) + (?: + (?:[a-z0-9_.-]+/[a-z0-9_.-]+)?\#\s*\d+ + (?:\s*(?:,|and)\s*(?:[a-z0-9_.-]+/[a-z0-9_.-]+)?\#\s*\d+)* + ) + \s*(?:[:\-\u2013\u2014]\s*)? + (?P.*) + $ + """, + re.IGNORECASE | re.VERBOSE, +) + + +def compile_casefold_patterns(patterns: Sequence[str]) -> tuple[re.Pattern[str], ...]: + return tuple(re.compile(pattern, re.IGNORECASE) for pattern in patterns if pattern.strip()) + + +@dataclass(slots=True, frozen=True) +class TemplateCleanupSettings: + strip_html_comments: bool + trim_closing_reference_prefix: bool + section_patterns: tuple[re.Pattern[str], ...] + line_patterns: tuple[re.Pattern[str], ...] + + +def build_template_cleanup_settings( + *, + mode: str = DEFAULT_TEMPLATE_CLEANUP_MODE, + strip_html_comments: bool = DEFAULT_STRIP_HTML_COMMENTS, + trim_closing_reference_prefix: bool = DEFAULT_TRIM_CLOSING_REFERENCE_PREFIX, + section_patterns: Sequence[str] = (), + line_patterns: Sequence[str] = (), +) -> TemplateCleanupSettings: + if mode == "off": + return TemplateCleanupSettings( + strip_html_comments=False, + trim_closing_reference_prefix=False, + section_patterns=(), + line_patterns=(), + ) + if mode == "merge_defaults": + section_sources = (*DEFAULT_TEMPLATE_SECTION_PATTERNS, *section_patterns) + line_sources = (*DEFAULT_TEMPLATE_LINE_PATTERNS, *line_patterns) + elif mode == "replace_defaults": + section_sources = tuple(section_patterns) + line_sources = tuple(line_patterns) + else: + raise ValueError(f"Unknown PR template cleanup mode: {mode}") + return TemplateCleanupSettings( + strip_html_comments=strip_html_comments, + trim_closing_reference_prefix=trim_closing_reference_prefix, + section_patterns=compile_casefold_patterns(section_sources), + line_patterns=compile_casefold_patterns(line_sources), + ) + + +def strip_pull_request_template( + body: str | None, + *, + settings: TemplateCleanupSettings | None = None, +) -> str: + text = (body or "").replace("\r\n", "\n").replace("\r", "\n") + if not text: + return "" + cleanup = settings or build_template_cleanup_settings() + if cleanup.strip_html_comments: + text = HTML_COMMENT_PATTERN.sub("\n", text) + cleaned_lines: list[str] = [] + skip_section = False + for raw_line in text.splitlines(): + line = raw_line.rstrip() + normalized = line.strip() + if any(pattern.match(normalized) for pattern in cleanup.line_patterns): + continue + if any(pattern.match(normalized) for pattern in cleanup.section_patterns): + skip_section = True + continue + if skip_section: + if normalized.startswith("#"): + skip_section = False + else: + continue + if cleanup.trim_closing_reference_prefix: + trimmed_reference = _trim_closing_reference_prefix(normalized) + if trimmed_reference == "": + continue + if trimmed_reference is not None: + cleaned_lines.append(trimmed_reference) + continue + cleaned_lines.append(line) + return collapse_blank_lines(cleaned_lines) + + +def collapse_blank_lines(lines: list[str]) -> str: + collapsed: list[str] = [] + previous_blank = True + for line in lines: + stripped = line.strip() + if not stripped: + if previous_blank: + continue + collapsed.append("") + previous_blank = True + continue + collapsed.append(stripped) + previous_blank = False + while collapsed and not collapsed[-1]: + collapsed.pop() + return "\n".join(collapsed) + + +def _trim_closing_reference_prefix(line: str) -> str | None: + match = PR_TEMPLATE_CLOSING_REFERENCE_PREFIX_PATTERN.match(line) + if match is None: + return None + return match.group("rest").strip() + + +@dataclass(slots=True, frozen=True) +class ClusterSuppressionRule: + id: str + title_patterns: tuple[re.Pattern[str], ...] = () + body_patterns: tuple[re.Pattern[str], ...] = () + path_patterns: tuple[re.Pattern[str], ...] = () + + def matches(self, *, title: str, body: str, paths: Sequence[str]) -> bool: + if not (self.title_patterns or self.body_patterns or self.path_patterns): + return False + if self.title_patterns and not any( + pattern.search(title) for pattern in self.title_patterns + ): + return False + if self.body_patterns and not any(pattern.search(body) for pattern in self.body_patterns): + return False + return not self.path_patterns or any( + pattern.search(path) for pattern in self.path_patterns for path in paths + ) + + +def compile_cluster_suppression_rules( + payload: Sequence[Mapping[str, Any]], +) -> tuple[ClusterSuppressionRule, ...]: + rules: list[ClusterSuppressionRule] = [] + for index, raw_rule in enumerate(payload, start=1): + rule_id = str(raw_rule.get("id") or raw_rule.get("name") or f"rule-{index}").strip() + if not rule_id: + rule_id = f"rule-{index}" + rules.append( + ClusterSuppressionRule( + id=rule_id, + title_patterns=compile_casefold_patterns( + _string_list(raw_rule.get("title_patterns")) + ), + body_patterns=compile_casefold_patterns( + _string_list(raw_rule.get("body_patterns")) + ), + path_patterns=compile_casefold_patterns( + _string_list(raw_rule.get("path_patterns")) + ), + ) + ) + return tuple(rules) + + +def suppressed_pull_request_reasons( + pull_requests: Sequence[Mapping[str, Any]], + pr_files: Sequence[Mapping[str, Any]], + rules: Sequence[ClusterSuppressionRule], +) -> dict[int, list[str]]: + if not rules: + return {} + paths_by_pr: defaultdict[int, list[str]] = defaultdict(list) + for row in pr_files: + pr_number = row.get("pull_request_number") + filename = str(row.get("filename") or "").strip() + if pr_number is None or not filename: + continue + paths_by_pr[int(pr_number)].append(filename) + suppressed: dict[int, list[str]] = {} + for row in pull_requests: + number = row.get("number") + if number is None: + continue + pr_number = int(number) + title = str(row.get("title") or "") + body = str(row.get("body") or "") + matched = [ + rule.id + for rule in rules + if rule.matches(title=title, body=body, paths=paths_by_pr.get(pr_number, [])) + ] + if matched: + suppressed[pr_number] = matched + return suppressed + + +def _string_list(value: Any) -> tuple[str, ...]: + if not isinstance(value, list): + return () + return tuple(str(item) for item in value if str(item).strip()) diff --git a/src/slop_farmer/reports/pr_scope.py b/src/slop_farmer/reports/pr_scope.py new file mode 100644 index 0000000000000000000000000000000000000000..2d460b732a827da94197fbf26181f515ae0a4b4f --- /dev/null +++ b/src/slop_farmer/reports/pr_scope.py @@ -0,0 +1,793 @@ +"""Holistic pull-request scope clustering. + +This module answers a different question than duplicate-PR detection: + + "Which open PRs look similar in code scope and touched areas?" + +The algorithm intentionally uses the whole open-PR queue at once instead of +only raw pairwise overlap: + +1. Build a sparse "scope vector" for each open PR from: + - exact filenames + - directory prefixes + - coarse file chunks derived from unified-diff hunk ranges +2. Weight every feature by inverse PR frequency so very common files and + directories contribute less than rare, discriminative ones. +3. Blend content similarity with simple "shape" features: + - total changed-line size + - changed-file count + - touched-directory count + - concentration in the dominant leaf directory +4. Build a mutual k-nearest-neighbor graph from those scores. +5. Keep only strong or locally-supported edges so broad PRs do not bridge + unrelated groups. +6. Split graph components greedily around local medoids to avoid weak chains. + +The result is a conservative "similar scope" clustering intended for queue +analysis and review routing, not a strict duplicate classifier. +""" + +from __future__ import annotations + +import json +import re +from collections import Counter, defaultdict +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from datetime import UTC, datetime +from math import log, sqrt +from pathlib import Path, PurePosixPath +from typing import Any + +from pydantic import BaseModel, Field + +from slop_farmer.data.parquet_io import read_json, read_parquet_rows +from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir +from slop_farmer.reports.pr_heuristics import ( + compile_cluster_suppression_rules, + suppressed_pull_request_reasons, +) + +HUNK_HEADER_PATTERN = re.compile(r"^@@ -\d+(?:,\d+)? \+(?P\d+)(?:,(?P\d+))? @@") +__all__ = [ + "PrScopeCluster", + "PrScopeClusterOptions", + "PrScopePair", + "build_pr_scope_clusters", + "run_pr_scope_report", +] + + +@dataclass(slots=True) +class PrScopeClusterOptions: + """Tuning knobs for holistic PR scope clustering. + + The defaults aim to be conservative: + - only open, non-draft PRs are clustered + - exact files matter most + - directory and chunk features let nearby-but-not-identical changes group + - cluster edges need either high similarity or mutual-neighbor support + """ + + include_closed: bool = False + include_drafts: bool = False + chunk_size: int = 80 + max_neighbors: int = 5 + min_similarity: float = 0.30 + pair_similarity: float = 0.40 + strong_similarity: float = 0.55 + min_shared_neighbors: int = 1 + expansion_similarity: float = 0.34 + min_cluster_average_similarity: float = 0.32 + min_cluster_size: int = 2 + max_feature_df_ratio: float = 0.90 + file_weight: float = 1.0 + directory_weight: float = 0.60 + chunk_weight: float = 0.75 + content_weight: float = 0.70 + size_weight: float = 0.15 + breadth_weight: float = 0.10 + concentration_weight: float = 0.05 + max_shared_features: int = 8 + + +class PrScopePair(BaseModel): + left_pr_number: int + right_pr_number: int + similarity: float + content_similarity: float + size_similarity: float + breadth_similarity: float + concentration_similarity: float + shared_filenames: list[str] = Field(default_factory=list) + shared_directories: list[str] = Field(default_factory=list) + + +class PrScopeCluster(BaseModel): + cluster_id: str + pr_numbers: list[int] + representative_pr_number: int + average_similarity: float + summary: str + shared_filenames: list[str] = Field(default_factory=list) + shared_directories: list[str] = Field(default_factory=list) + pairwise: list[PrScopePair] = Field(default_factory=list) + + +@dataclass(slots=True) +class _ScopeProfile: + number: int + total_changed_lines: int + file_count: int + directory_count: int + dominant_dir_share: float + filenames: set[str] + directories: set[str] + raw_vector: dict[str, float] + vector: dict[str, float] + + +@dataclass(slots=True) +class _ScopeComparison: + left: int + right: int + similarity: float + content_similarity: float + size_similarity: float + breadth_similarity: float + concentration_similarity: float + shared_filenames: list[str] + shared_directories: list[str] + + +def build_pr_scope_clusters( + pull_requests: Sequence[Mapping[str, Any]], + pr_files: Sequence[Mapping[str, Any]], + *, + options: PrScopeClusterOptions | None = None, + suppression_rules: Sequence[Mapping[str, Any]] = (), +) -> list[PrScopeCluster]: + """Cluster open PRs by weighted file-scope similarity. + + This is intentionally holistic: + - feature weights depend on the full open-PR set + - similarity blends exact-file, directory, and chunk overlap + - graph edges depend on each PR's neighborhood, not only raw pair scores + """ + + settings = options or PrScopeClusterOptions() + suppressed_prs = suppressed_pull_request_reasons( + pull_requests, + pr_files, + compile_cluster_suppression_rules(suppression_rules), + ) + active_prs = [ + row + for row in pull_requests + if _include_pull_request(row, settings) and int(row["number"]) not in suppressed_prs + ] + if len(active_prs) < 2: + return [] + + files_by_pr: defaultdict[int, list[Mapping[str, Any]]] = defaultdict(list) + active_numbers = {int(row["number"]) for row in active_prs if row.get("number") is not None} + for row in pr_files: + pr_number = row.get("pull_request_number") + if pr_number is None: + continue + number = int(pr_number) + if number in active_numbers: + files_by_pr[number].append(row) + + profiles = [ + _build_scope_profile(row, files_by_pr.get(int(row["number"]), []), settings) + for row in active_prs + ] + profiles_by_number = {profile.number: profile for profile in profiles} + feature_idf = _feature_idf(profiles, settings) + for profile in profiles: + profile.vector = _normalize_vector( + { + feature: weight * feature_idf[feature] + for feature, weight in profile.raw_vector.items() + if feature in feature_idf + } + ) + + comparisons = _pairwise_comparisons(profiles, settings) + if not comparisons: + return [] + comparison_map = {(entry.left, entry.right): entry for entry in comparisons} + + top_neighbors = _top_neighbors(comparisons, settings) + edges = _cluster_edges(comparisons, top_neighbors, settings) + if not edges: + return [] + + clusters: list[PrScopeCluster] = [] + for component in _connected_components(sorted(active_numbers), edges): + if len(component) < settings.min_cluster_size: + continue + member_sets = _refine_component(component, comparison_map, settings) + for members in member_sets: + clusters.append( + _cluster_entry( + members=members, + profiles_by_number=profiles_by_number, + comparison_map=comparison_map, + feature_idf=feature_idf, + settings=settings, + ) + ) + return sorted( + clusters, + key=lambda cluster: ( + -len(cluster.pr_numbers), + -cluster.average_similarity, + cluster.cluster_id, + ), + ) + + +def run_pr_scope_report(options: Any) -> Path: + """Resolve a snapshot, cluster open PR scopes, and write a JSON report.""" + + snapshot_dir = _resolve_snapshot_dir(options) + snapshot = _load_snapshot_context(snapshot_dir) + clusters = build_pr_scope_clusters( + snapshot["pull_requests"], + snapshot["pr_files"], + suppression_rules=options.cluster_suppression_rules, + ) + output_path = (options.output or (snapshot_dir / "pr-scope-clusters.json")).resolve() + output_path.parent.mkdir(parents=True, exist_ok=True) + payload = { + "repo": snapshot["repo"], + "snapshot_id": snapshot["snapshot_id"], + "generated_at": datetime.now(tz=UTC) + .replace(microsecond=0) + .isoformat() + .replace("+00:00", "Z"), + "cluster_count": len(clusters), + "pr_scope_clusters": [cluster.model_dump(mode="json") for cluster in clusters], + } + output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") + return output_path + + +def _resolve_snapshot_dir(options: Any) -> Path: + return resolve_snapshot_source_dir( + snapshot_dir=options.snapshot_dir, + local_snapshots_root=options.output_dir.resolve() / "snapshots", + hf_repo_id=options.hf_repo_id, + hf_revision=options.hf_revision, + hf_materialize_dir=options.hf_materialize_dir, + hf_output_dir=options.output_dir, + ) + + +def _load_snapshot_context(snapshot_dir: Path) -> dict[str, Any]: + manifest_path = snapshot_dir / "manifest.json" + manifest = read_json(manifest_path) if manifest_path.exists() else {} + pull_requests = read_parquet_rows(snapshot_dir / "pull_requests.parquet") + pr_files = read_parquet_rows(snapshot_dir / "pr_files.parquet") + repo = manifest.get("repo") or (pull_requests[0]["repo"] if pull_requests else None) or "" + snapshot_id = manifest.get("snapshot_id") or snapshot_dir.name + return { + "repo": repo, + "snapshot_id": snapshot_id, + "pull_requests": pull_requests, + "pr_files": pr_files, + } + + +def _include_pull_request(row: Mapping[str, Any], options: PrScopeClusterOptions) -> bool: + if row.get("number") is None: + return False + if not options.include_closed and str(row.get("state") or "").lower() != "open": + return False + return options.include_drafts or not bool(row.get("draft")) + + +def _build_scope_profile( + pr_row: Mapping[str, Any], + file_rows: Sequence[Mapping[str, Any]], + options: PrScopeClusterOptions, +) -> _ScopeProfile: + number = int(pr_row["number"]) + file_lines: defaultdict[str, int] = defaultdict(int) + prefix_directory_lines: defaultdict[str, int] = defaultdict(int) + leaf_directory_lines: defaultdict[str, int] = defaultdict(int) + chunk_lines: defaultdict[str, int] = defaultdict(int) + + for row in file_rows: + filename = str(row.get("filename") or "").strip() + if not filename: + continue + changed_lines = _file_changed_lines(row) + file_lines[filename] += changed_lines + leaf_directory = _leaf_directory(filename) + leaf_directory_lines[leaf_directory] += changed_lines + for directory in _directory_prefixes(filename): + prefix_directory_lines[directory] += changed_lines + patch = row.get("patch") + if patch: + for chunk_key, chunk_size in _chunk_line_weights( + filename, str(patch), options.chunk_size + ).items(): + chunk_lines[chunk_key] += chunk_size + + if not file_lines: + fallback_total = max( + 1, int(pr_row.get("additions") or 0) + int(pr_row.get("deletions") or 0) + ) + file_count = max(1, int(pr_row.get("changed_files") or 0)) + return _ScopeProfile( + number=number, + total_changed_lines=fallback_total, + file_count=file_count, + directory_count=0, + dominant_dir_share=0.0, + filenames=set(), + directories=set(), + raw_vector={}, + vector={}, + ) + + raw_vector: dict[str, float] = {} + for filename, changed_lines in file_lines.items(): + raw_vector[f"file:{filename}"] = sqrt(changed_lines) * options.file_weight + for directory, changed_lines in prefix_directory_lines.items(): + raw_vector[f"dir:{directory}"] = ( + raw_vector.get(f"dir:{directory}", 0.0) + sqrt(changed_lines) * options.directory_weight + ) + for chunk_key, changed_lines in chunk_lines.items(): + raw_vector[chunk_key] = ( + raw_vector.get(chunk_key, 0.0) + sqrt(changed_lines) * options.chunk_weight + ) + + total_changed_lines = sum(file_lines.values()) + dominant_dir_share = 0.0 + if leaf_directory_lines and total_changed_lines > 0: + dominant_dir_share = max(leaf_directory_lines.values()) / total_changed_lines + return _ScopeProfile( + number=number, + total_changed_lines=total_changed_lines, + file_count=len(file_lines), + directory_count=len(leaf_directory_lines), + dominant_dir_share=dominant_dir_share, + filenames=set(file_lines), + directories=set(prefix_directory_lines), + raw_vector=raw_vector, + vector={}, + ) + + +def _file_changed_lines(row: Mapping[str, Any]) -> int: + additions = int(row.get("additions") or 0) + deletions = int(row.get("deletions") or 0) + changes = int(row.get("changes") or 0) + total = additions + deletions + if total > 0: + return total + if changes > 0: + return changes + return 1 + + +def _directory_prefixes(filename: str) -> list[str]: + parts = PurePosixPath(filename).parts[:-1] + prefixes: list[str] = [] + current: list[str] = [] + for part in parts: + current.append(part) + prefixes.append("/".join(current)) + return prefixes + + +def _leaf_directory(filename: str) -> str: + parts = PurePosixPath(filename).parts[:-1] + return "/".join(parts) if parts else "." + + +def _chunk_line_weights(filename: str, patch: str, chunk_size: int) -> dict[str, int]: + weights: defaultdict[str, int] = defaultdict(int) + for start, end in _patch_ranges(patch): + current = start + while current <= end: + chunk_index = (current - 1) // chunk_size + chunk_start = chunk_index * chunk_size + 1 + chunk_end = chunk_start + chunk_size - 1 + overlap_end = min(end, chunk_end) + weights[f"chunk:{filename}:{chunk_start}-{chunk_end}"] += overlap_end - current + 1 + current = overlap_end + 1 + return weights + + +def _patch_ranges(patch: str) -> list[tuple[int, int]]: + ranges: list[tuple[int, int]] = [] + for line in patch.splitlines(): + match = HUNK_HEADER_PATTERN.match(line) + if match is None: + continue + start = int(match.group("start")) + count = int(match.group("count") or "1") + if count <= 0: + continue + ranges.append((start, start + count - 1)) + return ranges + + +def _feature_idf( + profiles: Sequence[_ScopeProfile], options: PrScopeClusterOptions +) -> dict[str, float]: + document_frequency: Counter[str] = Counter() + profile_count = len(profiles) + for profile in profiles: + document_frequency.update(profile.raw_vector.keys()) + + idf: dict[str, float] = {} + for feature, frequency in document_frequency.items(): + if frequency / profile_count > options.max_feature_df_ratio: + continue + idf[feature] = log((profile_count + 1) / (frequency + 1)) + 1.0 + return idf + + +def _normalize_vector(vector: Mapping[str, float]) -> dict[str, float]: + norm = sqrt(sum(weight * weight for weight in vector.values())) + if norm <= 0.0: + return {} + return {feature: weight / norm for feature, weight in vector.items() if weight > 0.0} + + +def _pairwise_comparisons( + profiles: Sequence[_ScopeProfile], + options: PrScopeClusterOptions, +) -> list[_ScopeComparison]: + comparisons: list[_ScopeComparison] = [] + weight_total = ( + options.content_weight + + options.size_weight + + options.breadth_weight + + options.concentration_weight + ) + if weight_total <= 0: + raise ValueError("PR scope similarity weights must sum to a positive value.") + + ordered = sorted(profiles, key=lambda profile: profile.number) + for index, left in enumerate(ordered): + for right in ordered[index + 1 :]: + content_similarity = _cosine_similarity(left.vector, right.vector) + if ( + content_similarity <= 0.0 + and not left.filenames.intersection(right.filenames) + and not left.directories.intersection(right.directories) + ): + continue + size_similarity = _ratio_similarity(left.total_changed_lines, right.total_changed_lines) + breadth_similarity = ( + _ratio_similarity(left.file_count, right.file_count) + + _ratio_similarity(left.directory_count, right.directory_count) + ) / 2.0 + concentration_similarity = max( + 0.0, 1.0 - abs(left.dominant_dir_share - right.dominant_dir_share) + ) + similarity = ( + content_similarity * options.content_weight + + size_similarity * options.size_weight + + breadth_similarity * options.breadth_weight + + concentration_similarity * options.concentration_weight + ) / weight_total + comparisons.append( + _ScopeComparison( + left=left.number, + right=right.number, + similarity=similarity, + content_similarity=content_similarity, + size_similarity=size_similarity, + breadth_similarity=breadth_similarity, + concentration_similarity=concentration_similarity, + shared_filenames=sorted(left.filenames & right.filenames)[:10], + shared_directories=sorted( + left.directories & right.directories, + key=lambda value: (-value.count("/"), value), + )[:10], + ) + ) + return comparisons + + +def _cosine_similarity(left: Mapping[str, float], right: Mapping[str, float]) -> float: + if not left or not right: + return 0.0 + if len(left) > len(right): + left, right = right, left + return sum(weight * right.get(feature, 0.0) for feature, weight in left.items()) + + +def _ratio_similarity(left: int, right: int) -> float: + largest = max(left, right) + if largest <= 0: + return 1.0 + return min(left, right) / largest + + +def _top_neighbors( + comparisons: Sequence[_ScopeComparison], + options: PrScopeClusterOptions, +) -> dict[int, set[int]]: + ranked: defaultdict[int, list[tuple[float, int]]] = defaultdict(list) + for entry in comparisons: + if entry.similarity < options.min_similarity: + continue + ranked[entry.left].append((entry.similarity, entry.right)) + ranked[entry.right].append((entry.similarity, entry.left)) + + neighbors: dict[int, set[int]] = {} + for number, items in ranked.items(): + sorted_items = sorted(items, key=lambda item: (-item[0], item[1])) + neighbors[number] = {other for _, other in sorted_items[: options.max_neighbors]} + return neighbors + + +def _cluster_edges( + comparisons: Sequence[_ScopeComparison], + top_neighbors: Mapping[int, set[int]], + options: PrScopeClusterOptions, +) -> set[tuple[int, int]]: + edges: set[tuple[int, int]] = set() + for entry in comparisons: + if entry.similarity < options.min_similarity: + continue + left_neighbors = top_neighbors.get(entry.left, set()) + right_neighbors = top_neighbors.get(entry.right, set()) + if entry.similarity >= options.strong_similarity: + edges.add((entry.left, entry.right)) + continue + if entry.right not in left_neighbors or entry.left not in right_neighbors: + continue + shared_neighbor_count = len(left_neighbors & right_neighbors) + if ( + shared_neighbor_count >= options.min_shared_neighbors + or entry.similarity >= options.pair_similarity + ): + edges.add((entry.left, entry.right)) + return edges + + +def _connected_components(numbers: Sequence[int], edges: set[tuple[int, int]]) -> list[list[int]]: + adjacency: defaultdict[int, set[int]] = defaultdict(set) + for left, right in edges: + adjacency[left].add(right) + adjacency[right].add(left) + + components: list[list[int]] = [] + seen: set[int] = set() + for number in sorted(numbers): + if number in seen or number not in adjacency: + continue + stack = [number] + component: list[int] = [] + while stack: + current = stack.pop() + if current in seen: + continue + seen.add(current) + component.append(current) + stack.extend(sorted(adjacency[current] - seen, reverse=True)) + if component: + components.append(sorted(component)) + return components + + +def _refine_component( + members: Sequence[int], + comparison_map: Mapping[tuple[int, int], _ScopeComparison], + options: PrScopeClusterOptions, +) -> list[list[int]]: + if len(members) <= options.min_cluster_size: + average_similarity = _cluster_average_similarity(members, comparison_map) + if average_similarity >= options.min_cluster_average_similarity: + return [sorted(members)] + return [] + + remaining = set(members) + refined: list[list[int]] = [] + while len(remaining) >= options.min_cluster_size: + seed = max( + sorted(remaining), + key=lambda number: ( + _mean_similarity(number, remaining - {number}, comparison_map), + -number, + ), + ) + cluster = [seed] + candidates = sorted( + remaining - {seed}, + key=lambda number: (_similarity(seed, number, comparison_map), -number), + reverse=True, + ) + for candidate in candidates: + mean_to_cluster = _mean_similarity(candidate, set(cluster), comparison_map) + if mean_to_cluster >= options.expansion_similarity: + cluster.append(candidate) + cluster = sorted(cluster) + average_similarity = _cluster_average_similarity(cluster, comparison_map) + if ( + len(cluster) >= options.min_cluster_size + and average_similarity >= options.min_cluster_average_similarity + ): + refined.append(cluster) + remaining.difference_update(cluster) + else: + remaining.remove(seed) + return refined + + +def _mean_similarity( + number: int, + others: set[int], + comparison_map: Mapping[tuple[int, int], _ScopeComparison], +) -> float: + if not others: + return 0.0 + return sum(_similarity(number, other, comparison_map) for other in others) / len(others) + + +def _cluster_average_similarity( + members: Sequence[int], + comparison_map: Mapping[tuple[int, int], _ScopeComparison], +) -> float: + if len(members) < 2: + return 0.0 + total = 0.0 + comparisons = 0 + ordered = sorted(members) + for index, left in enumerate(ordered): + for right in ordered[index + 1 :]: + total += _similarity(left, right, comparison_map) + comparisons += 1 + if comparisons == 0: + return 0.0 + return total / comparisons + + +def _similarity( + left: int, + right: int, + comparison_map: Mapping[tuple[int, int], _ScopeComparison], +) -> float: + entry = comparison_map.get((left, right)) or comparison_map.get((right, left)) + return entry.similarity if entry is not None else 0.0 + + +def _cluster_entry( + *, + members: Sequence[int], + profiles_by_number: Mapping[int, _ScopeProfile], + comparison_map: Mapping[tuple[int, int], _ScopeComparison], + feature_idf: Mapping[str, float], + settings: PrScopeClusterOptions, +) -> PrScopeCluster: + ordered = sorted(members) + pairwise: list[PrScopePair] = [] + for index, left in enumerate(ordered): + for right in ordered[index + 1 :]: + entry = comparison_map.get((left, right)) or comparison_map.get((right, left)) + if entry is None: + continue + pairwise.append( + PrScopePair( + left_pr_number=entry.left, + right_pr_number=entry.right, + similarity=round(entry.similarity, 3), + content_similarity=round(entry.content_similarity, 3), + size_similarity=round(entry.size_similarity, 3), + breadth_similarity=round(entry.breadth_similarity, 3), + concentration_similarity=round(entry.concentration_similarity, 3), + shared_filenames=entry.shared_filenames, + shared_directories=entry.shared_directories, + ) + ) + pairwise.sort( + key=lambda entry: (-entry.similarity, entry.left_pr_number, entry.right_pr_number) + ) + average_similarity = _cluster_average_similarity(ordered, comparison_map) + representative_pr_number = _representative_pr_number(ordered, comparison_map) + shared_filenames = _shared_exact_features( + ordered, + profiles_by_number, + feature_idf, + prefix="file:", + limit=settings.max_shared_features, + ) + shared_directories = _shared_exact_features( + ordered, + profiles_by_number, + feature_idf, + prefix="dir:", + limit=settings.max_shared_features, + ) + return PrScopeCluster( + cluster_id=f"pr-scope-{ordered[0]}-{len(ordered)}", + pr_numbers=ordered, + representative_pr_number=representative_pr_number, + average_similarity=round(average_similarity, 3), + summary=_cluster_summary( + ordered, representative_pr_number, shared_filenames, shared_directories + ), + shared_filenames=shared_filenames, + shared_directories=shared_directories, + pairwise=pairwise, + ) + + +def _representative_pr_number( + members: Sequence[int], + comparison_map: Mapping[tuple[int, int], _ScopeComparison], +) -> int: + return max( + sorted(members), + key=lambda number: ( + _mean_similarity(number, set(members) - {number}, comparison_map), + -number, + ), + ) + + +def _shared_exact_features( + members: Sequence[int], + profiles_by_number: Mapping[int, _ScopeProfile], + feature_idf: Mapping[str, float], + *, + prefix: str, + limit: int, +) -> list[str]: + counts: Counter[str] = Counter() + for number in members: + profile = profiles_by_number[number] + values = profile.filenames if prefix == "file:" else profile.directories + counts.update(values) + minimum_count = 2 if len(members) > 1 else 1 + shared = [ + value + for value, count in counts.items() + if count >= minimum_count and f"{prefix}{value}" in feature_idf + ] + if prefix == "dir:": + shared.sort( + key=lambda value: ( + -counts[value], + -value.count("/"), + -feature_idf.get(f"{prefix}{value}", 0.0), + value, + ) + ) + else: + shared.sort( + key=lambda value: (-counts[value], -feature_idf.get(f"{prefix}{value}", 0.0), value) + ) + return shared[:limit] + + +def _cluster_summary( + members: Sequence[int], + representative_pr_number: int, + shared_filenames: Sequence[str], + shared_directories: Sequence[str], +) -> str: + count = len(members) + if shared_filenames: + preview = ", ".join(f"`{value}`" for value in shared_filenames[:3]) + return ( + f"{count} open PRs share weighted file overlap around {preview}; " + f"representative PR #{representative_pr_number}." + ) + if shared_directories: + preview = ", ".join(f"`{value}`" for value in shared_directories[:3]) + return ( + f"{count} open PRs cluster in {preview} with similar change breadth; " + f"representative PR #{representative_pr_number}." + ) + return f"{count} open PRs have similar weighted scope; representative PR #{representative_pr_number}." diff --git a/src/slop_farmer/reports/pr_search_scope.py b/src/slop_farmer/reports/pr_search_scope.py new file mode 100644 index 0000000000000000000000000000000000000000..bcbe31af39b85454fa9e221fdc9751befeafda57 --- /dev/null +++ b/src/slop_farmer/reports/pr_search_scope.py @@ -0,0 +1,703 @@ +from __future__ import annotations + +import json +from collections import defaultdict +from collections.abc import Mapping, Sequence +from dataclasses import asdict +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from slop_farmer.config import PrSearchRefreshOptions +from slop_farmer.data.parquet_io import read_json, read_parquet_rows +from slop_farmer.data.snapshot_source import resolve_snapshot_source_dir +from slop_farmer.reports.pr_heuristics import ( + compile_cluster_suppression_rules, + suppressed_pull_request_reasons, +) +from slop_farmer.reports.pr_scope import ( + PrScopeCluster, + PrScopeClusterOptions, + _build_scope_profile, + _feature_idf, + _include_pull_request, + _normalize_vector, + _pairwise_comparisons, + build_pr_scope_clusters, +) + +FEATURE_VERSION = "pr_scope_v1" +CANDIDATE_FORMULA_VERSION = "scope_cluster_candidate_v1" +DEFAULT_CANDIDATE_LIMIT = 5 + + +def resolve_pr_search_snapshot_dir(options: PrSearchRefreshOptions) -> Path: + return resolve_snapshot_source_dir( + snapshot_dir=options.snapshot_dir, + local_snapshots_root=options.output_dir.resolve() / "snapshots", + hf_repo_id=options.hf_repo_id, + hf_revision=options.hf_revision, + hf_materialize_dir=options.hf_materialize_dir, + hf_output_dir=options.output_dir, + ) + + +def load_pr_search_snapshot(snapshot_dir: Path) -> dict[str, Any]: + manifest_path = snapshot_dir / "manifest.json" + manifest = read_json(manifest_path) if manifest_path.exists() else {} + pull_requests = read_parquet_rows(snapshot_dir / "pull_requests.parquet") + pr_files = read_parquet_rows(snapshot_dir / "pr_files.parquet") + contributors = read_parquet_rows(snapshot_dir / "new_contributors.parquet") + repo = manifest.get("repo") or (pull_requests[0].get("repo") if pull_requests else None) or "" + snapshot_id = manifest.get("snapshot_id") or snapshot_dir.name + return { + "repo": repo, + "snapshot_id": snapshot_id, + "manifest": manifest, + "pull_requests": pull_requests, + "pr_files": pr_files, + "contributors": contributors, + } + + +def build_pr_scope_search_artifacts( + pull_requests: Sequence[Mapping[str, Any]], + pr_files: Sequence[Mapping[str, Any]], + *, + options: PrScopeClusterOptions | None = None, + suppression_rules: Sequence[Mapping[str, Any]] = (), + limit_prs: int | None = None, +) -> dict[str, Any]: + settings = options or PrScopeClusterOptions() + suppressed_prs = suppressed_pull_request_reasons( + pull_requests, + pr_files, + compile_cluster_suppression_rules(suppression_rules), + ) + active_prs = [ + row + for row in pull_requests + if _include_pull_request(row, settings) and int(row["number"]) not in suppressed_prs + ] + active_prs.sort(key=lambda row: int(row["number"])) + if limit_prs is not None: + if limit_prs < 1: + raise ValueError("--limit-prs must be at least 1") + active_prs = active_prs[:limit_prs] + + active_numbers = {int(row["number"]) for row in active_prs if row.get("number") is not None} + filtered_pr_files = [ + row + for row in pr_files + if row.get("pull_request_number") is not None + and int(row["pull_request_number"]) in active_numbers + ] + files_by_pr: defaultdict[int, list[Mapping[str, Any]]] = defaultdict(list) + for row in filtered_pr_files: + files_by_pr[int(row["pull_request_number"])].append(row) + + profiles = [ + _build_scope_profile(row, files_by_pr.get(int(row["number"]), []), settings) + for row in active_prs + ] + feature_idf = _feature_idf(profiles, settings) if profiles else {} + for profile in profiles: + profile.vector = _normalize_vector( + { + feature: weight * feature_idf[feature] + for feature, weight in profile.raw_vector.items() + if feature in feature_idf + } + ) + + comparisons = _pairwise_comparisons(profiles, settings) if len(profiles) > 1 else [] + comparison_rows = {_pair_key(entry.left, entry.right): entry for entry in comparisons} + neighbor_rankings = _neighbor_rankings(comparisons, settings) + clusters = build_pr_scope_clusters( + active_prs, + filtered_pr_files, + options=settings, + suppression_rules=suppression_rules, + ) + + documents = [_document_row(row) for row in active_prs] + features = [_feature_row(profile) for profile in profiles] + neighbors = _neighbor_rows(neighbor_rankings) + cluster_rows = [_cluster_row(cluster) for cluster in clusters] + cluster_members = _cluster_member_rows(clusters) + cluster_candidates = _cluster_candidate_rows( + profiles=profiles, + comparison_rows=comparison_rows, + clusters=clusters, + ) + settings_json = { + **asdict(settings), + "feature_version": FEATURE_VERSION, + "candidate_formula_version": CANDIDATE_FORMULA_VERSION, + } + return { + "documents": documents, + "features": features, + "run_artifact": { + "feature_version": FEATURE_VERSION, + "idf_json": feature_idf, + }, + "neighbors": neighbors, + "clusters": cluster_rows, + "cluster_members": cluster_members, + "cluster_candidates": cluster_candidates, + "settings_json": settings_json, + } + + +def build_scope_feature_idf_for_indexed_documents( + indexed_documents: Sequence[Mapping[str, Any]], + pr_files: Sequence[Mapping[str, Any]], + *, + options: PrScopeClusterOptions | None = None, +) -> dict[str, float]: + settings = options or PrScopeClusterOptions() + indexed_numbers = { + int(row["pr_number"]) for row in indexed_documents if row.get("pr_number") is not None + } + files_by_pr: defaultdict[int, list[Mapping[str, Any]]] = defaultdict(list) + for row in pr_files: + pr_number = row.get("pull_request_number") + if pr_number is None: + continue + number = int(pr_number) + if number in indexed_numbers: + files_by_pr[number].append(row) + profiles = [ + _build_scope_profile( + _document_to_profile_row(row), + files_by_pr.get(int(row["pr_number"]), []), + settings, + ) + for row in indexed_documents + if row.get("pr_number") is not None + ] + return _feature_idf(profiles, settings) + + +def build_scope_feature_for_pull_request( + pr_row: Mapping[str, Any], + pr_files: Sequence[Mapping[str, Any]], + *, + feature_idf: Mapping[str, float], + options: PrScopeClusterOptions | None = None, +) -> dict[str, Any]: + settings = options or PrScopeClusterOptions() + profile = _build_scope_profile(pr_row, pr_files, settings) + profile.vector = _normalize_vector( + { + feature: weight * feature_idf[feature] + for feature, weight in profile.raw_vector.items() + if feature in feature_idf + } + ) + return _feature_row(profile) + + +def rank_scope_feature_matches( + query_feature: Mapping[str, Any], + indexed_features: Sequence[Mapping[str, Any]], + *, + options: PrScopeClusterOptions | None = None, + limit: int = 10, +) -> list[dict[str, Any]]: + settings = options or PrScopeClusterOptions() + rows: list[dict[str, Any]] = [] + query_pr_number = int(query_feature["pr_number"]) + for feature in indexed_features: + if int(feature["pr_number"]) == query_pr_number: + continue + pair = scope_feature_pair_explanation(query_feature, feature, options=settings) + if pair["similarity"] < settings.min_similarity: + continue + rows.append(pair) + rows.sort( + key=lambda row: ( + -float(row["similarity"]), + -float(row["content_similarity"]), + int(row["right_pr_number"]), + ) + ) + return rows[:limit] + + +def rank_scope_cluster_candidates( + *, + similarity_rows: Sequence[Mapping[str, Any]], + clusters: Sequence[Mapping[str, Any]], + cluster_members: Mapping[str, Sequence[int]], + assigned_cluster_ids: set[str] | None = None, + limit: int = DEFAULT_CANDIDATE_LIMIT, +) -> list[dict[str, Any]]: + similarities_by_pr = { + int(row["right_pr_number"]): row + for row in similarity_rows + if row.get("right_pr_number") is not None + } + candidate_rows: list[dict[str, Any]] = [] + assigned = assigned_cluster_ids or set() + for cluster in clusters: + cluster_id = str(cluster["cluster_id"]) + member_rows = [ + (member_pr_number, similarities_by_pr.get(member_pr_number)) + for member_pr_number in cluster_members.get(cluster_id, ()) + ] + member_similarities = [ + (member_pr_number, similarity_row) + for member_pr_number, similarity_row in member_rows + if similarity_row is not None and float(similarity_row["similarity"]) > 0.0 + ] + if not member_similarities and cluster_id not in assigned: + continue + member_similarities.sort(key=lambda item: (-float(item[1]["similarity"]), item[0])) + top_similarities = [float(entry["similarity"]) for _, entry in member_similarities[:3]] + max_member_similarity = top_similarities[0] if top_similarities else 0.0 + avg_top_member_similarity = ( + sum(top_similarities) / len(top_similarities) if top_similarities else 0.0 + ) + matched_member_count = len(member_similarities) + best_member_pr_number = member_similarities[0][0] if member_similarities else None + best_match = member_similarities[0][1] if member_similarities else None + candidate_score = ( + max_member_similarity * 0.60 + + avg_top_member_similarity * 0.30 + + min(matched_member_count, 3) / 3.0 * 0.10 + ) + evidence = { + "matched_member_pr_numbers": [member for member, _ in member_similarities[:5]], + "best_member_pr_number": best_member_pr_number, + "best_shared_filenames": ( + list(best_match["shared_filenames"][:5]) if best_match is not None else [] + ), + "best_shared_directories": ( + list(best_match["shared_directories"][:5]) if best_match is not None else [] + ), + "reason": _candidate_reason( + matched_member_count=matched_member_count, + best_comparison=best_match, + ), + } + candidate_rows.append( + { + "cluster_id": cluster_id, + "candidate_score": candidate_score, + "matched_member_count": matched_member_count, + "best_member_pr_number": best_member_pr_number, + "max_member_similarity": max_member_similarity, + "avg_top_member_similarity": avg_top_member_similarity, + "evidence": evidence, + "assigned": cluster_id in assigned, + } + ) + candidate_rows.sort( + key=lambda row: ( + -float(row["candidate_score"]), + -int(row["matched_member_count"]), + str(row["cluster_id"]), + ) + ) + for rank, row in enumerate(candidate_rows[:limit], start=1): + row["candidate_rank"] = rank + return candidate_rows[:limit] + + +def scope_feature_pair_explanation( + left_feature: Mapping[str, Any], + right_feature: Mapping[str, Any], + *, + options: PrScopeClusterOptions | None = None, +) -> dict[str, Any]: + settings = options or PrScopeClusterOptions() + weight_total = ( + settings.content_weight + + settings.size_weight + + settings.breadth_weight + + settings.concentration_weight + ) + if weight_total <= 0.0: + raise ValueError("PR scope similarity weights must sum to a positive value.") + + left_vector = _json_dict(left_feature.get("vector_json")) + right_vector = _json_dict(right_feature.get("vector_json")) + left_filenames = set(_json_list(left_feature.get("filenames_json"))) + right_filenames = set(_json_list(right_feature.get("filenames_json"))) + left_directories = set(_json_list(left_feature.get("directories_json"))) + right_directories = set(_json_list(right_feature.get("directories_json"))) + content_similarity = _cosine_similarity(left_vector, right_vector) + if ( + content_similarity <= 0.0 + and not left_filenames.intersection(right_filenames) + and not left_directories.intersection(right_directories) + ): + similarity = 0.0 + else: + size_similarity = _ratio_similarity( + int(left_feature.get("total_changed_lines") or 0), + int(right_feature.get("total_changed_lines") or 0), + ) + breadth_similarity = ( + _ratio_similarity( + int(left_feature.get("file_count") or 0), + int(right_feature.get("file_count") or 0), + ) + + _ratio_similarity( + int(left_feature.get("directory_count") or 0), + int(right_feature.get("directory_count") or 0), + ) + ) / 2.0 + concentration_similarity = max( + 0.0, + 1.0 + - abs( + float(left_feature.get("dominant_dir_share") or 0.0) + - float(right_feature.get("dominant_dir_share") or 0.0) + ), + ) + similarity = ( + content_similarity * settings.content_weight + + size_similarity * settings.size_weight + + breadth_similarity * settings.breadth_weight + + concentration_similarity * settings.concentration_weight + ) / weight_total + return { + "left_pr_number": int(left_feature["pr_number"]), + "right_pr_number": int(right_feature["pr_number"]), + "similarity": similarity, + "content_similarity": content_similarity, + "size_similarity": size_similarity, + "breadth_similarity": breadth_similarity, + "concentration_similarity": concentration_similarity, + "shared_filenames": sorted(left_filenames & right_filenames)[:10], + "shared_directories": sorted( + left_directories & right_directories, + key=lambda value: (-value.count("/"), value), + )[:10], + } + return { + "left_pr_number": int(left_feature["pr_number"]), + "right_pr_number": int(right_feature["pr_number"]), + "similarity": similarity, + "content_similarity": content_similarity, + "size_similarity": 0.0, + "breadth_similarity": 0.0, + "concentration_similarity": 0.0, + "shared_filenames": [], + "shared_directories": [], + } + + +def scope_options_from_settings(settings_json: Mapping[str, Any] | None) -> PrScopeClusterOptions: + if not settings_json: + return PrScopeClusterOptions() + defaults = asdict(PrScopeClusterOptions()) + values = {key: settings_json[key] for key in defaults if key in settings_json} + return PrScopeClusterOptions(**values) + + +def iso_timestamp() -> str: + return datetime.now(tz=UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def _document_row(row: Mapping[str, Any]) -> dict[str, Any]: + return { + "pr_number": int(row["number"]), + "github_id": row.get("github_id"), + "author_login": row.get("author_login"), + "state": row.get("state"), + "draft": bool(row.get("draft")), + "merged": bool(row.get("merged")), + "title": row.get("title") or "", + "base_ref": row.get("base_ref"), + "created_at": row.get("created_at"), + "updated_at": row.get("updated_at"), + "merged_at": row.get("merged_at"), + "additions": int(row.get("additions") or 0), + "deletions": int(row.get("deletions") or 0), + "changed_files": int(row.get("changed_files") or 0), + "comments_count": int(row.get("comments_count") or 0), + "review_comments_count": int(row.get("review_comments_count") or 0), + "html_url": row.get("html_url"), + } + + +def _document_to_profile_row(row: Mapping[str, Any]) -> dict[str, Any]: + return { + "number": int(row["pr_number"]), + "additions": int(row.get("additions") or 0), + "deletions": int(row.get("deletions") or 0), + "changed_files": int(row.get("changed_files") or 0), + } + + +def _feature_row(profile: Any) -> dict[str, Any]: + return { + "pr_number": profile.number, + "feature_version": FEATURE_VERSION, + "total_changed_lines": profile.total_changed_lines, + "file_count": profile.file_count, + "directory_count": profile.directory_count, + "dominant_dir_share": profile.dominant_dir_share, + "filenames_json": sorted(profile.filenames), + "directories_json": sorted(profile.directories), + "vector_json": profile.vector, + } + + +def _neighbor_rankings( + comparisons: Sequence[Any], options: PrScopeClusterOptions +) -> dict[int, list[dict[str, Any]]]: + ranked: defaultdict[int, list[tuple[float, int, Any]]] = defaultdict(list) + for entry in comparisons: + if entry.similarity < options.min_similarity: + continue + ranked[entry.left].append((entry.similarity, entry.right, entry)) + ranked[entry.right].append((entry.similarity, entry.left, entry)) + + results: dict[int, list[dict[str, Any]]] = {} + for pr_number, items in ranked.items(): + ordered = sorted(items, key=lambda item: (-item[0], item[1]))[: options.max_neighbors] + results[pr_number] = [ + { + "other_pr_number": other_pr_number, + "rank": rank, + "comparison": comparison, + } + for rank, (_, other_pr_number, comparison) in enumerate(ordered, start=1) + ] + return results + + +def _neighbor_rows( + neighbor_rankings: Mapping[int, Sequence[Mapping[str, Any]]], +) -> list[dict[str, Any]]: + rows: dict[tuple[int, int], dict[str, Any]] = {} + for pr_number, ranked_neighbors in neighbor_rankings.items(): + for ranked_neighbor in ranked_neighbors: + comparison = ranked_neighbor["comparison"] + left_pr = min(pr_number, int(ranked_neighbor["other_pr_number"])) + right_pr = max(pr_number, int(ranked_neighbor["other_pr_number"])) + pair_key = (left_pr, right_pr) + row = rows.get(pair_key) + if row is None: + row = { + "left_pr_number": left_pr, + "right_pr_number": right_pr, + "rank_from_left": None, + "rank_from_right": None, + "similarity": comparison.similarity, + "content_similarity": comparison.content_similarity, + "size_similarity": comparison.size_similarity, + "breadth_similarity": comparison.breadth_similarity, + "concentration_similarity": comparison.concentration_similarity, + "shared_filenames_json": comparison.shared_filenames, + "shared_directories_json": comparison.shared_directories, + } + rows[pair_key] = row + if pr_number == left_pr: + row["rank_from_left"] = int(ranked_neighbor["rank"]) + else: + row["rank_from_right"] = int(ranked_neighbor["rank"]) + return [rows[key] for key in sorted(rows)] + + +def _cluster_row(cluster: PrScopeCluster) -> dict[str, Any]: + return { + "cluster_id": cluster.cluster_id, + "representative_pr_number": cluster.representative_pr_number, + "cluster_size": len(cluster.pr_numbers), + "average_similarity": cluster.average_similarity, + "summary": cluster.summary, + "shared_filenames_json": cluster.shared_filenames, + "shared_directories_json": cluster.shared_directories, + } + + +def _cluster_member_rows(clusters: Sequence[PrScopeCluster]) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + for cluster in clusters: + for pr_number in cluster.pr_numbers: + rows.append( + { + "cluster_id": cluster.cluster_id, + "pr_number": pr_number, + "member_role": ( + "representative" + if pr_number == cluster.representative_pr_number + else "member" + ), + } + ) + rows.sort( + key=lambda row: ( + row["cluster_id"], + row["member_role"] != "representative", + row["pr_number"], + ) + ) + return rows + + +def _cluster_candidate_rows( + *, + profiles: Sequence[Any], + comparison_rows: Mapping[tuple[int, int], Any], + clusters: Sequence[PrScopeCluster], +) -> list[dict[str, Any]]: + cluster_ids_by_pr: defaultdict[int, set[str]] = defaultdict(set) + cluster_members: dict[str, list[int]] = {} + for cluster in clusters: + cluster_members[cluster.cluster_id] = list(cluster.pr_numbers) + for pr_number in cluster.pr_numbers: + cluster_ids_by_pr[pr_number].add(cluster.cluster_id) + + rows: list[dict[str, Any]] = [] + for profile in sorted(profiles, key=lambda item: item.number): + candidates = _cluster_candidates_for_pr( + pr_number=profile.number, + comparison_rows=comparison_rows, + clusters=clusters, + assigned_cluster_ids=cluster_ids_by_pr.get(profile.number, set()), + cluster_members=cluster_members, + ) + rows.extend(candidates) + return rows + + +def _cluster_candidates_for_pr( + *, + pr_number: int, + comparison_rows: Mapping[tuple[int, int], Any], + clusters: Sequence[PrScopeCluster], + assigned_cluster_ids: set[str], + cluster_members: Mapping[str, Sequence[int]], +) -> list[dict[str, Any]]: + candidate_rows: list[dict[str, Any]] = [] + for cluster in clusters: + member_similarities: list[tuple[int, Any]] = [] + for member_pr_number in cluster_members[cluster.cluster_id]: + if member_pr_number == pr_number: + continue + comparison = comparison_rows.get(_pair_key(pr_number, member_pr_number)) + if comparison is None or comparison.similarity <= 0.0: + continue + member_similarities.append((member_pr_number, comparison)) + if not member_similarities and cluster.cluster_id not in assigned_cluster_ids: + continue + member_similarities.sort(key=lambda item: (-item[1].similarity, item[0])) + top_similarities = [entry.similarity for _, entry in member_similarities[:3]] + max_member_similarity = top_similarities[0] if top_similarities else 0.0 + avg_top_member_similarity = ( + sum(top_similarities) / len(top_similarities) if top_similarities else 0.0 + ) + matched_member_count = len(member_similarities) + candidate_score = ( + max_member_similarity * 0.60 + + avg_top_member_similarity * 0.30 + + min(matched_member_count, 3) / 3.0 * 0.10 + ) + best_member_pr_number = member_similarities[0][0] if member_similarities else None + best_comparison = member_similarities[0][1] if member_similarities else None + evidence = { + "matched_member_pr_numbers": [member for member, _ in member_similarities[:5]], + "best_member_pr_number": best_member_pr_number, + "best_shared_filenames": ( + list(best_comparison.shared_filenames[:5]) if best_comparison is not None else [] + ), + "best_shared_directories": ( + list(best_comparison.shared_directories[:5]) if best_comparison is not None else [] + ), + "reason": _candidate_reason( + matched_member_count=matched_member_count, + best_comparison=best_comparison, + ), + } + candidate_rows.append( + { + "pr_number": pr_number, + "cluster_id": cluster.cluster_id, + "candidate_score": candidate_score, + "matched_member_count": matched_member_count, + "best_member_pr_number": best_member_pr_number, + "max_member_similarity": max_member_similarity, + "avg_top_member_similarity": avg_top_member_similarity, + "evidence_json": evidence, + "assigned": cluster.cluster_id in assigned_cluster_ids, + } + ) + + candidate_rows.sort( + key=lambda row: ( + -row["candidate_score"], + -row["matched_member_count"], + row["cluster_id"], + ) + ) + for rank, row in enumerate(candidate_rows[:DEFAULT_CANDIDATE_LIMIT], start=1): + row["candidate_rank"] = rank + return candidate_rows[:DEFAULT_CANDIDATE_LIMIT] + + +def _candidate_reason(*, matched_member_count: int, best_comparison: Any | None) -> str: + if best_comparison is None: + return "cluster membership matches existing scope assignment" + shared_filenames = ( + list(best_comparison.shared_filenames) + if hasattr(best_comparison, "shared_filenames") + else list(best_comparison.get("shared_filenames") or []) + ) + shared_directories = ( + list(best_comparison.shared_directories) + if hasattr(best_comparison, "shared_directories") + else list(best_comparison.get("shared_directories") or []) + ) + if matched_member_count >= 2: + return "overlapping files and directories with multiple cluster members" + if shared_filenames: + return "overlapping changed files with a cluster member" + if shared_directories: + return "overlapping directories with a cluster member" + return "similar change shape to a cluster member" + + +def _pair_key(left: int, right: int) -> tuple[int, int]: + return (left, right) if left <= right else (right, left) + + +def _json_dict(raw: Any) -> dict[str, float]: + if isinstance(raw, dict): + return {str(key): float(value) for key, value in raw.items()} + if isinstance(raw, str) and raw: + payload = json.loads(raw) + if isinstance(payload, dict): + return {str(key): float(value) for key, value in payload.items()} + return {} + + +def _json_list(raw: Any) -> list[str]: + if isinstance(raw, list): + return [str(item) for item in raw] + if isinstance(raw, str) and raw: + payload = json.loads(raw) + if isinstance(payload, list): + return [str(item) for item in payload] + return [] + + +def _cosine_similarity(left: Mapping[str, float], right: Mapping[str, float]) -> float: + if not left or not right: + return 0.0 + if len(left) > len(right): + left, right = right, left + return sum(weight * right.get(feature, 0.0) for feature, weight in left.items()) + + +def _ratio_similarity(left: int, right: int) -> float: + largest = max(left, right) + if largest <= 0: + return 1.0 + return min(left, right) / largest diff --git a/src/slop_farmer/reports/pr_search_service.py b/src/slop_farmer/reports/pr_search_service.py new file mode 100644 index 0000000000000000000000000000000000000000..acffef646d748e37a5d4a32d8671eb77a00ebac7 --- /dev/null +++ b/src/slop_farmer/reports/pr_search_service.py @@ -0,0 +1,1069 @@ +from __future__ import annotations + +import json +from collections.abc import Iterable, Mapping, Sequence +from contextlib import suppress +from pathlib import Path +from typing import Any, Protocol +from uuid import uuid4 + +from slop_farmer.config import PrSearchRefreshOptions, RepoRef, resolve_github_token +from slop_farmer.data.github_api import GitHubClient +from slop_farmer.data.normalize import normalize_pr_file, normalize_pull_request +from slop_farmer.data.search_duckdb import ( + connect_pr_search_db, + fetch_rows, + get_candidate_cluster_rows, + get_cluster, + get_cluster_ids_for_prs, + get_cluster_members, + get_contributor, + get_contributor_pulls, + get_document, + get_feature, + get_pair_neighbor_row, + get_run_counts, + get_scope_run_artifact, + get_shared_cluster_ids, + get_similar_pr_rows, + insert_rows, + replace_active_run, + resolve_active_run, + update_run_status, +) +from slop_farmer.reports.pr_scope import PrScopeClusterOptions +from slop_farmer.reports.pr_search_scope import ( + build_pr_scope_search_artifacts, + build_scope_feature_for_pull_request, + build_scope_feature_idf_for_indexed_documents, + iso_timestamp, + load_pr_search_snapshot, + rank_scope_cluster_candidates, + rank_scope_feature_matches, + resolve_pr_search_snapshot_dir, + scope_feature_pair_explanation, + scope_options_from_settings, +) + + +class ProbeClientLike(Protocol): + def get_pull_request(self, owner: str, repo: str, number: int) -> dict[str, Any]: ... + + def iter_pull_files(self, owner: str, repo: str, number: int) -> Iterable[dict[str, Any]]: ... + + +def run_pr_search_refresh(options: PrSearchRefreshOptions) -> dict[str, Any]: + snapshot_dir = resolve_pr_search_snapshot_dir(options) + snapshot = load_pr_search_snapshot(snapshot_dir) + repo = str(snapshot["repo"]) + db_path = resolve_pr_search_db_path(options.db, output_dir=options.output_dir) + started_at = iso_timestamp() + scope_options = PrScopeClusterOptions( + include_closed=options.include_closed, + include_drafts=options.include_drafts, + ) + artifacts = build_pr_scope_search_artifacts( + snapshot["pull_requests"], + snapshot["pr_files"], + options=scope_options, + suppression_rules=options.cluster_suppression_rules, + limit_prs=options.limit_prs, + ) + run_id = uuid4().hex + source_type = "hf_dataset_repo" if options.hf_repo_id else "local_snapshot" + + connection = connect_pr_search_db(db_path) + try: + insert_rows( + connection, + "pr_search_runs", + [ + { + "id": run_id, + "repo": repo, + "snapshot_id": snapshot["snapshot_id"], + "snapshot_dir": str(snapshot_dir), + "source_type": source_type, + "hf_repo_id": options.hf_repo_id, + "hf_revision": options.hf_revision, + "started_at": started_at, + "finished_at": None, + "status": "running", + "settings_json": artifacts["settings_json"], + "notes": None, + } + ], + ) + connection.execute("BEGIN") + created_at = iso_timestamp() + insert_rows( + connection, + "pr_search_documents", + _scoped_rows(artifacts["documents"], run_id=run_id, repo=repo), + ) + insert_rows( + connection, + "pr_search_contributors", + _contributor_rows( + snapshot["contributors"], + run_id=run_id, + repo=repo, + snapshot_id=str(snapshot["snapshot_id"]), + ), + ) + insert_rows( + connection, + "pr_scope_features", + _scoped_rows( + artifacts["features"], + run_id=run_id, + repo=repo, + computed_at=created_at, + ), + ) + insert_rows( + connection, + "pr_scope_run_artifacts", + _scoped_rows( + [artifacts["run_artifact"]], + run_id=run_id, + repo=repo, + computed_at=created_at, + ), + ) + insert_rows( + connection, + "pr_scope_neighbors", + _scoped_rows( + artifacts["neighbors"], + run_id=run_id, + repo=repo, + created_at=created_at, + ), + ) + insert_rows( + connection, + "pr_scope_clusters", + _scoped_rows( + artifacts["clusters"], + run_id=run_id, + repo=repo, + created_at=created_at, + ), + ) + insert_rows( + connection, + "pr_scope_cluster_members", + _scoped_rows(artifacts["cluster_members"], run_id=run_id, repo=repo), + ) + insert_rows( + connection, + "pr_scope_cluster_candidates", + _scoped_rows(artifacts["cluster_candidates"], run_id=run_id, repo=repo), + ) + finished_at = iso_timestamp() + update_run_status( + connection, + run_id=run_id, + status="succeeded", + finished_at=finished_at, + ) + if options.replace_active: + replace_active_run( + connection, + repo=repo, + run_id=run_id, + activated_at=finished_at, + ) + connection.execute("COMMIT") + counts = get_run_counts(connection, run_id=run_id) + return { + "db_path": str(db_path), + "run_id": run_id, + "repo": repo, + "snapshot_id": snapshot["snapshot_id"], + "snapshot_dir": str(snapshot_dir), + "source_type": source_type, + "active_updated": bool(options.replace_active), + "row_counts": counts, + } + except Exception as exc: + with suppress(Exception): + connection.execute("ROLLBACK") + update_run_status( + connection, + run_id=run_id, + status="failed", + finished_at=iso_timestamp(), + notes=str(exc), + ) + raise + finally: + connection.close() + + +def get_pr_search_status(db_path: Path, *, repo: str | None = None) -> dict[str, Any]: + connection = connect_pr_search_db(db_path, read_only=True) + try: + active_run = resolve_active_run(connection, repo=repo) + return { + **_without_json_fields(active_run), + "settings": _json_dict(active_run.get("settings_json")), + "row_counts": get_run_counts(connection, run_id=str(active_run["id"])), + } + finally: + connection.close() + + +def get_pr_search_similar( + db_path: Path, + *, + pr_number: int, + repo: str | None = None, + limit: int = 10, +) -> dict[str, Any]: + connection = connect_pr_search_db(db_path, read_only=True) + try: + active_run = resolve_active_run(connection, repo=repo) + run_id = str(active_run["id"]) + document = _require_document(connection, run_id=run_id, pr_number=pr_number) + similar_rows = get_similar_pr_rows( + connection, run_id=run_id, pr_number=pr_number, limit=limit + ) + cluster_ids_by_pr = get_cluster_ids_for_prs( + connection, + run_id=run_id, + pr_numbers=[int(row["neighbor_pr_number"]) for row in similar_rows], + ) + results = [] + for row in similar_rows: + results.append( + { + **_without_json_fields(row), + "neighbor_title": _require_document( + connection, + run_id=run_id, + pr_number=int(row["neighbor_pr_number"]), + )["title"], + "cluster_ids": cluster_ids_by_pr.get(int(row["neighbor_pr_number"]), []), + "shared_filenames": _json_list(row.get("shared_filenames_json")), + "shared_directories": _json_list(row.get("shared_directories_json")), + } + ) + return { + "repo": active_run["repo"], + "snapshot_id": active_run["snapshot_id"], + "run_id": run_id, + "pr": document, + "similar_prs": results, + "similar_count": len(results), + } + finally: + connection.close() + + +def get_pr_search_candidate_clusters( + db_path: Path, + *, + pr_number: int, + repo: str | None = None, + limit: int = 5, +) -> dict[str, Any]: + connection = connect_pr_search_db(db_path, read_only=True) + try: + active_run = resolve_active_run(connection, repo=repo) + run_id = str(active_run["id"]) + document = _require_document(connection, run_id=run_id, pr_number=pr_number) + rows = get_candidate_cluster_rows( + connection, run_id=run_id, pr_number=pr_number, limit=limit + ) + candidates = [] + for row in rows: + evidence = _json_dict(row.get("evidence_json")) + candidates.append( + { + **_without_json_fields(row), + "shared_filenames": _json_list(row.get("shared_filenames_json")), + "shared_directories": _json_list(row.get("shared_directories_json")), + "evidence": evidence, + "matched_member_pr_numbers": evidence.get("matched_member_pr_numbers") or [], + "reason": evidence.get("reason") or "", + } + ) + return { + "repo": active_run["repo"], + "snapshot_id": active_run["snapshot_id"], + "run_id": run_id, + "pr": document, + "candidate_clusters": candidates, + "candidate_cluster_count": len(candidates), + } + finally: + connection.close() + + +def get_pr_search_contributor( + db_path: Path, + *, + author_login: str, + repo: str | None = None, +) -> dict[str, Any]: + connection = connect_pr_search_db(db_path, read_only=True) + try: + active_run = resolve_active_run(connection, repo=repo) + run_id = str(active_run["id"]) + contributor = _require_contributor(connection, run_id=run_id, author_login=author_login) + pulls = _document_rows( + get_contributor_pulls(connection, run_id=run_id, author_login=author_login, limit=20) + ) + return { + "repo": active_run["repo"], + "snapshot_id": active_run["snapshot_id"], + "run_id": run_id, + "contributor": contributor, + "pulls": pulls, + "pull_count": len(pulls), + } + finally: + connection.close() + + +def get_pr_search_contributor_pulls( + db_path: Path, + *, + author_login: str, + repo: str | None = None, + limit: int = 20, +) -> dict[str, Any]: + connection = connect_pr_search_db(db_path, read_only=True) + try: + active_run = resolve_active_run(connection, repo=repo) + run_id = str(active_run["id"]) + contributor = _require_contributor(connection, run_id=run_id, author_login=author_login) + pulls = _document_rows( + get_contributor_pulls(connection, run_id=run_id, author_login=author_login, limit=limit) + ) + return { + "repo": active_run["repo"], + "snapshot_id": active_run["snapshot_id"], + "run_id": run_id, + "contributor": contributor, + "pulls": pulls, + "pull_count": len(pulls), + } + finally: + connection.close() + + +def get_pr_search_pull_contributor( + db_path: Path, + *, + pr_number: int, + repo: str | None = None, +) -> dict[str, Any]: + connection = connect_pr_search_db(db_path, read_only=True) + try: + active_run = resolve_active_run(connection, repo=repo) + run_id = str(active_run["id"]) + document = _require_document(connection, run_id=run_id, pr_number=pr_number) + author_login = str(document.get("author_login") or "").strip() + if not author_login: + raise ValueError(f"PR #{pr_number} does not have an indexed author_login.") + contributor = _require_contributor(connection, run_id=run_id, author_login=author_login) + return { + "repo": active_run["repo"], + "snapshot_id": active_run["snapshot_id"], + "run_id": run_id, + "pr": _without_json_fields(document), + "contributor": contributor, + } + finally: + connection.close() + + +def get_pr_search_similar_lookup( + db_path: Path, + *, + pr_number: int, + repo: str | None = None, + limit: int = 10, + mode: str = "auto", + client: ProbeClientLike | None = None, +) -> dict[str, Any]: + resolved_mode = _normalize_lookup_mode(mode) + if resolved_mode != "live": + try: + result = get_pr_search_similar(db_path, pr_number=pr_number, repo=repo, limit=limit) + except ValueError as exc: + if resolved_mode == "indexed" or not _is_index_miss(exc): + raise + else: + result["query"] = { + "pr_number": pr_number, + "mode_requested": resolved_mode, + "mode_used": "indexed", + "source": "active_index", + } + return result + + live_result = probe_pr_search_live( + db_path, + pr_number=pr_number, + repo=repo, + limit=limit, + client=client, + ) + return { + "repo": live_result["repo"], + "snapshot_id": live_result["snapshot_id"], + "run_id": live_result["run_id"], + "query": { + "pr_number": pr_number, + "mode_requested": resolved_mode, + "mode_used": "live", + "source": live_result["probe_source"]["provider"], + }, + "pr": live_result["probe_pr"], + "probe_source": live_result["probe_source"], + "similar_prs": live_result["similar_prs"], + "similar_count": len(live_result["similar_prs"]), + } + + +def get_pr_search_clusters( + db_path: Path, + *, + pr_number: int, + repo: str | None = None, + limit: int = 5, + mode: str = "auto", + client: ProbeClientLike | None = None, +) -> dict[str, Any]: + resolved_mode = _normalize_lookup_mode(mode) + if resolved_mode != "live": + try: + result = _get_pr_search_clusters_indexed( + db_path, + pr_number=pr_number, + repo=repo, + limit=limit, + ) + except ValueError as exc: + if resolved_mode == "indexed" or not _is_index_miss(exc): + raise + else: + result["query"] = { + "pr_number": pr_number, + "mode_requested": resolved_mode, + "mode_used": "indexed", + "source": "active_index", + } + return result + + live_result = probe_pr_search_live( + db_path, + pr_number=pr_number, + repo=repo, + limit=limit, + client=client, + ) + return { + "repo": live_result["repo"], + "snapshot_id": live_result["snapshot_id"], + "run_id": live_result["run_id"], + "query": { + "pr_number": pr_number, + "mode_requested": resolved_mode, + "mode_used": "live", + "source": live_result["probe_source"]["provider"], + }, + "pr": live_result["probe_pr"], + "probe_source": live_result["probe_source"], + "assigned_clusters": [], + "candidate_clusters": live_result["candidate_clusters"], + "assigned_cluster_count": 0, + "candidate_cluster_count": len(live_result["candidate_clusters"]), + } + + +def get_pr_search_cluster( + db_path: Path, + *, + cluster_id: str, + repo: str | None = None, +) -> dict[str, Any]: + connection = connect_pr_search_db(db_path, read_only=True) + try: + active_run = resolve_active_run(connection, repo=repo) + run_id = str(active_run["id"]) + cluster = get_cluster(connection, run_id=run_id, cluster_id=cluster_id) + if cluster is None: + raise ValueError(f"Cluster {cluster_id!r} was not found in the active run.") + members = get_cluster_members(connection, run_id=run_id, cluster_id=cluster_id) + return { + "repo": active_run["repo"], + "snapshot_id": active_run["snapshot_id"], + "run_id": run_id, + "cluster": _cluster_summary(cluster), + "members": members, + "member_count": len(members), + } + finally: + connection.close() + + +def list_pr_search_clusters( + db_path: Path, + *, + repo: str | None = None, + limit: int = 50, +) -> dict[str, Any]: + connection = connect_pr_search_db(db_path, read_only=True) + try: + active_run = resolve_active_run(connection, repo=repo) + run_id = str(active_run["id"]) + rows = fetch_rows( + connection, + """ + SELECT + cl.*, + d.title AS representative_title, + d.html_url AS representative_html_url, + d.state AS representative_state, + d.draft AS representative_draft + FROM pr_scope_clusters AS cl + LEFT JOIN pr_search_documents AS d + ON d.run_id = cl.run_id AND d.pr_number = cl.representative_pr_number + WHERE cl.run_id = ? + ORDER BY cl.cluster_size DESC, cl.average_similarity DESC, cl.cluster_id + LIMIT ? + """, + [run_id, limit], + ) + clusters = [] + for index, row in enumerate(rows, start=1): + clusters.append({"rank": index, **_cluster_summary(row)}) + return { + "repo": active_run["repo"], + "snapshot_id": active_run["snapshot_id"], + "run_id": run_id, + "clusters": clusters, + "cluster_count": len(clusters), + } + finally: + connection.close() + + +def explain_pr_search_pair( + db_path: Path, + *, + left_pr_number: int, + right_pr_number: int, + repo: str | None = None, +) -> dict[str, Any]: + connection = connect_pr_search_db(db_path, read_only=True) + try: + active_run = resolve_active_run(connection, repo=repo) + run_id = str(active_run["id"]) + left_document = _require_document(connection, run_id=run_id, pr_number=left_pr_number) + right_document = _require_document(connection, run_id=run_id, pr_number=right_pr_number) + neighbor_row = get_pair_neighbor_row( + connection, + run_id=run_id, + left_pr_number=left_pr_number, + right_pr_number=right_pr_number, + ) + shared_cluster_ids = get_shared_cluster_ids( + connection, + run_id=run_id, + left_pr_number=left_pr_number, + right_pr_number=right_pr_number, + ) + if neighbor_row is not None: + shared_filenames = _json_list(neighbor_row.get("shared_filenames_json")) + shared_directories = _json_list(neighbor_row.get("shared_directories_json")) + return { + "repo": active_run["repo"], + "snapshot_id": active_run["snapshot_id"], + "run_id": run_id, + "materialized": True, + "left_pr": left_document, + "right_pr": right_document, + "pair": { + "similarity": neighbor_row["similarity"], + "content_similarity": neighbor_row["content_similarity"], + "size_similarity": neighbor_row["size_similarity"], + "breadth_similarity": neighbor_row["breadth_similarity"], + "concentration_similarity": neighbor_row["concentration_similarity"], + "shared_filenames": shared_filenames, + "shared_directories": shared_directories, + }, + "shared_cluster_ids": shared_cluster_ids, + } + + left_feature = _require_feature(connection, run_id=run_id, pr_number=left_pr_number) + right_feature = _require_feature(connection, run_id=run_id, pr_number=right_pr_number) + pair = scope_feature_pair_explanation( + left_feature, + right_feature, + options=scope_options_from_settings(_json_dict(active_run.get("settings_json"))), + ) + return { + "repo": active_run["repo"], + "snapshot_id": active_run["snapshot_id"], + "run_id": run_id, + "materialized": False, + "left_pr": left_document, + "right_pr": right_document, + "pair": pair, + "shared_cluster_ids": shared_cluster_ids, + } + finally: + connection.close() + + +def probe_pr_search_live( + db_path: Path, + *, + pr_number: int, + repo: str | None = None, + limit: int = 10, + client: ProbeClientLike | None = None, +) -> dict[str, Any]: + connection = connect_pr_search_db(db_path, read_only=True) + try: + active_run = resolve_active_run(connection, repo=repo) + run_id = str(active_run["id"]) + repo_slug = repo or str(active_run["repo"]) + repo_ref = RepoRef.parse(repo_slug) + settings = scope_options_from_settings(_json_dict(active_run.get("settings_json"))) + indexed_documents = fetch_rows( + connection, + """ + SELECT * + FROM pr_search_documents + WHERE run_id = ? + ORDER BY pr_number + """, + [run_id], + ) + indexed_features = fetch_rows( + connection, + """ + SELECT * + FROM pr_scope_features + WHERE run_id = ? + ORDER BY pr_number + """, + [run_id], + ) + run_artifact = get_scope_run_artifact(connection, run_id=run_id) + cluster_rows = fetch_rows( + connection, + """ + SELECT * + FROM pr_scope_clusters + WHERE run_id = ? + ORDER BY cluster_id + """, + [run_id], + ) + cluster_member_rows = fetch_rows( + connection, + """ + SELECT cluster_id, pr_number + FROM pr_scope_cluster_members + WHERE run_id = ? + ORDER BY cluster_id, pr_number + """, + [run_id], + ) + cluster_members: dict[str, list[int]] = {} + for row in cluster_member_rows: + cluster_members.setdefault(str(row["cluster_id"]), []).append(int(row["pr_number"])) + + probe_client = client or GitHubClient(token=resolve_github_token()) + extracted_at = iso_timestamp() + pr_detail = probe_client.get_pull_request(repo_ref.owner, repo_ref.name, pr_number) + pr_row = normalize_pull_request( + repo_ref.slug, + pr_detail, + pr_detail, + str(active_run["snapshot_id"]), + extracted_at, + ) + pr_files = [ + normalize_pr_file( + repo_ref.slug, + pr_number, + item, + str(active_run["snapshot_id"]), + extracted_at, + ) + for item in probe_client.iter_pull_files(repo_ref.owner, repo_ref.name, pr_number) + ] + feature_idf = ( + _json_float_dict(run_artifact.get("idf_json")) if run_artifact is not None else {} + ) + if not feature_idf: + snapshot = load_pr_search_snapshot(Path(str(active_run["snapshot_dir"]))) + feature_idf = build_scope_feature_idf_for_indexed_documents( + indexed_documents, + snapshot["pr_files"], + options=settings, + ) + query_feature = build_scope_feature_for_pull_request( + pr_row, + pr_files, + feature_idf=feature_idf, + options=settings, + ) + similarity_rows = rank_scope_feature_matches( + query_feature, + indexed_features, + options=settings, + limit=limit, + ) + cluster_ids_by_pr = get_cluster_ids_for_prs( + connection, + run_id=run_id, + pr_numbers=[int(row["right_pr_number"]) for row in similarity_rows], + ) + live_similar_prs = [] + for row in similarity_rows: + indexed_document = _require_document( + connection, + run_id=run_id, + pr_number=int(row["right_pr_number"]), + ) + live_similar_prs.append( + { + **row, + "neighbor_pr_number": int(row["right_pr_number"]), + "neighbor_title": indexed_document["title"], + "cluster_ids": cluster_ids_by_pr.get(int(row["right_pr_number"]), []), + } + ) + assigned_cluster_ids = set( + get_cluster_ids_for_prs(connection, run_id=run_id, pr_numbers=[pr_number]).get( + pr_number, [] + ) + ) + candidate_clusters = rank_scope_cluster_candidates( + similarity_rows=similarity_rows, + clusters=cluster_rows, + cluster_members=cluster_members, + assigned_cluster_ids=assigned_cluster_ids, + limit=min(5, max(limit, 1)), + ) + cluster_by_id = {str(row["cluster_id"]): row for row in cluster_rows} + for row in candidate_clusters: + cluster = cluster_by_id[row["cluster_id"]] + row.update( + { + "representative_pr_number": cluster["representative_pr_number"], + "cluster_size": cluster["cluster_size"], + "average_similarity": cluster["average_similarity"], + "summary": cluster["summary"], + "shared_filenames": _json_list(cluster.get("shared_filenames_json")), + "shared_directories": _json_list(cluster.get("shared_directories_json")), + "matched_member_pr_numbers": row["evidence"].get("matched_member_pr_numbers") + or [], + "reason": row["evidence"].get("reason") or "", + } + ) + + return { + "repo": repo_slug, + "snapshot_id": active_run["snapshot_id"], + "run_id": run_id, + "probe_pr": { + "pr_number": pr_number, + "title": pr_row.get("title") or "", + "html_url": pr_row.get("html_url"), + "base_ref": pr_row.get("base_ref"), + "changed_files": int(pr_row.get("changed_files") or 0), + }, + "probe_source": _probe_source_metadata( + probe_client, + owner=repo_ref.owner, + repo=repo_ref.name, + number=pr_number, + ), + "similar_prs": live_similar_prs, + "candidate_clusters": candidate_clusters, + } + finally: + connection.close() + + +def probe_pr_search_github( + db_path: Path, + *, + pr_number: int, + repo: str | None = None, + limit: int = 10, + client: ProbeClientLike | None = None, +) -> dict[str, Any]: + return probe_pr_search_live( + db_path, + pr_number=pr_number, + repo=repo, + limit=limit, + client=client, + ) + + +def resolve_pr_search_db_path(db_path: Path | None, *, output_dir: Path) -> Path: + return (db_path or output_dir / "state" / "pr-search.duckdb").resolve() + + +def _scoped_rows(rows: list[dict[str, Any]], **extra: Any) -> list[dict[str, Any]]: + return [{**extra, **row} for row in rows] + + +def _get_pr_search_clusters_indexed( + db_path: Path, + *, + pr_number: int, + repo: str | None = None, + limit: int = 5, +) -> dict[str, Any]: + connection = connect_pr_search_db(db_path, read_only=True) + try: + active_run = resolve_active_run(connection, repo=repo) + run_id = str(active_run["id"]) + document = _require_document(connection, run_id=run_id, pr_number=pr_number) + candidate_rows = get_candidate_cluster_rows( + connection, + run_id=run_id, + pr_number=pr_number, + limit=limit, + ) + assigned_cluster_ids = get_cluster_ids_for_prs( + connection, + run_id=run_id, + pr_numbers=[pr_number], + ).get(pr_number, []) + assigned_clusters = [] + for cluster_id in assigned_cluster_ids: + cluster = get_cluster(connection, run_id=run_id, cluster_id=cluster_id) + if cluster is None: + continue + assigned_clusters.append(_cluster_summary(cluster)) + candidates = [] + for row in candidate_rows: + evidence = _json_dict(row.get("evidence_json")) + candidates.append( + { + **_without_json_fields(row), + "shared_filenames": _json_list(row.get("shared_filenames_json")), + "shared_directories": _json_list(row.get("shared_directories_json")), + "evidence": evidence, + "matched_member_pr_numbers": evidence.get("matched_member_pr_numbers") or [], + "reason": evidence.get("reason") or "", + } + ) + return { + "repo": active_run["repo"], + "snapshot_id": active_run["snapshot_id"], + "run_id": run_id, + "pr": document, + "assigned_clusters": assigned_clusters, + "candidate_clusters": candidates, + "assigned_cluster_count": len(assigned_clusters), + "candidate_cluster_count": len(candidates), + } + finally: + connection.close() + + +def _require_document(connection: Any, *, run_id: str, pr_number: int) -> dict[str, Any]: + document = get_document(connection, run_id=run_id, pr_number=pr_number) + if document is None: + raise ValueError(f"PR #{pr_number} was not found in the active indexed universe.") + return document + + +def _require_feature(connection: Any, *, run_id: str, pr_number: int) -> dict[str, Any]: + feature = get_feature(connection, run_id=run_id, pr_number=pr_number) + if feature is None: + raise ValueError(f"No scope feature row was found for PR #{pr_number}.") + return feature + + +def _require_contributor(connection: Any, *, run_id: str, author_login: str) -> dict[str, Any]: + contributor = get_contributor(connection, run_id=run_id, author_login=author_login) + if contributor is None: + raise ValueError( + f"Contributor {author_login!r} was not found in the active indexed universe." + ) + return _contributor_row(contributor) + + +def _json_list(raw: Any) -> list[str]: + if isinstance(raw, list): + return [str(item) for item in raw] + if isinstance(raw, str) and raw: + payload = json.loads(raw) + if isinstance(payload, list): + return [str(item) for item in payload] + return [] + + +def _json_dict(raw: Any) -> dict[str, Any]: + if isinstance(raw, dict): + return dict(raw) + if isinstance(raw, str) and raw: + payload = json.loads(raw) + if isinstance(payload, dict): + return payload + return {} + + +def _json_float_dict(raw: Any) -> dict[str, float]: + payload = _json_dict(raw) + return {str(key): float(value) for key, value in payload.items()} + + +def _cluster_summary(cluster: dict[str, Any]) -> dict[str, Any]: + return { + **_without_json_fields(cluster), + "shared_filenames": _json_list(cluster.get("shared_filenames_json")), + "shared_directories": _json_list(cluster.get("shared_directories_json")), + } + + +def _without_json_fields(row: Mapping[str, Any]) -> dict[str, Any]: + return {str(key): value for key, value in row.items() if not str(key).endswith("_json")} + + +def _document_rows(rows: Sequence[Mapping[str, Any]]) -> list[dict[str, Any]]: + return [_without_json_fields(row) for row in rows] + + +def _contributor_rows( + rows: list[Mapping[str, Any]], + *, + run_id: str, + repo: str, + snapshot_id: str, +) -> list[dict[str, Any]]: + return [ + { + "run_id": run_id, + "repo": repo, + "snapshot_id": snapshot_id, + "report_generated_at": row.get("report_generated_at"), + "window_days": row.get("window_days"), + "author_login": row.get("author_login"), + "name": row.get("name"), + "profile_url": row.get("profile_url"), + "repo_pull_requests_url": row.get("repo_pull_requests_url"), + "repo_issues_url": row.get("repo_issues_url"), + "repo_first_seen_at": row.get("repo_first_seen_at"), + "repo_last_seen_at": row.get("repo_last_seen_at"), + "repo_primary_artifact_count": row.get("repo_primary_artifact_count"), + "repo_artifact_count": row.get("repo_artifact_count"), + "snapshot_issue_count": row.get("snapshot_issue_count"), + "snapshot_pr_count": row.get("snapshot_pr_count"), + "snapshot_comment_count": row.get("snapshot_comment_count"), + "snapshot_review_count": row.get("snapshot_review_count"), + "snapshot_review_comment_count": row.get("snapshot_review_comment_count"), + "repo_association": row.get("repo_association"), + "new_to_repo": row.get("new_to_repo"), + "first_seen_in_snapshot": row.get("first_seen_in_snapshot"), + "report_reason": row.get("report_reason"), + "account_age_days": row.get("account_age_days"), + "young_account": row.get("young_account"), + "follow_through_score": row.get("follow_through_score"), + "breadth_score": row.get("breadth_score"), + "automation_risk_signal": row.get("automation_risk_signal"), + "heuristic_note": row.get("heuristic_note"), + "public_orgs_json": row.get("public_orgs"), + "visible_authored_pr_count": row.get("visible_authored_pr_count"), + "merged_pr_count": row.get("merged_pr_count"), + "closed_unmerged_pr_count": row.get("closed_unmerged_pr_count"), + "open_pr_count": row.get("open_pr_count"), + "merged_pr_rate": row.get("merged_pr_rate"), + "closed_unmerged_pr_rate": row.get("closed_unmerged_pr_rate"), + "still_open_pr_rate": row.get("still_open_pr_rate"), + "distinct_repos_with_authored_prs": row.get("distinct_repos_with_authored_prs"), + "distinct_repos_with_open_prs": row.get("distinct_repos_with_open_prs"), + "fetch_error": row.get("fetch_error"), + } + for row in rows + ] + + +def _contributor_row(row: Mapping[str, Any]) -> dict[str, Any]: + return { + **_without_json_fields(row), + "public_orgs": _json_list(row.get("public_orgs_json")), + } + + +def _normalize_lookup_mode(mode: str) -> str: + normalized = mode.strip().lower() + if normalized not in {"auto", "indexed", "live"}: + raise ValueError(f"Unsupported mode {mode!r}; expected auto, indexed, or live.") + return normalized + + +def _is_index_miss(exc: ValueError) -> bool: + return "active indexed universe" in str(exc) + + +def _probe_source_metadata( + client: Any, + *, + owner: str, + repo: str, + number: int, +) -> dict[str, Any]: + metadata: dict[str, Any] = {"provider": _probe_provider(client)} + base_url = getattr(client, "base_url", None) + if isinstance(base_url, str) and base_url: + metadata["base_url"] = base_url + status_method = getattr(client, "get_pull_request_status", None) + if not callable(status_method): + return metadata + try: + status = status_method(owner, repo, number) + except Exception as exc: + metadata["status_error"] = str(exc) + return metadata + metadata.update(_normalize_probe_status(status)) + return metadata + + +def _normalize_probe_status(raw: Any) -> dict[str, Any]: + if not isinstance(raw, dict): + return {} + indexed = raw.get("indexed") + if indexed is None: + indexed = raw.get("is_indexed") + freshness = raw.get("index_freshness") + if freshness is None: + freshness = raw.get("freshness") + last_indexed_at = raw.get("last_indexed_at") + if last_indexed_at is None: + last_indexed_at = raw.get("indexed_at") + + normalized: dict[str, Any] = {} + if indexed is not None: + normalized["indexed"] = bool(indexed) + if freshness is not None: + normalized["index_freshness"] = str(freshness) + if last_indexed_at is not None: + normalized["last_indexed_at"] = str(last_indexed_at) + return normalized + + +def _probe_provider(client: Any) -> str: + provider = getattr(client, "provider", None) + if isinstance(provider, str) and provider: + return provider + if isinstance(client, GitHubClient): + return "github" + return "live" diff --git a/src/slop_farmer/reports/prompts/canonical_duplicate_pr.md b/src/slop_farmer/reports/prompts/canonical_duplicate_pr.md new file mode 100644 index 0000000000000000000000000000000000000000..1897954d123d550ff6ad84d1fd55e9fb2fc02f75 --- /dev/null +++ b/src/slop_farmer/reports/prompts/canonical_duplicate_pr.md @@ -0,0 +1,35 @@ +You are synthesizing a canonical pull request for duplicate open pull requests in `{{REPO}}`. + +Read these staged context files before making any edits: + +- Selected cluster metadata: `{{SELECTED_CLUSTER_PATH}}` +- Cluster context summary: `{{CLUSTER_CONTEXT_PATH}}` +- Per-PR context directory: `{{PR_CONTEXT_DIR}}` +- Target issue context: `{{ISSUE_CONTEXT_PATH}}` + +Open source PR numbers for this synthesis: `{{SOURCE_PR_NUMBERS}}` +Cluster id: `{{CLUSTER_ID}}` + +Constraints: + +1. Work only from the open source PRs listed above. Ignore closed or merged PRs even if they appear in the broader cluster metadata. +2. Synthesize a fresh patch against the current checked-out `{{REPO}}:{{DEFAULT_BRANCH}}` worktree state. Do not cherry-pick source branches blindly. +3. Use the staged PR diffs, file lists, comments, reviews, and review comments as evidence. Read files from disk as needed. +4. Produce one clean commit in this worktree if you can produce a valid canonical patch. +5. Keep the patch as small as possible. Prefer implementation changes first, and add only the minimum tests required to validate the fix. +6. {{FILE_POLICY_INSTRUCTION}} +7. Keep comments to the bare minimum required for readability. Remove explanatory churn rather than adding more. +8. Run targeted validation for the files you change. If the most relevant tests fail, do not hide that fact. +9. Do not push, create a fork, or open a pull request. The surrounding Python workflow handles publication deterministically. + +When you are done, return JSON that matches the provided output schema. Requirements for the JSON: + +- `status` must be `success` or `failed` +- `cluster_id` must be `{{CLUSTER_ID}}` +- `source_pr_numbers` must be a subset of the listed open source PRs and should include only the PRs whose concrete changes are reflected in the synthesized patch +- `commit_message` must match the commit you created when `status=success` +- `pr_title` must be suitable for a ready-for-review pull request against `{{REPO}}:{{DEFAULT_BRANCH}}` +- `tests_run` must list the exact validation commands you executed +- `summary` should be brief and concrete + +If you cannot produce a valid canonical patch, return `status="failed"` with a concise explanation in `summary`. diff --git a/src/slop_farmer/reports/read_views.py b/src/slop_farmer/reports/read_views.py new file mode 100644 index 0000000000000000000000000000000000000000..606a2f1a51fb22f7def936dd0a0352b33167c0e8 --- /dev/null +++ b/src/slop_farmer/reports/read_views.py @@ -0,0 +1,791 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Literal + +from slop_farmer.data.parquet_io import read_json, read_parquet_rows +from slop_farmer.data.snapshot_paths import ( + CURRENT_ANALYSIS_MANIFEST_PATH, + load_current_analysis_manifest, + repo_relative_path_to_local, +) + +AnalysisVariant = Literal["auto", "hybrid", "deterministic"] + + +@dataclass(slots=True, frozen=True) +class _SnapshotMetadata: + repo: str + snapshot_id: str + + +@dataclass(slots=True, frozen=True) +class _AnalysisSelection: + path: Path + payload: dict[str, Any] + variant_used: str + llm_enrichment: bool + + +def get_snapshot_surfaces(snapshot_dir: Path) -> dict[str, Any]: + issue_status = get_issue_cluster_status(snapshot_dir, variant="auto") + contributor_status = get_contributor_status(snapshot_dir) + return { + "issues": { + "available": issue_status["available"], + "variant_used": issue_status.get("variant_used"), + "llm_enrichment": issue_status.get("llm_enrichment"), + "generated_at": issue_status.get("generated_at"), + "cluster_count": (issue_status.get("counts") or {}).get("meta_bugs", 0), + "duplicate_pr_count": (issue_status.get("counts") or {}).get("duplicate_prs", 0), + "available_variants": issue_status.get("available_variants") or [], + }, + "contributors": { + "available": contributor_status["available"], + "generated_at": contributor_status.get("generated_at"), + "contributor_count": contributor_status.get("contributor_count", 0), + }, + } + + +def get_issue_cluster_status(snapshot_dir: Path, *, variant: AnalysisVariant) -> dict[str, Any]: + metadata = _snapshot_metadata(snapshot_dir) + candidates = _analysis_candidates(snapshot_dir) + selection = _select_analysis_report(candidates, variant=variant) + status = { + "repo": metadata.repo, + "snapshot_id": metadata.snapshot_id, + "variant_requested": variant, + "available": selection is not None, + "available_variants": sorted({candidate["variant"] for candidate in candidates}), + } + if selection is None: + return { + **status, + "variant_used": None, + "llm_enrichment": False, + "generated_at": None, + "report_path": None, + "counts": {"meta_bugs": 0, "duplicate_issues": 0, "duplicate_prs": 0}, + } + payload = selection.payload + return { + **status, + "variant_used": selection.variant_used, + "llm_enrichment": selection.llm_enrichment, + "generated_at": payload.get("generated_at"), + "report_path": selection.path.name, + "counts": _analysis_counts(payload), + } + + +def list_issue_clusters( + snapshot_dir: Path, + *, + limit: int | None, + variant: AnalysisVariant, +) -> dict[str, Any]: + metadata, selection, issue_map, pr_map = _analysis_context(snapshot_dir, variant=variant) + base = _analysis_base_payload(metadata, selection, variant=variant) + if selection is None: + return {**base, "clusters": [], "cluster_count": 0} + clusters = [ + _issue_cluster_summary(cluster, issue_map, pr_map, rank=index) + for index, cluster in enumerate(selection.payload.get("meta_bugs") or [], start=1) + ] + total = len(clusters) + return { + **base, + "clusters": clusters[:limit] if limit is not None else clusters, + "cluster_count": total, + } + + +def get_issue_cluster( + snapshot_dir: Path, + *, + cluster_id: str, + variant: AnalysisVariant, +) -> dict[str, Any]: + metadata, selection, issue_map, pr_map = _analysis_context(snapshot_dir, variant=variant) + base = _analysis_base_payload(metadata, selection, variant=variant) + if selection is None: + return { + **base, + "cluster_id": cluster_id, + "found": False, + "cluster": None, + "issues": [], + "pull_requests": [], + } + cluster = next( + ( + row + for row in selection.payload.get("meta_bugs") or [] + if str(row.get("cluster_id") or "") == cluster_id + ), + None, + ) + if cluster is None: + return { + **base, + "cluster_id": cluster_id, + "found": False, + "cluster": None, + "issues": [], + "pull_requests": [], + } + issue_numbers = _ordered_ints(cluster.get("issue_numbers")) + pr_numbers = _ordered_ints(cluster.get("pr_numbers")) + canonical_pr_number = _coerce_int(cluster.get("canonical_pr_number")) + return { + **base, + "cluster_id": cluster_id, + "found": True, + "cluster": { + **_issue_cluster_summary(cluster, issue_map, pr_map), + "canonical_issue_reason": cluster.get("canonical_issue_reason"), + "canonical_pr_reason": cluster.get("canonical_pr_reason"), + "best_issue_reason": cluster.get("best_issue_reason"), + "best_pr_reason": cluster.get("best_pr_reason"), + }, + "issues": [_issue_member_row(number, issue_map.get(number)) for number in issue_numbers], + "pull_requests": [ + _pr_member_row( + number, + pr_map.get(number), + role="canonical" if canonical_pr_number == number else "member", + ) + for number in pr_numbers + ], + } + + +def get_issue_clusters_for_pr( + snapshot_dir: Path, + *, + pr_number: int, + variant: AnalysisVariant, +) -> dict[str, Any]: + metadata, selection, issue_map, pr_map = _analysis_context(snapshot_dir, variant=variant) + base = _analysis_base_payload(metadata, selection, variant=variant) + if selection is None: + return {**base, "pr_number": pr_number, "found": False, "clusters": [], "cluster_count": 0} + matches = [] + for index, cluster in enumerate(selection.payload.get("meta_bugs") or [], start=1): + pr_numbers = _ordered_ints(cluster.get("pr_numbers")) + if pr_number not in pr_numbers: + continue + canonical_pr_number = _coerce_int(cluster.get("canonical_pr_number")) + matches.append( + { + **_issue_cluster_summary(cluster, issue_map, pr_map, rank=index), + "membership_role": "canonical" if canonical_pr_number == pr_number else "member", + } + ) + return { + **base, + "pr_number": pr_number, + "found": bool(matches), + "clusters": matches, + "cluster_count": len(matches), + } + + +def check_issue_cluster_membership( + snapshot_dir: Path, + *, + pr_number: int, + cluster_id: str | None, + variant: AnalysisVariant, +) -> dict[str, Any]: + lookup = get_issue_clusters_for_pr(snapshot_dir, pr_number=pr_number, variant=variant) + matches = list(lookup.get("clusters") or []) + matching_cluster_ids = [str(row.get("cluster_id")) for row in matches if row.get("cluster_id")] + if cluster_id is None: + return { + **lookup, + "cluster_id": None, + "matched": bool(matching_cluster_ids), + "matching_cluster_ids": matching_cluster_ids, + } + match = next((row for row in matches if row.get("cluster_id") == cluster_id), None) + return { + **lookup, + "cluster_id": cluster_id, + "matched": match is not None, + "matching_cluster_ids": matching_cluster_ids, + "membership": match, + } + + +def list_issue_duplicate_prs( + snapshot_dir: Path, + *, + limit: int | None, + variant: AnalysisVariant, +) -> dict[str, Any]: + metadata, selection, issue_map, pr_map = _analysis_context(snapshot_dir, variant=variant) + base = _analysis_base_payload(metadata, selection, variant=variant) + if selection is None: + return {**base, "duplicate_prs": [], "duplicate_pr_count": 0} + rows = [ + _duplicate_pr_summary(entry, issue_map, pr_map, rank=index) + for index, entry in enumerate(selection.payload.get("duplicate_prs") or [], start=1) + ] + total = len(rows) + return { + **base, + "duplicate_prs": rows[:limit] if limit is not None else rows, + "duplicate_pr_count": total, + } + + +def get_issue_best(snapshot_dir: Path, *, variant: AnalysisVariant) -> dict[str, Any]: + metadata, selection, issue_map, pr_map = _analysis_context(snapshot_dir, variant=variant) + base = _analysis_base_payload(metadata, selection, variant=variant) + if selection is None: + return {**base, "best_issue": None, "best_pr": None} + return { + **base, + "best_issue": _best_issue_summary(selection.payload.get("best_issue"), issue_map), + "best_pr": _best_pr_summary(selection.payload.get("best_pr"), pr_map), + } + + +def get_contributor_status(snapshot_dir: Path) -> dict[str, Any]: + metadata = _snapshot_metadata(snapshot_dir) + report = _read_optional_json(snapshot_dir / "new-contributors-report.json") + raw_contributors = report.get("contributors") + contributors: list[Any] = raw_contributors if isinstance(raw_contributors, list) else [] + return { + "repo": str(report.get("repo") or metadata.repo), + "snapshot_id": str(report.get("snapshot_id") or metadata.snapshot_id), + "available": bool(report), + "generated_at": report.get("generated_at"), + "window_days": _coerce_int(report.get("window_days")), + "contributor_count": len(contributors), + } + + +def list_contributors(snapshot_dir: Path, *, limit: int | None) -> dict[str, Any]: + status = get_contributor_status(snapshot_dir) + report = _read_optional_json(snapshot_dir / "new-contributors-report.json") + rows = [ + _contributor_summary(entry, rank=index) + for index, entry in enumerate(report.get("contributors") or [], start=1) + if isinstance(entry, dict) + ] + total = len(rows) + return { + **status, + "contributors": rows[:limit] if limit is not None else rows, + "contributor_count": total, + } + + +def get_contributor(snapshot_dir: Path, *, author_login: str) -> dict[str, Any]: + status = get_contributor_status(snapshot_dir) + report = _read_optional_json(snapshot_dir / "new-contributors-report.json") + contributor = _find_contributor(report.get("contributors") or [], author_login) + if contributor is None: + return { + **status, + "author_login": author_login, + "found": False, + "summary": None, + "risk": None, + "contributor": None, + } + return { + **status, + "author_login": str(contributor.get("author_login") or author_login), + "found": True, + "summary": _contributor_summary(contributor), + "risk": _contributor_risk(contributor), + "contributor": contributor, + } + + +def get_contributor_risk(snapshot_dir: Path, *, author_login: str) -> dict[str, Any]: + contributor = get_contributor(snapshot_dir, author_login=author_login) + risk = contributor.get("risk") + return { + "repo": contributor.get("repo"), + "snapshot_id": contributor.get("snapshot_id"), + "available": contributor.get("available"), + "generated_at": contributor.get("generated_at"), + "author_login": contributor.get("author_login"), + "found": contributor.get("found"), + "risk_available": risk is not None, + "risk": risk, + } + + +def _analysis_context( + snapshot_dir: Path, + *, + variant: AnalysisVariant, +) -> tuple[ + _SnapshotMetadata, + _AnalysisSelection | None, + dict[int, dict[str, Any]], + dict[int, dict[str, Any]], +]: + metadata = _snapshot_metadata(snapshot_dir) + selection = _select_analysis_report(_analysis_candidates(snapshot_dir), variant=variant) + issue_map, pr_map = _artifact_maps(snapshot_dir) + return metadata, selection, issue_map, pr_map + + +def _analysis_base_payload( + metadata: _SnapshotMetadata, + selection: _AnalysisSelection | None, + *, + variant: AnalysisVariant, +) -> dict[str, Any]: + base = { + "repo": metadata.repo, + "snapshot_id": metadata.snapshot_id, + "variant_requested": variant, + "available": selection is not None, + "variant_used": None, + "llm_enrichment": False, + "generated_at": None, + } + if selection is None: + return base + return { + **base, + "variant_used": selection.variant_used, + "llm_enrichment": selection.llm_enrichment, + "generated_at": selection.payload.get("generated_at"), + } + + +def _analysis_candidates(snapshot_dir: Path) -> list[dict[str, Any]]: + candidates: list[dict[str, Any]] = [] + for path in _analysis_report_paths(snapshot_dir): + payload = _read_optional_json(path) + if not payload: + continue + llm_enrichment = bool(payload.get("llm_enrichment")) + candidates.append( + { + "path": path, + "payload": payload, + "variant": _analysis_variant(path.name, payload, llm_enrichment=llm_enrichment), + "llm_enrichment": llm_enrichment, + } + ) + return candidates + + +def _select_analysis_report( + candidates: list[dict[str, Any]], + *, + variant: AnalysisVariant, +) -> _AnalysisSelection | None: + if not candidates: + return None + if variant == "auto": + ordered = sorted(candidates, key=_analysis_auto_priority) + else: + ordered = [candidate for candidate in candidates if candidate["variant"] == variant] + ordered.sort(key=_analysis_specific_priority) + if not ordered: + return None + winner = ordered[0] + return _AnalysisSelection( + path=Path(winner["path"]), + payload=dict(winner["payload"]), + variant_used=str(winner["variant"]), + llm_enrichment=bool(winner["llm_enrichment"]), + ) + + +def _analysis_report_paths(snapshot_dir: Path) -> list[Path]: + ordered: list[Path] = [] + current_manifest_path = repo_relative_path_to_local( + snapshot_dir, CURRENT_ANALYSIS_MANIFEST_PATH + ) + if current_manifest_path.exists(): + try: + current_manifest = load_current_analysis_manifest(current_manifest_path) + except ValueError: + current_manifest = None + if current_manifest is not None: + for artifact_path in (current_manifest.get("artifacts") or {}).values(): + if not isinstance(artifact_path, str): + continue + ordered.append(repo_relative_path_to_local(snapshot_dir, artifact_path)) + ordered.extend( + [ + snapshot_dir / "analysis-report-hybrid.json", + snapshot_dir / "analysis-report-deterministic.json", + snapshot_dir / "analysis-report.json", + ] + ) + seen: set[Path] = set() + deduped: list[Path] = [] + for path in ordered: + if path in seen: + continue + seen.add(path) + deduped.append(path) + deduped.extend( + path for path in sorted(snapshot_dir.glob("analysis-report*.json")) if path not in seen + ) + return [path for path in deduped if path.exists()] + + +def _analysis_auto_priority(candidate: dict[str, Any]) -> tuple[int, str]: + path = Path(candidate["path"]) + if path.name == "analysis-report-hybrid.json": + return (0, path.name) + if bool(candidate.get("llm_enrichment")): + return (1, path.name) + if path.name == "analysis-report.json": + return (2, path.name) + return (3, path.name) + + +def _analysis_specific_priority(candidate: dict[str, Any]) -> tuple[int, str]: + path = Path(candidate["path"]) + if path.name.endswith(f"-{candidate['variant']}.json"): + return (0, path.name) + if path.name == "analysis-report.json": + return (1, path.name) + return (2, path.name) + + +def _analysis_variant(path_name: str, payload: dict[str, Any], *, llm_enrichment: bool) -> str: + lowered = path_name.lower() + if "hybrid" in lowered: + return "hybrid" + if "deterministic" in lowered: + return "deterministic" + if isinstance(payload.get("variant_used"), str): + variant_used = str(payload["variant_used"]).strip().lower() + if variant_used in {"hybrid", "deterministic"}: + return variant_used + return "hybrid" if llm_enrichment else "deterministic" + + +def _analysis_counts(payload: dict[str, Any]) -> dict[str, int]: + return { + "meta_bugs": len(payload.get("meta_bugs") or []), + "duplicate_issues": len(payload.get("duplicate_issues") or []), + "duplicate_prs": len(payload.get("duplicate_prs") or []), + } + + +def _artifact_maps( + snapshot_dir: Path, +) -> tuple[dict[int, dict[str, Any]], dict[int, dict[str, Any]]]: + issue_rows = ( + read_parquet_rows(snapshot_dir / "issues.parquet") + if (snapshot_dir / "issues.parquet").exists() + else [] + ) + pr_rows = ( + read_parquet_rows(snapshot_dir / "pull_requests.parquet") + if (snapshot_dir / "pull_requests.parquet").exists() + else [] + ) + issue_map = { + int(row["number"]): row for row in issue_rows if _coerce_int(row.get("number")) is not None + } + pr_map = { + int(row["number"]): row for row in pr_rows if _coerce_int(row.get("number")) is not None + } + return issue_map, pr_map + + +def _issue_cluster_summary( + cluster: dict[str, Any], + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], + *, + rank: int | None = None, +) -> dict[str, Any]: + canonical_issue_number = _coerce_int(cluster.get("canonical_issue_number")) + canonical_pr_number = _coerce_int(cluster.get("canonical_pr_number")) + issue_numbers = _ordered_ints(cluster.get("issue_numbers")) + pr_numbers = _ordered_ints(cluster.get("pr_numbers")) + return { + "rank": rank, + "cluster_id": str(cluster.get("cluster_id") or f"cluster-{rank or 0}"), + "title": _cluster_title( + cluster, issue_map, pr_map, canonical_issue_number, canonical_pr_number + ), + "summary": cluster.get("summary"), + "status": cluster.get("status"), + "confidence": _coerce_float(cluster.get("confidence")), + "canonical_issue_number": canonical_issue_number, + "canonical_issue_title": _title_for_issue(canonical_issue_number, issue_map), + "canonical_issue_url": _url_for_issue(canonical_issue_number, issue_map), + "canonical_pr_number": canonical_pr_number, + "canonical_pr_title": _title_for_pr(canonical_pr_number, pr_map), + "canonical_pr_url": _url_for_pr(canonical_pr_number, pr_map), + "issue_numbers": issue_numbers, + "issue_count": len(issue_numbers), + "pr_numbers": pr_numbers, + "pr_count": len(pr_numbers), + "evidence_types": [str(value) for value in (cluster.get("evidence_types") or []) if value], + "github_url": _cluster_url(canonical_issue_number, canonical_pr_number, issue_map, pr_map), + } + + +def _cluster_title( + cluster: dict[str, Any], + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], + canonical_issue_number: int | None, + canonical_pr_number: int | None, +) -> str: + issue_title = _title_for_issue(canonical_issue_number, issue_map) + if issue_title: + return issue_title + pr_title = _title_for_pr(canonical_pr_number, pr_map) + if pr_title: + return pr_title + summary = str(cluster.get("summary") or "").strip() + if summary: + return summary + return str(cluster.get("cluster_id") or "cluster") + + +def _cluster_url( + canonical_issue_number: int | None, + canonical_pr_number: int | None, + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], +) -> str | None: + return _url_for_issue(canonical_issue_number, issue_map) or _url_for_pr( + canonical_pr_number, pr_map + ) + + +def _duplicate_pr_summary( + entry: dict[str, Any], + issue_map: dict[int, dict[str, Any]], + pr_map: dict[int, dict[str, Any]], + *, + rank: int, +) -> dict[str, Any]: + canonical_pr_number = _coerce_int(entry.get("canonical_pr_number")) + target_issue_number = _coerce_int(entry.get("target_issue_number")) + duplicates = _ordered_ints(entry.get("duplicate_pr_numbers")) + return { + "rank": rank, + "cluster_id": str(entry.get("cluster_id") or f"duplicate-pr-{rank}"), + "canonical_pr_number": canonical_pr_number, + "canonical_pr_title": _title_for_pr(canonical_pr_number, pr_map), + "canonical_pr_url": _url_for_pr(canonical_pr_number, pr_map), + "target_issue_number": target_issue_number, + "target_issue_title": _title_for_issue(target_issue_number, issue_map), + "target_issue_url": _url_for_issue(target_issue_number, issue_map), + "duplicate_pr_numbers": duplicates, + "duplicate_pr_count": len(duplicates), + "reason": entry.get("reason"), + } + + +def _best_issue_summary(entry: Any, issue_map: dict[int, dict[str, Any]]) -> dict[str, Any] | None: + if not isinstance(entry, dict): + return None + issue_number = _coerce_int(entry.get("issue_number")) + return { + "cluster_id": entry.get("cluster_id"), + "issue_number": issue_number, + "title": _title_for_issue(issue_number, issue_map), + "url": _url_for_issue(issue_number, issue_map), + "reason": entry.get("reason"), + "score": _coerce_float(entry.get("score")), + } + + +def _best_pr_summary(entry: Any, pr_map: dict[int, dict[str, Any]]) -> dict[str, Any] | None: + if not isinstance(entry, dict): + return None + pr_number = _coerce_int(entry.get("pr_number")) + return { + "cluster_id": entry.get("cluster_id"), + "pr_number": pr_number, + "title": _title_for_pr(pr_number, pr_map), + "url": _url_for_pr(pr_number, pr_map), + "reason": entry.get("reason"), + "score": _coerce_float(entry.get("score")), + } + + +def _issue_member_row(number: int, row: dict[str, Any] | None) -> dict[str, Any]: + row = row or {} + return { + "number": number, + "title": row.get("title"), + "state": row.get("state"), + "author_login": row.get("author_login"), + "created_at": row.get("created_at"), + "updated_at": row.get("updated_at"), + "html_url": row.get("html_url"), + } + + +def _pr_member_row(number: int, row: dict[str, Any] | None, *, role: str) -> dict[str, Any]: + row = row or {} + return { + "number": number, + "role": role, + "title": row.get("title"), + "author_login": row.get("author_login"), + "state": row.get("state"), + "draft": bool(row.get("draft")), + "merged": bool(row.get("merged")), + "author_association": row.get("author_association"), + "created_at": row.get("created_at"), + "updated_at": row.get("updated_at"), + "html_url": row.get("html_url"), + } + + +def _contributor_summary(contributor: dict[str, Any], *, rank: int | None = None) -> dict[str, Any]: + raw_activity = contributor.get("activity") + activity: dict[str, Any] = raw_activity if isinstance(raw_activity, dict) else {} + return { + "rank": rank, + "author_login": contributor.get("author_login"), + "name": contributor.get("name"), + "profile_url": contributor.get("profile_url"), + "repo_association": contributor.get("repo_association"), + "first_seen_in_snapshot": contributor.get("first_seen_in_snapshot"), + "new_to_repo": contributor.get("new_to_repo"), + "snapshot_pr_count": _coerce_int(contributor.get("snapshot_pr_count")) or 0, + "snapshot_issue_count": _coerce_int(contributor.get("snapshot_issue_count")) or 0, + "follow_through_score": contributor.get("follow_through_score"), + "breadth_score": contributor.get("breadth_score"), + "automation_risk_signal": contributor.get("automation_risk_signal"), + "heuristic_note": contributor.get("heuristic_note"), + "account_age_days": _coerce_int(contributor.get("account_age_days")), + "public_pr_count_42d": _coerce_int(activity.get("visible_authored_pr_count")), + "public_repo_count_42d": _coerce_int(activity.get("distinct_repos_with_authored_prs")), + "repo_pull_requests_url": contributor.get("repo_pull_requests_url"), + "repo_issues_url": contributor.get("repo_issues_url"), + } + + +def _contributor_risk(contributor: dict[str, Any]) -> dict[str, Any]: + raw_activity = contributor.get("activity") + activity: dict[str, Any] = raw_activity if isinstance(raw_activity, dict) else {} + return { + "automation_risk_signal": contributor.get("automation_risk_signal"), + "heuristic_note": contributor.get("heuristic_note"), + "follow_through_score": contributor.get("follow_through_score"), + "breadth_score": contributor.get("breadth_score"), + "account_age_days": _coerce_int(contributor.get("account_age_days")), + "public_pr_count_42d": _coerce_int(activity.get("visible_authored_pr_count")), + "public_repo_count_42d": _coerce_int(activity.get("distinct_repos_with_authored_prs")), + "report_reason": contributor.get("report_reason"), + } + + +def _find_contributor(entries: list[Any], author_login: str) -> dict[str, Any] | None: + lowered = author_login.casefold() + for entry in entries: + if not isinstance(entry, dict): + continue + login = str(entry.get("author_login") or "") + if login.casefold() == lowered: + return entry + return None + + +def _snapshot_metadata(snapshot_dir: Path) -> _SnapshotMetadata: + manifest = _read_optional_json(snapshot_dir / "manifest.json") + repo = str(manifest.get("repo") or _infer_repo(snapshot_dir) or "") + snapshot_id = str(manifest.get("snapshot_id") or snapshot_dir.name) + return _SnapshotMetadata(repo=repo, snapshot_id=snapshot_id) + + +def _infer_repo(snapshot_dir: Path) -> str | None: + for filename in ("pull_requests.parquet", "issues.parquet"): + path = snapshot_dir / filename + if not path.exists(): + continue + rows = read_parquet_rows(path) + if rows and rows[0].get("repo"): + return str(rows[0]["repo"]) + for filename in _analysis_report_paths(snapshot_dir): + payload = _read_optional_json(filename) + if payload.get("repo"): + return str(payload["repo"]) + report = _read_optional_json(snapshot_dir / "new-contributors-report.json") + if report.get("repo"): + return str(report["repo"]) + return None + + +def _title_for_issue(number: int | None, issue_map: dict[int, dict[str, Any]]) -> str | None: + if number is None or number not in issue_map: + return None + title = issue_map[number].get("title") + return str(title) if title else None + + +def _url_for_issue(number: int | None, issue_map: dict[int, dict[str, Any]]) -> str | None: + if number is None or number not in issue_map: + return None + value = issue_map[number].get("html_url") + return str(value) if value else None + + +def _title_for_pr(number: int | None, pr_map: dict[int, dict[str, Any]]) -> str | None: + if number is None or number not in pr_map: + return None + title = pr_map[number].get("title") + return str(title) if title else None + + +def _url_for_pr(number: int | None, pr_map: dict[int, dict[str, Any]]) -> str | None: + if number is None or number not in pr_map: + return None + value = pr_map[number].get("html_url") + return str(value) if value else None + + +def _ordered_ints(values: Any) -> list[int]: + if not isinstance(values, list): + return [] + ordered: list[int] = [] + for value in values: + number = _coerce_int(value) + if number is not None: + ordered.append(number) + return ordered + + +def _coerce_int(value: Any) -> int | None: + if value is None: + return None + try: + return int(value) + except (TypeError, ValueError): + return None + + +def _coerce_float(value: Any) -> float | None: + if value is None: + return None + try: + return float(value) + except (TypeError, ValueError): + return None + + +def _read_optional_json(path: Path) -> dict[str, Any]: + if not path.exists(): + return {} + payload = read_json(path) + return payload if isinstance(payload, dict) else {} diff --git a/src/slop_farmer/reports/schemas/canonical_duplicate_pr_output.schema.json b/src/slop_farmer/reports/schemas/canonical_duplicate_pr_output.schema.json new file mode 100644 index 0000000000000000000000000000000000000000..b902a66535255f93e4503073689b452727eaeeac --- /dev/null +++ b/src/slop_farmer/reports/schemas/canonical_duplicate_pr_output.schema.json @@ -0,0 +1,48 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "additionalProperties": false, + "required": [ + "status", + "cluster_id", + "source_pr_numbers", + "tests_run", + "commit_message", + "pr_title", + "summary" + ], + "properties": { + "status": { + "type": "string", + "enum": [ + "success", + "failed" + ] + }, + "cluster_id": { + "type": "string", + "minLength": 1 + }, + "source_pr_numbers": { + "type": "array", + "items": { + "type": "integer" + } + }, + "tests_run": { + "type": "array", + "items": { + "type": "string" + } + }, + "commit_message": { + "type": "string" + }, + "pr_title": { + "type": "string" + }, + "summary": { + "type": "string" + } + } +} diff --git a/src/slop_farmer/reports/user_activity.py b/src/slop_farmer/reports/user_activity.py new file mode 100644 index 0000000000000000000000000000000000000000..05f0bd573d5f5d7070616f3b67cf9a7d783eea5a --- /dev/null +++ b/src/slop_farmer/reports/user_activity.py @@ -0,0 +1,491 @@ +from __future__ import annotations + +import json +import urllib.error +import urllib.request +from datetime import UTC, datetime, timedelta +from typing import Any + +from slop_farmer.config import resolve_github_token +from slop_farmer.data.http import urlopen_with_retry + +GRAPHQL_URL = "https://api.github.com/graphql" + +PROFILE_QUERY = """ +query UserActivityProfile($login: String!, $from: DateTime!, $to: DateTime!) { + rateLimit { cost remaining resetAt } + viewer { + login + organizations(first: 100) { + totalCount + nodes { + login + name + } + } + } + user(login: $login) { + login + name + createdAt + organizations(first: 100) { + totalCount + nodes { + login + name + } + } + starredRepositories(first: 100, orderBy: {field: STARRED_AT, direction: DESC}) { + totalCount + nodes { + nameWithOwner + stargazerCount + owner { + login + } + } + } + contributionsCollection(from: $from, to: $to) { + contributionCalendar { + totalContributions + weeks { + contributionDays { + date + contributionCount + } + } + } + totalIssueContributions + totalPullRequestContributions + pullRequestContributionsByRepository(maxRepositories: 10) { + repository { nameWithOwner } + contributions { totalCount } + } + issueContributionsByRepository(maxRepositories: 10) { + repository { nameWithOwner } + contributions { totalCount } + } + } + } +} +""".strip() + +SEARCH_PRS_QUERY = """ +query SearchPullRequests($query: String!, $cursor: String) { + rateLimit { cost remaining resetAt } + search(type: ISSUE, query: $query, first: 100, after: $cursor) { + issueCount + pageInfo { hasNextPage endCursor } + nodes { + ... on PullRequest { + number + state + merged + createdAt + updatedAt + repository { nameWithOwner stargazerCount } + } + } + } +} +""".strip() + +SEARCH_ISSUES_QUERY = """ +query SearchIssues($query: String!, $cursor: String) { + rateLimit { cost remaining resetAt } + search(type: ISSUE, query: $query, first: 100, after: $cursor) { + issueCount + pageInfo { hasNextPage endCursor } + nodes { + ... on Issue { + number + state + createdAt + updatedAt + repository { nameWithOwner } + } + } + } +} +""".strip() + + +def summarize_user(login: str, days: int, repo: str | None) -> dict[str, Any]: + now = datetime.now(tz=UTC) + start = (now - timedelta(days=days)).replace(microsecond=0) + from_iso = start.isoformat().replace("+00:00", "Z") + from_date = start.date().isoformat() + to_iso = now.replace(microsecond=0).isoformat().replace("+00:00", "Z") + + _user_activity_log(f"user enrichment {login}: fetching profile") + profile, _errors = _post_graphql( + PROFILE_QUERY, {"login": login, "from": from_iso, "to": to_iso} + ) + viewer = profile["viewer"] + user = profile["user"] + if user is None: + raise RuntimeError(f"unknown user {login!r}") + + contributions = user["contributionsCollection"] + calendar = contributions["contributionCalendar"] + repo_term = f" repo:{repo}" if repo else "" + pr_query = f"author:{login} is:pr created:>={from_date}{repo_term} sort:created-desc" + open_pr_query = ( + f"author:{login} is:pr is:open created:>={from_date}{repo_term} sort:created-desc" + ) + issue_query = f"author:{login} is:issue created:>={from_date}{repo_term} sort:created-desc" + + pr_count, prs, inaccessible_pr_nodes = _search_all(pr_query, prs=True, label=f"{login} prs") + open_pr_count, open_prs, inaccessible_open_pr_nodes = _search_all( + open_pr_query, prs=True, label=f"{login} open-prs" + ) + issue_count, issues, inaccessible_issue_nodes = _search_all( + issue_query, prs=False, label=f"{login} issues" + ) + + merged_prs = [pr for pr in prs if pr.get("merged")] + closed_unmerged_prs = [pr for pr in prs if pr.get("state") == "CLOSED" and not pr.get("merged")] + still_open_prs = [pr for pr in prs if pr.get("state") == "OPEN"] + open_pr_repos = sorted( + {pr["repository"]["nameWithOwner"] for pr in open_prs if pr.get("repository")} + ) + pr_repos = sorted({pr["repository"]["nameWithOwner"] for pr in prs if pr.get("repository")}) + issue_repos = sorted( + {issue["repository"]["nameWithOwner"] for issue in issues if issue.get("repository")} + ) + starred = user.get("starredRepositories") or {} + starred_nodes = [row for row in starred.get("nodes") or [] if isinstance(row, dict)] + non_self_starred = [ + row + for row in starred_nodes + if ((row.get("owner") or {}).get("login") or "").casefold() != login.casefold() + ] + + recent_pr_repo_stars = [ + int((pr.get("repository") or {}).get("stargazerCount") or 0) for pr in prs + ] + merged_pr_repo_stars = [ + int((pr.get("repository") or {}).get("stargazerCount") or 0) for pr in merged_prs + ] + closed_unmerged_pr_repo_stars = [ + int((pr.get("repository") or {}).get("stargazerCount") or 0) for pr in closed_unmerged_prs + ] + open_pr_repo_stars = [ + int((pr.get("repository") or {}).get("stargazerCount") or 0) for pr in open_prs + ] + repo_owners = sorted( + { + repo_name.split("/", 1)[0] + for repo_name in pr_repos + issue_repos + open_pr_repos + if "/" in repo_name and repo_name.split("/", 1)[0].casefold() != login.casefold() + } + ) + + public_org_rows = _public_orgs(login) + target_listed_org_rows = [ + {"login": row["login"], "name": row.get("name")} + for row in (user.get("organizations") or {}).get("nodes") or [] + if isinstance(row, dict) and row.get("login") + ] + viewer_listed_org_rows = [ + {"login": row["login"], "name": row.get("name")} + for row in (viewer.get("organizations") or {}).get("nodes") or [] + if isinstance(row, dict) and row.get("login") + ] + viewer_is_target = viewer.get("login", "").casefold() == login.casefold() + auth_membership_rows = _authenticated_memberships() if viewer_is_target else [] + direct_membership_checks = _check_viewer_org_membership(repo_owners) if viewer_is_target else [] + + public_org_logins = {row["login"] for row in public_org_rows} + listed_org_logins = {row["login"] for row in target_listed_org_rows} + auth_membership_logins = {row["login"] for row in auth_membership_rows} + directly_confirmed_orgs = sorted( + row["login"] for row in direct_membership_checks if row.get("viewer_is_a_member") + ) + listing_endpoints_partial = any( + org not in public_org_logins | listed_org_logins | auth_membership_logins + for org in directly_confirmed_orgs + ) + + created_at = datetime.fromisoformat(user["createdAt"].replace("Z", "+00:00")) + account_age_days = (now - created_at).days + + return { + "login": user["login"], + "name": user.get("name"), + "repo_scope": repo, + "window": {"days": days, "from": from_iso, "to": to_iso}, + "account": {"created_at": user["createdAt"], "age_days": account_age_days}, + "activity": { + "visible_contributions_total": calendar["totalContributions"], + **_contribution_calendar_summary(calendar["weeks"]), + "authored_issues": contributions["totalIssueContributions"], + "authored_pull_requests": contributions["totalPullRequestContributions"], + "visible_authored_issue_count": len(issues), + "visible_authored_pr_count": len(prs), + "visible_open_pr_count": len(open_prs), + "search_authored_issue_count": issue_count, + "search_authored_pr_count": pr_count, + "search_open_pr_count": open_pr_count, + "inaccessible_issue_nodes": inaccessible_issue_nodes, + "inaccessible_pr_nodes": inaccessible_pr_nodes, + "inaccessible_open_pr_nodes": inaccessible_open_pr_nodes, + "distinct_repos_with_authored_prs": len(pr_repos), + "distinct_repos_with_authored_issues": len(issue_repos), + "distinct_repos_with_open_prs": len(open_pr_repos), + "open_pr_count": len(open_prs), + "merged_pr_count": len(merged_prs), + "closed_unmerged_pr_count": len(closed_unmerged_prs), + "still_open_pr_count": len(still_open_prs), + "merged_pr_rate": _rate(len(merged_prs), len(prs)), + "closed_unmerged_pr_rate": _rate(len(closed_unmerged_prs), len(prs)), + "still_open_pr_rate": _rate(len(still_open_prs), len(prs)), + }, + "stars": { + "starred_repositories_total": starred.get("totalCount"), + "visible_starred_repositories_returned": len(starred_nodes), + "visible_non_self_starred_repositories": len(non_self_starred), + "visible_non_self_starred_examples": [ + row["nameWithOwner"] for row in non_self_starred[:10] + ], + "recent_pr_repo_star_distribution": _star_distribution(recent_pr_repo_stars), + "recent_merged_pr_repo_star_distribution": _star_distribution(merged_pr_repo_stars), + "recent_closed_unmerged_pr_repo_star_distribution": _star_distribution( + closed_unmerged_pr_repo_stars + ), + "recent_open_pr_repo_star_distribution": _star_distribution(open_pr_repo_stars), + }, + "top_repositories": { + "pull_requests": [ + { + "repo": row["repository"]["nameWithOwner"], + "count": row["contributions"]["totalCount"], + } + for row in contributions["pullRequestContributionsByRepository"] + ], + "issues": [ + { + "repo": row["repository"]["nameWithOwner"], + "count": row["contributions"]["totalCount"], + } + for row in contributions["issueContributionsByRepository"] + ], + "open_pr_repositories": open_pr_repos, + }, + "organization_membership": { + "target_is_viewer": viewer_is_target, + "public_orgs": public_org_rows, + "graphql_target_listed_orgs": target_listed_org_rows, + "graphql_viewer_listed_orgs": viewer_listed_org_rows, + "authenticated_memberships": auth_membership_rows, + "checked_recent_repo_owner_orgs": direct_membership_checks, + "visibility": { + "listing_endpoints_partial": listing_endpoints_partial, + "public_org_count": len(public_org_rows), + "graphql_target_listed_org_count": len(target_listed_org_rows), + "authenticated_membership_count": len(auth_membership_rows), + "directly_confirmed_membership_count": len(directly_confirmed_orgs), + "directly_confirmed_memberships": directly_confirmed_orgs, + }, + }, + } + + +def _post_graphql( + query: str, + variables: dict[str, Any], + *, + allow_partial: bool = False, +) -> tuple[dict[str, Any], list[dict[str, Any]]]: + token = resolve_github_token() + if not token: + raise RuntimeError("missing GITHUB_TOKEN/GRAPHQL_TOKEN/GH_TOKEN") + body = json.dumps({"query": query, "variables": variables}).encode() + request = urllib.request.Request( + GRAPHQL_URL, + data=body, + headers={ + "Authorization": f"bearer {token}", + "User-Agent": "slop-farmer", + "Content-Type": "application/json", + "Accept": "application/json", + }, + method="POST", + ) + try: + with urlopen_with_retry( + request, + timeout=120, + log=_user_activity_log, + label="GitHub GraphQL user activity", + ) as response: + payload = json.load(response) + except urllib.error.HTTPError as exc: # pragma: no cover - live network only + detail = exc.read().decode("utf-8", "replace") + raise RuntimeError(f"graphql request failed: {exc.code} {detail}") from exc + errors = payload.get("errors") or [] + if errors and not allow_partial: + raise RuntimeError(json.dumps(errors)) + return payload["data"], errors + + +def _search_all( + query: str, *, prs: bool, label: str | None = None +) -> tuple[int, list[dict[str, Any]], int]: + nodes: list[dict[str, Any]] = [] + cursor: str | None = None + issue_count = 0 + inaccessible = 0 + search_query = SEARCH_PRS_QUERY if prs else SEARCH_ISSUES_QUERY + page = 0 + while True: + page += 1 + data, errors = _post_graphql( + search_query, {"query": query, "cursor": cursor}, allow_partial=True + ) + search = data["search"] + issue_count = search["issueCount"] + inaccessible += len(errors) + nodes.extend(node for node in search["nodes"] if isinstance(node, dict)) + if label and ( + page == 1 + or page % 5 == 0 + or not search["pageInfo"]["hasNextPage"] + or len(nodes) >= 1000 + ): + _user_activity_log( + f"user enrichment {label}: page {page}, loaded {len(nodes)} visible rows, " + f"search_count={issue_count}, inaccessible={inaccessible}" + ) + if not search["pageInfo"]["hasNextPage"] or len(nodes) >= 1000: + break + cursor = search["pageInfo"]["endCursor"] + return issue_count, nodes, inaccessible + + +def _public_orgs(login: str) -> list[dict[str, Any]]: + payload = _get_json(f"https://api.github.com/users/{login}/orgs") + if not isinstance(payload, list): + return [] + return [ + {"login": row.get("login"), "name": row.get("name")} + for row in payload + if isinstance(row, dict) and row.get("login") + ] + + +def _authenticated_memberships() -> list[dict[str, Any]]: + payload = _get_json("https://api.github.com/user/memberships/orgs") + if not isinstance(payload, list): + return [] + rows = [] + for row in payload: + if not isinstance(row, dict): + continue + org = row.get("organization") or {} + if not org.get("login"): + continue + rows.append( + { + "login": org.get("login"), + "name": org.get("name"), + "state": row.get("state"), + "role": row.get("role"), + } + ) + return rows + + +def _check_viewer_org_membership(logins: list[str]) -> list[dict[str, Any]]: + if not logins: + return [] + aliases = {f"org{i}": login for i, login in enumerate(logins)} + fields = "\n".join( + f'{alias}: organization(login: "{login}") {{ login name viewerIsAMember viewerCanAdminister }}' + for alias, login in aliases.items() + ) + query = f"query ViewerOrgChecks {{ rateLimit {{ cost remaining resetAt }}\n{fields}\n}}" + data, _errors = _post_graphql(query, {}) + rows = [] + for alias in aliases: + org = data.get(alias) + if isinstance(org, dict) and org.get("login"): + rows.append( + { + "login": org["login"], + "name": org.get("name"), + "viewer_is_a_member": bool(org.get("viewerIsAMember")), + "viewer_can_administer": bool(org.get("viewerCanAdminister")), + } + ) + return rows + + +def _get_json(url: str) -> Any: + token = resolve_github_token() + if not token: + raise RuntimeError("missing GITHUB_TOKEN/GRAPHQL_TOKEN/GH_TOKEN") + request = urllib.request.Request( + url, + headers={ + "Authorization": f"bearer {token}", + "User-Agent": "slop-farmer", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + }, + ) + try: + with urlopen_with_retry( + request, + timeout=120, + log=_user_activity_log, + label=url, + ) as response: + return json.load(response) + except urllib.error.HTTPError as exc: # pragma: no cover - live network only + detail = exc.read().decode("utf-8", "replace") + raise RuntimeError(f"rest request failed: {exc.code} {url} {detail}") from exc + + +def _contribution_calendar_summary(weeks: list[dict[str, Any]]) -> dict[str, Any]: + daily = [day for week in weeks for day in week["contributionDays"]] + active_days = sum(1 for day in daily if day["contributionCount"] > 0) + weekly = [sum(day["contributionCount"] for day in week["contributionDays"]) for week in weeks] + return {"active_days": active_days, "weekly_totals": weekly} + + +def _star_distribution(values: list[int]) -> dict[str, Any]: + if not values: + return { + "count": 0, + "min": None, + "median": None, + "max": None, + "buckets": {"lt_100": 0, "100_to_999": 0, "1k_to_9k": 0, "10k_plus": 0}, + } + ordered = sorted(values) + mid = len(ordered) // 2 + median = ordered[mid] if len(ordered) % 2 else round((ordered[mid - 1] + ordered[mid]) / 2, 1) + return { + "count": len(values), + "min": min(values), + "median": median, + "max": max(values), + "buckets": { + "lt_100": sum(1 for value in values if value < 100), + "100_to_999": sum(1 for value in values if 100 <= value < 1000), + "1k_to_9k": sum(1 for value in values if 1000 <= value < 10000), + "10k_plus": sum(1 for value in values if value >= 10000), + }, + } + + +def _rate(numerator: int, denominator: int) -> float | None: + if denominator == 0: + return None + return round(numerator / denominator, 4) + + +def _user_activity_log(message: str) -> None: + stamp = datetime.now(tz=UTC).strftime("%H:%M:%SZ") + print(f"[{stamp}] {message}", flush=True) diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000000000000000000000000000000000000..9771f09da266a48a15654e77fc5970209a9be7df --- /dev/null +++ b/uv.lock @@ -0,0 +1,2859 @@ +version = 1 +revision = 3 +requires-python = ">=3.13.5" + +[[package]] +name = "a2a-sdk" +version = "0.3.26" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-api-core" }, + { name = "httpx" }, + { name = "httpx-sse" }, + { name = "protobuf" }, + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/be/97/a6840e01795b182ce751ca165430d46459927cde9bfab838087cbb24aef7/a2a_sdk-0.3.26.tar.gz", hash = "sha256:44068e2d037afbb07ab899267439e9bc7eaa7ac2af94f1e8b239933c993ad52d", size = 274598, upload-time = "2026-04-09T15:21:13.902Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dd/d5/51f4ee1bf3b736add42a542d3c8a3fd3fa85f3d36c17972127defc46c26f/a2a_sdk-0.3.26-py3-none-any.whl", hash = "sha256:754e0573f6d33b225c1d8d51f640efa69cbbed7bdfb06ce9c3540ea9f58d4a91", size = 151016, upload-time = "2026-04-09T15:21:12.35Z" }, +] + +[[package]] +name = "agent-client-protocol" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/13/3b893421369767e7043cc115d6ef0df417c298b84563be3a12df0416158d/agent_client_protocol-0.9.0.tar.gz", hash = "sha256:f744c48ab9af0f0b4452e5ab5498d61bcab97c26dbe7d6feec5fd36de49be30b", size = 71853, upload-time = "2026-03-26T01:21:00.379Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/ed/c284543c08aa443a4ef2c8bd120be51da8433dd174c01749b5d87c333f22/agent_client_protocol-0.9.0-py3-none-any.whl", hash = "sha256:06911500b51d8cb69112544e2be01fc5e7db39ef88fecbc3848c5c6f194798ee", size = 56850, upload-time = "2026-03-26T01:20:59.252Z" }, +] + +[[package]] +name = "aiofile" +version = "3.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "caio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/67/e2/d7cb819de8df6b5c1968a2756c3cb4122d4fa2b8fc768b53b7c9e5edb646/aiofile-3.9.0.tar.gz", hash = "sha256:e5ad718bb148b265b6df1b3752c4d1d83024b93da9bd599df74b9d9ffcf7919b", size = 17943, upload-time = "2024-10-08T10:39:35.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/50/25/da1f0b4dd970e52bf5a36c204c107e11a0c6d3ed195eba0bfbc664c312b2/aiofile-3.9.0-py3-none-any.whl", hash = "sha256:ce2f6c1571538cbdfa0143b04e16b208ecb0e9cb4148e528af8a640ed51cc8aa", size = 19539, upload-time = "2024-10-08T10:39:32.955Z" }, +] + +[[package]] +name = "aiohappyeyeballs" +version = "2.6.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760, upload-time = "2025-03-12T01:42:48.764Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265, upload-time = "2025-03-12T01:42:47.083Z" }, +] + +[[package]] +name = "aiohttp" +version = "3.13.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohappyeyeballs" }, + { name = "aiosignal" }, + { name = "attrs" }, + { name = "frozenlist" }, + { name = "multidict" }, + { name = "propcache" }, + { name = "yarl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/77/9a/152096d4808df8e4268befa55fba462f440f14beab85e8ad9bf990516918/aiohttp-3.13.5.tar.gz", hash = "sha256:9d98cc980ecc96be6eb4c1994ce35d28d8b1f5e5208a23b421187d1209dbb7d1", size = 7858271, upload-time = "2026-03-31T22:01:03.343Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/e9/d76bf503005709e390122d34e15256b88f7008e246c4bdbe915cd4f1adce/aiohttp-3.13.5-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5029cc80718bbd545123cd8fe5d15025eccaaaace5d0eeec6bd556ad6163d61", size = 742930, upload-time = "2026-03-31T21:58:13.155Z" }, + { url = "https://files.pythonhosted.org/packages/57/00/4b7b70223deaebd9bb85984d01a764b0d7bd6526fcdc73cca83bcbe7243e/aiohttp-3.13.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4bb6bf5811620003614076bdc807ef3b5e38244f9d25ca5fe888eaccea2a9832", size = 496927, upload-time = "2026-03-31T21:58:15.073Z" }, + { url = "https://files.pythonhosted.org/packages/9c/f5/0fb20fb49f8efdcdce6cd8127604ad2c503e754a8f139f5e02b01626523f/aiohttp-3.13.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a84792f8631bf5a94e52d9cc881c0b824ab42717165a5579c760b830d9392ac9", size = 497141, upload-time = "2026-03-31T21:58:17.009Z" }, + { url = "https://files.pythonhosted.org/packages/3b/86/b7c870053e36a94e8951b803cb5b909bfbc9b90ca941527f5fcafbf6b0fa/aiohttp-3.13.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:57653eac22c6a4c13eb22ecf4d673d64a12f266e72785ab1c8b8e5940d0e8090", size = 1732476, upload-time = "2026-03-31T21:58:18.925Z" }, + { url = "https://files.pythonhosted.org/packages/b5/e5/4e161f84f98d80c03a238671b4136e6530453d65262867d989bbe78244d0/aiohttp-3.13.5-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5e5f7debc7a57af53fdf5c5009f9391d9f4c12867049d509bf7bb164a6e295b", size = 1706507, upload-time = "2026-03-31T21:58:21.094Z" }, + { url = "https://files.pythonhosted.org/packages/d4/56/ea11a9f01518bd5a2a2fcee869d248c4b8a0cfa0bb13401574fa31adf4d4/aiohttp-3.13.5-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c719f65bebcdf6716f10e9eff80d27567f7892d8988c06de12bbbd39307c6e3a", size = 1773465, upload-time = "2026-03-31T21:58:23.159Z" }, + { url = "https://files.pythonhosted.org/packages/eb/40/333ca27fb74b0383f17c90570c748f7582501507307350a79d9f9f3c6eb1/aiohttp-3.13.5-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d97f93fdae594d886c5a866636397e2bcab146fd7a132fd6bb9ce182224452f8", size = 1873523, upload-time = "2026-03-31T21:58:25.59Z" }, + { url = "https://files.pythonhosted.org/packages/f0/d2/e2f77eef1acb7111405433c707dc735e63f67a56e176e72e9e7a2cd3f493/aiohttp-3.13.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3df334e39d4c2f899a914f1dba283c1aadc311790733f705182998c6f7cae665", size = 1754113, upload-time = "2026-03-31T21:58:27.624Z" }, + { url = "https://files.pythonhosted.org/packages/fb/56/3f653d7f53c89669301ec9e42c95233e2a0c0a6dd051269e6e678db4fdb0/aiohttp-3.13.5-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fe6970addfea9e5e081401bcbadf865d2b6da045472f58af08427e108d618540", size = 1562351, upload-time = "2026-03-31T21:58:29.918Z" }, + { url = "https://files.pythonhosted.org/packages/ec/a6/9b3e91eb8ae791cce4ee736da02211c85c6f835f1bdfac0594a8a3b7018c/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7becdf835feff2f4f335d7477f121af787e3504b48b449ff737afb35869ba7bb", size = 1693205, upload-time = "2026-03-31T21:58:32.214Z" }, + { url = "https://files.pythonhosted.org/packages/98/fc/bfb437a99a2fcebd6b6eaec609571954de2ed424f01c352f4b5504371dd3/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:676e5651705ad5d8a70aeb8eb6936c436d8ebbd56e63436cb7dd9bb36d2a9a46", size = 1730618, upload-time = "2026-03-31T21:58:34.728Z" }, + { url = "https://files.pythonhosted.org/packages/e4/b6/c8534862126191a034f68153194c389addc285a0f1347d85096d349bbc15/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:9b16c653d38eb1a611cc898c41e76859ca27f119d25b53c12875fd0474ae31a8", size = 1745185, upload-time = "2026-03-31T21:58:36.909Z" }, + { url = "https://files.pythonhosted.org/packages/0b/93/4ca8ee2ef5236e2707e0fd5fecb10ce214aee1ff4ab307af9c558bda3b37/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:999802d5fa0389f58decd24b537c54aa63c01c3219ce17d1214cbda3c2b22d2d", size = 1557311, upload-time = "2026-03-31T21:58:39.38Z" }, + { url = "https://files.pythonhosted.org/packages/57/ae/76177b15f18c5f5d094f19901d284025db28eccc5ae374d1d254181d33f4/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ec707059ee75732b1ba130ed5f9580fe10ff75180c812bc267ded039db5128c6", size = 1773147, upload-time = "2026-03-31T21:58:41.476Z" }, + { url = "https://files.pythonhosted.org/packages/01/a4/62f05a0a98d88af59d93b7fcac564e5f18f513cb7471696ac286db970d6a/aiohttp-3.13.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2d6d44a5b48132053c2f6cd5c8cb14bc67e99a63594e336b0f2af81e94d5530c", size = 1730356, upload-time = "2026-03-31T21:58:44.049Z" }, + { url = "https://files.pythonhosted.org/packages/e4/85/fc8601f59dfa8c9523808281f2da571f8b4699685f9809a228adcc90838d/aiohttp-3.13.5-cp313-cp313-win32.whl", hash = "sha256:329f292ed14d38a6c4c435e465f48bebb47479fd676a0411936cc371643225cc", size = 432637, upload-time = "2026-03-31T21:58:46.167Z" }, + { url = "https://files.pythonhosted.org/packages/c0/1b/ac685a8882896acf0f6b31d689e3792199cfe7aba37969fa91da63a7fa27/aiohttp-3.13.5-cp313-cp313-win_amd64.whl", hash = "sha256:69f571de7500e0557801c0b51f4780482c0ec5fe2ac851af5a92cfce1af1cb83", size = 458896, upload-time = "2026-03-31T21:58:48.119Z" }, + { url = "https://files.pythonhosted.org/packages/5d/ce/46572759afc859e867a5bc8ec3487315869013f59281ce61764f76d879de/aiohttp-3.13.5-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:eb4639f32fd4a9904ab8fb45bf3383ba71137f3d9d4ba25b3b3f3109977c5b8c", size = 745721, upload-time = "2026-03-31T21:58:50.229Z" }, + { url = "https://files.pythonhosted.org/packages/13/fe/8a2efd7626dbe6049b2ef8ace18ffda8a4dfcbe1bcff3ac30c0c7575c20b/aiohttp-3.13.5-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:7e5dc4311bd5ac493886c63cbf76ab579dbe4641268e7c74e48e774c74b6f2be", size = 497663, upload-time = "2026-03-31T21:58:52.232Z" }, + { url = "https://files.pythonhosted.org/packages/9b/91/cc8cc78a111826c54743d88651e1687008133c37e5ee615fee9b57990fac/aiohttp-3.13.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:756c3c304d394977519824449600adaf2be0ccee76d206ee339c5e76b70ded25", size = 499094, upload-time = "2026-03-31T21:58:54.566Z" }, + { url = "https://files.pythonhosted.org/packages/0a/33/a8362cb15cf16a3af7e86ed11962d5cd7d59b449202dc576cdc731310bde/aiohttp-3.13.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecc26751323224cf8186efcf7fbcbc30f4e1d8c7970659daf25ad995e4032a56", size = 1726701, upload-time = "2026-03-31T21:58:56.864Z" }, + { url = "https://files.pythonhosted.org/packages/45/0c/c091ac5c3a17114bd76cbf85d674650969ddf93387876cf67f754204bd77/aiohttp-3.13.5-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10a75acfcf794edf9d8db50e5a7ec5fc818b2a8d3f591ce93bc7b1210df016d2", size = 1683360, upload-time = "2026-03-31T21:58:59.072Z" }, + { url = "https://files.pythonhosted.org/packages/23/73/bcee1c2b79bc275e964d1446c55c54441a461938e70267c86afaae6fba27/aiohttp-3.13.5-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0f7a18f258d124cd678c5fe072fe4432a4d5232b0657fca7c1847f599233c83a", size = 1773023, upload-time = "2026-03-31T21:59:01.776Z" }, + { url = "https://files.pythonhosted.org/packages/c7/ef/720e639df03004fee2d869f771799d8c23046dec47d5b81e396c7cda583a/aiohttp-3.13.5-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:df6104c009713d3a89621096f3e3e88cc323fd269dbd7c20afe18535094320be", size = 1853795, upload-time = "2026-03-31T21:59:04.568Z" }, + { url = "https://files.pythonhosted.org/packages/bd/c9/989f4034fb46841208de7aeeac2c6d8300745ab4f28c42f629ba77c2d916/aiohttp-3.13.5-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:241a94f7de7c0c3b616627aaad530fe2cb620084a8b144d3be7b6ecfe95bae3b", size = 1730405, upload-time = "2026-03-31T21:59:07.221Z" }, + { url = "https://files.pythonhosted.org/packages/ce/75/ee1fd286ca7dc599d824b5651dad7b3be7ff8d9a7e7b3fe9820d9180f7db/aiohttp-3.13.5-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c974fb66180e58709b6fc402846f13791240d180b74de81d23913abe48e96d94", size = 1558082, upload-time = "2026-03-31T21:59:09.484Z" }, + { url = "https://files.pythonhosted.org/packages/c3/20/1e9e6650dfc436340116b7aa89ff8cb2bbdf0abc11dfaceaad8f74273a10/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6e27ea05d184afac78aabbac667450c75e54e35f62238d44463131bd3f96753d", size = 1692346, upload-time = "2026-03-31T21:59:12.068Z" }, + { url = "https://files.pythonhosted.org/packages/d8/40/8ebc6658d48ea630ac7903912fe0dd4e262f0e16825aa4c833c56c9f1f56/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a79a6d399cef33a11b6f004c67bb07741d91f2be01b8d712d52c75711b1e07c7", size = 1698891, upload-time = "2026-03-31T21:59:14.552Z" }, + { url = "https://files.pythonhosted.org/packages/d8/78/ea0ae5ec8ba7a5c10bdd6e318f1ba5e76fcde17db8275188772afc7917a4/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c632ce9c0b534fbe25b52c974515ed674937c5b99f549a92127c85f771a78772", size = 1742113, upload-time = "2026-03-31T21:59:17.068Z" }, + { url = "https://files.pythonhosted.org/packages/8a/66/9d308ed71e3f2491be1acb8769d96c6f0c47d92099f3bc9119cada27b357/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:fceedde51fbd67ee2bcc8c0b33d0126cc8b51ef3bbde2f86662bd6d5a6f10ec5", size = 1553088, upload-time = "2026-03-31T21:59:19.541Z" }, + { url = "https://files.pythonhosted.org/packages/da/a6/6cc25ed8dfc6e00c90f5c6d126a98e2cf28957ad06fa1036bd34b6f24a2c/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f92995dfec9420bb69ae629abf422e516923ba79ba4403bc750d94fb4a6c68c1", size = 1757976, upload-time = "2026-03-31T21:59:22.311Z" }, + { url = "https://files.pythonhosted.org/packages/c1/2b/cce5b0ffe0de99c83e5e36d8f828e4161e415660a9f3e58339d07cce3006/aiohttp-3.13.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20ae0ff08b1f2c8788d6fb85afcb798654ae6ba0b747575f8562de738078457b", size = 1712444, upload-time = "2026-03-31T21:59:24.635Z" }, + { url = "https://files.pythonhosted.org/packages/6c/cf/9e1795b4160c58d29421eafd1a69c6ce351e2f7c8d3c6b7e4ca44aea1a5b/aiohttp-3.13.5-cp314-cp314-win32.whl", hash = "sha256:b20df693de16f42b2472a9c485e1c948ee55524786a0a34345511afdd22246f3", size = 438128, upload-time = "2026-03-31T21:59:27.291Z" }, + { url = "https://files.pythonhosted.org/packages/22/4d/eaedff67fc805aeba4ba746aec891b4b24cebb1a7d078084b6300f79d063/aiohttp-3.13.5-cp314-cp314-win_amd64.whl", hash = "sha256:f85c6f327bf0b8c29da7d93b1cabb6363fb5e4e160a32fa241ed2dce21b73162", size = 464029, upload-time = "2026-03-31T21:59:29.429Z" }, + { url = "https://files.pythonhosted.org/packages/79/11/c27d9332ee20d68dd164dc12a6ecdef2e2e35ecc97ed6cf0d2442844624b/aiohttp-3.13.5-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:1efb06900858bb618ff5cee184ae2de5828896c448403d51fb633f09e109be0a", size = 778758, upload-time = "2026-03-31T21:59:31.547Z" }, + { url = "https://files.pythonhosted.org/packages/04/fb/377aead2e0a3ba5f09b7624f702a964bdf4f08b5b6728a9799830c80041e/aiohttp-3.13.5-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:fee86b7c4bd29bdaf0d53d14739b08a106fdda809ca5fe032a15f52fae5fe254", size = 512883, upload-time = "2026-03-31T21:59:34.098Z" }, + { url = "https://files.pythonhosted.org/packages/bb/a6/aa109a33671f7a5d3bd78b46da9d852797c5e665bfda7d6b373f56bff2ec/aiohttp-3.13.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:20058e23909b9e65f9da62b396b77dfa95965cbe840f8def6e572538b1d32e36", size = 516668, upload-time = "2026-03-31T21:59:36.497Z" }, + { url = "https://files.pythonhosted.org/packages/79/b3/ca078f9f2fa9563c36fb8ef89053ea2bb146d6f792c5104574d49d8acb63/aiohttp-3.13.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cf20a8d6868cb15a73cab329ffc07291ba8c22b1b88176026106ae39aa6df0f", size = 1883461, upload-time = "2026-03-31T21:59:38.723Z" }, + { url = "https://files.pythonhosted.org/packages/b7/e3/a7ad633ca1ca497b852233a3cce6906a56c3225fb6d9217b5e5e60b7419d/aiohttp-3.13.5-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:330f5da04c987f1d5bdb8ae189137c77139f36bd1cb23779ca1a354a4b027800", size = 1747661, upload-time = "2026-03-31T21:59:41.187Z" }, + { url = "https://files.pythonhosted.org/packages/33/b9/cd6fe579bed34a906d3d783fe60f2fa297ef55b27bb4538438ee49d4dc41/aiohttp-3.13.5-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6f1cbf0c7926d315c3c26c2da41fd2b5d2fe01ac0e157b78caefc51a782196cf", size = 1863800, upload-time = "2026-03-31T21:59:43.84Z" }, + { url = "https://files.pythonhosted.org/packages/c0/3f/2c1e2f5144cefa889c8afd5cf431994c32f3b29da9961698ff4e3811b79a/aiohttp-3.13.5-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:53fc049ed6390d05423ba33103ded7281fe897cf97878f369a527070bd95795b", size = 1958382, upload-time = "2026-03-31T21:59:46.187Z" }, + { url = "https://files.pythonhosted.org/packages/66/1d/f31ec3f1013723b3babe3609e7f119c2c2fb6ef33da90061a705ef3e1bc8/aiohttp-3.13.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:898703aa2667e3c5ca4c54ca36cd73f58b7a38ef87a5606414799ebce4d3fd3a", size = 1803724, upload-time = "2026-03-31T21:59:48.656Z" }, + { url = "https://files.pythonhosted.org/packages/0e/b4/57712dfc6f1542f067daa81eb61da282fab3e6f1966fca25db06c4fc62d5/aiohttp-3.13.5-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0494a01ca9584eea1e5fbd6d748e61ecff218c51b576ee1999c23db7066417d8", size = 1640027, upload-time = "2026-03-31T21:59:51.284Z" }, + { url = "https://files.pythonhosted.org/packages/25/3c/734c878fb43ec083d8e31bf029daae1beafeae582d1b35da234739e82ee7/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6cf81fe010b8c17b09495cbd15c1d35afbc8fb405c0c9cf4738e5ae3af1d65be", size = 1806644, upload-time = "2026-03-31T21:59:53.753Z" }, + { url = "https://files.pythonhosted.org/packages/20/a5/f671e5cbec1c21d044ff3078223f949748f3a7f86b14e34a365d74a5d21f/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:c564dd5f09ddc9d8f2c2d0a301cd30a79a2cc1b46dd1a73bef8f0038863d016b", size = 1791630, upload-time = "2026-03-31T21:59:56.239Z" }, + { url = "https://files.pythonhosted.org/packages/0b/63/fb8d0ad63a0b8a99be97deac8c04dacf0785721c158bdf23d679a87aa99e/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:2994be9f6e51046c4f864598fd9abeb4fba6e88f0b2152422c9666dcd4aea9c6", size = 1809403, upload-time = "2026-03-31T21:59:59.103Z" }, + { url = "https://files.pythonhosted.org/packages/59/0c/bfed7f30662fcf12206481c2aac57dedee43fe1c49275e85b3a1e1742294/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:157826e2fa245d2ef46c83ea8a5faf77ca19355d278d425c29fda0beb3318037", size = 1634924, upload-time = "2026-03-31T22:00:02.116Z" }, + { url = "https://files.pythonhosted.org/packages/17/d6/fd518d668a09fd5a3319ae5e984d4d80b9a4b3df4e21c52f02251ef5a32e/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:a8aca50daa9493e9e13c0f566201a9006f080e7c50e5e90d0b06f53146a54500", size = 1836119, upload-time = "2026-03-31T22:00:04.756Z" }, + { url = "https://files.pythonhosted.org/packages/78/b7/15fb7a9d52e112a25b621c67b69c167805cb1f2ab8f1708a5c490d1b52fe/aiohttp-3.13.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3b13560160d07e047a93f23aaa30718606493036253d5430887514715b67c9d9", size = 1772072, upload-time = "2026-03-31T22:00:07.494Z" }, + { url = "https://files.pythonhosted.org/packages/7e/df/57ba7f0c4a553fc2bd8b6321df236870ec6fd64a2a473a8a13d4f733214e/aiohttp-3.13.5-cp314-cp314t-win32.whl", hash = "sha256:9a0f4474b6ea6818b41f82172d799e4b3d29e22c2c520ce4357856fced9af2f8", size = 471819, upload-time = "2026-03-31T22:00:10.277Z" }, + { url = "https://files.pythonhosted.org/packages/62/29/2f8418269e46454a26171bfdd6a055d74febf32234e474930f2f60a17145/aiohttp-3.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:18a2f6c1182c51baa1d28d68fea51513cb2a76612f038853c0ad3c145423d3d9", size = 505441, upload-time = "2026-03-31T22:00:12.791Z" }, +] + +[[package]] +name = "aiosignal" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "frozenlist" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" }, +] + +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "anthropic" +version = "0.96.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "docstring-parser" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/7e/672f533dee813028d2c699bfd2a7f52c9118d7353680d9aa44b9e23f717f/anthropic-0.96.0.tar.gz", hash = "sha256:9de947b737f39452f68aa520f1c2239d44119c9b73b0fb6d4e6ca80f00279ee6", size = 658210, upload-time = "2026-04-16T14:28:02.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/5a/72f33204064b6e87601a71a6baf8d855769f8a0c1eaae8d06a1094872371/anthropic-0.96.0-py3-none-any.whl", hash = "sha256:9a6e335a354602a521cd9e777e92bfd46ba6e115bf9bbfe6135311e8fb2015b2", size = 635930, upload-time = "2026-04-16T14:28:01.436Z" }, +] + +[package.optional-dependencies] +vertex = [ + { name = "google-auth", extra = ["requests"] }, +] + +[[package]] +name = "anyio" +version = "4.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, +] + +[[package]] +name = "attrs" +version = "26.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9a/8e/82a0fe20a541c03148528be8cac2408564a6c9a0cc7e9171802bc1d26985/attrs-26.1.0.tar.gz", hash = "sha256:d03ceb89cb322a8fd706d4fb91940737b6642aa36998fe130a9bc96c985eff32", size = 952055, upload-time = "2026-03-19T14:22:25.026Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" }, +] + +[[package]] +name = "authlib" +version = "1.6.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/af/98/00d3dd826d46959ad8e32af2dbb2398868fd9fd0683c26e56d0789bd0e68/authlib-1.6.9.tar.gz", hash = "sha256:d8f2421e7e5980cc1ddb4e32d3f5fa659cfaf60d8eaf3281ebed192e4ab74f04", size = 165134, upload-time = "2026-03-02T07:44:01.998Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/53/23/b65f568ed0c22f1efacb744d2db1a33c8068f384b8c9b482b52ebdbc3ef6/authlib-1.6.9-py2.py3-none-any.whl", hash = "sha256:f08b4c14e08f0861dc18a32357b33fbcfd2ea86cfe3fe149484b4d764c4a0ac3", size = 244197, upload-time = "2026-03-02T07:44:00.307Z" }, +] + +[[package]] +name = "beartype" +version = "0.22.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/94/1009e248bbfbab11397abca7193bea6626806be9a327d399810d523a07cb/beartype-0.22.9.tar.gz", hash = "sha256:8f82b54aa723a2848a56008d18875f91c1db02c32ef6a62319a002e3e25a975f", size = 1608866, upload-time = "2025-12-13T06:50:30.72Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/71/cc/18245721fa7747065ab478316c7fea7c74777d07f37ae60db2e84f8172e8/beartype-0.22.9-py3-none-any.whl", hash = "sha256:d16c9bbc61ea14637596c5f6fbff2ee99cbe3573e46a716401734ef50c3060c2", size = 1333658, upload-time = "2025-12-13T06:50:28.266Z" }, +] + +[[package]] +name = "cachetools" +version = "7.0.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/dd/57fe3fdb6e65b25a5987fd2cdc7e22db0aef508b91634d2e57d22928d41b/cachetools-7.0.5.tar.gz", hash = "sha256:0cd042c24377200c1dcd225f8b7b12b0ca53cc2c961b43757e774ebe190fd990", size = 37367, upload-time = "2026-03-09T20:51:29.451Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/f3/39cf3367b8107baa44f861dc802cbf16263c945b62d8265d36034fc07bea/cachetools-7.0.5-py3-none-any.whl", hash = "sha256:46bc8ebefbe485407621d0a4264b23c080cedd913921bad7ac3ed2f26c183114", size = 13918, upload-time = "2026-03-09T20:51:27.33Z" }, +] + +[[package]] +name = "caio" +version = "0.9.25" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/92/88/b8527e1b00c1811db339a1df8bd1ae49d146fcea9d6a5c40e3a80aaeb38d/caio-0.9.25.tar.gz", hash = "sha256:16498e7f81d1d0f5a4c0ad3f2540e65fe25691376e0a5bd367f558067113ed10", size = 26781, upload-time = "2025-12-26T15:21:36.501Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/57/5e6ff127e6f62c9f15d989560435c642144aa4210882f9494204bc892305/caio-0.9.25-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d6c2a3411af97762a2b03840c3cec2f7f728921ff8adda53d7ea2315a8563451", size = 36979, upload-time = "2025-12-26T15:21:35.484Z" }, + { url = "https://files.pythonhosted.org/packages/a3/9f/f21af50e72117eb528c422d4276cbac11fb941b1b812b182e0a9c70d19c5/caio-0.9.25-cp313-cp313-manylinux2010_x86_64.manylinux2014_x86_64.manylinux_2_12_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0998210a4d5cd5cb565b32ccfe4e53d67303f868a76f212e002a8554692870e6", size = 81900, upload-time = "2025-12-26T15:22:21.919Z" }, + { url = "https://files.pythonhosted.org/packages/9c/12/c39ae2a4037cb10ad5eb3578eb4d5f8c1a2575c62bba675f3406b7ef0824/caio-0.9.25-cp313-cp313-manylinux_2_34_aarch64.whl", hash = "sha256:1a177d4777141b96f175fe2c37a3d96dec7911ed9ad5f02bac38aaa1c936611f", size = 81523, upload-time = "2026-03-04T22:08:25.187Z" }, + { url = "https://files.pythonhosted.org/packages/22/59/f8f2e950eb4f1a5a3883e198dca514b9d475415cb6cd7b78b9213a0dd45a/caio-0.9.25-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:9ed3cfb28c0e99fec5e208c934e5c157d0866aa9c32aa4dc5e9b6034af6286b7", size = 80243, upload-time = "2026-03-04T22:08:26.449Z" }, + { url = "https://files.pythonhosted.org/packages/69/ca/a08fdc7efdcc24e6a6131a93c85be1f204d41c58f474c42b0670af8c016b/caio-0.9.25-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:fab6078b9348e883c80a5e14b382e6ad6aabbc4429ca034e76e730cf464269db", size = 36978, upload-time = "2025-12-26T15:21:41.055Z" }, + { url = "https://files.pythonhosted.org/packages/5e/6c/d4d24f65e690213c097174d26eda6831f45f4734d9d036d81790a27e7b78/caio-0.9.25-cp314-cp314-manylinux2010_x86_64.manylinux2014_x86_64.manylinux_2_12_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:44a6b58e52d488c75cfaa5ecaa404b2b41cc965e6c417e03251e868ecd5b6d77", size = 81832, upload-time = "2025-12-26T15:22:22.757Z" }, + { url = "https://files.pythonhosted.org/packages/87/a4/e534cf7d2d0e8d880e25dd61e8d921ffcfe15bd696734589826f5a2df727/caio-0.9.25-cp314-cp314-manylinux_2_34_aarch64.whl", hash = "sha256:628a630eb7fb22381dd8e3c8ab7f59e854b9c806639811fc3f4310c6bd711d79", size = 81565, upload-time = "2026-03-04T22:08:27.483Z" }, + { url = "https://files.pythonhosted.org/packages/3f/ed/bf81aeac1d290017e5e5ac3e880fd56ee15e50a6d0353986799d1bc5cfd5/caio-0.9.25-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:0ba16aa605ccb174665357fc729cf500679c2d94d5f1458a6f0d5ca48f2060a7", size = 80071, upload-time = "2026-03-04T22:08:28.751Z" }, + { url = "https://files.pythonhosted.org/packages/86/93/1f76c8d1bafe3b0614e06b2195784a3765bbf7b0a067661af9e2dd47fc33/caio-0.9.25-py3-none-any.whl", hash = "sha256:06c0bb02d6b929119b1cfbe1ca403c768b2013a369e2db46bfa2a5761cf82e40", size = 19087, upload-time = "2025-12-26T15:22:00.221Z" }, +] + +[[package]] +name = "cattrs" +version = "26.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a0/ec/ba18945e7d6e55a58364d9fb2e46049c1c2998b3d805f19b703f14e81057/cattrs-26.1.0.tar.gz", hash = "sha256:fa239e0f0ec0715ba34852ce813986dfed1e12117e209b816ab87401271cdd40", size = 495672, upload-time = "2026-02-18T22:15:19.406Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/80/56/60547f7801b97c67e97491dc3d9ade9fbccbd0325058fd3dfcb2f5d98d90/cattrs-26.1.0-py3-none-any.whl", hash = "sha256:d1e0804c42639494d469d08d4f26d6b9de9b8ab26b446db7b5f8c2e97f7c3096", size = 73054, upload-time = "2026-02-18T22:15:17.958Z" }, +] + +[[package]] +name = "certifi" +version = "2026.2.25" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" }, +] + +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser", marker = "implementation_name != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" }, + { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" }, + { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" }, + { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" }, + { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" }, + { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" }, + { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" }, + { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" }, + { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" }, + { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" }, + { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" }, + { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" }, + { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" }, + { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" }, + { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" }, + { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" }, + { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" }, + { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" }, + { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" }, + { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" }, + { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" }, + { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" }, + { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" }, + { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" }, + { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" }, + { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" }, + { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" }, + { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" }, + { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" }, + { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/60/e3bec1881450851b087e301bedc3daa9377a4d45f1c26aa90b0b235e38aa/charset_normalizer-3.4.6.tar.gz", hash = "sha256:1ae6b62897110aa7c79ea2f5dd38d1abca6db663687c0b1ad9aed6f6bae3d9d6", size = 143363, upload-time = "2026-03-15T18:53:25.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/1d/4fdabeef4e231153b6ed7567602f3b68265ec4e5b76d6024cf647d43d981/charset_normalizer-3.4.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:11afb56037cbc4b1555a34dd69151e8e069bee82e613a73bef6e714ce733585f", size = 294823, upload-time = "2026-03-15T18:51:15.755Z" }, + { url = "https://files.pythonhosted.org/packages/47/7b/20e809b89c69d37be748d98e84dce6820bf663cf19cf6b942c951a3e8f41/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423fb7e748a08f854a08a222b983f4df1912b1daedce51a72bd24fe8f26a1843", size = 198527, upload-time = "2026-03-15T18:51:17.177Z" }, + { url = "https://files.pythonhosted.org/packages/37/a6/4f8d27527d59c039dce6f7622593cdcd3d70a8504d87d09eb11e9fdc6062/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d73beaac5e90173ac3deb9928a74763a6d230f494e4bfb422c217a0ad8e629bf", size = 218388, upload-time = "2026-03-15T18:51:18.934Z" }, + { url = "https://files.pythonhosted.org/packages/f6/9b/4770ccb3e491a9bacf1c46cc8b812214fe367c86a96353ccc6daf87b01ec/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d60377dce4511655582e300dc1e5a5f24ba0cb229005a1d5c8d0cb72bb758ab8", size = 214563, upload-time = "2026-03-15T18:51:20.374Z" }, + { url = "https://files.pythonhosted.org/packages/2b/58/a199d245894b12db0b957d627516c78e055adc3a0d978bc7f65ddaf7c399/charset_normalizer-3.4.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:530e8cebeea0d76bdcf93357aa5e41336f48c3dc709ac52da2bb167c5b8271d9", size = 206587, upload-time = "2026-03-15T18:51:21.807Z" }, + { url = "https://files.pythonhosted.org/packages/7e/70/3def227f1ec56f5c69dfc8392b8bd63b11a18ca8178d9211d7cc5e5e4f27/charset_normalizer-3.4.6-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:a26611d9987b230566f24a0a125f17fe0de6a6aff9f25c9f564aaa2721a5fb88", size = 194724, upload-time = "2026-03-15T18:51:23.508Z" }, + { url = "https://files.pythonhosted.org/packages/58/ab/9318352e220c05efd31c2779a23b50969dc94b985a2efa643ed9077bfca5/charset_normalizer-3.4.6-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:34315ff4fc374b285ad7f4a0bf7dcbfe769e1b104230d40f49f700d4ab6bbd84", size = 202956, upload-time = "2026-03-15T18:51:25.239Z" }, + { url = "https://files.pythonhosted.org/packages/75/13/f3550a3ac25b70f87ac98c40d3199a8503676c2f1620efbf8d42095cfc40/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5f8ddd609f9e1af8c7bd6e2aca279c931aefecd148a14402d4e368f3171769fd", size = 201923, upload-time = "2026-03-15T18:51:26.682Z" }, + { url = "https://files.pythonhosted.org/packages/1b/db/c5c643b912740b45e8eec21de1bbab8e7fc085944d37e1e709d3dcd9d72f/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:80d0a5615143c0b3225e5e3ef22c8d5d51f3f72ce0ea6fb84c943546c7b25b6c", size = 195366, upload-time = "2026-03-15T18:51:28.129Z" }, + { url = "https://files.pythonhosted.org/packages/5a/67/3b1c62744f9b2448443e0eb160d8b001c849ec3fef591e012eda6484787c/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:92734d4d8d187a354a556626c221cd1a892a4e0802ccb2af432a1d85ec012194", size = 219752, upload-time = "2026-03-15T18:51:29.556Z" }, + { url = "https://files.pythonhosted.org/packages/f6/98/32ffbaf7f0366ffb0445930b87d103f6b406bc2c271563644bde8a2b1093/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:613f19aa6e082cf96e17e3ffd89383343d0d589abda756b7764cf78361fd41dc", size = 203296, upload-time = "2026-03-15T18:51:30.921Z" }, + { url = "https://files.pythonhosted.org/packages/41/12/5d308c1bbe60cabb0c5ef511574a647067e2a1f631bc8634fcafaccd8293/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:2b1a63e8224e401cafe7739f77efd3f9e7f5f2026bda4aead8e59afab537784f", size = 215956, upload-time = "2026-03-15T18:51:32.399Z" }, + { url = "https://files.pythonhosted.org/packages/53/e9/5f85f6c5e20669dbe56b165c67b0260547dea97dba7e187938833d791687/charset_normalizer-3.4.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6cceb5473417d28edd20c6c984ab6fee6c6267d38d906823ebfe20b03d607dc2", size = 208652, upload-time = "2026-03-15T18:51:34.214Z" }, + { url = "https://files.pythonhosted.org/packages/f1/11/897052ea6af56df3eef3ca94edafee410ca699ca0c7b87960ad19932c55e/charset_normalizer-3.4.6-cp313-cp313-win32.whl", hash = "sha256:d7de2637729c67d67cf87614b566626057e95c303bc0a55ffe391f5205e7003d", size = 143940, upload-time = "2026-03-15T18:51:36.15Z" }, + { url = "https://files.pythonhosted.org/packages/a1/5c/724b6b363603e419829f561c854b87ed7c7e31231a7908708ac086cdf3e2/charset_normalizer-3.4.6-cp313-cp313-win_amd64.whl", hash = "sha256:572d7c822caf521f0525ba1bce1a622a0b85cf47ffbdae6c9c19e3b5ac3c4389", size = 154101, upload-time = "2026-03-15T18:51:37.876Z" }, + { url = "https://files.pythonhosted.org/packages/01/a5/7abf15b4c0968e47020f9ca0935fb3274deb87cb288cd187cad92e8cdffd/charset_normalizer-3.4.6-cp313-cp313-win_arm64.whl", hash = "sha256:a4474d924a47185a06411e0064b803c68be044be2d60e50e8bddcc2649957c1f", size = 143109, upload-time = "2026-03-15T18:51:39.565Z" }, + { url = "https://files.pythonhosted.org/packages/25/6f/ffe1e1259f384594063ea1869bfb6be5cdb8bc81020fc36c3636bc8302a1/charset_normalizer-3.4.6-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:9cc6e6d9e571d2f863fa77700701dae73ed5f78881efc8b3f9a4398772ff53e8", size = 294458, upload-time = "2026-03-15T18:51:41.134Z" }, + { url = "https://files.pythonhosted.org/packages/56/60/09bb6c13a8c1016c2ed5c6a6488e4ffef506461aa5161662bd7636936fb1/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef5960d965e67165d75b7c7ffc60a83ec5abfc5c11b764ec13ea54fbef8b4421", size = 199277, upload-time = "2026-03-15T18:51:42.953Z" }, + { url = "https://files.pythonhosted.org/packages/00/50/dcfbb72a5138bbefdc3332e8d81a23494bf67998b4b100703fd15fa52d81/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b3694e3f87f8ac7ce279d4355645b3c878d24d1424581b46282f24b92f5a4ae2", size = 218758, upload-time = "2026-03-15T18:51:44.339Z" }, + { url = "https://files.pythonhosted.org/packages/03/b3/d79a9a191bb75f5aa81f3aaaa387ef29ce7cb7a9e5074ba8ea095cc073c2/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5d11595abf8dd942a77883a39d81433739b287b6aa71620f15164f8096221b30", size = 215299, upload-time = "2026-03-15T18:51:45.871Z" }, + { url = "https://files.pythonhosted.org/packages/76/7e/bc8911719f7084f72fd545f647601ea3532363927f807d296a8c88a62c0d/charset_normalizer-3.4.6-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7bda6eebafd42133efdca535b04ccb338ab29467b3f7bf79569883676fc628db", size = 206811, upload-time = "2026-03-15T18:51:47.308Z" }, + { url = "https://files.pythonhosted.org/packages/e2/40/c430b969d41dda0c465aa36cc7c2c068afb67177bef50905ac371b28ccc7/charset_normalizer-3.4.6-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:bbc8c8650c6e51041ad1be191742b8b421d05bbd3410f43fa2a00c8db87678e8", size = 193706, upload-time = "2026-03-15T18:51:48.849Z" }, + { url = "https://files.pythonhosted.org/packages/48/15/e35e0590af254f7df984de1323640ef375df5761f615b6225ba8deb9799a/charset_normalizer-3.4.6-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:22c6f0c2fbc31e76c3b8a86fba1a56eda6166e238c29cdd3d14befdb4a4e4815", size = 202706, upload-time = "2026-03-15T18:51:50.257Z" }, + { url = "https://files.pythonhosted.org/packages/5e/bd/f736f7b9cc5e93a18b794a50346bb16fbfd6b37f99e8f306f7951d27c17c/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7edbed096e4a4798710ed6bc75dcaa2a21b68b6c356553ac4823c3658d53743a", size = 202497, upload-time = "2026-03-15T18:51:52.012Z" }, + { url = "https://files.pythonhosted.org/packages/9d/ba/2cc9e3e7dfdf7760a6ed8da7446d22536f3d0ce114ac63dee2a5a3599e62/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:7f9019c9cb613f084481bd6a100b12e1547cf2efe362d873c2e31e4035a6fa43", size = 193511, upload-time = "2026-03-15T18:51:53.723Z" }, + { url = "https://files.pythonhosted.org/packages/9e/cb/5be49b5f776e5613be07298c80e1b02a2d900f7a7de807230595c85a8b2e/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:58c948d0d086229efc484fe2f30c2d382c86720f55cd9bc33591774348ad44e0", size = 220133, upload-time = "2026-03-15T18:51:55.333Z" }, + { url = "https://files.pythonhosted.org/packages/83/43/99f1b5dad345accb322c80c7821071554f791a95ee50c1c90041c157ae99/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:419a9d91bd238052642a51938af8ac05da5b3343becde08d5cdeab9046df9ee1", size = 203035, upload-time = "2026-03-15T18:51:56.736Z" }, + { url = "https://files.pythonhosted.org/packages/87/9a/62c2cb6a531483b55dddff1a68b3d891a8b498f3ca555fbcf2978e804d9d/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5273b9f0b5835ff0350c0828faea623c68bfa65b792720c453e22b25cc72930f", size = 216321, upload-time = "2026-03-15T18:51:58.17Z" }, + { url = "https://files.pythonhosted.org/packages/6e/79/94a010ff81e3aec7c293eb82c28f930918e517bc144c9906a060844462eb/charset_normalizer-3.4.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:0e901eb1049fdb80f5bd11ed5ea1e498ec423102f7a9b9e4645d5b8204ff2815", size = 208973, upload-time = "2026-03-15T18:51:59.998Z" }, + { url = "https://files.pythonhosted.org/packages/2a/57/4ecff6d4ec8585342f0c71bc03efaa99cb7468f7c91a57b105bcd561cea8/charset_normalizer-3.4.6-cp314-cp314-win32.whl", hash = "sha256:b4ff1d35e8c5bd078be89349b6f3a845128e685e751b6ea1169cf2160b344c4d", size = 144610, upload-time = "2026-03-15T18:52:02.213Z" }, + { url = "https://files.pythonhosted.org/packages/80/94/8434a02d9d7f168c25767c64671fead8d599744a05d6a6c877144c754246/charset_normalizer-3.4.6-cp314-cp314-win_amd64.whl", hash = "sha256:74119174722c4349af9708993118581686f343adc1c8c9c007d59be90d077f3f", size = 154962, upload-time = "2026-03-15T18:52:03.658Z" }, + { url = "https://files.pythonhosted.org/packages/46/4c/48f2cdbfd923026503dfd67ccea45c94fd8fe988d9056b468579c66ed62b/charset_normalizer-3.4.6-cp314-cp314-win_arm64.whl", hash = "sha256:e5bcc1a1ae744e0bb59641171ae53743760130600da8db48cbb6e4918e186e4e", size = 143595, upload-time = "2026-03-15T18:52:05.123Z" }, + { url = "https://files.pythonhosted.org/packages/31/93/8878be7569f87b14f1d52032946131bcb6ebbd8af3e20446bc04053dc3f1/charset_normalizer-3.4.6-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:ad8faf8df23f0378c6d527d8b0b15ea4a2e23c89376877c598c4870d1b2c7866", size = 314828, upload-time = "2026-03-15T18:52:06.831Z" }, + { url = "https://files.pythonhosted.org/packages/06/b6/fae511ca98aac69ecc35cde828b0a3d146325dd03d99655ad38fc2cc3293/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f5ea69428fa1b49573eef0cc44a1d43bebd45ad0c611eb7d7eac760c7ae771bc", size = 208138, upload-time = "2026-03-15T18:52:08.239Z" }, + { url = "https://files.pythonhosted.org/packages/54/57/64caf6e1bf07274a1e0b7c160a55ee9e8c9ec32c46846ce59b9c333f7008/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:06a7e86163334edfc5d20fe104db92fcd666e5a5df0977cb5680a506fe26cc8e", size = 224679, upload-time = "2026-03-15T18:52:10.043Z" }, + { url = "https://files.pythonhosted.org/packages/aa/cb/9ff5a25b9273ef160861b41f6937f86fae18b0792fe0a8e75e06acb08f1d/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e1f6e2f00a6b8edb562826e4632e26d063ac10307e80f7461f7de3ad8ef3f077", size = 223475, upload-time = "2026-03-15T18:52:11.854Z" }, + { url = "https://files.pythonhosted.org/packages/fc/97/440635fc093b8d7347502a377031f9605a1039c958f3cd18dcacffb37743/charset_normalizer-3.4.6-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95b52c68d64c1878818687a473a10547b3292e82b6f6fe483808fb1468e2f52f", size = 215230, upload-time = "2026-03-15T18:52:13.325Z" }, + { url = "https://files.pythonhosted.org/packages/cd/24/afff630feb571a13f07c8539fbb502d2ab494019492aaffc78ef41f1d1d0/charset_normalizer-3.4.6-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:7504e9b7dc05f99a9bbb4525c67a2c155073b44d720470a148b34166a69c054e", size = 199045, upload-time = "2026-03-15T18:52:14.752Z" }, + { url = "https://files.pythonhosted.org/packages/e5/17/d1399ecdaf7e0498c327433e7eefdd862b41236a7e484355b8e0e5ebd64b/charset_normalizer-3.4.6-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:172985e4ff804a7ad08eebec0a1640ece87ba5041d565fff23c8f99c1f389484", size = 211658, upload-time = "2026-03-15T18:52:16.278Z" }, + { url = "https://files.pythonhosted.org/packages/b5/38/16baa0affb957b3d880e5ac2144caf3f9d7de7bc4a91842e447fbb5e8b67/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4be9f4830ba8741527693848403e2c457c16e499100963ec711b1c6f2049b7c7", size = 210769, upload-time = "2026-03-15T18:52:17.782Z" }, + { url = "https://files.pythonhosted.org/packages/05/34/c531bc6ac4c21da9ddfddb3107be2287188b3ea4b53b70fc58f2a77ac8d8/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:79090741d842f564b1b2827c0b82d846405b744d31e84f18d7a7b41c20e473ff", size = 201328, upload-time = "2026-03-15T18:52:19.553Z" }, + { url = "https://files.pythonhosted.org/packages/fa/73/a5a1e9ca5f234519c1953608a03fe109c306b97fdfb25f09182babad51a7/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:87725cfb1a4f1f8c2fc9890ae2f42094120f4b44db9360be5d99a4c6b0e03a9e", size = 225302, upload-time = "2026-03-15T18:52:21.043Z" }, + { url = "https://files.pythonhosted.org/packages/ba/f6/cd782923d112d296294dea4bcc7af5a7ae0f86ab79f8fefbda5526b6cfc0/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:fcce033e4021347d80ed9c66dcf1e7b1546319834b74445f561d2e2221de5659", size = 211127, upload-time = "2026-03-15T18:52:22.491Z" }, + { url = "https://files.pythonhosted.org/packages/0e/c5/0b6898950627af7d6103a449b22320372c24c6feda91aa24e201a478d161/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:ca0276464d148c72defa8bb4390cce01b4a0e425f3b50d1435aa6d7a18107602", size = 222840, upload-time = "2026-03-15T18:52:24.113Z" }, + { url = "https://files.pythonhosted.org/packages/7d/25/c4bba773bef442cbdc06111d40daa3de5050a676fa26e85090fc54dd12f0/charset_normalizer-3.4.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:197c1a244a274bb016dd8b79204850144ef77fe81c5b797dc389327adb552407", size = 216890, upload-time = "2026-03-15T18:52:25.541Z" }, + { url = "https://files.pythonhosted.org/packages/35/1a/05dacadb0978da72ee287b0143097db12f2e7e8d3ffc4647da07a383b0b7/charset_normalizer-3.4.6-cp314-cp314t-win32.whl", hash = "sha256:2a24157fa36980478dd1770b585c0f30d19e18f4fb0c47c13aa568f871718579", size = 155379, upload-time = "2026-03-15T18:52:27.05Z" }, + { url = "https://files.pythonhosted.org/packages/5d/7a/d269d834cb3a76291651256f3b9a5945e81d0a49ab9f4a498964e83c0416/charset_normalizer-3.4.6-cp314-cp314t-win_amd64.whl", hash = "sha256:cd5e2801c89992ed8c0a3f0293ae83c159a60d9a5d685005383ef4caca77f2c4", size = 169043, upload-time = "2026-03-15T18:52:28.502Z" }, + { url = "https://files.pythonhosted.org/packages/23/06/28b29fba521a37a8932c6a84192175c34d49f84a6d4773fa63d05f9aff22/charset_normalizer-3.4.6-cp314-cp314t-win_arm64.whl", hash = "sha256:47955475ac79cc504ef2704b192364e51d0d473ad452caedd0002605f780101c", size = 148523, upload-time = "2026-03-15T18:52:29.956Z" }, + { url = "https://files.pythonhosted.org/packages/2a/68/687187c7e26cb24ccbd88e5069f5ef00eba804d36dde11d99aad0838ab45/charset_normalizer-3.4.6-py3-none-any.whl", hash = "sha256:947cf925bc916d90adba35a64c82aace04fa39b46b52d4630ece166655905a69", size = 61455, upload-time = "2026-03-15T18:53:23.833Z" }, +] + +[[package]] +name = "click" +version = "8.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "cryptography" +version = "46.0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/60/04/ee2a9e8542e4fa2773b81771ff8349ff19cdd56b7258a0cc442639052edb/cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d", size = 750064, upload-time = "2026-02-10T19:18:38.255Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/81/b0bb27f2ba931a65409c6b8a8b358a7f03c0e46eceacddff55f7c84b1f3b/cryptography-46.0.5-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad", size = 7176289, upload-time = "2026-02-10T19:17:08.274Z" }, + { url = "https://files.pythonhosted.org/packages/ff/9e/6b4397a3e3d15123de3b1806ef342522393d50736c13b20ec4c9ea6693a6/cryptography-46.0.5-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b", size = 4275637, upload-time = "2026-02-10T19:17:10.53Z" }, + { url = "https://files.pythonhosted.org/packages/63/e7/471ab61099a3920b0c77852ea3f0ea611c9702f651600397ac567848b897/cryptography-46.0.5-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b", size = 4424742, upload-time = "2026-02-10T19:17:12.388Z" }, + { url = "https://files.pythonhosted.org/packages/37/53/a18500f270342d66bf7e4d9f091114e31e5ee9e7375a5aba2e85a91e0044/cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263", size = 4277528, upload-time = "2026-02-10T19:17:13.853Z" }, + { url = "https://files.pythonhosted.org/packages/22/29/c2e812ebc38c57b40e7c583895e73c8c5adb4d1e4a0cc4c5a4fdab2b1acc/cryptography-46.0.5-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d", size = 4947993, upload-time = "2026-02-10T19:17:15.618Z" }, + { url = "https://files.pythonhosted.org/packages/6b/e7/237155ae19a9023de7e30ec64e5d99a9431a567407ac21170a046d22a5a3/cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed", size = 4456855, upload-time = "2026-02-10T19:17:17.221Z" }, + { url = "https://files.pythonhosted.org/packages/2d/87/fc628a7ad85b81206738abbd213b07702bcbdada1dd43f72236ef3cffbb5/cryptography-46.0.5-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2", size = 3984635, upload-time = "2026-02-10T19:17:18.792Z" }, + { url = "https://files.pythonhosted.org/packages/84/29/65b55622bde135aedf4565dc509d99b560ee4095e56989e815f8fd2aa910/cryptography-46.0.5-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2", size = 4277038, upload-time = "2026-02-10T19:17:20.256Z" }, + { url = "https://files.pythonhosted.org/packages/bc/36/45e76c68d7311432741faf1fbf7fac8a196a0a735ca21f504c75d37e2558/cryptography-46.0.5-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0", size = 4912181, upload-time = "2026-02-10T19:17:21.825Z" }, + { url = "https://files.pythonhosted.org/packages/6d/1a/c1ba8fead184d6e3d5afcf03d569acac5ad063f3ac9fb7258af158f7e378/cryptography-46.0.5-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731", size = 4456482, upload-time = "2026-02-10T19:17:25.133Z" }, + { url = "https://files.pythonhosted.org/packages/f9/e5/3fb22e37f66827ced3b902cf895e6a6bc1d095b5b26be26bd13c441fdf19/cryptography-46.0.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82", size = 4405497, upload-time = "2026-02-10T19:17:26.66Z" }, + { url = "https://files.pythonhosted.org/packages/1a/df/9d58bb32b1121a8a2f27383fabae4d63080c7ca60b9b5c88be742be04ee7/cryptography-46.0.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1", size = 4667819, upload-time = "2026-02-10T19:17:28.569Z" }, + { url = "https://files.pythonhosted.org/packages/ea/ed/325d2a490c5e94038cdb0117da9397ece1f11201f425c4e9c57fe5b9f08b/cryptography-46.0.5-cp311-abi3-win32.whl", hash = "sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48", size = 3028230, upload-time = "2026-02-10T19:17:30.518Z" }, + { url = "https://files.pythonhosted.org/packages/e9/5a/ac0f49e48063ab4255d9e3b79f5def51697fce1a95ea1370f03dc9db76f6/cryptography-46.0.5-cp311-abi3-win_amd64.whl", hash = "sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4", size = 3480909, upload-time = "2026-02-10T19:17:32.083Z" }, + { url = "https://files.pythonhosted.org/packages/00/13/3d278bfa7a15a96b9dc22db5a12ad1e48a9eb3d40e1827ef66a5df75d0d0/cryptography-46.0.5-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:94a76daa32eb78d61339aff7952ea819b1734b46f73646a07decb40e5b3448e2", size = 7119287, upload-time = "2026-02-10T19:17:33.801Z" }, + { url = "https://files.pythonhosted.org/packages/67/c8/581a6702e14f0898a0848105cbefd20c058099e2c2d22ef4e476dfec75d7/cryptography-46.0.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678", size = 4265728, upload-time = "2026-02-10T19:17:35.569Z" }, + { url = "https://files.pythonhosted.org/packages/dd/4a/ba1a65ce8fc65435e5a849558379896c957870dd64fecea97b1ad5f46a37/cryptography-46.0.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87", size = 4408287, upload-time = "2026-02-10T19:17:36.938Z" }, + { url = "https://files.pythonhosted.org/packages/f8/67/8ffdbf7b65ed1ac224d1c2df3943553766914a8ca718747ee3871da6107e/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee", size = 4270291, upload-time = "2026-02-10T19:17:38.748Z" }, + { url = "https://files.pythonhosted.org/packages/f8/e5/f52377ee93bc2f2bba55a41a886fd208c15276ffbd2569f2ddc89d50e2c5/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981", size = 4927539, upload-time = "2026-02-10T19:17:40.241Z" }, + { url = "https://files.pythonhosted.org/packages/3b/02/cfe39181b02419bbbbcf3abdd16c1c5c8541f03ca8bda240debc467d5a12/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9", size = 4442199, upload-time = "2026-02-10T19:17:41.789Z" }, + { url = "https://files.pythonhosted.org/packages/c0/96/2fcaeb4873e536cf71421a388a6c11b5bc846e986b2b069c79363dc1648e/cryptography-46.0.5-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648", size = 3960131, upload-time = "2026-02-10T19:17:43.379Z" }, + { url = "https://files.pythonhosted.org/packages/d8/d2/b27631f401ddd644e94c5cf33c9a4069f72011821cf3dc7309546b0642a0/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4", size = 4270072, upload-time = "2026-02-10T19:17:45.481Z" }, + { url = "https://files.pythonhosted.org/packages/f4/a7/60d32b0370dae0b4ebe55ffa10e8599a2a59935b5ece1b9f06edb73abdeb/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0", size = 4892170, upload-time = "2026-02-10T19:17:46.997Z" }, + { url = "https://files.pythonhosted.org/packages/d2/b9/cf73ddf8ef1164330eb0b199a589103c363afa0cf794218c24d524a58eab/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663", size = 4441741, upload-time = "2026-02-10T19:17:48.661Z" }, + { url = "https://files.pythonhosted.org/packages/5f/eb/eee00b28c84c726fe8fa0158c65afe312d9c3b78d9d01daf700f1f6e37ff/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826", size = 4396728, upload-time = "2026-02-10T19:17:50.058Z" }, + { url = "https://files.pythonhosted.org/packages/65/f4/6bc1a9ed5aef7145045114b75b77c2a8261b4d38717bd8dea111a63c3442/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d", size = 4652001, upload-time = "2026-02-10T19:17:51.54Z" }, + { url = "https://files.pythonhosted.org/packages/86/ef/5d00ef966ddd71ac2e6951d278884a84a40ffbd88948ef0e294b214ae9e4/cryptography-46.0.5-cp314-cp314t-win32.whl", hash = "sha256:c3bcce8521d785d510b2aad26ae2c966092b7daa8f45dd8f44734a104dc0bc1a", size = 3003637, upload-time = "2026-02-10T19:17:52.997Z" }, + { url = "https://files.pythonhosted.org/packages/b7/57/f3f4160123da6d098db78350fdfd9705057aad21de7388eacb2401dceab9/cryptography-46.0.5-cp314-cp314t-win_amd64.whl", hash = "sha256:4d8ae8659ab18c65ced284993c2265910f6c9e650189d4e3f68445ef82a810e4", size = 3469487, upload-time = "2026-02-10T19:17:54.549Z" }, + { url = "https://files.pythonhosted.org/packages/e2/fa/a66aa722105ad6a458bebd64086ca2b72cdd361fed31763d20390f6f1389/cryptography-46.0.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31", size = 7170514, upload-time = "2026-02-10T19:17:56.267Z" }, + { url = "https://files.pythonhosted.org/packages/0f/04/c85bdeab78c8bc77b701bf0d9bdcf514c044e18a46dcff330df5448631b0/cryptography-46.0.5-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18", size = 4275349, upload-time = "2026-02-10T19:17:58.419Z" }, + { url = "https://files.pythonhosted.org/packages/5c/32/9b87132a2f91ee7f5223b091dc963055503e9b442c98fc0b8a5ca765fab0/cryptography-46.0.5-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235", size = 4420667, upload-time = "2026-02-10T19:18:00.619Z" }, + { url = "https://files.pythonhosted.org/packages/a1/a6/a7cb7010bec4b7c5692ca6f024150371b295ee1c108bdc1c400e4c44562b/cryptography-46.0.5-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a", size = 4276980, upload-time = "2026-02-10T19:18:02.379Z" }, + { url = "https://files.pythonhosted.org/packages/8e/7c/c4f45e0eeff9b91e3f12dbd0e165fcf2a38847288fcfd889deea99fb7b6d/cryptography-46.0.5-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76", size = 4939143, upload-time = "2026-02-10T19:18:03.964Z" }, + { url = "https://files.pythonhosted.org/packages/37/19/e1b8f964a834eddb44fa1b9a9976f4e414cbb7aa62809b6760c8803d22d1/cryptography-46.0.5-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614", size = 4453674, upload-time = "2026-02-10T19:18:05.588Z" }, + { url = "https://files.pythonhosted.org/packages/db/ed/db15d3956f65264ca204625597c410d420e26530c4e2943e05a0d2f24d51/cryptography-46.0.5-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229", size = 3978801, upload-time = "2026-02-10T19:18:07.167Z" }, + { url = "https://files.pythonhosted.org/packages/41/e2/df40a31d82df0a70a0daf69791f91dbb70e47644c58581d654879b382d11/cryptography-46.0.5-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1", size = 4276755, upload-time = "2026-02-10T19:18:09.813Z" }, + { url = "https://files.pythonhosted.org/packages/33/45/726809d1176959f4a896b86907b98ff4391a8aa29c0aaaf9450a8a10630e/cryptography-46.0.5-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d", size = 4901539, upload-time = "2026-02-10T19:18:11.263Z" }, + { url = "https://files.pythonhosted.org/packages/99/0f/a3076874e9c88ecb2ecc31382f6e7c21b428ede6f55aafa1aa272613e3cd/cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c", size = 4452794, upload-time = "2026-02-10T19:18:12.914Z" }, + { url = "https://files.pythonhosted.org/packages/02/ef/ffeb542d3683d24194a38f66ca17c0a4b8bf10631feef44a7ef64e631b1a/cryptography-46.0.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4", size = 4404160, upload-time = "2026-02-10T19:18:14.375Z" }, + { url = "https://files.pythonhosted.org/packages/96/93/682d2b43c1d5f1406ed048f377c0fc9fc8f7b0447a478d5c65ab3d3a66eb/cryptography-46.0.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9", size = 4667123, upload-time = "2026-02-10T19:18:15.886Z" }, + { url = "https://files.pythonhosted.org/packages/45/2d/9c5f2926cb5300a8eefc3f4f0b3f3df39db7f7ce40c8365444c49363cbda/cryptography-46.0.5-cp38-abi3-win32.whl", hash = "sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72", size = 3010220, upload-time = "2026-02-10T19:18:17.361Z" }, + { url = "https://files.pythonhosted.org/packages/48/ef/0c2f4a8e31018a986949d34a01115dd057bf536905dca38897bacd21fac3/cryptography-46.0.5-cp38-abi3-win_amd64.whl", hash = "sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595", size = 3467050, upload-time = "2026-02-10T19:18:18.899Z" }, +] + +[[package]] +name = "cyclopts" +version = "4.10.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "docstring-parser" }, + { name = "rich" }, + { name = "rich-rst" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6c/c4/2ce2ca1451487dc7d59f09334c3fa1182c46cfcf0a2d5f19f9b26d53ac74/cyclopts-4.10.1.tar.gz", hash = "sha256:ad4e4bb90576412d32276b14a76f55d43353753d16217f2c3cd5bdceba7f15a0", size = 166623, upload-time = "2026-03-23T14:43:01.098Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0b/2261922126b2e50c601fe22d7ff5194e0a4d50e654836260c0665e24d862/cyclopts-4.10.1-py3-none-any.whl", hash = "sha256:35f37257139380a386d9fe4475e1e7c87ca7795765ef4f31abba579fcfcb6ecd", size = 204331, upload-time = "2026-03-23T14:43:02.625Z" }, +] + +[[package]] +name = "deprecated" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/85/12f0a49a7c4ffb70572b6c2ef13c90c88fd190debda93b23f026b25f9634/deprecated-1.3.1.tar.gz", hash = "sha256:b1b50e0ff0c1fddaa5708a2c6b0a6588bb09b892825ab2b214ac9ea9d92a5223", size = 2932523, upload-time = "2025-10-30T08:19:02.757Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/d0/205d54408c08b13550c733c4b85429e7ead111c7f0014309637425520a9a/deprecated-1.3.1-py2.py3-none-any.whl", hash = "sha256:597bfef186b6f60181535a29fbe44865ce137a5079f295b479886c82729d5f3f", size = 11298, upload-time = "2025-10-30T08:19:00.758Z" }, +] + +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, +] + +[[package]] +name = "dnspython" +version = "2.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/57666417c0f90f08bcafa776861060426765fdb422eb10212086fb811d26/dnspython-2.8.0.tar.gz", hash = "sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f", size = 368251, upload-time = "2025-09-07T18:58:00.022Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" }, +] + +[[package]] +name = "docstring-parser" +version = "0.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload-time = "2025-07-21T07:35:01.868Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" }, +] + +[[package]] +name = "docstring-to-markdown" +version = "0.17" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "importlib-metadata" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/52/d8/8abe80d62c5dce1075578031bcfde07e735bcf0afe2886dd48b470162ab4/docstring_to_markdown-0.17.tar.gz", hash = "sha256:df72a112294c7492487c9da2451cae0faeee06e86008245c188c5761c9590ca3", size = 32260, upload-time = "2025-05-02T15:09:07.932Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/7b/af3d0da15bed3a8665419bb3a630585756920f4ad67abfdfef26240ebcc0/docstring_to_markdown-0.17-py3-none-any.whl", hash = "sha256:fd7d5094aa83943bf5f9e1a13701866b7c452eac19765380dead666e36d3711c", size = 23479, upload-time = "2025-05-02T15:09:06.676Z" }, +] + +[[package]] +name = "docutils" +version = "0.22.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" }, +] + +[[package]] +name = "duckdb" +version = "1.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0c/66/744b4931b799a42f8cb9bc7a6f169e7b8e51195b62b246db407fd90bf15f/duckdb-1.5.2.tar.gz", hash = "sha256:638da0d5102b6cb6f7d47f83d0600708ac1d3cb46c5e9aaabc845f9ba4d69246", size = 18017166, upload-time = "2026-04-13T11:30:09.065Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/f2/e3d742808f138d374be4bb516fade3d1f33749b813650810ab7885cdc363/duckdb-1.5.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:4420b3f47027a7849d0e1815532007f377fa95ee5810b47ea717d35525c12f79", size = 30064879, upload-time = "2026-04-13T11:29:30.763Z" }, + { url = "https://files.pythonhosted.org/packages/72/0d/f3dc1cf97e1267ca15e4307d456f96ce583961f0703fd75e62b2ad8d64fa/duckdb-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bb42e6ed543902e14eae647850da24103a89f0bc2587dec5601b1c1f213bd2ed", size = 15969327, upload-time = "2026-04-13T11:29:33.481Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e0/d5418def53ae4e05a63075705ff44ed5af5a1a5932627eb2b600c5df1c93/duckdb-1.5.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:98c0535cd6d901f61a5ea3c2e26a1fd28482953d794deb183daf568e3aa5dda6", size = 14225107, upload-time = "2026-04-13T11:29:35.882Z" }, + { url = "https://files.pythonhosted.org/packages/16/a7/15aaa59dbecc35e9711980fcdbf525b32a52470b32d18ef678193a146213/duckdb-1.5.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:486c862bf7f163c0110b6d85b3e5c031d224a671cca468f12ebb1d3a348f6b39", size = 19313433, upload-time = "2026-04-13T11:29:38.367Z" }, + { url = "https://files.pythonhosted.org/packages/bd/21/d903cc63a5140c822b7b62b373a87dc557e60c29b321dfb435061c5e67cf/duckdb-1.5.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:70631c847ca918ee710ec874241b00cf9d2e5be90762cbb2a0389f17823c08f7", size = 21429837, upload-time = "2026-04-13T11:29:41.135Z" }, + { url = "https://files.pythonhosted.org/packages/e3/0a/b770d1f60c70597302130d6247f418549b7094251a02348fbaf1c7e147ae/duckdb-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:52a21823f3fbb52f0f0e5425e20b07391ad882464b955879499b5ff0b45a376b", size = 13107699, upload-time = "2026-04-13T11:29:43.905Z" }, + { url = "https://files.pythonhosted.org/packages/d9/cf/e200fe431d700962d1a908d2ce89f53ccee1cc8db260174ae663ba09686b/duckdb-1.5.2-cp313-cp313-win_arm64.whl", hash = "sha256:411ad438bd4140f189a10e7f515781335962c5d18bd07837dc6d202e3985253d", size = 13927646, upload-time = "2026-04-13T11:29:46.598Z" }, + { url = "https://files.pythonhosted.org/packages/83/a1/f6286c67726cc1ea60a6e3c0d9fbc66527dde24ae089a51bbe298b13ca78/duckdb-1.5.2-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:6b0fe75c148000f060aa1a27b293cacc0ea08cc1cad724fbf2143d56070a3785", size = 30078598, upload-time = "2026-04-13T11:29:49.828Z" }, + { url = "https://files.pythonhosted.org/packages/de/6a/59febb02f21a4a5c6b0b0099ef7c965fdd5e61e4904cf813809bb792e35f/duckdb-1.5.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:35579b8e3a064b5eaf15b0eafc558056a13f79a0a62e34cc4baf57119daecfec", size = 15975120, upload-time = "2026-04-13T11:29:52.631Z" }, + { url = "https://files.pythonhosted.org/packages/09/70/ce750854d37bb5a45cccbb2c3cb04df4af56aea8fc30a2499bb643b4a9c0/duckdb-1.5.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ea58ff5b0880593a280cf5511734b17711b32ee1f58b47d726e8600848358160", size = 14227762, upload-time = "2026-04-13T11:29:55.564Z" }, + { url = "https://files.pythonhosted.org/packages/28/dc/ad45ac3c0b6c4687dc649e8f6cf01af1c8b0443932a39b2abb4ebcb3babd/duckdb-1.5.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef461bca07313412dc09961c4a4757a851f56b95ac01c58fac6007632b7b94f2", size = 19315668, upload-time = "2026-04-13T11:29:58.427Z" }, + { url = "https://files.pythonhosted.org/packages/cc/b1/1464f468d2e5813f5808de95df9d3113a645a5bfa2ffcaecbc542ddae272/duckdb-1.5.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:be37680ddb380015cb37318e378c53511c45c4f0d8fac5599d22b7d092b9217a", size = 21434056, upload-time = "2026-04-13T11:30:01.238Z" }, + { url = "https://files.pythonhosted.org/packages/ce/32/6673607e024722473fa7aafdd29c0e3dd231dd528f6cd8b5797fbeeb229d/duckdb-1.5.2-cp314-cp314-win_amd64.whl", hash = "sha256:0b291786014df1133f8f18b9df4d004484613146e858d71a21791e0fcca16cf4", size = 13633667, upload-time = "2026-04-13T11:30:04.05Z" }, + { url = "https://files.pythonhosted.org/packages/7a/e3/9d34173ec068631faea3ea6e73050700729363e7e33306a9a3218e5cdc61/duckdb-1.5.2-cp314-cp314-win_arm64.whl", hash = "sha256:c9f3e0b71b8a50fccfb42794899285d9d318ce2503782b9dd54868e5ecd0ad31", size = 14402513, upload-time = "2026-04-13T11:30:06.609Z" }, +] + +[[package]] +name = "email-validator" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dnspython" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/ce/13508a1ec3f8bb981ae4ca79ea40384becc868bfae97fd1c942bb3a001b1/email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7", size = 48967, upload-time = "2024-06-20T11:30:30.034Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/ee/bf0adb559ad3c786f12bcbc9296b3f5675f529199bef03e2df281fa1fadb/email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631", size = 33521, upload-time = "2024-06-20T11:30:28.248Z" }, +] + +[[package]] +name = "exceptiongroup" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, +] + +[[package]] +name = "fast-agent-mcp" +version = "0.6.17" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "a2a-sdk" }, + { name = "agent-client-protocol" }, + { name = "aiohttp" }, + { name = "anthropic", extra = ["vertex"] }, + { name = "deprecated" }, + { name = "email-validator" }, + { name = "fastapi" }, + { name = "fastmcp" }, + { name = "google-genai" }, + { name = "jsonschema" }, + { name = "keyring" }, + { name = "mcp" }, + { name = "mslex" }, + { name = "multilspy" }, + { name = "openai", extra = ["aiohttp"] }, + { name = "opentelemetry-distro" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-instrumentation-anthropic", marker = "python_full_version < '4'" }, + { name = "opentelemetry-instrumentation-google-genai" }, + { name = "opentelemetry-instrumentation-mcp", marker = "python_full_version < '4'" }, + { name = "opentelemetry-instrumentation-openai", marker = "python_full_version < '4'" }, + { name = "prompt-toolkit" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "pyperclip" }, + { name = "python-frontmatter" }, + { name = "pyyaml" }, + { name = "rich" }, + { name = "ruamel-yaml" }, + { name = "tiktoken" }, + { name = "typer" }, + { name = "uvloop", marker = "sys_platform != 'win32'" }, + { name = "watchfiles" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8c/a1/b6b1045345d38b342da3def7723a2dc6a44faff9c01fee6d81afbd272d62/fast_agent_mcp-0.6.17.tar.gz", hash = "sha256:a920113d47ef2ab82be1bd63b77d3bf78f8f862a5a6e91f1fd0aa931850fb25f", size = 2091401, upload-time = "2026-04-16T21:48:43.334Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/ef/47e05d6fa95e04ed8ad60afac3ae29d8205894fb220ffde193bd33578f3a/fast_agent_mcp-0.6.17-py3-none-any.whl", hash = "sha256:a23c5a5ed8924e38809dabd31f994e5cc81b8c084e84632bb1eb246b257c4752", size = 1573794, upload-time = "2026-04-16T21:48:38.999Z" }, +] + +[[package]] +name = "fastapi" +version = "0.136.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "pydantic" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4e/d9/e66315807e41e69e7f6a1b42a162dada2f249c5f06ad3f1a95f84ab336ef/fastapi-0.136.0.tar.gz", hash = "sha256:cf08e067cc66e106e102d9ba659463abfac245200752f8a5b7b1e813de4ff73e", size = 396607, upload-time = "2026-04-16T11:47:13.623Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/a3/0bd5f0cdb0bbc92650e8dc457e9250358411ee5d1b65e42b6632387daf81/fastapi-0.136.0-py3-none-any.whl", hash = "sha256:8793d44ec7378e2be07f8a013cf7f7aa47d6327d0dfe9804862688ec4541a6b4", size = 117556, upload-time = "2026-04-16T11:47:11.922Z" }, +] + +[[package]] +name = "fastmcp" +version = "3.2.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "authlib" }, + { name = "cyclopts" }, + { name = "exceptiongroup" }, + { name = "httpx" }, + { name = "jsonref" }, + { name = "jsonschema-path" }, + { name = "mcp" }, + { name = "openapi-pydantic" }, + { name = "opentelemetry-api" }, + { name = "packaging" }, + { name = "platformdirs" }, + { name = "py-key-value-aio", extra = ["filetree", "keyring", "memory"] }, + { name = "pydantic", extra = ["email"] }, + { name = "pyperclip" }, + { name = "python-dotenv" }, + { name = "pyyaml" }, + { name = "rich" }, + { name = "uncalled-for" }, + { name = "uvicorn" }, + { name = "watchfiles" }, + { name = "websockets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/42/7eed0a38e3b7a386805fecacf8a5a9353a2b3040395ef9e30e585d8549ac/fastmcp-3.2.3.tar.gz", hash = "sha256:4f02ae8b00227285a0cf6544dea1db29b022c8cdd8d3dfdec7118540210ae60a", size = 26328743, upload-time = "2026-04-09T22:05:03.402Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f5/48/84b6dcba793178a44b9d99b4def6cd62f870dcfc5bb7b9153ac390135812/fastmcp-3.2.3-py3-none-any.whl", hash = "sha256:cc50af6eed1f62ed8b6ebf4987286d8d1d006f08d5bec739d5c7fb76160e0911", size = 707260, upload-time = "2026-04-09T22:05:01.225Z" }, +] + +[[package]] +name = "filelock" +version = "3.25.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/b8/00651a0f559862f3bb7d6f7477b192afe3f583cc5e26403b44e59a55ab34/filelock-3.25.2.tar.gz", hash = "sha256:b64ece2b38f4ca29dd3e810287aa8c48182bbecd1ae6e9ae126c9b35f1382694", size = 40480, upload-time = "2026-03-11T20:45:38.487Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/a5/842ae8f0c08b61d6484b52f99a03510a3a72d23141942d216ebe81fefbce/filelock-3.25.2-py3-none-any.whl", hash = "sha256:ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70", size = 26759, upload-time = "2026-03-11T20:45:37.437Z" }, +] + +[[package]] +name = "frozenlist" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2d/f5/c831fac6cc817d26fd54c7eaccd04ef7e0288806943f7cc5bbf69f3ac1f0/frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad", size = 45875, upload-time = "2025-10-06T05:38:17.865Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2d/40/0832c31a37d60f60ed79e9dfb5a92e1e2af4f40a16a29abcc7992af9edff/frozenlist-1.8.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8d92f1a84bb12d9e56f818b3a746f3efba93c1b63c8387a73dde655e1e42282a", size = 85717, upload-time = "2025-10-06T05:36:27.341Z" }, + { url = "https://files.pythonhosted.org/packages/30/ba/b0b3de23f40bc55a7057bd38434e25c34fa48e17f20ee273bbde5e0650f3/frozenlist-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96153e77a591c8adc2ee805756c61f59fef4cf4073a9275ee86fe8cba41241f7", size = 49651, upload-time = "2025-10-06T05:36:28.855Z" }, + { url = "https://files.pythonhosted.org/packages/0c/ab/6e5080ee374f875296c4243c381bbdef97a9ac39c6e3ce1d5f7d42cb78d6/frozenlist-1.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f21f00a91358803399890ab167098c131ec2ddd5f8f5fd5fe9c9f2c6fcd91e40", size = 49417, upload-time = "2025-10-06T05:36:29.877Z" }, + { url = "https://files.pythonhosted.org/packages/d5/4e/e4691508f9477ce67da2015d8c00acd751e6287739123113a9fca6f1604e/frozenlist-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fb30f9626572a76dfe4293c7194a09fb1fe93ba94c7d4f720dfae3b646b45027", size = 234391, upload-time = "2025-10-06T05:36:31.301Z" }, + { url = "https://files.pythonhosted.org/packages/40/76/c202df58e3acdf12969a7895fd6f3bc016c642e6726aa63bd3025e0fc71c/frozenlist-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaa352d7047a31d87dafcacbabe89df0aa506abb5b1b85a2fb91bc3faa02d822", size = 233048, upload-time = "2025-10-06T05:36:32.531Z" }, + { url = "https://files.pythonhosted.org/packages/f9/c0/8746afb90f17b73ca5979c7a3958116e105ff796e718575175319b5bb4ce/frozenlist-1.8.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:03ae967b4e297f58f8c774c7eabcce57fe3c2434817d4385c50661845a058121", size = 226549, upload-time = "2025-10-06T05:36:33.706Z" }, + { url = "https://files.pythonhosted.org/packages/7e/eb/4c7eefc718ff72f9b6c4893291abaae5fbc0c82226a32dcd8ef4f7a5dbef/frozenlist-1.8.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6292f1de555ffcc675941d65fffffb0a5bcd992905015f85d0592201793e0e5", size = 239833, upload-time = "2025-10-06T05:36:34.947Z" }, + { url = "https://files.pythonhosted.org/packages/c2/4e/e5c02187cf704224f8b21bee886f3d713ca379535f16893233b9d672ea71/frozenlist-1.8.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29548f9b5b5e3460ce7378144c3010363d8035cea44bc0bf02d57f5a685e084e", size = 245363, upload-time = "2025-10-06T05:36:36.534Z" }, + { url = "https://files.pythonhosted.org/packages/1f/96/cb85ec608464472e82ad37a17f844889c36100eed57bea094518bf270692/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ec3cc8c5d4084591b4237c0a272cc4f50a5b03396a47d9caaf76f5d7b38a4f11", size = 229314, upload-time = "2025-10-06T05:36:38.582Z" }, + { url = "https://files.pythonhosted.org/packages/5d/6f/4ae69c550e4cee66b57887daeebe006fe985917c01d0fff9caab9883f6d0/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:517279f58009d0b1f2e7c1b130b377a349405da3f7621ed6bfae50b10adf20c1", size = 243365, upload-time = "2025-10-06T05:36:40.152Z" }, + { url = "https://files.pythonhosted.org/packages/7a/58/afd56de246cf11780a40a2c28dc7cbabbf06337cc8ddb1c780a2d97e88d8/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:db1e72ede2d0d7ccb213f218df6a078a9c09a7de257c2fe8fcef16d5925230b1", size = 237763, upload-time = "2025-10-06T05:36:41.355Z" }, + { url = "https://files.pythonhosted.org/packages/cb/36/cdfaf6ed42e2644740d4a10452d8e97fa1c062e2a8006e4b09f1b5fd7d63/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b4dec9482a65c54a5044486847b8a66bf10c9cb4926d42927ec4e8fd5db7fed8", size = 240110, upload-time = "2025-10-06T05:36:42.716Z" }, + { url = "https://files.pythonhosted.org/packages/03/a8/9ea226fbefad669f11b52e864c55f0bd57d3c8d7eb07e9f2e9a0b39502e1/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:21900c48ae04d13d416f0e1e0c4d81f7931f73a9dfa0b7a8746fb2fe7dd970ed", size = 233717, upload-time = "2025-10-06T05:36:44.251Z" }, + { url = "https://files.pythonhosted.org/packages/1e/0b/1b5531611e83ba7d13ccc9988967ea1b51186af64c42b7a7af465dcc9568/frozenlist-1.8.0-cp313-cp313-win32.whl", hash = "sha256:8b7b94a067d1c504ee0b16def57ad5738701e4ba10cec90529f13fa03c833496", size = 39628, upload-time = "2025-10-06T05:36:45.423Z" }, + { url = "https://files.pythonhosted.org/packages/d8/cf/174c91dbc9cc49bc7b7aab74d8b734e974d1faa8f191c74af9b7e80848e6/frozenlist-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:878be833caa6a3821caf85eb39c5ba92d28e85df26d57afb06b35b2efd937231", size = 43882, upload-time = "2025-10-06T05:36:46.796Z" }, + { url = "https://files.pythonhosted.org/packages/c1/17/502cd212cbfa96eb1388614fe39a3fc9ab87dbbe042b66f97acb57474834/frozenlist-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:44389d135b3ff43ba8cc89ff7f51f5a0bb6b63d829c8300f79a2fe4fe61bcc62", size = 39676, upload-time = "2025-10-06T05:36:47.8Z" }, + { url = "https://files.pythonhosted.org/packages/d2/5c/3bbfaa920dfab09e76946a5d2833a7cbdf7b9b4a91c714666ac4855b88b4/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:e25ac20a2ef37e91c1b39938b591457666a0fa835c7783c3a8f33ea42870db94", size = 89235, upload-time = "2025-10-06T05:36:48.78Z" }, + { url = "https://files.pythonhosted.org/packages/d2/d6/f03961ef72166cec1687e84e8925838442b615bd0b8854b54923ce5b7b8a/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07cdca25a91a4386d2e76ad992916a85038a9b97561bf7a3fd12d5d9ce31870c", size = 50742, upload-time = "2025-10-06T05:36:49.837Z" }, + { url = "https://files.pythonhosted.org/packages/1e/bb/a6d12b7ba4c3337667d0e421f7181c82dda448ce4e7ad7ecd249a16fa806/frozenlist-1.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e0c11f2cc6717e0a741f84a527c52616140741cd812a50422f83dc31749fb52", size = 51725, upload-time = "2025-10-06T05:36:50.851Z" }, + { url = "https://files.pythonhosted.org/packages/bc/71/d1fed0ffe2c2ccd70b43714c6cab0f4188f09f8a67a7914a6b46ee30f274/frozenlist-1.8.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b3210649ee28062ea6099cfda39e147fa1bc039583c8ee4481cb7811e2448c51", size = 284533, upload-time = "2025-10-06T05:36:51.898Z" }, + { url = "https://files.pythonhosted.org/packages/c9/1f/fb1685a7b009d89f9bf78a42d94461bc06581f6e718c39344754a5d9bada/frozenlist-1.8.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:581ef5194c48035a7de2aefc72ac6539823bb71508189e5de01d60c9dcd5fa65", size = 292506, upload-time = "2025-10-06T05:36:53.101Z" }, + { url = "https://files.pythonhosted.org/packages/e6/3b/b991fe1612703f7e0d05c0cf734c1b77aaf7c7d321df4572e8d36e7048c8/frozenlist-1.8.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3ef2d026f16a2b1866e1d86fc4e1291e1ed8a387b2c333809419a2f8b3a77b82", size = 274161, upload-time = "2025-10-06T05:36:54.309Z" }, + { url = "https://files.pythonhosted.org/packages/ca/ec/c5c618767bcdf66e88945ec0157d7f6c4a1322f1473392319b7a2501ded7/frozenlist-1.8.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5500ef82073f599ac84d888e3a8c1f77ac831183244bfd7f11eaa0289fb30714", size = 294676, upload-time = "2025-10-06T05:36:55.566Z" }, + { url = "https://files.pythonhosted.org/packages/7c/ce/3934758637d8f8a88d11f0585d6495ef54b2044ed6ec84492a91fa3b27aa/frozenlist-1.8.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50066c3997d0091c411a66e710f4e11752251e6d2d73d70d8d5d4c76442a199d", size = 300638, upload-time = "2025-10-06T05:36:56.758Z" }, + { url = "https://files.pythonhosted.org/packages/fc/4f/a7e4d0d467298f42de4b41cbc7ddaf19d3cfeabaf9ff97c20c6c7ee409f9/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5c1c8e78426e59b3f8005e9b19f6ff46e5845895adbde20ece9218319eca6506", size = 283067, upload-time = "2025-10-06T05:36:57.965Z" }, + { url = "https://files.pythonhosted.org/packages/dc/48/c7b163063d55a83772b268e6d1affb960771b0e203b632cfe09522d67ea5/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:eefdba20de0d938cec6a89bd4d70f346a03108a19b9df4248d3cf0d88f1b0f51", size = 292101, upload-time = "2025-10-06T05:36:59.237Z" }, + { url = "https://files.pythonhosted.org/packages/9f/d0/2366d3c4ecdc2fd391e0afa6e11500bfba0ea772764d631bbf82f0136c9d/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cf253e0e1c3ceb4aaff6df637ce033ff6535fb8c70a764a8f46aafd3d6ab798e", size = 289901, upload-time = "2025-10-06T05:37:00.811Z" }, + { url = "https://files.pythonhosted.org/packages/b8/94/daff920e82c1b70e3618a2ac39fbc01ae3e2ff6124e80739ce5d71c9b920/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:032efa2674356903cd0261c4317a561a6850f3ac864a63fc1583147fb05a79b0", size = 289395, upload-time = "2025-10-06T05:37:02.115Z" }, + { url = "https://files.pythonhosted.org/packages/e3/20/bba307ab4235a09fdcd3cc5508dbabd17c4634a1af4b96e0f69bfe551ebd/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6da155091429aeba16851ecb10a9104a108bcd32f6c1642867eadaee401c1c41", size = 283659, upload-time = "2025-10-06T05:37:03.711Z" }, + { url = "https://files.pythonhosted.org/packages/fd/00/04ca1c3a7a124b6de4f8a9a17cc2fcad138b4608e7a3fc5877804b8715d7/frozenlist-1.8.0-cp313-cp313t-win32.whl", hash = "sha256:0f96534f8bfebc1a394209427d0f8a63d343c9779cda6fc25e8e121b5fd8555b", size = 43492, upload-time = "2025-10-06T05:37:04.915Z" }, + { url = "https://files.pythonhosted.org/packages/59/5e/c69f733a86a94ab10f68e496dc6b7e8bc078ebb415281d5698313e3af3a1/frozenlist-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5d63a068f978fc69421fb0e6eb91a9603187527c86b7cd3f534a5b77a592b888", size = 48034, upload-time = "2025-10-06T05:37:06.343Z" }, + { url = "https://files.pythonhosted.org/packages/16/6c/be9d79775d8abe79b05fa6d23da99ad6e7763a1d080fbae7290b286093fd/frozenlist-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf0a7e10b077bf5fb9380ad3ae8ce20ef919a6ad93b4552896419ac7e1d8e042", size = 41749, upload-time = "2025-10-06T05:37:07.431Z" }, + { url = "https://files.pythonhosted.org/packages/f1/c8/85da824b7e7b9b6e7f7705b2ecaf9591ba6f79c1177f324c2735e41d36a2/frozenlist-1.8.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cee686f1f4cadeb2136007ddedd0aaf928ab95216e7691c63e50a8ec066336d0", size = 86127, upload-time = "2025-10-06T05:37:08.438Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e8/a1185e236ec66c20afd72399522f142c3724c785789255202d27ae992818/frozenlist-1.8.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:119fb2a1bd47307e899c2fac7f28e85b9a543864df47aa7ec9d3c1b4545f096f", size = 49698, upload-time = "2025-10-06T05:37:09.48Z" }, + { url = "https://files.pythonhosted.org/packages/a1/93/72b1736d68f03fda5fdf0f2180fb6caaae3894f1b854d006ac61ecc727ee/frozenlist-1.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4970ece02dbc8c3a92fcc5228e36a3e933a01a999f7094ff7c23fbd2beeaa67c", size = 49749, upload-time = "2025-10-06T05:37:10.569Z" }, + { url = "https://files.pythonhosted.org/packages/a7/b2/fabede9fafd976b991e9f1b9c8c873ed86f202889b864756f240ce6dd855/frozenlist-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:cba69cb73723c3f329622e34bdbf5ce1f80c21c290ff04256cff1cd3c2036ed2", size = 231298, upload-time = "2025-10-06T05:37:11.993Z" }, + { url = "https://files.pythonhosted.org/packages/3a/3b/d9b1e0b0eed36e70477ffb8360c49c85c8ca8ef9700a4e6711f39a6e8b45/frozenlist-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:778a11b15673f6f1df23d9586f83c4846c471a8af693a22e066508b77d201ec8", size = 232015, upload-time = "2025-10-06T05:37:13.194Z" }, + { url = "https://files.pythonhosted.org/packages/dc/94/be719d2766c1138148564a3960fc2c06eb688da592bdc25adcf856101be7/frozenlist-1.8.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0325024fe97f94c41c08872db482cf8ac4800d80e79222c6b0b7b162d5b13686", size = 225038, upload-time = "2025-10-06T05:37:14.577Z" }, + { url = "https://files.pythonhosted.org/packages/e4/09/6712b6c5465f083f52f50cf74167b92d4ea2f50e46a9eea0523d658454ae/frozenlist-1.8.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:97260ff46b207a82a7567b581ab4190bd4dfa09f4db8a8b49d1a958f6aa4940e", size = 240130, upload-time = "2025-10-06T05:37:15.781Z" }, + { url = "https://files.pythonhosted.org/packages/f8/d4/cd065cdcf21550b54f3ce6a22e143ac9e4836ca42a0de1022da8498eac89/frozenlist-1.8.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:54b2077180eb7f83dd52c40b2750d0a9f175e06a42e3213ce047219de902717a", size = 242845, upload-time = "2025-10-06T05:37:17.037Z" }, + { url = "https://files.pythonhosted.org/packages/62/c3/f57a5c8c70cd1ead3d5d5f776f89d33110b1addae0ab010ad774d9a44fb9/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f05983daecab868a31e1da44462873306d3cbfd76d1f0b5b69c473d21dbb128", size = 229131, upload-time = "2025-10-06T05:37:18.221Z" }, + { url = "https://files.pythonhosted.org/packages/6c/52/232476fe9cb64f0742f3fde2b7d26c1dac18b6d62071c74d4ded55e0ef94/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:33f48f51a446114bc5d251fb2954ab0164d5be02ad3382abcbfe07e2531d650f", size = 240542, upload-time = "2025-10-06T05:37:19.771Z" }, + { url = "https://files.pythonhosted.org/packages/5f/85/07bf3f5d0fb5414aee5f47d33c6f5c77bfe49aac680bfece33d4fdf6a246/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:154e55ec0655291b5dd1b8731c637ecdb50975a2ae70c606d100750a540082f7", size = 237308, upload-time = "2025-10-06T05:37:20.969Z" }, + { url = "https://files.pythonhosted.org/packages/11/99/ae3a33d5befd41ac0ca2cc7fd3aa707c9c324de2e89db0e0f45db9a64c26/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:4314debad13beb564b708b4a496020e5306c7333fa9a3ab90374169a20ffab30", size = 238210, upload-time = "2025-10-06T05:37:22.252Z" }, + { url = "https://files.pythonhosted.org/packages/b2/60/b1d2da22f4970e7a155f0adde9b1435712ece01b3cd45ba63702aea33938/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:073f8bf8becba60aa931eb3bc420b217bb7d5b8f4750e6f8b3be7f3da85d38b7", size = 231972, upload-time = "2025-10-06T05:37:23.5Z" }, + { url = "https://files.pythonhosted.org/packages/3f/ab/945b2f32de889993b9c9133216c068b7fcf257d8595a0ac420ac8677cab0/frozenlist-1.8.0-cp314-cp314-win32.whl", hash = "sha256:bac9c42ba2ac65ddc115d930c78d24ab8d4f465fd3fc473cdedfccadb9429806", size = 40536, upload-time = "2025-10-06T05:37:25.581Z" }, + { url = "https://files.pythonhosted.org/packages/59/ad/9caa9b9c836d9ad6f067157a531ac48b7d36499f5036d4141ce78c230b1b/frozenlist-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:3e0761f4d1a44f1d1a47996511752cf3dcec5bbdd9cc2b4fe595caf97754b7a0", size = 44330, upload-time = "2025-10-06T05:37:26.928Z" }, + { url = "https://files.pythonhosted.org/packages/82/13/e6950121764f2676f43534c555249f57030150260aee9dcf7d64efda11dd/frozenlist-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:d1eaff1d00c7751b7c6662e9c5ba6eb2c17a2306ba5e2a37f24ddf3cc953402b", size = 40627, upload-time = "2025-10-06T05:37:28.075Z" }, + { url = "https://files.pythonhosted.org/packages/c0/c7/43200656ecc4e02d3f8bc248df68256cd9572b3f0017f0a0c4e93440ae23/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d3bb933317c52d7ea5004a1c442eef86f426886fba134ef8cf4226ea6ee1821d", size = 89238, upload-time = "2025-10-06T05:37:29.373Z" }, + { url = "https://files.pythonhosted.org/packages/d1/29/55c5f0689b9c0fb765055629f472c0de484dcaf0acee2f7707266ae3583c/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8009897cdef112072f93a0efdce29cd819e717fd2f649ee3016efd3cd885a7ed", size = 50738, upload-time = "2025-10-06T05:37:30.792Z" }, + { url = "https://files.pythonhosted.org/packages/ba/7d/b7282a445956506fa11da8c2db7d276adcbf2b17d8bb8407a47685263f90/frozenlist-1.8.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2c5dcbbc55383e5883246d11fd179782a9d07a986c40f49abe89ddf865913930", size = 51739, upload-time = "2025-10-06T05:37:32.127Z" }, + { url = "https://files.pythonhosted.org/packages/62/1c/3d8622e60d0b767a5510d1d3cf21065b9db874696a51ea6d7a43180a259c/frozenlist-1.8.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:39ecbc32f1390387d2aa4f5a995e465e9e2f79ba3adcac92d68e3e0afae6657c", size = 284186, upload-time = "2025-10-06T05:37:33.21Z" }, + { url = "https://files.pythonhosted.org/packages/2d/14/aa36d5f85a89679a85a1d44cd7a6657e0b1c75f61e7cad987b203d2daca8/frozenlist-1.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92db2bf818d5cc8d9c1f1fc56b897662e24ea5adb36ad1f1d82875bd64e03c24", size = 292196, upload-time = "2025-10-06T05:37:36.107Z" }, + { url = "https://files.pythonhosted.org/packages/05/23/6bde59eb55abd407d34f77d39a5126fb7b4f109a3f611d3929f14b700c66/frozenlist-1.8.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2dc43a022e555de94c3b68a4ef0b11c4f747d12c024a520c7101709a2144fb37", size = 273830, upload-time = "2025-10-06T05:37:37.663Z" }, + { url = "https://files.pythonhosted.org/packages/d2/3f/22cff331bfad7a8afa616289000ba793347fcd7bc275f3b28ecea2a27909/frozenlist-1.8.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb89a7f2de3602cfed448095bab3f178399646ab7c61454315089787df07733a", size = 294289, upload-time = "2025-10-06T05:37:39.261Z" }, + { url = "https://files.pythonhosted.org/packages/a4/89/5b057c799de4838b6c69aa82b79705f2027615e01be996d2486a69ca99c4/frozenlist-1.8.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:33139dc858c580ea50e7e60a1b0ea003efa1fd42e6ec7fdbad78fff65fad2fd2", size = 300318, upload-time = "2025-10-06T05:37:43.213Z" }, + { url = "https://files.pythonhosted.org/packages/30/de/2c22ab3eb2a8af6d69dc799e48455813bab3690c760de58e1bf43b36da3e/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:168c0969a329b416119507ba30b9ea13688fafffac1b7822802537569a1cb0ef", size = 282814, upload-time = "2025-10-06T05:37:45.337Z" }, + { url = "https://files.pythonhosted.org/packages/59/f7/970141a6a8dbd7f556d94977858cfb36fa9b66e0892c6dd780d2219d8cd8/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:28bd570e8e189d7f7b001966435f9dac6718324b5be2990ac496cf1ea9ddb7fe", size = 291762, upload-time = "2025-10-06T05:37:46.657Z" }, + { url = "https://files.pythonhosted.org/packages/c1/15/ca1adae83a719f82df9116d66f5bb28bb95557b3951903d39135620ef157/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b2a095d45c5d46e5e79ba1e5b9cb787f541a8dee0433836cea4b96a2c439dcd8", size = 289470, upload-time = "2025-10-06T05:37:47.946Z" }, + { url = "https://files.pythonhosted.org/packages/ac/83/dca6dc53bf657d371fbc88ddeb21b79891e747189c5de990b9dfff2ccba1/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:eab8145831a0d56ec9c4139b6c3e594c7a83c2c8be25d5bcf2d86136a532287a", size = 289042, upload-time = "2025-10-06T05:37:49.499Z" }, + { url = "https://files.pythonhosted.org/packages/96/52/abddd34ca99be142f354398700536c5bd315880ed0a213812bc491cff5e4/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:974b28cf63cc99dfb2188d8d222bc6843656188164848c4f679e63dae4b0708e", size = 283148, upload-time = "2025-10-06T05:37:50.745Z" }, + { url = "https://files.pythonhosted.org/packages/af/d3/76bd4ed4317e7119c2b7f57c3f6934aba26d277acc6309f873341640e21f/frozenlist-1.8.0-cp314-cp314t-win32.whl", hash = "sha256:342c97bf697ac5480c0a7ec73cd700ecfa5a8a40ac923bd035484616efecc2df", size = 44676, upload-time = "2025-10-06T05:37:52.222Z" }, + { url = "https://files.pythonhosted.org/packages/89/76/c615883b7b521ead2944bb3480398cbb07e12b7b4e4d073d3752eb721558/frozenlist-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:06be8f67f39c8b1dc671f5d83aaefd3358ae5cdcf8314552c57e7ed3e6475bdd", size = 49451, upload-time = "2025-10-06T05:37:53.425Z" }, + { url = "https://files.pythonhosted.org/packages/e0/a3/5982da14e113d07b325230f95060e2169f5311b1017ea8af2a29b374c289/frozenlist-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:102e6314ca4da683dca92e3b1355490fed5f313b768500084fbe6371fddfdb79", size = 42507, upload-time = "2025-10-06T05:37:54.513Z" }, + { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" }, +] + +[[package]] +name = "fsspec" +version = "2026.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/51/7c/f60c259dcbf4f0c47cc4ddb8f7720d2dcdc8888c8e5ad84c73ea4531cc5b/fsspec-2026.2.0.tar.gz", hash = "sha256:6544e34b16869f5aacd5b90bdf1a71acb37792ea3ddf6125ee69a22a53fb8bff", size = 313441, upload-time = "2026-02-05T21:50:53.743Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/ab/fb21f4c939bb440104cc2b396d3be1d9b7a9fd3c6c2a53d98c45b3d7c954/fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437", size = 202505, upload-time = "2026-02-05T21:50:51.819Z" }, +] + +[[package]] +name = "google-api-core" +version = "2.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "googleapis-common-protos" }, + { name = "proto-plus" }, + { name = "protobuf" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/22/98/586ec94553b569080caef635f98a3723db36a38eac0e3d7eb3ea9d2e4b9a/google_api_core-2.30.0.tar.gz", hash = "sha256:02edfa9fab31e17fc0befb5f161b3bf93c9096d99aed584625f38065c511ad9b", size = 176959, upload-time = "2026-02-18T20:28:11.926Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/45/27/09c33d67f7e0dcf06d7ac17d196594e66989299374bfb0d4331d1038e76b/google_api_core-2.30.0-py3-none-any.whl", hash = "sha256:80be49ee937ff9aba0fd79a6eddfde35fe658b9953ab9b79c57dd7061afa8df5", size = 173288, upload-time = "2026-02-18T20:28:10.367Z" }, +] + +[[package]] +name = "google-auth" +version = "2.49.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "pyasn1-modules" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ea/80/6a696a07d3d3b0a92488933532f03dbefa4a24ab80fb231395b9a2a1be77/google_auth-2.49.1.tar.gz", hash = "sha256:16d40da1c3c5a0533f57d268fe72e0ebb0ae1cc3b567024122651c045d879b64", size = 333825, upload-time = "2026-03-12T19:30:58.135Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/eb/c6c2478d8a8d633460be40e2a8a6f8f429171997a35a96f81d3b680dec83/google_auth-2.49.1-py3-none-any.whl", hash = "sha256:195ebe3dca18eddd1b3db5edc5189b76c13e96f29e73043b923ebcf3f1a860f7", size = 240737, upload-time = "2026-03-12T19:30:53.159Z" }, +] + +[package.optional-dependencies] +requests = [ + { name = "requests" }, +] + +[[package]] +name = "google-genai" +version = "1.66.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "google-auth", extra = ["requests"] }, + { name = "httpx" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "sniffio" }, + { name = "tenacity" }, + { name = "typing-extensions" }, + { name = "websockets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9b/ba/0b343b0770d4710ad2979fd9301d7caa56c940174d5361ed4a7cc4979241/google_genai-1.66.0.tar.gz", hash = "sha256:ffc01647b65046bca6387320057aa51db0ad64bcc72c8e3e914062acfa5f7c49", size = 504386, upload-time = "2026-03-04T22:15:28.156Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/dd/403949d922d4e261b08b64aaa132af4e456c3b15c8e2a2d9e6ef693f66e2/google_genai-1.66.0-py3-none-any.whl", hash = "sha256:7f127a39cf695277104ce4091bb26e417c59bb46e952ff3699c3a982d9c474ee", size = 732174, upload-time = "2026-03-04T22:15:26.63Z" }, +] + +[[package]] +name = "googleapis-common-protos" +version = "1.73.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/96/a0205167fa0154f4a542fd6925bdc63d039d88dab3588b875078107e6f06/googleapis_common_protos-1.73.0.tar.gz", hash = "sha256:778d07cd4fbeff84c6f7c72102f0daf98fa2bfd3fa8bea426edc545588da0b5a", size = 147323, upload-time = "2026-03-06T21:53:09.727Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/28/23eea8acd65972bbfe295ce3666b28ac510dfcb115fac089d3edb0feb00a/googleapis_common_protos-1.73.0-py3-none-any.whl", hash = "sha256:dfdaaa2e860f242046be561e6d6cb5c5f1541ae02cfbcb034371aadb2942b4e8", size = 297578, upload-time = "2026-03-06T21:52:33.933Z" }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "hf-xet" +version = "1.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/53/92/ec9ad04d0b5728dca387a45af7bc98fbb0d73b2118759f5f6038b61a57e8/hf_xet-1.4.3.tar.gz", hash = "sha256:8ddedb73c8c08928c793df2f3401ec26f95be7f7e516a7bee2fbb546f6676113", size = 670477, upload-time = "2026-03-31T22:40:07.874Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/43/724d307b34e353da0abd476e02f72f735cdd2bc86082dee1b32ea0bfee1d/hf_xet-1.4.3-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:7551659ba4f1e1074e9623996f28c3873682530aee0a846b7f2f066239228144", size = 3800935, upload-time = "2026-03-31T22:39:49.618Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d2/8bee5996b699262edb87dbb54118d287c0e1b2fc78af7cdc41857ba5e3c4/hf_xet-1.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bee693ada985e7045997f05f081d0e12c4c08bd7626dc397f8a7c487e6c04f7f", size = 3558942, upload-time = "2026-03-31T22:39:47.938Z" }, + { url = "https://files.pythonhosted.org/packages/c3/a1/e993d09cbe251196fb60812b09a58901c468127b7259d2bf0f68bf6088eb/hf_xet-1.4.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21644b404bb0100fe3857892f752c4d09642586fd988e61501c95bbf44b393a3", size = 4207657, upload-time = "2026-03-31T22:39:39.69Z" }, + { url = "https://files.pythonhosted.org/packages/64/44/9eb6d21e5c34c63e5e399803a6932fa983cabdf47c0ecbcfe7ea97684b8c/hf_xet-1.4.3-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:987f09cfe418237812896a6736b81b1af02a3a6dcb4b4944425c4c4fca7a7cf8", size = 3986765, upload-time = "2026-03-31T22:39:37.936Z" }, + { url = "https://files.pythonhosted.org/packages/ea/7b/8ad6f16fdb82f5f7284a34b5ec48645bd575bdcd2f6f0d1644775909c486/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:60cf7fc43a99da0a853345cf86d23738c03983ee5249613a6305d3e57a5dca74", size = 4188162, upload-time = "2026-03-31T22:39:58.382Z" }, + { url = "https://files.pythonhosted.org/packages/1b/c4/39d6e136cbeea9ca5a23aad4b33024319222adbdc059ebcda5fc7d9d5ff4/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2815a49a7a59f3e2edf0cf113ae88e8cb2ca2a221bf353fb60c609584f4884d4", size = 4424525, upload-time = "2026-03-31T22:40:00.225Z" }, + { url = "https://files.pythonhosted.org/packages/46/f2/adc32dae6bdbc367853118b9878139ac869419a4ae7ba07185dc31251b76/hf_xet-1.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:42ee323265f1e6a81b0e11094564fb7f7e0ec75b5105ffd91ae63f403a11931b", size = 3671610, upload-time = "2026-03-31T22:40:10.42Z" }, + { url = "https://files.pythonhosted.org/packages/e2/19/25d897dcc3f81953e0c2cde9ec186c7a0fee413eb0c9a7a9130d87d94d3a/hf_xet-1.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:27c976ba60079fb8217f485b9c5c7fcd21c90b0367753805f87cb9f3cdc4418a", size = 3528529, upload-time = "2026-03-31T22:40:09.106Z" }, + { url = "https://files.pythonhosted.org/packages/ec/36/3e8f85ca9fe09b8de2b2e10c63b3b3353d7dda88a0b3d426dffbe7b8313b/hf_xet-1.4.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:5251d5ece3a81815bae9abab41cf7ddb7bcb8f56411bce0827f4a3071c92fdc6", size = 3801019, upload-time = "2026-03-31T22:39:56.651Z" }, + { url = "https://files.pythonhosted.org/packages/b5/9c/defb6cb1de28bccb7bd8d95f6e60f72a3d3fa4cb3d0329c26fb9a488bfe7/hf_xet-1.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1feb0f3abeacee143367c326a128a2e2b60868ec12a36c225afb1d6c5a05e6d2", size = 3558746, upload-time = "2026-03-31T22:39:54.766Z" }, + { url = "https://files.pythonhosted.org/packages/c1/bd/8d001191893178ff8e826e46ad5299446e62b93cd164e17b0ffea08832ec/hf_xet-1.4.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8b301fc150290ca90b4fccd079829b84bb4786747584ae08b94b4577d82fb791", size = 4207692, upload-time = "2026-03-31T22:39:46.246Z" }, + { url = "https://files.pythonhosted.org/packages/ce/48/6790b402803250e9936435613d3a78b9aaeee7973439f0918848dde58309/hf_xet-1.4.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:d972fbe95ddc0d3c0fc49b31a8a69f47db35c1e3699bf316421705741aab6653", size = 3986281, upload-time = "2026-03-31T22:39:44.648Z" }, + { url = "https://files.pythonhosted.org/packages/51/56/ea62552fe53db652a9099eda600b032d75554d0e86c12a73824bfedef88b/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c5b48db1ee344a805a1b9bd2cda9b6b65fe77ed3787bd6e87ad5521141d317cd", size = 4187414, upload-time = "2026-03-31T22:40:04.951Z" }, + { url = "https://files.pythonhosted.org/packages/7d/f5/bc1456d4638061bea997e6d2db60a1a613d7b200e0755965ec312dc1ef79/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:22bdc1f5fb8b15bf2831440b91d1c9bbceeb7e10c81a12e8d75889996a5c9da8", size = 4424368, upload-time = "2026-03-31T22:40:06.347Z" }, + { url = "https://files.pythonhosted.org/packages/e4/76/ab597bae87e1f06d18d3ecb8ed7f0d3c9a37037fc32ce76233d369273c64/hf_xet-1.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:0392c79b7cf48418cd61478c1a925246cf10639f4cd9d94368d8ca1e8df9ea07", size = 3672280, upload-time = "2026-03-31T22:40:16.401Z" }, + { url = "https://files.pythonhosted.org/packages/62/05/2e462d34e23a09a74d73785dbed71cc5dbad82a72eee2ad60a72a554155d/hf_xet-1.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:681c92a07796325778a79d76c67011764ecc9042a8c3579332b61b63ae512075", size = 3528945, upload-time = "2026-03-31T22:40:14.995Z" }, + { url = "https://files.pythonhosted.org/packages/ac/9f/9c23e4a447b8f83120798f9279d0297a4d1360bdbf59ef49ebec78fe2545/hf_xet-1.4.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d0da85329eaf196e03e90b84c2d0aca53bd4573d097a75f99609e80775f98025", size = 3805048, upload-time = "2026-03-31T22:39:53.105Z" }, + { url = "https://files.pythonhosted.org/packages/0b/f8/7aacb8e5f4a7899d39c787b5984e912e6c18b11be136ef13947d7a66d265/hf_xet-1.4.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e23717ce4186b265f69afa66e6f0069fe7efbf331546f5c313d00e123dc84583", size = 3562178, upload-time = "2026-03-31T22:39:51.295Z" }, + { url = "https://files.pythonhosted.org/packages/df/9a/a24b26dc8a65f0ecc0fe5be981a19e61e7ca963b85e062c083f3a9100529/hf_xet-1.4.3-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc360b70c815bf340ed56c7b8c63aacf11762a4b099b2fe2c9bd6d6068668c08", size = 4212320, upload-time = "2026-03-31T22:39:42.922Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/46d493db155d2ee2801b71fb1b0fd67696359047fdd8caee2c914cc50c79/hf_xet-1.4.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:39f2d2e9654cd9b4319885733993807aab6de9dfbd34c42f0b78338d6617421f", size = 3991546, upload-time = "2026-03-31T22:39:41.335Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f5/067363e1c96c6b17256910830d1b54099d06287e10f4ec6ec4e7e08371fc/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:49ad8a8cead2b56051aa84d7fce3e1335efe68df3cf6c058f22a65513885baac", size = 4193200, upload-time = "2026-03-31T22:40:01.936Z" }, + { url = "https://files.pythonhosted.org/packages/42/4b/53951592882d9c23080c7644542fda34a3813104e9e11fa1a7d82d419cb8/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7716d62015477a70ea272d2d68cd7cad140f61c52ee452e133e139abfe2c17ba", size = 4429392, upload-time = "2026-03-31T22:40:03.492Z" }, + { url = "https://files.pythonhosted.org/packages/8a/21/75a6c175b4e79662ad8e62f46a40ce341d8d6b206b06b4320d07d55b188c/hf_xet-1.4.3-cp37-abi3-win_amd64.whl", hash = "sha256:6b591fcad34e272a5b02607485e4f2a1334aebf1bc6d16ce8eb1eb8978ac2021", size = 3677359, upload-time = "2026-03-31T22:40:13.619Z" }, + { url = "https://files.pythonhosted.org/packages/8a/7c/44314ecd0e89f8b2b51c9d9e5e7a60a9c1c82024ac471d415860557d3cd8/hf_xet-1.4.3-cp37-abi3-win_arm64.whl", hash = "sha256:7c2c7e20bcfcc946dc67187c203463f5e932e395845d098cc2a93f5b67ca0b47", size = 3533664, upload-time = "2026-03-31T22:40:12.152Z" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + +[[package]] +name = "httpx-aiohttp" +version = "0.1.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "httpx" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/2c/b894861cecf030fb45675ea24aa55b5722e97c602a163d872fca66c5a6d8/httpx_aiohttp-0.1.12.tar.gz", hash = "sha256:81feec51fd82c0ecfa0e9aaf1b1a6c2591260d5e2bcbeb7eb0277a78e610df2c", size = 275945, upload-time = "2025-12-12T10:12:15.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/8d/85c9701e9af72ca132a1783e2a54364a90c6da832304416a30fc11196ab2/httpx_aiohttp-0.1.12-py3-none-any.whl", hash = "sha256:5b0eac39a7f360fa7867a60bcb46bb1024eada9c01cbfecdb54dc1edb3fb7141", size = 6367, upload-time = "2025-12-12T10:12:14.018Z" }, +] + +[[package]] +name = "httpx-sse" +version = "0.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943, upload-time = "2025-10-10T21:48:22.271Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" }, +] + +[[package]] +name = "huggingface-hub" +version = "1.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "httpx" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "tqdm" }, + { name = "typer" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/89/e7aa12d8a6b9259bed10671abb25ae6fa437c0f88a86ecbf59617bae7759/huggingface_hub-1.11.0.tar.gz", hash = "sha256:15fb3713c7f9cdff7b808a94fd91664f661ab142796bb48c9cd9493e8d166278", size = 761749, upload-time = "2026-04-16T13:07:39.73Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/02/4f3f8997d1ea7fe0146b343e5e14bd065fa87af790d07e5576d31b31cc18/huggingface_hub-1.11.0-py3-none-any.whl", hash = "sha256:42a6de0afbfeb5e022222d36398f029679db4eb4778801aafda32257ae9131ab", size = 645499, upload-time = "2026-04-16T13:07:37.716Z" }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, +] + +[[package]] +name = "importlib-metadata" +version = "8.7.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "zipp" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/49/3b30cad09e7771a4982d9975a8cbf64f00d4a1ececb53297f1d9a7be1b10/importlib_metadata-8.7.1.tar.gz", hash = "sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb", size = 57107, upload-time = "2025-12-21T10:00:19.278Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/5e/f8e9a1d23b9c20a551a8a02ea3637b4642e22c2626e3a13a9a29cdea99eb/importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151", size = 27865, upload-time = "2025-12-21T10:00:18.329Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + +[[package]] +name = "jaraco-classes" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "more-itertools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/c0/ed4a27bc5571b99e3cff68f8a9fa5b56ff7df1c2251cc715a652ddd26402/jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd", size = 11780, upload-time = "2024-03-31T07:27:36.643Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/66/b15ce62552d84bbfcec9a4873ab79d993a1dd4edb922cbfccae192bd5b5f/jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790", size = 6777, upload-time = "2024-03-31T07:27:34.792Z" }, +] + +[[package]] +name = "jaraco-context" +version = "6.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/50/4763cd07e722bb6285316d390a164bc7e479db9d90daa769f22578f698b4/jaraco_context-6.1.2.tar.gz", hash = "sha256:f1a6c9d391e661cc5b8d39861ff077a7dc24dc23833ccee564b234b81c82dfe3", size = 16801, upload-time = "2026-03-20T22:13:33.922Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/58/bc8954bda5fcda97bd7c19be11b85f91973d67a706ed4a3aec33e7de22db/jaraco_context-6.1.2-py3-none-any.whl", hash = "sha256:bf8150b79a2d5d91ae48629d8b427a8f7ba0e1097dd6202a9059f29a36379535", size = 7871, upload-time = "2026-03-20T22:13:32.808Z" }, +] + +[[package]] +name = "jaraco-functools" +version = "4.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "more-itertools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0f/27/056e0638a86749374d6f57d0b0db39f29509cce9313cf91bdc0ac4d91084/jaraco_functools-4.4.0.tar.gz", hash = "sha256:da21933b0417b89515562656547a77b4931f98176eb173644c0d35032a33d6bb", size = 19943, upload-time = "2025-12-21T09:29:43.6Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/c4/813bb09f0985cb21e959f21f2464169eca882656849adf727ac7bb7e1767/jaraco_functools-4.4.0-py3-none-any.whl", hash = "sha256:9eec1e36f45c818d9bf307c8948eb03b2b56cd44087b3cdc989abca1f20b9176", size = 10481, upload-time = "2025-12-21T09:29:42.27Z" }, +] + +[[package]] +name = "jedi" +version = "0.19.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "parso" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287, upload-time = "2024-11-11T01:41:42.873Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278, upload-time = "2024-11-11T01:41:40.175Z" }, +] + +[[package]] +name = "jedi-language-server" +version = "0.41.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cattrs" }, + { name = "docstring-to-markdown" }, + { name = "jedi" }, + { name = "lsprotocol" }, + { name = "pygls" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/34/4a35094c680040c8dd598b1ee9153a701289351c1dcbad1a0f2d196c524b/jedi_language_server-0.41.3.tar.gz", hash = "sha256:113ec22b95fadaceefbb704b5f365384bed296b82ede59026be375ecc97a9f8a", size = 29113, upload-time = "2024-02-26T04:28:05.521Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/67/2cf4419a8c418b0e5cba0b43dc1ea33a0bb42907694d6a786a3644889f32/jedi_language_server-0.41.3-py3-none-any.whl", hash = "sha256:7411f7479cdc9e9ea495f91e20b182a5d00170c0a8a4a87d3a147462282c06af", size = 27615, upload-time = "2024-02-26T04:28:02.084Z" }, +] + +[[package]] +name = "jeepney" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/6f/357efd7602486741aa73ffc0617fb310a29b588ed0fd69c2399acbb85b0c/jeepney-0.9.0.tar.gz", hash = "sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732", size = 106758, upload-time = "2025-02-27T18:51:01.684Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" }, +] + +[[package]] +name = "jiter" +version = "0.13.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0d/5e/4ec91646aee381d01cdb9974e30882c9cd3b8c5d1079d6b5ff4af522439a/jiter-0.13.0.tar.gz", hash = "sha256:f2839f9c2c7e2dffc1bc5929a510e14ce0a946be9365fd1219e7ef342dae14f4", size = 164847, upload-time = "2026-02-02T12:37:56.441Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/9c/7ee5a6ff4b9991e1a45263bfc46731634c4a2bde27dfda6c8251df2d958c/jiter-0.13.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1f8a55b848cbabf97d861495cd65f1e5c590246fabca8b48e1747c4dfc8f85bf", size = 306897, upload-time = "2026-02-02T12:36:16.748Z" }, + { url = "https://files.pythonhosted.org/packages/7c/02/be5b870d1d2be5dd6a91bdfb90f248fbb7dcbd21338f092c6b89817c3dbf/jiter-0.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f556aa591c00f2c45eb1b89f68f52441a016034d18b65da60e2d2875bbbf344a", size = 317507, upload-time = "2026-02-02T12:36:18.351Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/b25d2ec333615f5f284f3a4024f7ce68cfa0604c322c6808b2344c7f5d2b/jiter-0.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7e1d61da332ec412350463891923f960c3073cf1aae93b538f0bb4c8cd46efb", size = 350560, upload-time = "2026-02-02T12:36:19.746Z" }, + { url = "https://files.pythonhosted.org/packages/be/ec/74dcb99fef0aca9fbe56b303bf79f6bd839010cb18ad41000bf6cc71eec0/jiter-0.13.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3097d665a27bc96fd9bbf7f86178037db139f319f785e4757ce7ccbf390db6c2", size = 363232, upload-time = "2026-02-02T12:36:21.243Z" }, + { url = "https://files.pythonhosted.org/packages/1b/37/f17375e0bb2f6a812d4dd92d7616e41917f740f3e71343627da9db2824ce/jiter-0.13.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d01ecc3a8cbdb6f25a37bd500510550b64ddf9f7d64a107d92f3ccb25035d0f", size = 483727, upload-time = "2026-02-02T12:36:22.688Z" }, + { url = "https://files.pythonhosted.org/packages/77/d2/a71160a5ae1a1e66c1395b37ef77da67513b0adba73b993a27fbe47eb048/jiter-0.13.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ed9bbc30f5d60a3bdf63ae76beb3f9db280d7f195dfcfa61af792d6ce912d159", size = 370799, upload-time = "2026-02-02T12:36:24.106Z" }, + { url = "https://files.pythonhosted.org/packages/01/99/ed5e478ff0eb4e8aa5fd998f9d69603c9fd3f32de3bd16c2b1194f68361c/jiter-0.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98fbafb6e88256f4454de33c1f40203d09fc33ed19162a68b3b257b29ca7f663", size = 359120, upload-time = "2026-02-02T12:36:25.519Z" }, + { url = "https://files.pythonhosted.org/packages/16/be/7ffd08203277a813f732ba897352797fa9493faf8dc7995b31f3d9cb9488/jiter-0.13.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5467696f6b827f1116556cb0db620440380434591e93ecee7fd14d1a491b6daa", size = 390664, upload-time = "2026-02-02T12:36:26.866Z" }, + { url = "https://files.pythonhosted.org/packages/d1/84/e0787856196d6d346264d6dcccb01f741e5f0bd014c1d9a2ebe149caf4f3/jiter-0.13.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:2d08c9475d48b92892583df9da592a0e2ac49bcd41fae1fec4f39ba6cf107820", size = 513543, upload-time = "2026-02-02T12:36:28.217Z" }, + { url = "https://files.pythonhosted.org/packages/65/50/ecbd258181c4313cf79bca6c88fb63207d04d5bf5e4f65174114d072aa55/jiter-0.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:aed40e099404721d7fcaf5b89bd3b4568a4666358bcac7b6b15c09fb6252ab68", size = 547262, upload-time = "2026-02-02T12:36:29.678Z" }, + { url = "https://files.pythonhosted.org/packages/27/da/68f38d12e7111d2016cd198161b36e1f042bd115c169255bcb7ec823a3bf/jiter-0.13.0-cp313-cp313-win32.whl", hash = "sha256:36ebfbcffafb146d0e6ffb3e74d51e03d9c35ce7c625c8066cdbfc7b953bdc72", size = 200630, upload-time = "2026-02-02T12:36:31.808Z" }, + { url = "https://files.pythonhosted.org/packages/25/65/3bd1a972c9a08ecd22eb3b08a95d1941ebe6938aea620c246cf426ae09c2/jiter-0.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:8d76029f077379374cf0dbc78dbe45b38dec4a2eb78b08b5194ce836b2517afc", size = 202602, upload-time = "2026-02-02T12:36:33.679Z" }, + { url = "https://files.pythonhosted.org/packages/15/fe/13bd3678a311aa67686bb303654792c48206a112068f8b0b21426eb6851e/jiter-0.13.0-cp313-cp313-win_arm64.whl", hash = "sha256:bb7613e1a427cfcb6ea4544f9ac566b93d5bf67e0d48c787eca673ff9c9dff2b", size = 185939, upload-time = "2026-02-02T12:36:35.065Z" }, + { url = "https://files.pythonhosted.org/packages/49/19/a929ec002ad3228bc97ca01dbb14f7632fffdc84a95ec92ceaf4145688ae/jiter-0.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fa476ab5dd49f3bf3a168e05f89358c75a17608dbabb080ef65f96b27c19ab10", size = 316616, upload-time = "2026-02-02T12:36:36.579Z" }, + { url = "https://files.pythonhosted.org/packages/52/56/d19a9a194afa37c1728831e5fb81b7722c3de18a3109e8f282bfc23e587a/jiter-0.13.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ade8cb6ff5632a62b7dbd4757d8c5573f7a2e9ae285d6b5b841707d8363205ef", size = 346850, upload-time = "2026-02-02T12:36:38.058Z" }, + { url = "https://files.pythonhosted.org/packages/36/4a/94e831c6bf287754a8a019cb966ed39ff8be6ab78cadecf08df3bb02d505/jiter-0.13.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9950290340acc1adaded363edd94baebcee7dabdfa8bee4790794cd5cfad2af6", size = 358551, upload-time = "2026-02-02T12:36:39.417Z" }, + { url = "https://files.pythonhosted.org/packages/a2/ec/a4c72c822695fa80e55d2b4142b73f0012035d9fcf90eccc56bc060db37c/jiter-0.13.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2b4972c6df33731aac0742b64fd0d18e0a69bc7d6e03108ce7d40c85fd9e3e6d", size = 201950, upload-time = "2026-02-02T12:36:40.791Z" }, + { url = "https://files.pythonhosted.org/packages/b6/00/393553ec27b824fbc29047e9c7cd4a3951d7fbe4a76743f17e44034fa4e4/jiter-0.13.0-cp313-cp313t-win_arm64.whl", hash = "sha256:701a1e77d1e593c1b435315ff625fd071f0998c5f02792038a5ca98899261b7d", size = 185852, upload-time = "2026-02-02T12:36:42.077Z" }, + { url = "https://files.pythonhosted.org/packages/6e/f5/f1997e987211f6f9bd71b8083047b316208b4aca0b529bb5f8c96c89ef3e/jiter-0.13.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:cc5223ab19fe25e2f0bf2643204ad7318896fe3729bf12fde41b77bfc4fafff0", size = 308804, upload-time = "2026-02-02T12:36:43.496Z" }, + { url = "https://files.pythonhosted.org/packages/cd/8f/5482a7677731fd44881f0204981ce2d7175db271f82cba2085dd2212e095/jiter-0.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9776ebe51713acf438fd9b4405fcd86893ae5d03487546dae7f34993217f8a91", size = 318787, upload-time = "2026-02-02T12:36:45.071Z" }, + { url = "https://files.pythonhosted.org/packages/f3/b9/7257ac59778f1cd025b26a23c5520a36a424f7f1b068f2442a5b499b7464/jiter-0.13.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:879e768938e7b49b5e90b7e3fecc0dbec01b8cb89595861fb39a8967c5220d09", size = 353880, upload-time = "2026-02-02T12:36:47.365Z" }, + { url = "https://files.pythonhosted.org/packages/c3/87/719eec4a3f0841dad99e3d3604ee4cba36af4419a76f3cb0b8e2e691ad67/jiter-0.13.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:682161a67adea11e3aae9038c06c8b4a9a71023228767477d683f69903ebc607", size = 366702, upload-time = "2026-02-02T12:36:48.871Z" }, + { url = "https://files.pythonhosted.org/packages/d2/65/415f0a75cf6921e43365a1bc227c565cb949caca8b7532776e430cbaa530/jiter-0.13.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a13b68cd1cd8cc9de8f244ebae18ccb3e4067ad205220ef324c39181e23bbf66", size = 486319, upload-time = "2026-02-02T12:36:53.006Z" }, + { url = "https://files.pythonhosted.org/packages/54/a2/9e12b48e82c6bbc6081fd81abf915e1443add1b13d8fc586e1d90bb02bb8/jiter-0.13.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87ce0f14c6c08892b610686ae8be350bf368467b6acd5085a5b65441e2bf36d2", size = 372289, upload-time = "2026-02-02T12:36:54.593Z" }, + { url = "https://files.pythonhosted.org/packages/4e/c1/e4693f107a1789a239c759a432e9afc592366f04e901470c2af89cfd28e1/jiter-0.13.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c365005b05505a90d1c47856420980d0237adf82f70c4aff7aebd3c1cc143ad", size = 360165, upload-time = "2026-02-02T12:36:56.112Z" }, + { url = "https://files.pythonhosted.org/packages/17/08/91b9ea976c1c758240614bd88442681a87672eebc3d9a6dde476874e706b/jiter-0.13.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1317fdffd16f5873e46ce27d0e0f7f4f90f0cdf1d86bf6abeaea9f63ca2c401d", size = 389634, upload-time = "2026-02-02T12:36:57.495Z" }, + { url = "https://files.pythonhosted.org/packages/18/23/58325ef99390d6d40427ed6005bf1ad54f2577866594bcf13ce55675f87d/jiter-0.13.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:c05b450d37ba0c9e21c77fef1f205f56bcee2330bddca68d344baebfc55ae0df", size = 514933, upload-time = "2026-02-02T12:36:58.909Z" }, + { url = "https://files.pythonhosted.org/packages/5b/25/69f1120c7c395fd276c3996bb8adefa9c6b84c12bb7111e5c6ccdcd8526d/jiter-0.13.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:775e10de3849d0631a97c603f996f518159272db00fdda0a780f81752255ee9d", size = 548842, upload-time = "2026-02-02T12:37:00.433Z" }, + { url = "https://files.pythonhosted.org/packages/18/05/981c9669d86850c5fbb0d9e62bba144787f9fba84546ba43d624ee27ef29/jiter-0.13.0-cp314-cp314-win32.whl", hash = "sha256:632bf7c1d28421c00dd8bbb8a3bac5663e1f57d5cd5ed962bce3c73bf62608e6", size = 202108, upload-time = "2026-02-02T12:37:01.718Z" }, + { url = "https://files.pythonhosted.org/packages/8d/96/cdcf54dd0b0341db7d25413229888a346c7130bd20820530905fdb65727b/jiter-0.13.0-cp314-cp314-win_amd64.whl", hash = "sha256:f22ef501c3f87ede88f23f9b11e608581c14f04db59b6a801f354397ae13739f", size = 204027, upload-time = "2026-02-02T12:37:03.075Z" }, + { url = "https://files.pythonhosted.org/packages/fb/f9/724bcaaab7a3cd727031fe4f6995cb86c4bd344909177c186699c8dec51a/jiter-0.13.0-cp314-cp314-win_arm64.whl", hash = "sha256:07b75fe09a4ee8e0c606200622e571e44943f47254f95e2436c8bdcaceb36d7d", size = 187199, upload-time = "2026-02-02T12:37:04.414Z" }, + { url = "https://files.pythonhosted.org/packages/62/92/1661d8b9fd6a3d7a2d89831db26fe3c1509a287d83ad7838831c7b7a5c7e/jiter-0.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:964538479359059a35fb400e769295d4b315ae61e4105396d355a12f7fef09f0", size = 318423, upload-time = "2026-02-02T12:37:05.806Z" }, + { url = "https://files.pythonhosted.org/packages/4f/3b/f77d342a54d4ebcd128e520fc58ec2f5b30a423b0fd26acdfc0c6fef8e26/jiter-0.13.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e104da1db1c0991b3eaed391ccd650ae8d947eab1480c733e5a3fb28d4313e40", size = 351438, upload-time = "2026-02-02T12:37:07.189Z" }, + { url = "https://files.pythonhosted.org/packages/76/b3/ba9a69f0e4209bd3331470c723c2f5509e6f0482e416b612431a5061ed71/jiter-0.13.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e3a5f0cde8ff433b8e88e41aa40131455420fb3649a3c7abdda6145f8cb7202", size = 364774, upload-time = "2026-02-02T12:37:08.579Z" }, + { url = "https://files.pythonhosted.org/packages/b3/16/6cdb31fa342932602458dbb631bfbd47f601e03d2e4950740e0b2100b570/jiter-0.13.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:57aab48f40be1db920a582b30b116fe2435d184f77f0e4226f546794cedd9cf0", size = 487238, upload-time = "2026-02-02T12:37:10.066Z" }, + { url = "https://files.pythonhosted.org/packages/ed/b1/956cc7abaca8d95c13aa8d6c9b3f3797241c246cd6e792934cc4c8b250d2/jiter-0.13.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7772115877c53f62beeb8fd853cab692dbc04374ef623b30f997959a4c0e7e95", size = 372892, upload-time = "2026-02-02T12:37:11.656Z" }, + { url = "https://files.pythonhosted.org/packages/26/c4/97ecde8b1e74f67b8598c57c6fccf6df86ea7861ed29da84629cdbba76c4/jiter-0.13.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1211427574b17b633cfceba5040de8081e5abf114f7a7602f73d2e16f9fdaa59", size = 360309, upload-time = "2026-02-02T12:37:13.244Z" }, + { url = "https://files.pythonhosted.org/packages/4b/d7/eabe3cf46715854ccc80be2cd78dd4c36aedeb30751dbf85a1d08c14373c/jiter-0.13.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7beae3a3d3b5212d3a55d2961db3c292e02e302feb43fce6a3f7a31b90ea6dfe", size = 389607, upload-time = "2026-02-02T12:37:14.881Z" }, + { url = "https://files.pythonhosted.org/packages/df/2d/03963fc0804e6109b82decfb9974eb92df3797fe7222428cae12f8ccaa0c/jiter-0.13.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e5562a0f0e90a6223b704163ea28e831bd3a9faa3512a711f031611e6b06c939", size = 514986, upload-time = "2026-02-02T12:37:16.326Z" }, + { url = "https://files.pythonhosted.org/packages/f6/6c/8c83b45eb3eb1c1e18d841fe30b4b5bc5619d781267ca9bc03e005d8fd0a/jiter-0.13.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:6c26a424569a59140fb51160a56df13f438a2b0967365e987889186d5fc2f6f9", size = 548756, upload-time = "2026-02-02T12:37:17.736Z" }, + { url = "https://files.pythonhosted.org/packages/47/66/eea81dfff765ed66c68fd2ed8c96245109e13c896c2a5015c7839c92367e/jiter-0.13.0-cp314-cp314t-win32.whl", hash = "sha256:24dc96eca9f84da4131cdf87a95e6ce36765c3b156fc9ae33280873b1c32d5f6", size = 201196, upload-time = "2026-02-02T12:37:19.101Z" }, + { url = "https://files.pythonhosted.org/packages/ff/32/4ac9c7a76402f8f00d00842a7f6b83b284d0cf7c1e9d4227bc95aa6d17fa/jiter-0.13.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0a8d76c7524087272c8ae913f5d9d608bd839154b62c4322ef65723d2e5bb0b8", size = 204215, upload-time = "2026-02-02T12:37:20.495Z" }, + { url = "https://files.pythonhosted.org/packages/f9/8e/7def204fea9f9be8b3c21a6f2dd6c020cf56c7d5ff753e0e23ed7f9ea57e/jiter-0.13.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2c26cf47e2cad140fa23b6d58d435a7c0161f5c514284802f25e87fddfe11024", size = 187152, upload-time = "2026-02-02T12:37:22.124Z" }, +] + +[[package]] +name = "jsonref" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/0d/c1f3277e90ccdb50d33ed5ba1ec5b3f0a242ed8c1b1a85d3afeb68464dca/jsonref-1.1.0.tar.gz", hash = "sha256:32fe8e1d85af0fdefbebce950af85590b22b60f9e95443176adbde4e1ecea552", size = 8814, upload-time = "2023-01-16T16:10:04.455Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/ec/e1db9922bceb168197a558a2b8c03a7963f1afe93517ddd3cf99f202f996/jsonref-1.1.0-py3-none-any.whl", hash = "sha256:590dc7773df6c21cbf948b5dac07a72a251db28b0238ceecce0a2abfa8ec30a9", size = 9425, upload-time = "2023-01-16T16:10:02.255Z" }, +] + +[[package]] +name = "jsonschema" +version = "4.25.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "jsonschema-specifications" }, + { name = "referencing" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342, upload-time = "2025-08-18T17:03:50.038Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" }, +] + +[[package]] +name = "jsonschema-path" +version = "0.4.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pathable" }, + { name = "pyyaml" }, + { name = "referencing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/8a/7e6102f2b8bdc6705a9eb5294f8f6f9ccd3a8420e8e8e19671d1dd773251/jsonschema_path-0.4.5.tar.gz", hash = "sha256:c6cd7d577ae290c7defd4f4029e86fdb248ca1bd41a07557795b3c95e5144918", size = 15113, upload-time = "2026-03-03T09:56:46.87Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/d5/4e96c44f6c1ea3d812cf5391d81a4f5abaa540abf8d04ecd7f66e0ed11df/jsonschema_path-0.4.5-py3-none-any.whl", hash = "sha256:7d77a2c3f3ec569a40efe5c5f942c44c1af2a6f96fe0866794c9ef5b8f87fd65", size = 19368, upload-time = "2026-03-03T09:56:45.39Z" }, +] + +[[package]] +name = "jsonschema-specifications" +version = "2025.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "referencing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, +] + +[[package]] +name = "keyring" +version = "25.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jaraco-classes" }, + { name = "jaraco-context" }, + { name = "jaraco-functools" }, + { name = "jeepney", marker = "sys_platform == 'linux'" }, + { name = "pywin32-ctypes", marker = "sys_platform == 'win32'" }, + { name = "secretstorage", marker = "sys_platform == 'linux'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/4b/674af6ef2f97d56f0ab5153bf0bfa28ccb6c3ed4d1babf4305449668807b/keyring-25.7.0.tar.gz", hash = "sha256:fe01bd85eb3f8fb3dd0405defdeac9a5b4f6f0439edbb3149577f244a2e8245b", size = 63516, upload-time = "2025-11-16T16:26:09.482Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/db/e655086b7f3a705df045bf0933bdd9c2f79bb3c97bfef1384598bb79a217/keyring-25.7.0-py3-none-any.whl", hash = "sha256:be4a0b195f149690c166e850609a477c532ddbfbaed96a404d4e43f8d5e2689f", size = 39160, upload-time = "2025-11-16T16:26:08.402Z" }, +] + +[[package]] +name = "lsprotocol" +version = "2023.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "cattrs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/f6/6e80484ec078d0b50699ceb1833597b792a6c695f90c645fbaf54b947e6f/lsprotocol-2023.0.1.tar.gz", hash = "sha256:cc5c15130d2403c18b734304339e51242d3018a05c4f7d0f198ad6e0cd21861d", size = 69434, upload-time = "2024-01-09T17:21:12.625Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/37/2351e48cb3309673492d3a8c59d407b75fb6630e560eb27ecd4da03adc9a/lsprotocol-2023.0.1-py3-none-any.whl", hash = "sha256:c75223c9e4af2f24272b14c6375787438279369236cd568f596d4951052a60f2", size = 70826, upload-time = "2024-01-09T17:21:14.491Z" }, +] + +[[package]] +name = "markdown-it-py" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, +] + +[[package]] +name = "mcp" +version = "1.27.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "httpx" }, + { name = "httpx-sse" }, + { name = "jsonschema" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "python-multipart" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "sse-starlette" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, + { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/eb/c0cfc62075dc6e1ec1c64d352ae09ac051d9334311ed226f1f425312848a/mcp-1.27.0.tar.gz", hash = "sha256:d3dc35a7eec0d458c1da4976a48f982097ddaab87e278c5511d5a4a56e852b83", size = 607509, upload-time = "2026-04-02T14:48:08.88Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9c/46/f6b4ad632c67ef35209a66127e4bddc95759649dd595f71f13fba11bdf9a/mcp-1.27.0-py3-none-any.whl", hash = "sha256:5ce1fa81614958e267b21fb2aa34e0aea8e2c6ede60d52aba45fd47246b4d741", size = 215967, upload-time = "2026-04-02T14:48:07.24Z" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + +[[package]] +name = "more-itertools" +version = "10.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ea/5d/38b681d3fce7a266dd9ab73c66959406d565b3e85f21d5e66e1181d93721/more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd", size = 137431, upload-time = "2025-09-02T15:23:11.018Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667, upload-time = "2025-09-02T15:23:09.635Z" }, +] + +[[package]] +name = "mslex" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/97/7022667073c99a0fe028f2e34b9bf76b49a611afd21b02527fbfd92d4cd5/mslex-1.3.0.tar.gz", hash = "sha256:641c887d1d3db610eee2af37a8e5abda3f70b3006cdfd2d0d29dc0d1ae28a85d", size = 11583, upload-time = "2024-10-16T13:16:18.523Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/f2/66bd65ca0139675a0d7b18f0bada6e12b51a984e41a76dbe44761bf1b3ee/mslex-1.3.0-py3-none-any.whl", hash = "sha256:c7074b347201b3466fc077c5692fbce9b5f62a63a51f537a53fbbd02eff2eea4", size = 7820, upload-time = "2024-10-16T13:16:17.566Z" }, +] + +[[package]] +name = "multidict" +version = "6.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010, upload-time = "2026-01-26T02:46:45.979Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/22/929c141d6c0dba87d3e1d38fbdf1ba8baba86b7776469f2bc2d3227a1e67/multidict-6.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2b41f5fed0ed563624f1c17630cb9941cf2309d4df00e494b551b5f3e3d67a23", size = 76174, upload-time = "2026-01-26T02:44:18.509Z" }, + { url = "https://files.pythonhosted.org/packages/c7/75/bc704ae15fee974f8fccd871305e254754167dce5f9e42d88a2def741a1d/multidict-6.7.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84e61e3af5463c19b67ced91f6c634effb89ef8bfc5ca0267f954451ed4bb6a2", size = 45116, upload-time = "2026-01-26T02:44:19.745Z" }, + { url = "https://files.pythonhosted.org/packages/79/76/55cd7186f498ed080a18440c9013011eb548f77ae1b297206d030eb1180a/multidict-6.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:935434b9853c7c112eee7ac891bc4cb86455aa631269ae35442cb316790c1445", size = 43524, upload-time = "2026-01-26T02:44:21.571Z" }, + { url = "https://files.pythonhosted.org/packages/e9/3c/414842ef8d5a1628d68edee29ba0e5bcf235dbfb3ccd3ea303a7fe8c72ff/multidict-6.7.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:432feb25a1cb67fe82a9680b4d65fb542e4635cb3166cd9c01560651ad60f177", size = 249368, upload-time = "2026-01-26T02:44:22.803Z" }, + { url = "https://files.pythonhosted.org/packages/f6/32/befed7f74c458b4a525e60519fe8d87eef72bb1e99924fa2b0f9d97a221e/multidict-6.7.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e82d14e3c948952a1a85503817e038cba5905a3352de76b9a465075d072fba23", size = 256952, upload-time = "2026-01-26T02:44:24.306Z" }, + { url = "https://files.pythonhosted.org/packages/03/d6/c878a44ba877f366630c860fdf74bfb203c33778f12b6ac274936853c451/multidict-6.7.1-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4cfb48c6ea66c83bcaaf7e4dfa7ec1b6bbcf751b7db85a328902796dfde4c060", size = 240317, upload-time = "2026-01-26T02:44:25.772Z" }, + { url = "https://files.pythonhosted.org/packages/68/49/57421b4d7ad2e9e60e25922b08ceb37e077b90444bde6ead629095327a6f/multidict-6.7.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1d540e51b7e8e170174555edecddbd5538105443754539193e3e1061864d444d", size = 267132, upload-time = "2026-01-26T02:44:27.648Z" }, + { url = "https://files.pythonhosted.org/packages/b7/fe/ec0edd52ddbcea2a2e89e174f0206444a61440b40f39704e64dc807a70bd/multidict-6.7.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:273d23f4b40f3dce4d6c8a821c741a86dec62cded82e1175ba3d99be128147ed", size = 268140, upload-time = "2026-01-26T02:44:29.588Z" }, + { url = "https://files.pythonhosted.org/packages/b0/73/6e1b01cbeb458807aa0831742232dbdd1fa92bfa33f52a3f176b4ff3dc11/multidict-6.7.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d624335fd4fa1c08a53f8b4be7676ebde19cd092b3895c421045ca87895b429", size = 254277, upload-time = "2026-01-26T02:44:30.902Z" }, + { url = "https://files.pythonhosted.org/packages/6a/b2/5fb8c124d7561a4974c342bc8c778b471ebbeb3cc17df696f034a7e9afe7/multidict-6.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:12fad252f8b267cc75b66e8fc51b3079604e8d43a75428ffe193cd9e2195dfd6", size = 252291, upload-time = "2026-01-26T02:44:32.31Z" }, + { url = "https://files.pythonhosted.org/packages/5a/96/51d4e4e06bcce92577fcd488e22600bd38e4fd59c20cb49434d054903bd2/multidict-6.7.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:03ede2a6ffbe8ef936b92cb4529f27f42be7f56afcdab5ab739cd5f27fb1cbf9", size = 250156, upload-time = "2026-01-26T02:44:33.734Z" }, + { url = "https://files.pythonhosted.org/packages/db/6b/420e173eec5fba721a50e2a9f89eda89d9c98fded1124f8d5c675f7a0c0f/multidict-6.7.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:90efbcf47dbe33dcf643a1e400d67d59abeac5db07dc3f27d6bdeae497a2198c", size = 249742, upload-time = "2026-01-26T02:44:35.222Z" }, + { url = "https://files.pythonhosted.org/packages/44/a3/ec5b5bd98f306bc2aa297b8c6f11a46714a56b1e6ef5ebda50a4f5d7c5fb/multidict-6.7.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:5c4b9bfc148f5a91be9244d6264c53035c8a0dcd2f51f1c3c6e30e30ebaa1c84", size = 262221, upload-time = "2026-01-26T02:44:36.604Z" }, + { url = "https://files.pythonhosted.org/packages/cd/f7/e8c0d0da0cd1e28d10e624604e1a36bcc3353aaebdfdc3a43c72bc683a12/multidict-6.7.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:401c5a650f3add2472d1d288c26deebc540f99e2fb83e9525007a74cd2116f1d", size = 258664, upload-time = "2026-01-26T02:44:38.008Z" }, + { url = "https://files.pythonhosted.org/packages/52/da/151a44e8016dd33feed44f730bd856a66257c1ee7aed4f44b649fb7edeb3/multidict-6.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:97891f3b1b3ffbded884e2916cacf3c6fc87b66bb0dde46f7357404750559f33", size = 249490, upload-time = "2026-01-26T02:44:39.386Z" }, + { url = "https://files.pythonhosted.org/packages/87/af/a3b86bf9630b732897f6fc3f4c4714b90aa4361983ccbdcd6c0339b21b0c/multidict-6.7.1-cp313-cp313-win32.whl", hash = "sha256:e1c5988359516095535c4301af38d8a8838534158f649c05dd1050222321bcb3", size = 41695, upload-time = "2026-01-26T02:44:41.318Z" }, + { url = "https://files.pythonhosted.org/packages/b2/35/e994121b0e90e46134673422dd564623f93304614f5d11886b1b3e06f503/multidict-6.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:960c83bf01a95b12b08fd54324a4eb1d5b52c88932b5cba5d6e712bb3ed12eb5", size = 45884, upload-time = "2026-01-26T02:44:42.488Z" }, + { url = "https://files.pythonhosted.org/packages/ca/61/42d3e5dbf661242a69c97ea363f2d7b46c567da8eadef8890022be6e2ab0/multidict-6.7.1-cp313-cp313-win_arm64.whl", hash = "sha256:563fe25c678aaba333d5399408f5ec3c383ca5b663e7f774dd179a520b8144df", size = 43122, upload-time = "2026-01-26T02:44:43.664Z" }, + { url = "https://files.pythonhosted.org/packages/6d/b3/e6b21c6c4f314bb956016b0b3ef2162590a529b84cb831c257519e7fde44/multidict-6.7.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:c76c4bec1538375dad9d452d246ca5368ad6e1c9039dadcf007ae59c70619ea1", size = 83175, upload-time = "2026-01-26T02:44:44.894Z" }, + { url = "https://files.pythonhosted.org/packages/fb/76/23ecd2abfe0957b234f6c960f4ade497f55f2c16aeb684d4ecdbf1c95791/multidict-6.7.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:57b46b24b5d5ebcc978da4ec23a819a9402b4228b8a90d9c656422b4bdd8a963", size = 48460, upload-time = "2026-01-26T02:44:46.106Z" }, + { url = "https://files.pythonhosted.org/packages/c4/57/a0ed92b23f3a042c36bc4227b72b97eca803f5f1801c1ab77c8a212d455e/multidict-6.7.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e954b24433c768ce78ab7929e84ccf3422e46deb45a4dc9f93438f8217fa2d34", size = 46930, upload-time = "2026-01-26T02:44:47.278Z" }, + { url = "https://files.pythonhosted.org/packages/b5/66/02ec7ace29162e447f6382c495dc95826bf931d3818799bbef11e8f7df1a/multidict-6.7.1-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3bd231490fa7217cc832528e1cd8752a96f0125ddd2b5749390f7c3ec8721b65", size = 242582, upload-time = "2026-01-26T02:44:48.604Z" }, + { url = "https://files.pythonhosted.org/packages/58/18/64f5a795e7677670e872673aca234162514696274597b3708b2c0d276cce/multidict-6.7.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:253282d70d67885a15c8a7716f3a73edf2d635793ceda8173b9ecc21f2fb8292", size = 250031, upload-time = "2026-01-26T02:44:50.544Z" }, + { url = "https://files.pythonhosted.org/packages/c8/ed/e192291dbbe51a8290c5686f482084d31bcd9d09af24f63358c3d42fd284/multidict-6.7.1-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0b4c48648d7649c9335cf1927a8b87fa692de3dcb15faa676c6a6f1f1aabda43", size = 228596, upload-time = "2026-01-26T02:44:51.951Z" }, + { url = "https://files.pythonhosted.org/packages/1e/7e/3562a15a60cf747397e7f2180b0a11dc0c38d9175a650e75fa1b4d325e15/multidict-6.7.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:98bc624954ec4d2c7cb074b8eefc2b5d0ce7d482e410df446414355d158fe4ca", size = 257492, upload-time = "2026-01-26T02:44:53.902Z" }, + { url = "https://files.pythonhosted.org/packages/24/02/7d0f9eae92b5249bb50ac1595b295f10e263dd0078ebb55115c31e0eaccd/multidict-6.7.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1b99af4d9eec0b49927b4402bcbb58dea89d3e0db8806a4086117019939ad3dd", size = 255899, upload-time = "2026-01-26T02:44:55.316Z" }, + { url = "https://files.pythonhosted.org/packages/00/e3/9b60ed9e23e64c73a5cde95269ef1330678e9c6e34dd4eb6b431b85b5a10/multidict-6.7.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6aac4f16b472d5b7dc6f66a0d49dd57b0e0902090be16594dc9ebfd3d17c47e7", size = 247970, upload-time = "2026-01-26T02:44:56.783Z" }, + { url = "https://files.pythonhosted.org/packages/3e/06/538e58a63ed5cfb0bd4517e346b91da32fde409d839720f664e9a4ae4f9d/multidict-6.7.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:21f830fe223215dffd51f538e78c172ed7c7f60c9b96a2bf05c4848ad49921c3", size = 245060, upload-time = "2026-01-26T02:44:58.195Z" }, + { url = "https://files.pythonhosted.org/packages/b2/2f/d743a3045a97c895d401e9bd29aaa09b94f5cbdf1bd561609e5a6c431c70/multidict-6.7.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f5dd81c45b05518b9aa4da4aa74e1c93d715efa234fd3e8a179df611cc85e5f4", size = 235888, upload-time = "2026-01-26T02:44:59.57Z" }, + { url = "https://files.pythonhosted.org/packages/38/83/5a325cac191ab28b63c52f14f1131f3b0a55ba3b9aa65a6d0bf2a9b921a0/multidict-6.7.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:eb304767bca2bb92fb9c5bd33cedc95baee5bb5f6c88e63706533a1c06ad08c8", size = 243554, upload-time = "2026-01-26T02:45:01.054Z" }, + { url = "https://files.pythonhosted.org/packages/20/1f/9d2327086bd15da2725ef6aae624208e2ef828ed99892b17f60c344e57ed/multidict-6.7.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:c9035dde0f916702850ef66460bc4239d89d08df4d02023a5926e7446724212c", size = 252341, upload-time = "2026-01-26T02:45:02.484Z" }, + { url = "https://files.pythonhosted.org/packages/e8/2c/2a1aa0280cf579d0f6eed8ee5211c4f1730bd7e06c636ba2ee6aafda302e/multidict-6.7.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:af959b9beeb66c822380f222f0e0a1889331597e81f1ded7f374f3ecb0fd6c52", size = 246391, upload-time = "2026-01-26T02:45:03.862Z" }, + { url = "https://files.pythonhosted.org/packages/e5/03/7ca022ffc36c5a3f6e03b179a5ceb829be9da5783e6fe395f347c0794680/multidict-6.7.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:41f2952231456154ee479651491e94118229844dd7226541788be783be2b5108", size = 243422, upload-time = "2026-01-26T02:45:05.296Z" }, + { url = "https://files.pythonhosted.org/packages/dc/1d/b31650eab6c5778aceed46ba735bd97f7c7d2f54b319fa916c0f96e7805b/multidict-6.7.1-cp313-cp313t-win32.whl", hash = "sha256:df9f19c28adcb40b6aae30bbaa1478c389efd50c28d541d76760199fc1037c32", size = 47770, upload-time = "2026-01-26T02:45:06.754Z" }, + { url = "https://files.pythonhosted.org/packages/ac/5b/2d2d1d522e51285bd61b1e20df8f47ae1a9d80839db0b24ea783b3832832/multidict-6.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:d54ecf9f301853f2c5e802da559604b3e95bb7a3b01a9c295c6ee591b9882de8", size = 53109, upload-time = "2026-01-26T02:45:08.044Z" }, + { url = "https://files.pythonhosted.org/packages/3d/a3/cc409ba012c83ca024a308516703cf339bdc4b696195644a7215a5164a24/multidict-6.7.1-cp313-cp313t-win_arm64.whl", hash = "sha256:5a37ca18e360377cfda1d62f5f382ff41f2b8c4ccb329ed974cc2e1643440118", size = 45573, upload-time = "2026-01-26T02:45:09.349Z" }, + { url = "https://files.pythonhosted.org/packages/91/cc/db74228a8be41884a567e88a62fd589a913708fcf180d029898c17a9a371/multidict-6.7.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8f333ec9c5eb1b7105e3b84b53141e66ca05a19a605368c55450b6ba208cb9ee", size = 75190, upload-time = "2026-01-26T02:45:10.651Z" }, + { url = "https://files.pythonhosted.org/packages/d5/22/492f2246bb5b534abd44804292e81eeaf835388901f0c574bac4eeec73c5/multidict-6.7.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:a407f13c188f804c759fc6a9f88286a565c242a76b27626594c133b82883b5c2", size = 44486, upload-time = "2026-01-26T02:45:11.938Z" }, + { url = "https://files.pythonhosted.org/packages/f1/4f/733c48f270565d78b4544f2baddc2fb2a245e5a8640254b12c36ac7ac68e/multidict-6.7.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0e161ddf326db5577c3a4cc2d8648f81456e8a20d40415541587a71620d7a7d1", size = 43219, upload-time = "2026-01-26T02:45:14.346Z" }, + { url = "https://files.pythonhosted.org/packages/24/bb/2c0c2287963f4259c85e8bcbba9182ced8d7fca65c780c38e99e61629d11/multidict-6.7.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:1e3a8bb24342a8201d178c3b4984c26ba81a577c80d4d525727427460a50c22d", size = 245132, upload-time = "2026-01-26T02:45:15.712Z" }, + { url = "https://files.pythonhosted.org/packages/a7/f9/44d4b3064c65079d2467888794dea218d1601898ac50222ab8a9a8094460/multidict-6.7.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97231140a50f5d447d3164f994b86a0bed7cd016e2682f8650d6a9158e14fd31", size = 252420, upload-time = "2026-01-26T02:45:17.293Z" }, + { url = "https://files.pythonhosted.org/packages/8b/13/78f7275e73fa17b24c9a51b0bd9d73ba64bb32d0ed51b02a746eb876abe7/multidict-6.7.1-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6b10359683bd8806a200fd2909e7c8ca3a7b24ec1d8132e483d58e791d881048", size = 233510, upload-time = "2026-01-26T02:45:19.356Z" }, + { url = "https://files.pythonhosted.org/packages/4b/25/8167187f62ae3cbd52da7893f58cb036b47ea3fb67138787c76800158982/multidict-6.7.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:283ddac99f7ac25a4acadbf004cb5ae34480bbeb063520f70ce397b281859362", size = 264094, upload-time = "2026-01-26T02:45:20.834Z" }, + { url = "https://files.pythonhosted.org/packages/a1/e7/69a3a83b7b030cf283fb06ce074a05a02322359783424d7edf0f15fe5022/multidict-6.7.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:538cec1e18c067d0e6103aa9a74f9e832904c957adc260e61cd9d8cf0c3b3d37", size = 260786, upload-time = "2026-01-26T02:45:22.818Z" }, + { url = "https://files.pythonhosted.org/packages/fe/3b/8ec5074bcfc450fe84273713b4b0a0dd47c0249358f5d82eb8104ffe2520/multidict-6.7.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7eee46ccb30ff48a1e35bb818cc90846c6be2b68240e42a78599166722cea709", size = 248483, upload-time = "2026-01-26T02:45:24.368Z" }, + { url = "https://files.pythonhosted.org/packages/48/5a/d5a99e3acbca0e29c5d9cba8f92ceb15dce78bab963b308ae692981e3a5d/multidict-6.7.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa263a02f4f2dd2d11a7b1bb4362aa7cb1049f84a9235d31adf63f30143469a0", size = 248403, upload-time = "2026-01-26T02:45:25.982Z" }, + { url = "https://files.pythonhosted.org/packages/35/48/e58cd31f6c7d5102f2a4bf89f96b9cf7e00b6c6f3d04ecc44417c00a5a3c/multidict-6.7.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:2e1425e2f99ec5bd36c15a01b690a1a2456209c5deed58f95469ffb46039ccbb", size = 240315, upload-time = "2026-01-26T02:45:27.487Z" }, + { url = "https://files.pythonhosted.org/packages/94/33/1cd210229559cb90b6786c30676bb0c58249ff42f942765f88793b41fdce/multidict-6.7.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:497394b3239fc6f0e13a78a3e1b61296e72bf1c5f94b4c4eb80b265c37a131cd", size = 245528, upload-time = "2026-01-26T02:45:28.991Z" }, + { url = "https://files.pythonhosted.org/packages/64/f2/6e1107d226278c876c783056b7db43d800bb64c6131cec9c8dfb6903698e/multidict-6.7.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:233b398c29d3f1b9676b4b6f75c518a06fcb2ea0b925119fb2c1bc35c05e1601", size = 258784, upload-time = "2026-01-26T02:45:30.503Z" }, + { url = "https://files.pythonhosted.org/packages/4d/c1/11f664f14d525e4a1b5327a82d4de61a1db604ab34c6603bb3c2cc63ad34/multidict-6.7.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:93b1818e4a6e0930454f0f2af7dfce69307ca03cdcfb3739bf4d91241967b6c1", size = 251980, upload-time = "2026-01-26T02:45:32.603Z" }, + { url = "https://files.pythonhosted.org/packages/e1/9f/75a9ac888121d0c5bbd4ecf4eead45668b1766f6baabfb3b7f66a410e231/multidict-6.7.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f33dc2a3abe9249ea5d8360f969ec7f4142e7ac45ee7014d8f8d5acddf178b7b", size = 243602, upload-time = "2026-01-26T02:45:34.043Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e7/50bf7b004cc8525d80dbbbedfdc7aed3e4c323810890be4413e589074032/multidict-6.7.1-cp314-cp314-win32.whl", hash = "sha256:3ab8b9d8b75aef9df299595d5388b14530839f6422333357af1339443cff777d", size = 40930, upload-time = "2026-01-26T02:45:36.278Z" }, + { url = "https://files.pythonhosted.org/packages/e0/bf/52f25716bbe93745595800f36fb17b73711f14da59ed0bb2eba141bc9f0f/multidict-6.7.1-cp314-cp314-win_amd64.whl", hash = "sha256:5e01429a929600e7dab7b166062d9bb54a5eed752384c7384c968c2afab8f50f", size = 45074, upload-time = "2026-01-26T02:45:37.546Z" }, + { url = "https://files.pythonhosted.org/packages/97/ab/22803b03285fa3a525f48217963da3a65ae40f6a1b6f6cf2768879e208f9/multidict-6.7.1-cp314-cp314-win_arm64.whl", hash = "sha256:4885cb0e817aef5d00a2e8451d4665c1808378dc27c2705f1bf4ef8505c0d2e5", size = 42471, upload-time = "2026-01-26T02:45:38.889Z" }, + { url = "https://files.pythonhosted.org/packages/e0/6d/f9293baa6146ba9507e360ea0292b6422b016907c393e2f63fc40ab7b7b5/multidict-6.7.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:0458c978acd8e6ea53c81eefaddbbee9c6c5e591f41b3f5e8e194780fe026581", size = 82401, upload-time = "2026-01-26T02:45:40.254Z" }, + { url = "https://files.pythonhosted.org/packages/7a/68/53b5494738d83558d87c3c71a486504d8373421c3e0dbb6d0db48ad42ee0/multidict-6.7.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:c0abd12629b0af3cf590982c0b413b1e7395cd4ec026f30986818ab95bfaa94a", size = 48143, upload-time = "2026-01-26T02:45:41.635Z" }, + { url = "https://files.pythonhosted.org/packages/37/e8/5284c53310dcdc99ce5d66563f6e5773531a9b9fe9ec7a615e9bc306b05f/multidict-6.7.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:14525a5f61d7d0c94b368a42cff4c9a4e7ba2d52e2672a7b23d84dc86fb02b0c", size = 46507, upload-time = "2026-01-26T02:45:42.99Z" }, + { url = "https://files.pythonhosted.org/packages/e4/fc/6800d0e5b3875568b4083ecf5f310dcf91d86d52573160834fb4bfcf5e4f/multidict-6.7.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:17307b22c217b4cf05033dabefe68255a534d637c6c9b0cc8382718f87be4262", size = 239358, upload-time = "2026-01-26T02:45:44.376Z" }, + { url = "https://files.pythonhosted.org/packages/41/75/4ad0973179361cdf3a113905e6e088173198349131be2b390f9fa4da5fc6/multidict-6.7.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a7e590ff876a3eaf1c02a4dfe0724b6e69a9e9de6d8f556816f29c496046e59", size = 246884, upload-time = "2026-01-26T02:45:47.167Z" }, + { url = "https://files.pythonhosted.org/packages/c3/9c/095bb28b5da139bd41fb9a5d5caff412584f377914bd8787c2aa98717130/multidict-6.7.1-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:5fa6a95dfee63893d80a34758cd0e0c118a30b8dcb46372bf75106c591b77889", size = 225878, upload-time = "2026-01-26T02:45:48.698Z" }, + { url = "https://files.pythonhosted.org/packages/07/d0/c0a72000243756e8f5a277b6b514fa005f2c73d481b7d9e47cd4568aa2e4/multidict-6.7.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a0543217a6a017692aa6ae5cc39adb75e587af0f3a82288b1492eb73dd6cc2a4", size = 253542, upload-time = "2026-01-26T02:45:50.164Z" }, + { url = "https://files.pythonhosted.org/packages/c0/6b/f69da15289e384ecf2a68837ec8b5ad8c33e973aa18b266f50fe55f24b8c/multidict-6.7.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f99fe611c312b3c1c0ace793f92464d8cd263cc3b26b5721950d977b006b6c4d", size = 252403, upload-time = "2026-01-26T02:45:51.779Z" }, + { url = "https://files.pythonhosted.org/packages/a2/76/b9669547afa5a1a25cd93eaca91c0da1c095b06b6d2d8ec25b713588d3a1/multidict-6.7.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9004d8386d133b7e6135679424c91b0b854d2d164af6ea3f289f8f2761064609", size = 244889, upload-time = "2026-01-26T02:45:53.27Z" }, + { url = "https://files.pythonhosted.org/packages/7e/a9/a50d2669e506dad33cfc45b5d574a205587b7b8a5f426f2fbb2e90882588/multidict-6.7.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e628ef0e6859ffd8273c69412a2465c4be4a9517d07261b33334b5ec6f3c7489", size = 241982, upload-time = "2026-01-26T02:45:54.919Z" }, + { url = "https://files.pythonhosted.org/packages/c5/bb/1609558ad8b456b4827d3c5a5b775c93b87878fd3117ed3db3423dfbce1b/multidict-6.7.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:841189848ba629c3552035a6a7f5bf3b02eb304e9fea7492ca220a8eda6b0e5c", size = 232415, upload-time = "2026-01-26T02:45:56.981Z" }, + { url = "https://files.pythonhosted.org/packages/d8/59/6f61039d2aa9261871e03ab9dc058a550d240f25859b05b67fd70f80d4b3/multidict-6.7.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:ce1bbd7d780bb5a0da032e095c951f7014d6b0a205f8318308140f1a6aba159e", size = 240337, upload-time = "2026-01-26T02:45:58.698Z" }, + { url = "https://files.pythonhosted.org/packages/a1/29/fdc6a43c203890dc2ae9249971ecd0c41deaedfe00d25cb6564b2edd99eb/multidict-6.7.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b26684587228afed0d50cf804cc71062cc9c1cdf55051c4c6345d372947b268c", size = 248788, upload-time = "2026-01-26T02:46:00.862Z" }, + { url = "https://files.pythonhosted.org/packages/a9/14/a153a06101323e4cf086ecee3faadba52ff71633d471f9685c42e3736163/multidict-6.7.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9f9af11306994335398293f9958071019e3ab95e9a707dc1383a35613f6abcb9", size = 242842, upload-time = "2026-01-26T02:46:02.824Z" }, + { url = "https://files.pythonhosted.org/packages/41/5f/604ae839e64a4a6efc80db94465348d3b328ee955e37acb24badbcd24d83/multidict-6.7.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b4938326284c4f1224178a560987b6cf8b4d38458b113d9b8c1db1a836e640a2", size = 240237, upload-time = "2026-01-26T02:46:05.898Z" }, + { url = "https://files.pythonhosted.org/packages/5f/60/c3a5187bf66f6fb546ff4ab8fb5a077cbdd832d7b1908d4365c7f74a1917/multidict-6.7.1-cp314-cp314t-win32.whl", hash = "sha256:98655c737850c064a65e006a3df7c997cd3b220be4ec8fe26215760b9697d4d7", size = 48008, upload-time = "2026-01-26T02:46:07.468Z" }, + { url = "https://files.pythonhosted.org/packages/0c/f7/addf1087b860ac60e6f382240f64fb99f8bfb532bb06f7c542b83c29ca61/multidict-6.7.1-cp314-cp314t-win_amd64.whl", hash = "sha256:497bde6223c212ba11d462853cfa4f0ae6ef97465033e7dc9940cdb3ab5b48e5", size = 53542, upload-time = "2026-01-26T02:46:08.809Z" }, + { url = "https://files.pythonhosted.org/packages/4c/81/4629d0aa32302ef7b2ec65c75a728cc5ff4fa410c50096174c1632e70b3e/multidict-6.7.1-cp314-cp314t-win_arm64.whl", hash = "sha256:2bbd113e0d4af5db41d5ebfe9ccaff89de2120578164f86a5d17d5a576d1e5b2", size = 44719, upload-time = "2026-01-26T02:46:11.146Z" }, + { url = "https://files.pythonhosted.org/packages/81/08/7036c080d7117f28a4af526d794aab6a84463126db031b007717c1a6676e/multidict-6.7.1-py3-none-any.whl", hash = "sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56", size = 12319, upload-time = "2026-01-26T02:46:44.004Z" }, +] + +[[package]] +name = "multilspy" +version = "0.0.15" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jedi-language-server" }, + { name = "psutil" }, + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d8/a8/4d6ab48e624f911eb5229aa01b3524b916470c9d036a9e8cc96d6fb81673/multilspy-0.0.15.tar.gz", hash = "sha256:b27a0b7c5c5306216b31fe1df9b4a42d2797735d0a78928e0df9ef8dfbcc97c5", size = 120639, upload-time = "2025-04-03T07:01:27.216Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/97/4d/b9d3492d6a7a2536498fc7fd49c1cc7bc86a41acf93b0ad967d75dbe5cd6/multilspy-0.0.15-py3-none-any.whl", hash = "sha256:3fa88939b953ed5d39aba4688a34105ec1e5cf2b2f778167fee2b78b3c0e1427", size = 137361, upload-time = "2025-04-03T07:01:25.492Z" }, +] + +[[package]] +name = "numpy" +version = "2.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/10/8b/c265f4823726ab832de836cdd184d0986dcf94480f81e8739692a7ac7af2/numpy-2.4.3.tar.gz", hash = "sha256:483a201202b73495f00dbc83796c6ae63137a9bdade074f7648b3e32613412dd", size = 20727743, upload-time = "2026-03-09T07:58:53.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/d0/1fe47a98ce0df229238b77611340aff92d52691bcbc10583303181abf7fc/numpy-2.4.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b346845443716c8e542d54112966383b448f4a3ba5c66409771b8c0889485dd3", size = 16665297, upload-time = "2026-03-09T07:56:52.296Z" }, + { url = "https://files.pythonhosted.org/packages/27/d9/4e7c3f0e68dfa91f21c6fb6cf839bc829ec920688b1ce7ec722b1a6202fb/numpy-2.4.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2629289168f4897a3c4e23dc98d6f1731f0fc0fe52fb9db19f974041e4cc12b9", size = 14691853, upload-time = "2026-03-09T07:56:54.992Z" }, + { url = "https://files.pythonhosted.org/packages/3a/66/bd096b13a87549683812b53ab211e6d413497f84e794fb3c39191948da97/numpy-2.4.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:bb2e3cf95854233799013779216c57e153c1ee67a0bf92138acca0e429aefaee", size = 5198435, upload-time = "2026-03-09T07:56:57.184Z" }, + { url = "https://files.pythonhosted.org/packages/a2/2f/687722910b5a5601de2135c891108f51dfc873d8e43c8ed9f4ebb440b4a2/numpy-2.4.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:7f3408ff897f8ab07a07fbe2823d7aee6ff644c097cc1f90382511fe982f647f", size = 6546347, upload-time = "2026-03-09T07:56:59.531Z" }, + { url = "https://files.pythonhosted.org/packages/bf/ec/7971c4e98d86c564750393fab8d7d83d0a9432a9d78bb8a163a6dc59967a/numpy-2.4.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:decb0eb8a53c3b009b0962378065589685d66b23467ef5dac16cbe818afde27f", size = 15664626, upload-time = "2026-03-09T07:57:01.385Z" }, + { url = "https://files.pythonhosted.org/packages/7e/eb/7daecbea84ec935b7fc732e18f532073064a3816f0932a40a17f3349185f/numpy-2.4.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5f51900414fc9204a0e0da158ba2ac52b75656e7dce7e77fb9f84bfa343b4cc", size = 16608916, upload-time = "2026-03-09T07:57:04.008Z" }, + { url = "https://files.pythonhosted.org/packages/df/58/2a2b4a817ffd7472dca4421d9f0776898b364154e30c95f42195041dc03b/numpy-2.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6bd06731541f89cdc01b261ba2c9e037f1543df7472517836b78dfb15bd6e476", size = 17015824, upload-time = "2026-03-09T07:57:06.347Z" }, + { url = "https://files.pythonhosted.org/packages/4a/ca/627a828d44e78a418c55f82dd4caea8ea4a8ef24e5144d9e71016e52fb40/numpy-2.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22654fe6be0e5206f553a9250762c653d3698e46686eee53b399ab90da59bd92", size = 18334581, upload-time = "2026-03-09T07:57:09.114Z" }, + { url = "https://files.pythonhosted.org/packages/cd/c0/76f93962fc79955fcba30a429b62304332345f22d4daec1cb33653425643/numpy-2.4.3-cp313-cp313-win32.whl", hash = "sha256:d71e379452a2f670ccb689ec801b1218cd3983e253105d6e83780967e899d687", size = 5958618, upload-time = "2026-03-09T07:57:11.432Z" }, + { url = "https://files.pythonhosted.org/packages/b1/3c/88af0040119209b9b5cb59485fa48b76f372c73068dbf9254784b975ac53/numpy-2.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:0a60e17a14d640f49146cb38e3f105f571318db7826d9b6fef7e4dce758faecd", size = 12312824, upload-time = "2026-03-09T07:57:13.586Z" }, + { url = "https://files.pythonhosted.org/packages/58/ce/3d07743aced3d173f877c3ef6a454c2174ba42b584ab0b7e6d99374f51ed/numpy-2.4.3-cp313-cp313-win_arm64.whl", hash = "sha256:c9619741e9da2059cd9c3f206110b97583c7152c1dc9f8aafd4beb450ac1c89d", size = 10221218, upload-time = "2026-03-09T07:57:16.183Z" }, + { url = "https://files.pythonhosted.org/packages/62/09/d96b02a91d09e9d97862f4fc8bfebf5400f567d8eb1fe4b0cc4795679c15/numpy-2.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7aa4e54f6469300ebca1d9eb80acd5253cdfa36f2c03d79a35883687da430875", size = 14819570, upload-time = "2026-03-09T07:57:18.564Z" }, + { url = "https://files.pythonhosted.org/packages/b5/ca/0b1aba3905fdfa3373d523b2b15b19029f4f3031c87f4066bd9d20ef6c6b/numpy-2.4.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d1b90d840b25874cf5cd20c219af10bac3667db3876d9a495609273ebe679070", size = 5326113, upload-time = "2026-03-09T07:57:21.052Z" }, + { url = "https://files.pythonhosted.org/packages/c0/63/406e0fd32fcaeb94180fd6a4c41e55736d676c54346b7efbce548b94a914/numpy-2.4.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a749547700de0a20a6718293396ec237bb38218049cfce788e08fcb716e8cf73", size = 6646370, upload-time = "2026-03-09T07:57:22.804Z" }, + { url = "https://files.pythonhosted.org/packages/b6/d0/10f7dc157d4b37af92720a196be6f54f889e90dcd30dce9dc657ed92c257/numpy-2.4.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f3c4a151a2e529adf49c1d54f0f57ff8f9b233ee4d44af623a81553ab86368", size = 15723499, upload-time = "2026-03-09T07:57:24.693Z" }, + { url = "https://files.pythonhosted.org/packages/66/f1/d1c2bf1161396629701bc284d958dc1efa3a5a542aab83cf11ee6eb4cba5/numpy-2.4.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22c31dc07025123aedf7f2db9e91783df13f1776dc52c6b22c620870dc0fab22", size = 16657164, upload-time = "2026-03-09T07:57:27.676Z" }, + { url = "https://files.pythonhosted.org/packages/1a/be/cca19230b740af199ac47331a21c71e7a3d0ba59661350483c1600d28c37/numpy-2.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:148d59127ac95979d6f07e4d460f934ebdd6eed641db9c0db6c73026f2b2101a", size = 17081544, upload-time = "2026-03-09T07:57:30.664Z" }, + { url = "https://files.pythonhosted.org/packages/b9/c5/9602b0cbb703a0936fb40f8a95407e8171935b15846de2f0776e08af04c7/numpy-2.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a97cbf7e905c435865c2d939af3d93f99d18eaaa3cabe4256f4304fb51604349", size = 18380290, upload-time = "2026-03-09T07:57:33.763Z" }, + { url = "https://files.pythonhosted.org/packages/ed/81/9f24708953cd30be9ee36ec4778f4b112b45165812f2ada4cc5ea1c1f254/numpy-2.4.3-cp313-cp313t-win32.whl", hash = "sha256:be3b8487d725a77acccc9924f65fd8bce9af7fac8c9820df1049424a2115af6c", size = 6082814, upload-time = "2026-03-09T07:57:36.491Z" }, + { url = "https://files.pythonhosted.org/packages/e2/9e/52f6eaa13e1a799f0ab79066c17f7016a4a8ae0c1aefa58c82b4dab690b4/numpy-2.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1ec84fd7c8e652b0f4aaaf2e6e9cc8eaa9b1b80a537e06b2e3a2fb176eedcb26", size = 12452673, upload-time = "2026-03-09T07:57:38.281Z" }, + { url = "https://files.pythonhosted.org/packages/c4/04/b8cece6ead0b30c9fbd99bb835ad7ea0112ac5f39f069788c5558e3b1ab2/numpy-2.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:120df8c0a81ebbf5b9020c91439fccd85f5e018a927a39f624845be194a2be02", size = 10290907, upload-time = "2026-03-09T07:57:40.747Z" }, + { url = "https://files.pythonhosted.org/packages/70/ae/3936f79adebf8caf81bd7a599b90a561334a658be4dcc7b6329ebf4ee8de/numpy-2.4.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:5884ce5c7acfae1e4e1b6fde43797d10aa506074d25b531b4f54bde33c0c31d4", size = 16664563, upload-time = "2026-03-09T07:57:43.817Z" }, + { url = "https://files.pythonhosted.org/packages/9b/62/760f2b55866b496bb1fa7da2a6db076bef908110e568b02fcfc1422e2a3a/numpy-2.4.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:297837823f5bc572c5f9379b0c9f3a3365f08492cbdc33bcc3af174372ebb168", size = 14702161, upload-time = "2026-03-09T07:57:46.169Z" }, + { url = "https://files.pythonhosted.org/packages/32/af/a7a39464e2c0a21526fb4fb76e346fb172ebc92f6d1c7a07c2c139cc17b1/numpy-2.4.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:a111698b4a3f8dcbe54c64a7708f049355abd603e619013c346553c1fd4ca90b", size = 5208738, upload-time = "2026-03-09T07:57:48.506Z" }, + { url = "https://files.pythonhosted.org/packages/29/8c/2a0cf86a59558fa078d83805589c2de490f29ed4fb336c14313a161d358a/numpy-2.4.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:4bd4741a6a676770e0e97fe9ab2e51de01183df3dcbcec591d26d331a40de950", size = 6543618, upload-time = "2026-03-09T07:57:50.591Z" }, + { url = "https://files.pythonhosted.org/packages/aa/b8/612ce010c0728b1c363fa4ea3aa4c22fe1c5da1de008486f8c2f5cb92fae/numpy-2.4.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:54f29b877279d51e210e0c80709ee14ccbbad647810e8f3d375561c45ef613dd", size = 15680676, upload-time = "2026-03-09T07:57:52.34Z" }, + { url = "https://files.pythonhosted.org/packages/a9/7e/4f120ecc54ba26ddf3dc348eeb9eb063f421de65c05fc961941798feea18/numpy-2.4.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:679f2a834bae9020f81534671c56fd0cc76dd7e5182f57131478e23d0dc59e24", size = 16613492, upload-time = "2026-03-09T07:57:54.91Z" }, + { url = "https://files.pythonhosted.org/packages/2c/86/1b6020db73be330c4b45d5c6ee4295d59cfeef0e3ea323959d053e5a6909/numpy-2.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d84f0f881cb2225c2dfd7f78a10a5645d487a496c6668d6cc39f0f114164f3d0", size = 17031789, upload-time = "2026-03-09T07:57:57.641Z" }, + { url = "https://files.pythonhosted.org/packages/07/3a/3b90463bf41ebc21d1b7e06079f03070334374208c0f9a1f05e4ae8455e7/numpy-2.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d213c7e6e8d211888cc359bab7199670a00f5b82c0978b9d1c75baf1eddbeac0", size = 18339941, upload-time = "2026-03-09T07:58:00.577Z" }, + { url = "https://files.pythonhosted.org/packages/a8/74/6d736c4cd962259fd8bae9be27363eb4883a2f9069763747347544c2a487/numpy-2.4.3-cp314-cp314-win32.whl", hash = "sha256:52077feedeff7c76ed7c9f1a0428558e50825347b7545bbb8523da2cd55c547a", size = 6007503, upload-time = "2026-03-09T07:58:03.331Z" }, + { url = "https://files.pythonhosted.org/packages/48/39/c56ef87af669364356bb011922ef0734fc49dad51964568634c72a009488/numpy-2.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:0448e7f9caefb34b4b7dd2b77f21e8906e5d6f0365ad525f9f4f530b13df2afc", size = 12444915, upload-time = "2026-03-09T07:58:06.353Z" }, + { url = "https://files.pythonhosted.org/packages/9d/1f/ab8528e38d295fd349310807496fabb7cf9fe2e1f70b97bc20a483ea9d4a/numpy-2.4.3-cp314-cp314-win_arm64.whl", hash = "sha256:b44fd60341c4d9783039598efadd03617fa28d041fc37d22b62d08f2027fa0e7", size = 10494875, upload-time = "2026-03-09T07:58:08.734Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ef/b7c35e4d5ef141b836658ab21a66d1a573e15b335b1d111d31f26c8ef80f/numpy-2.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0a195f4216be9305a73c0e91c9b026a35f2161237cf1c6de9b681637772ea657", size = 14822225, upload-time = "2026-03-09T07:58:11.034Z" }, + { url = "https://files.pythonhosted.org/packages/cd/8d/7730fa9278cf6648639946cc816e7cc89f0d891602584697923375f801ed/numpy-2.4.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:cd32fbacb9fd1bf041bf8e89e4576b6f00b895f06d00914820ae06a616bdfef7", size = 5328769, upload-time = "2026-03-09T07:58:13.67Z" }, + { url = "https://files.pythonhosted.org/packages/47/01/d2a137317c958b074d338807c1b6a383406cdf8b8e53b075d804cc3d211d/numpy-2.4.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:2e03c05abaee1f672e9d67bc858f300b5ccba1c21397211e8d77d98350972093", size = 6649461, upload-time = "2026-03-09T07:58:15.912Z" }, + { url = "https://files.pythonhosted.org/packages/5c/34/812ce12bc0f00272a4b0ec0d713cd237cb390666eb6206323d1cc9cedbb2/numpy-2.4.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d1ce23cce91fcea443320a9d0ece9b9305d4368875bab09538f7a5b4131938a", size = 15725809, upload-time = "2026-03-09T07:58:17.787Z" }, + { url = "https://files.pythonhosted.org/packages/25/c0/2aed473a4823e905e765fee3dc2cbf504bd3e68ccb1150fbdabd5c39f527/numpy-2.4.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c59020932feb24ed49ffd03704fbab89f22aa9c0d4b180ff45542fe8918f5611", size = 16655242, upload-time = "2026-03-09T07:58:20.476Z" }, + { url = "https://files.pythonhosted.org/packages/f2/c8/7e052b2fc87aa0e86de23f20e2c42bd261c624748aa8efd2c78f7bb8d8c6/numpy-2.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9684823a78a6cd6ad7511fc5e25b07947d1d5b5e2812c93fe99d7d4195130720", size = 17080660, upload-time = "2026-03-09T07:58:23.067Z" }, + { url = "https://files.pythonhosted.org/packages/f3/3d/0876746044db2adcb11549f214d104f2e1be00f07a67edbb4e2812094847/numpy-2.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0200b25c687033316fb39f0ff4e3e690e8957a2c3c8d22499891ec58c37a3eb5", size = 18380384, upload-time = "2026-03-09T07:58:25.839Z" }, + { url = "https://files.pythonhosted.org/packages/07/12/8160bea39da3335737b10308df4f484235fd297f556745f13092aa039d3b/numpy-2.4.3-cp314-cp314t-win32.whl", hash = "sha256:5e10da9e93247e554bb1d22f8edc51847ddd7dde52d85ce31024c1b4312bfba0", size = 6154547, upload-time = "2026-03-09T07:58:28.289Z" }, + { url = "https://files.pythonhosted.org/packages/42/f3/76534f61f80d74cc9cdf2e570d3d4eeb92c2280a27c39b0aaf471eda7b48/numpy-2.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:45f003dbdffb997a03da2d1d0cb41fbd24a87507fb41605c0420a3db5bd4667b", size = 12633645, upload-time = "2026-03-09T07:58:30.384Z" }, + { url = "https://files.pythonhosted.org/packages/1f/b6/7c0d4334c15983cec7f92a69e8ce9b1e6f31857e5ee3a413ac424e6bd63d/numpy-2.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:4d382735cecd7bcf090172489a525cd7d4087bc331f7df9f60ddc9a296cf208e", size = 10565454, upload-time = "2026-03-09T07:58:33.031Z" }, +] + +[[package]] +name = "openai" +version = "2.31.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/94/fe/64b3d035780b3188f86c4f6f1bc202e7bb74757ef028802112273b9dcacf/openai-2.31.0.tar.gz", hash = "sha256:43ca59a88fc973ad1848d86b98d7fac207e265ebbd1828b5e4bdfc85f79427a5", size = 684772, upload-time = "2026-04-08T21:01:41.797Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/66/bc/a8f7c3aa03452fedbb9af8be83e959adba96a6b4a35e416faffcc959c568/openai-2.31.0-py3-none-any.whl", hash = "sha256:44e1344d87e56a493d649b17e2fac519d1368cbb0745f59f1957c4c26de50a0a", size = 1153479, upload-time = "2026-04-08T21:01:39.217Z" }, +] + +[package.optional-dependencies] +aiohttp = [ + { name = "aiohttp" }, + { name = "httpx-aiohttp" }, +] + +[[package]] +name = "openapi-pydantic" +version = "0.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/2e/58d83848dd1a79cb92ed8e63f6ba901ca282c5f09d04af9423ec26c56fd7/openapi_pydantic-0.5.1.tar.gz", hash = "sha256:ff6835af6bde7a459fb93eb93bb92b8749b754fc6e51b2f1590a19dc3005ee0d", size = 60892, upload-time = "2025-01-08T19:29:27.083Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/cf/03675d8bd8ecbf4445504d8071adab19f5f993676795708e36402ab38263/openapi_pydantic-0.5.1-py3-none-any.whl", hash = "sha256:a3a09ef4586f5bd760a8df7f43028b60cafb6d9f61de2acba9574766255ab146", size = 96381, upload-time = "2025-01-08T19:29:25.275Z" }, +] + +[[package]] +name = "opentelemetry-api" +version = "1.39.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "importlib-metadata" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/97/b9/3161be15bb8e3ad01be8be5a968a9237c3027c5be504362ff800fca3e442/opentelemetry_api-1.39.1.tar.gz", hash = "sha256:fbde8c80e1b937a2c61f20347e91c0c18a1940cecf012d62e65a7caf08967c9c", size = 65767, upload-time = "2025-12-11T13:32:39.182Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/df/d3f1ddf4bb4cb50ed9b1139cc7b1c54c34a1e7ce8fd1b9a37c0d1551a6bd/opentelemetry_api-1.39.1-py3-none-any.whl", hash = "sha256:2edd8463432a7f8443edce90972169b195e7d6a05500cd29e6d13898187c9950", size = 66356, upload-time = "2025-12-11T13:32:17.304Z" }, +] + +[[package]] +name = "opentelemetry-distro" +version = "0.60b1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-sdk" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/15/77/f0b1f2bcf451ec5bc443d53bc7437577c3fc8444b3eb0d416ac5f7558b7b/opentelemetry_distro-0.60b1.tar.gz", hash = "sha256:8b7326b83a55ff7b17bb92225a86e2736a004f6af7aff00cb5d87b2d8e5bc283", size = 2584, upload-time = "2025-12-11T13:36:39.522Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/24/70/78a86531495040fcad9569d7daa630eca06d27d37c825a8aad448b7c1c5b/opentelemetry_distro-0.60b1-py3-none-any.whl", hash = "sha256:581104a786f5df252f4dfe725e0ae16337a26da902acb92d8b3e7aee29f0c76e", size = 3343, upload-time = "2025-12-11T13:35:28.462Z" }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-common" +version = "1.39.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-proto" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/9d/22d241b66f7bbde88a3bfa6847a351d2c46b84de23e71222c6aae25c7050/opentelemetry_exporter_otlp_proto_common-1.39.1.tar.gz", hash = "sha256:763370d4737a59741c89a67b50f9e39271639ee4afc999dadfe768541c027464", size = 20409, upload-time = "2025-12-11T13:32:40.885Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8c/02/ffc3e143d89a27ac21fd557365b98bd0653b98de8a101151d5805b5d4c33/opentelemetry_exporter_otlp_proto_common-1.39.1-py3-none-any.whl", hash = "sha256:08f8a5862d64cc3435105686d0216c1365dc5701f86844a8cd56597d0c764fde", size = 18366, upload-time = "2025-12-11T13:32:20.2Z" }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-http" +version = "1.39.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "googleapis-common-protos" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-common" }, + { name = "opentelemetry-proto" }, + { name = "opentelemetry-sdk" }, + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/80/04/2a08fa9c0214ae38880df01e8bfae12b067ec0793446578575e5080d6545/opentelemetry_exporter_otlp_proto_http-1.39.1.tar.gz", hash = "sha256:31bdab9745c709ce90a49a0624c2bd445d31a28ba34275951a6a362d16a0b9cb", size = 17288, upload-time = "2025-12-11T13:32:42.029Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/95/f1/b27d3e2e003cd9a3592c43d099d2ed8d0a947c15281bf8463a256db0b46c/opentelemetry_exporter_otlp_proto_http-1.39.1-py3-none-any.whl", hash = "sha256:d9f5207183dd752a412c4cd564ca8875ececba13be6e9c6c370ffb752fd59985", size = 19641, upload-time = "2025-12-11T13:32:22.248Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation" +version = "0.60b1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "packaging" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/41/0f/7e6b713ac117c1f5e4e3300748af699b9902a2e5e34c9cf443dde25a01fa/opentelemetry_instrumentation-0.60b1.tar.gz", hash = "sha256:57ddc7974c6eb35865af0426d1a17132b88b2ed8586897fee187fd5b8944bd6a", size = 31706, upload-time = "2025-12-11T13:36:42.515Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/d2/6788e83c5c86a2690101681aeef27eeb2a6bf22df52d3f263a22cee20915/opentelemetry_instrumentation-0.60b1-py3-none-any.whl", hash = "sha256:04480db952b48fb1ed0073f822f0ee26012b7be7c3eac1a3793122737c78632d", size = 33096, upload-time = "2025-12-11T13:35:33.067Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-anthropic" +version = "0.52.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-semantic-conventions-ai" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/0d/cd59fb2475dfb245af493873cb4c5afa568cf66ad4e0de832c6513541267/opentelemetry_instrumentation_anthropic-0.52.1.tar.gz", hash = "sha256:e3462adc0956c95575ff845be78c3ab51113cf9372d3f64ef7119896ab304fbb", size = 682764, upload-time = "2026-02-02T09:23:02.275Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/b8/87380b52b436d4d4683425038d8ab28987f070ff806dc0561ed0bcae69b6/opentelemetry_instrumentation_anthropic-0.52.1-py3-none-any.whl", hash = "sha256:9e902e4ae14b5ca2a5a60c22a1a2d6fe245ff45f4d7037e5705468c21ea431fa", size = 18428, upload-time = "2026-02-02T09:22:23.45Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-google-genai" +version = "0.6b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-genai" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/60/59/a0d3da1679c056db45fa6338b332f9add8544c5afe9f7643d37062617c9a/opentelemetry_instrumentation_google_genai-0.6b0.tar.gz", hash = "sha256:76229c51a166d53e58e0376487f420562f1ab155511fe932110b4ea9c5718aad", size = 48433, upload-time = "2026-01-27T22:15:13.906Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/71/fd/5da48a5efef82034b6c0e20d5d293f4ade223bb2ec54101c1f7c81206577/opentelemetry_instrumentation_google_genai-0.6b0-py3-none-any.whl", hash = "sha256:bc5cf5957b697f05ffb765f59bb9e08aae457f1d08967753966e9d8a49b1b79f", size = 29861, upload-time = "2026-01-27T22:15:12.912Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-mcp" +version = "0.52.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-semantic-conventions-ai" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/69/9b9413172a630e61426772699d828a94f3354f647d5b0266b579df3a18ac/opentelemetry_instrumentation_mcp-0.52.1.tar.gz", hash = "sha256:b159190a9a93ccf8c39259f250d62f187b720b4a3844d7e4f655ccfaa25bc1f8", size = 120368, upload-time = "2026-02-02T09:23:14.785Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/57/70/dc3a34e55da4c015aa2c879fb317d23fedf20175ba30a6f68e21d73da778/opentelemetry_instrumentation_mcp-0.52.1-py3-none-any.whl", hash = "sha256:39b8c3c0f841b694ae2301075633fbd8ae30d6ee06671d2f878d2742ef88c7d3", size = 10463, upload-time = "2026-02-02T09:22:39.167Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-openai" +version = "0.52.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-semantic-conventions-ai" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/71/b178325571b504cd5709a61f3c205ddcdb0471eab945c564354f64ac2de9/opentelemetry_instrumentation_openai-0.52.1.tar.gz", hash = "sha256:444a60163856c52a1a620197ae6e3bb6d4492da94969737b0abe9a4388a108fc", size = 6978373, upload-time = "2026-02-02T09:23:18.838Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/c3/f4e62f185d9f55286cfaf1dbf89cf87f48f17d39d7b1602c55cf2dcb2dd8/opentelemetry_instrumentation_openai-0.52.1-py3-none-any.whl", hash = "sha256:1f3818fbdd6ac4b038a099b99cd8afd5a959a396ad26baf422ad2002d8209ddd", size = 43085, upload-time = "2026-02-02T09:22:43.274Z" }, +] + +[[package]] +name = "opentelemetry-proto" +version = "1.39.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/1d/f25d76d8260c156c40c97c9ed4511ec0f9ce353f8108ca6e7561f82a06b2/opentelemetry_proto-1.39.1.tar.gz", hash = "sha256:6c8e05144fc0d3ed4d22c2289c6b126e03bcd0e6a7da0f16cedd2e1c2772e2c8", size = 46152, upload-time = "2025-12-11T13:32:48.681Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/95/b40c96a7b5203005a0b03d8ce8cd212ff23f1793d5ba289c87a097571b18/opentelemetry_proto-1.39.1-py3-none-any.whl", hash = "sha256:22cdc78efd3b3765d09e68bfbd010d4fc254c9818afd0b6b423387d9dee46007", size = 72535, upload-time = "2025-12-11T13:32:33.866Z" }, +] + +[[package]] +name = "opentelemetry-sdk" +version = "1.39.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/fb/c76080c9ba07e1e8235d24cdcc4d125ef7aa3edf23eb4e497c2e50889adc/opentelemetry_sdk-1.39.1.tar.gz", hash = "sha256:cf4d4563caf7bff906c9f7967e2be22d0d6b349b908be0d90fb21c8e9c995cc6", size = 171460, upload-time = "2025-12-11T13:32:49.369Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/98/e91cf858f203d86f4eccdf763dcf01cf03f1dae80c3750f7e635bfa206b6/opentelemetry_sdk-1.39.1-py3-none-any.whl", hash = "sha256:4d5482c478513ecb0a5d938dcc61394e647066e0cc2676bee9f3af3f3f45f01c", size = 132565, upload-time = "2025-12-11T13:32:35.069Z" }, +] + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.60b1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/91/df/553f93ed38bf22f4b999d9be9c185adb558982214f33eae539d3b5cd0858/opentelemetry_semantic_conventions-0.60b1.tar.gz", hash = "sha256:87c228b5a0669b748c76d76df6c364c369c28f1c465e50f661e39737e84bc953", size = 137935, upload-time = "2025-12-11T13:32:50.487Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/5e/5958555e09635d09b75de3c4f8b9cae7335ca545d77392ffe7331534c402/opentelemetry_semantic_conventions-0.60b1-py3-none-any.whl", hash = "sha256:9fa8c8b0c110da289809292b0591220d3a7b53c1526a23021e977d68597893fb", size = 219982, upload-time = "2025-12-11T13:32:36.955Z" }, +] + +[[package]] +name = "opentelemetry-semantic-conventions-ai" +version = "0.4.16" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-sdk" }, + { name = "opentelemetry-semantic-conventions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/37/44/fda3c60e77548224ffd86b62aab5a58534e1d32f74d2ccd50ef58aade8d3/opentelemetry_semantic_conventions_ai-0.4.16.tar.gz", hash = "sha256:572eb878d8b81e50f1e53d2a5c1b441e7d34918ee01c846ff62485204d660c22", size = 19071, upload-time = "2026-03-19T15:29:35.357Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/98/e8bf804f2351603b508abaa624096ba279f1d62c3104e7020b45ae938d54/opentelemetry_semantic_conventions_ai-0.4.16-py3-none-any.whl", hash = "sha256:d5ddd0df387b969da82e3e0a8b7415e91d2fc7ce13de7efc2690a7939932b2e0", size = 6495, upload-time = "2026-03-19T15:29:33.974Z" }, +] + +[[package]] +name = "opentelemetry-util-genai" +version = "0.2b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/06/869c18f22fa32d6db9ebbc5bd82f18614c379b726aa9770d1b2d20f9178c/opentelemetry_util_genai-0.2b0.tar.gz", hash = "sha256:803d5d5e720f3e057c64d935dfd46dc013784820715d996980cb5b79bb5774a3", size = 20542, upload-time = "2025-10-15T20:07:31.873Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/0d/1dc4705e44540183ab603aa4e1b0e3f888e63156678192e8c64f42c782ed/opentelemetry_util_genai-0.2b0-py3-none-any.whl", hash = "sha256:06dc8664713f9cec216d7093585a1a5fbb5f6fb7a387f19430774a5028aaa30b", size = 22237, upload-time = "2025-10-15T20:07:30.991Z" }, +] + +[[package]] +name = "packaging" +version = "26.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, +] + +[[package]] +name = "parso" +version = "0.8.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/81/76/a1e769043c0c0c9fe391b702539d594731a4362334cdf4dc25d0c09761e7/parso-0.8.6.tar.gz", hash = "sha256:2b9a0332696df97d454fa67b81618fd69c35a7b90327cbe6ba5c92d2c68a7bfd", size = 401621, upload-time = "2026-02-09T15:45:24.425Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/61/fae042894f4296ec49e3f193aff5d7c18440da9e48102c3315e1bc4519a7/parso-0.8.6-py2.py3-none-any.whl", hash = "sha256:2c549f800b70a5c4952197248825584cb00f033b29c692671d3bf08bf380baff", size = 106894, upload-time = "2026-02-09T15:45:21.391Z" }, +] + +[[package]] +name = "pathable" +version = "0.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/55/b748445cb4ea6b125626f15379be7c96d1035d4fa3e8fee362fa92298abf/pathable-0.5.0.tar.gz", hash = "sha256:d81938348a1cacb525e7c75166270644782c0fb9c8cecc16be033e71427e0ef1", size = 16655, upload-time = "2026-02-20T08:47:00.748Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/52/96/5a770e5c461462575474468e5af931cff9de036e7c2b4fea23c1c58d2cbe/pathable-0.5.0-py3-none-any.whl", hash = "sha256:646e3d09491a6351a0c82632a09c02cdf70a252e73196b36d8a15ba0a114f0a6", size = 16867, upload-time = "2026-02-20T08:46:59.536Z" }, +] + +[[package]] +name = "platformdirs" +version = "4.9.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/56/8d4c30c8a1d07013911a8fdbd8f89440ef9f08d07a1b50ab8ca8be5a20f9/platformdirs-4.9.4.tar.gz", hash = "sha256:1ec356301b7dc906d83f371c8f487070e99d3ccf9e501686456394622a01a934", size = 28737, upload-time = "2026-03-05T18:34:13.271Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/63/d7/97f7e3a6abb67d8080dd406fd4df842c2be0efaf712d1c899c32a075027c/platformdirs-4.9.4-py3-none-any.whl", hash = "sha256:68a9a4619a666ea6439f2ff250c12a853cd1cbd5158d258bd824a7df6be2f868", size = 21216, upload-time = "2026-03-05T18:34:12.172Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "prompt-toolkit" +version = "3.0.52" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wcwidth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/96/06e01a7b38dce6fe1db213e061a4602dd6032a8a97ef6c1a862537732421/prompt_toolkit-3.0.52.tar.gz", hash = "sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855", size = 434198, upload-time = "2025-08-27T15:24:02.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431, upload-time = "2025-08-27T15:23:59.498Z" }, +] + +[[package]] +name = "propcache" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/da/e9fc233cf63743258bff22b3dfa7ea5baef7b5bc324af47a0ad89b8ffc6f/propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d", size = 46442, upload-time = "2025-10-08T19:49:02.291Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bf/df/6d9c1b6ac12b003837dde8a10231a7344512186e87b36e855bef32241942/propcache-0.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:43eedf29202c08550aac1d14e0ee619b0430aaef78f85864c1a892294fbc28cf", size = 77750, upload-time = "2025-10-08T19:47:07.648Z" }, + { url = "https://files.pythonhosted.org/packages/8b/e8/677a0025e8a2acf07d3418a2e7ba529c9c33caf09d3c1f25513023c1db56/propcache-0.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d62cdfcfd89ccb8de04e0eda998535c406bf5e060ffd56be6c586cbcc05b3311", size = 44780, upload-time = "2025-10-08T19:47:08.851Z" }, + { url = "https://files.pythonhosted.org/packages/89/a4/92380f7ca60f99ebae761936bc48a72a639e8a47b29050615eef757cb2a7/propcache-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cae65ad55793da34db5f54e4029b89d3b9b9490d8abe1b4c7ab5d4b8ec7ebf74", size = 46308, upload-time = "2025-10-08T19:47:09.982Z" }, + { url = "https://files.pythonhosted.org/packages/2d/48/c5ac64dee5262044348d1d78a5f85dd1a57464a60d30daee946699963eb3/propcache-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:333ddb9031d2704a301ee3e506dc46b1fe5f294ec198ed6435ad5b6a085facfe", size = 208182, upload-time = "2025-10-08T19:47:11.319Z" }, + { url = "https://files.pythonhosted.org/packages/c6/0c/cd762dd011a9287389a6a3eb43aa30207bde253610cca06824aeabfe9653/propcache-0.4.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fd0858c20f078a32cf55f7e81473d96dcf3b93fd2ccdb3d40fdf54b8573df3af", size = 211215, upload-time = "2025-10-08T19:47:13.146Z" }, + { url = "https://files.pythonhosted.org/packages/30/3e/49861e90233ba36890ae0ca4c660e95df565b2cd15d4a68556ab5865974e/propcache-0.4.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:678ae89ebc632c5c204c794f8dab2837c5f159aeb59e6ed0539500400577298c", size = 218112, upload-time = "2025-10-08T19:47:14.913Z" }, + { url = "https://files.pythonhosted.org/packages/f1/8b/544bc867e24e1bd48f3118cecd3b05c694e160a168478fa28770f22fd094/propcache-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d472aeb4fbf9865e0c6d622d7f4d54a4e101a89715d8904282bb5f9a2f476c3f", size = 204442, upload-time = "2025-10-08T19:47:16.277Z" }, + { url = "https://files.pythonhosted.org/packages/50/a6/4282772fd016a76d3e5c0df58380a5ea64900afd836cec2c2f662d1b9bb3/propcache-0.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4d3df5fa7e36b3225954fba85589da77a0fe6a53e3976de39caf04a0db4c36f1", size = 199398, upload-time = "2025-10-08T19:47:17.962Z" }, + { url = "https://files.pythonhosted.org/packages/3e/ec/d8a7cd406ee1ddb705db2139f8a10a8a427100347bd698e7014351c7af09/propcache-0.4.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ee17f18d2498f2673e432faaa71698032b0127ebf23ae5974eeaf806c279df24", size = 196920, upload-time = "2025-10-08T19:47:19.355Z" }, + { url = "https://files.pythonhosted.org/packages/f6/6c/f38ab64af3764f431e359f8baf9e0a21013e24329e8b85d2da32e8ed07ca/propcache-0.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:580e97762b950f993ae618e167e7be9256b8353c2dcd8b99ec100eb50f5286aa", size = 203748, upload-time = "2025-10-08T19:47:21.338Z" }, + { url = "https://files.pythonhosted.org/packages/d6/e3/fa846bd70f6534d647886621388f0a265254d30e3ce47e5c8e6e27dbf153/propcache-0.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:501d20b891688eb8e7aa903021f0b72d5a55db40ffaab27edefd1027caaafa61", size = 205877, upload-time = "2025-10-08T19:47:23.059Z" }, + { url = "https://files.pythonhosted.org/packages/e2/39/8163fc6f3133fea7b5f2827e8eba2029a0277ab2c5beee6c1db7b10fc23d/propcache-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a0bd56e5b100aef69bd8562b74b46254e7c8812918d3baa700c8a8009b0af66", size = 199437, upload-time = "2025-10-08T19:47:24.445Z" }, + { url = "https://files.pythonhosted.org/packages/93/89/caa9089970ca49c7c01662bd0eeedfe85494e863e8043565aeb6472ce8fe/propcache-0.4.1-cp313-cp313-win32.whl", hash = "sha256:bcc9aaa5d80322bc2fb24bb7accb4a30f81e90ab8d6ba187aec0744bc302ad81", size = 37586, upload-time = "2025-10-08T19:47:25.736Z" }, + { url = "https://files.pythonhosted.org/packages/f5/ab/f76ec3c3627c883215b5c8080debb4394ef5a7a29be811f786415fc1e6fd/propcache-0.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:381914df18634f5494334d201e98245c0596067504b9372d8cf93f4bb23e025e", size = 40790, upload-time = "2025-10-08T19:47:26.847Z" }, + { url = "https://files.pythonhosted.org/packages/59/1b/e71ae98235f8e2ba5004d8cb19765a74877abf189bc53fc0c80d799e56c3/propcache-0.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:8873eb4460fd55333ea49b7d189749ecf6e55bf85080f11b1c4530ed3034cba1", size = 37158, upload-time = "2025-10-08T19:47:27.961Z" }, + { url = "https://files.pythonhosted.org/packages/83/ce/a31bbdfc24ee0dcbba458c8175ed26089cf109a55bbe7b7640ed2470cfe9/propcache-0.4.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:92d1935ee1f8d7442da9c0c4fa7ac20d07e94064184811b685f5c4fada64553b", size = 81451, upload-time = "2025-10-08T19:47:29.445Z" }, + { url = "https://files.pythonhosted.org/packages/25/9c/442a45a470a68456e710d96cacd3573ef26a1d0a60067e6a7d5e655621ed/propcache-0.4.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:473c61b39e1460d386479b9b2f337da492042447c9b685f28be4f74d3529e566", size = 46374, upload-time = "2025-10-08T19:47:30.579Z" }, + { url = "https://files.pythonhosted.org/packages/f4/bf/b1d5e21dbc3b2e889ea4327044fb16312a736d97640fb8b6aa3f9c7b3b65/propcache-0.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c0ef0aaafc66fbd87842a3fe3902fd889825646bc21149eafe47be6072725835", size = 48396, upload-time = "2025-10-08T19:47:31.79Z" }, + { url = "https://files.pythonhosted.org/packages/f4/04/5b4c54a103d480e978d3c8a76073502b18db0c4bc17ab91b3cb5092ad949/propcache-0.4.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f95393b4d66bfae908c3ca8d169d5f79cd65636ae15b5e7a4f6e67af675adb0e", size = 275950, upload-time = "2025-10-08T19:47:33.481Z" }, + { url = "https://files.pythonhosted.org/packages/b4/c1/86f846827fb969c4b78b0af79bba1d1ea2156492e1b83dea8b8a6ae27395/propcache-0.4.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c07fda85708bc48578467e85099645167a955ba093be0a2dcba962195676e859", size = 273856, upload-time = "2025-10-08T19:47:34.906Z" }, + { url = "https://files.pythonhosted.org/packages/36/1d/fc272a63c8d3bbad6878c336c7a7dea15e8f2d23a544bda43205dfa83ada/propcache-0.4.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:af223b406d6d000830c6f65f1e6431783fc3f713ba3e6cc8c024d5ee96170a4b", size = 280420, upload-time = "2025-10-08T19:47:36.338Z" }, + { url = "https://files.pythonhosted.org/packages/07/0c/01f2219d39f7e53d52e5173bcb09c976609ba30209912a0680adfb8c593a/propcache-0.4.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a78372c932c90ee474559c5ddfffd718238e8673c340dc21fe45c5b8b54559a0", size = 263254, upload-time = "2025-10-08T19:47:37.692Z" }, + { url = "https://files.pythonhosted.org/packages/2d/18/cd28081658ce597898f0c4d174d4d0f3c5b6d4dc27ffafeef835c95eb359/propcache-0.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:564d9f0d4d9509e1a870c920a89b2fec951b44bf5ba7d537a9e7c1ccec2c18af", size = 261205, upload-time = "2025-10-08T19:47:39.659Z" }, + { url = "https://files.pythonhosted.org/packages/7a/71/1f9e22eb8b8316701c2a19fa1f388c8a3185082607da8e406a803c9b954e/propcache-0.4.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:17612831fda0138059cc5546f4d12a2aacfb9e47068c06af35c400ba58ba7393", size = 247873, upload-time = "2025-10-08T19:47:41.084Z" }, + { url = "https://files.pythonhosted.org/packages/4a/65/3d4b61f36af2b4eddba9def857959f1016a51066b4f1ce348e0cf7881f58/propcache-0.4.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:41a89040cb10bd345b3c1a873b2bf36413d48da1def52f268a055f7398514874", size = 262739, upload-time = "2025-10-08T19:47:42.51Z" }, + { url = "https://files.pythonhosted.org/packages/2a/42/26746ab087faa77c1c68079b228810436ccd9a5ce9ac85e2b7307195fd06/propcache-0.4.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e35b88984e7fa64aacecea39236cee32dd9bd8c55f57ba8a75cf2399553f9bd7", size = 263514, upload-time = "2025-10-08T19:47:43.927Z" }, + { url = "https://files.pythonhosted.org/packages/94/13/630690fe201f5502d2403dd3cfd451ed8858fe3c738ee88d095ad2ff407b/propcache-0.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f8b465489f927b0df505cbe26ffbeed4d6d8a2bbc61ce90eb074ff129ef0ab1", size = 257781, upload-time = "2025-10-08T19:47:45.448Z" }, + { url = "https://files.pythonhosted.org/packages/92/f7/1d4ec5841505f423469efbfc381d64b7b467438cd5a4bbcbb063f3b73d27/propcache-0.4.1-cp313-cp313t-win32.whl", hash = "sha256:2ad890caa1d928c7c2965b48f3a3815c853180831d0e5503d35cf00c472f4717", size = 41396, upload-time = "2025-10-08T19:47:47.202Z" }, + { url = "https://files.pythonhosted.org/packages/48/f0/615c30622316496d2cbbc29f5985f7777d3ada70f23370608c1d3e081c1f/propcache-0.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:f7ee0e597f495cf415bcbd3da3caa3bd7e816b74d0d52b8145954c5e6fd3ff37", size = 44897, upload-time = "2025-10-08T19:47:48.336Z" }, + { url = "https://files.pythonhosted.org/packages/fd/ca/6002e46eccbe0e33dcd4069ef32f7f1c9e243736e07adca37ae8c4830ec3/propcache-0.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:929d7cbe1f01bb7baffb33dc14eb5691c95831450a26354cd210a8155170c93a", size = 39789, upload-time = "2025-10-08T19:47:49.876Z" }, + { url = "https://files.pythonhosted.org/packages/8e/5c/bca52d654a896f831b8256683457ceddd490ec18d9ec50e97dfd8fc726a8/propcache-0.4.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3f7124c9d820ba5548d431afb4632301acf965db49e666aa21c305cbe8c6de12", size = 78152, upload-time = "2025-10-08T19:47:51.051Z" }, + { url = "https://files.pythonhosted.org/packages/65/9b/03b04e7d82a5f54fb16113d839f5ea1ede58a61e90edf515f6577c66fa8f/propcache-0.4.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c0d4b719b7da33599dfe3b22d3db1ef789210a0597bc650b7cee9c77c2be8c5c", size = 44869, upload-time = "2025-10-08T19:47:52.594Z" }, + { url = "https://files.pythonhosted.org/packages/b2/fa/89a8ef0468d5833a23fff277b143d0573897cf75bd56670a6d28126c7d68/propcache-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9f302f4783709a78240ebc311b793f123328716a60911d667e0c036bc5dcbded", size = 46596, upload-time = "2025-10-08T19:47:54.073Z" }, + { url = "https://files.pythonhosted.org/packages/86/bd/47816020d337f4a746edc42fe8d53669965138f39ee117414c7d7a340cfe/propcache-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c80ee5802e3fb9ea37938e7eecc307fb984837091d5fd262bb37238b1ae97641", size = 206981, upload-time = "2025-10-08T19:47:55.715Z" }, + { url = "https://files.pythonhosted.org/packages/df/f6/c5fa1357cc9748510ee55f37173eb31bfde6d94e98ccd9e6f033f2fc06e1/propcache-0.4.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ed5a841e8bb29a55fb8159ed526b26adc5bdd7e8bd7bf793ce647cb08656cdf4", size = 211490, upload-time = "2025-10-08T19:47:57.499Z" }, + { url = "https://files.pythonhosted.org/packages/80/1e/e5889652a7c4a3846683401a48f0f2e5083ce0ec1a8a5221d8058fbd1adf/propcache-0.4.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:55c72fd6ea2da4c318e74ffdf93c4fe4e926051133657459131a95c846d16d44", size = 215371, upload-time = "2025-10-08T19:47:59.317Z" }, + { url = "https://files.pythonhosted.org/packages/b2/f2/889ad4b2408f72fe1a4f6a19491177b30ea7bf1a0fd5f17050ca08cfc882/propcache-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8326e144341460402713f91df60ade3c999d601e7eb5ff8f6f7862d54de0610d", size = 201424, upload-time = "2025-10-08T19:48:00.67Z" }, + { url = "https://files.pythonhosted.org/packages/27/73/033d63069b57b0812c8bd19f311faebeceb6ba31b8f32b73432d12a0b826/propcache-0.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:060b16ae65bc098da7f6d25bf359f1f31f688384858204fe5d652979e0015e5b", size = 197566, upload-time = "2025-10-08T19:48:02.604Z" }, + { url = "https://files.pythonhosted.org/packages/dc/89/ce24f3dc182630b4e07aa6d15f0ff4b14ed4b9955fae95a0b54c58d66c05/propcache-0.4.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:89eb3fa9524f7bec9de6e83cf3faed9d79bffa560672c118a96a171a6f55831e", size = 193130, upload-time = "2025-10-08T19:48:04.499Z" }, + { url = "https://files.pythonhosted.org/packages/a9/24/ef0d5fd1a811fb5c609278d0209c9f10c35f20581fcc16f818da959fc5b4/propcache-0.4.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:dee69d7015dc235f526fe80a9c90d65eb0039103fe565776250881731f06349f", size = 202625, upload-time = "2025-10-08T19:48:06.213Z" }, + { url = "https://files.pythonhosted.org/packages/f5/02/98ec20ff5546f68d673df2f7a69e8c0d076b5abd05ca882dc7ee3a83653d/propcache-0.4.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5558992a00dfd54ccbc64a32726a3357ec93825a418a401f5cc67df0ac5d9e49", size = 204209, upload-time = "2025-10-08T19:48:08.432Z" }, + { url = "https://files.pythonhosted.org/packages/a0/87/492694f76759b15f0467a2a93ab68d32859672b646aa8a04ce4864e7932d/propcache-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c9b822a577f560fbd9554812526831712c1436d2c046cedee4c3796d3543b144", size = 197797, upload-time = "2025-10-08T19:48:09.968Z" }, + { url = "https://files.pythonhosted.org/packages/ee/36/66367de3575db1d2d3f3d177432bd14ee577a39d3f5d1b3d5df8afe3b6e2/propcache-0.4.1-cp314-cp314-win32.whl", hash = "sha256:ab4c29b49d560fe48b696cdcb127dd36e0bc2472548f3bf56cc5cb3da2b2984f", size = 38140, upload-time = "2025-10-08T19:48:11.232Z" }, + { url = "https://files.pythonhosted.org/packages/0c/2a/a758b47de253636e1b8aef181c0b4f4f204bf0dd964914fb2af90a95b49b/propcache-0.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:5a103c3eb905fcea0ab98be99c3a9a5ab2de60228aa5aceedc614c0281cf6153", size = 41257, upload-time = "2025-10-08T19:48:12.707Z" }, + { url = "https://files.pythonhosted.org/packages/34/5e/63bd5896c3fec12edcbd6f12508d4890d23c265df28c74b175e1ef9f4f3b/propcache-0.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:74c1fb26515153e482e00177a1ad654721bf9207da8a494a0c05e797ad27b992", size = 38097, upload-time = "2025-10-08T19:48:13.923Z" }, + { url = "https://files.pythonhosted.org/packages/99/85/9ff785d787ccf9bbb3f3106f79884a130951436f58392000231b4c737c80/propcache-0.4.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:824e908bce90fb2743bd6b59db36eb4f45cd350a39637c9f73b1c1ea66f5b75f", size = 81455, upload-time = "2025-10-08T19:48:15.16Z" }, + { url = "https://files.pythonhosted.org/packages/90/85/2431c10c8e7ddb1445c1f7c4b54d886e8ad20e3c6307e7218f05922cad67/propcache-0.4.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2b5e7db5328427c57c8e8831abda175421b709672f6cfc3d630c3b7e2146393", size = 46372, upload-time = "2025-10-08T19:48:16.424Z" }, + { url = "https://files.pythonhosted.org/packages/01/20/b0972d902472da9bcb683fa595099911f4d2e86e5683bcc45de60dd05dc3/propcache-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6f6ff873ed40292cd4969ef5310179afd5db59fdf055897e282485043fc80ad0", size = 48411, upload-time = "2025-10-08T19:48:17.577Z" }, + { url = "https://files.pythonhosted.org/packages/e2/e3/7dc89f4f21e8f99bad3d5ddb3a3389afcf9da4ac69e3deb2dcdc96e74169/propcache-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49a2dc67c154db2c1463013594c458881a069fcf98940e61a0569016a583020a", size = 275712, upload-time = "2025-10-08T19:48:18.901Z" }, + { url = "https://files.pythonhosted.org/packages/20/67/89800c8352489b21a8047c773067644e3897f02ecbbd610f4d46b7f08612/propcache-0.4.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:005f08e6a0529984491e37d8dbc3dd86f84bd78a8ceb5fa9a021f4c48d4984be", size = 273557, upload-time = "2025-10-08T19:48:20.762Z" }, + { url = "https://files.pythonhosted.org/packages/e2/a1/b52b055c766a54ce6d9c16d9aca0cad8059acd9637cdf8aa0222f4a026ef/propcache-0.4.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5c3310452e0d31390da9035c348633b43d7e7feb2e37be252be6da45abd1abcc", size = 280015, upload-time = "2025-10-08T19:48:22.592Z" }, + { url = "https://files.pythonhosted.org/packages/48/c8/33cee30bd890672c63743049f3c9e4be087e6780906bfc3ec58528be59c1/propcache-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c3c70630930447f9ef1caac7728c8ad1c56bc5015338b20fed0d08ea2480b3a", size = 262880, upload-time = "2025-10-08T19:48:23.947Z" }, + { url = "https://files.pythonhosted.org/packages/0c/b1/8f08a143b204b418285c88b83d00edbd61afbc2c6415ffafc8905da7038b/propcache-0.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8e57061305815dfc910a3634dcf584f08168a8836e6999983569f51a8544cd89", size = 260938, upload-time = "2025-10-08T19:48:25.656Z" }, + { url = "https://files.pythonhosted.org/packages/cf/12/96e4664c82ca2f31e1c8dff86afb867348979eb78d3cb8546a680287a1e9/propcache-0.4.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:521a463429ef54143092c11a77e04056dd00636f72e8c45b70aaa3140d639726", size = 247641, upload-time = "2025-10-08T19:48:27.207Z" }, + { url = "https://files.pythonhosted.org/packages/18/ed/e7a9cfca28133386ba52278136d42209d3125db08d0a6395f0cba0c0285c/propcache-0.4.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:120c964da3fdc75e3731aa392527136d4ad35868cc556fd09bb6d09172d9a367", size = 262510, upload-time = "2025-10-08T19:48:28.65Z" }, + { url = "https://files.pythonhosted.org/packages/f5/76/16d8bf65e8845dd62b4e2b57444ab81f07f40caa5652b8969b87ddcf2ef6/propcache-0.4.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:d8f353eb14ee3441ee844ade4277d560cdd68288838673273b978e3d6d2c8f36", size = 263161, upload-time = "2025-10-08T19:48:30.133Z" }, + { url = "https://files.pythonhosted.org/packages/e7/70/c99e9edb5d91d5ad8a49fa3c1e8285ba64f1476782fed10ab251ff413ba1/propcache-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ab2943be7c652f09638800905ee1bab2c544e537edb57d527997a24c13dc1455", size = 257393, upload-time = "2025-10-08T19:48:31.567Z" }, + { url = "https://files.pythonhosted.org/packages/08/02/87b25304249a35c0915d236575bc3574a323f60b47939a2262b77632a3ee/propcache-0.4.1-cp314-cp314t-win32.whl", hash = "sha256:05674a162469f31358c30bcaa8883cb7829fa3110bf9c0991fe27d7896c42d85", size = 42546, upload-time = "2025-10-08T19:48:32.872Z" }, + { url = "https://files.pythonhosted.org/packages/cb/ef/3c6ecf8b317aa982f309835e8f96987466123c6e596646d4e6a1dfcd080f/propcache-0.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:990f6b3e2a27d683cb7602ed6c86f15ee6b43b1194736f9baaeb93d0016633b1", size = 46259, upload-time = "2025-10-08T19:48:34.226Z" }, + { url = "https://files.pythonhosted.org/packages/c4/2d/346e946d4951f37eca1e4f55be0f0174c52cd70720f84029b02f296f4a38/propcache-0.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:ecef2343af4cc68e05131e45024ba34f6095821988a9d0a02aa7c73fcc448aa9", size = 40428, upload-time = "2025-10-08T19:48:35.441Z" }, + { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" }, +] + +[[package]] +name = "proto-plus" +version = "1.27.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3a/02/8832cde80e7380c600fbf55090b6ab7b62bd6825dbedde6d6657c15a1f8e/proto_plus-1.27.1.tar.gz", hash = "sha256:912a7460446625b792f6448bade9e55cd4e41e6ac10e27009ef71a7f317fa147", size = 56929, upload-time = "2026-02-02T17:34:49.035Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/79/ac273cbbf744691821a9cca88957257f41afe271637794975ca090b9588b/proto_plus-1.27.1-py3-none-any.whl", hash = "sha256:e4643061f3a4d0de092d62aa4ad09fa4756b2cbb89d4627f3985018216f9fefc", size = 50480, upload-time = "2026-02-02T17:34:47.339Z" }, +] + +[[package]] +name = "protobuf" +version = "6.33.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/70/e908e9c5e52ef7c3a6c7902c9dfbb34c7e29c25d2f81ade3856445fd5c94/protobuf-6.33.6.tar.gz", hash = "sha256:a6768d25248312c297558af96a9f9c929e8c4cee0659cb07e780731095f38135", size = 444531, upload-time = "2026-03-18T19:05:00.988Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/9f/2f509339e89cfa6f6a4c4ff50438db9ca488dec341f7e454adad60150b00/protobuf-6.33.6-cp310-abi3-win32.whl", hash = "sha256:7d29d9b65f8afef196f8334e80d6bc1d5d4adedb449971fefd3723824e6e77d3", size = 425739, upload-time = "2026-03-18T19:04:48.373Z" }, + { url = "https://files.pythonhosted.org/packages/76/5d/683efcd4798e0030c1bab27374fd13a89f7c2515fb1f3123efdfaa5eab57/protobuf-6.33.6-cp310-abi3-win_amd64.whl", hash = "sha256:0cd27b587afca21b7cfa59a74dcbd48a50f0a6400cfb59391340ad729d91d326", size = 437089, upload-time = "2026-03-18T19:04:50.381Z" }, + { url = "https://files.pythonhosted.org/packages/5c/01/a3c3ed5cd186f39e7880f8303cc51385a198a81469d53d0fdecf1f64d929/protobuf-6.33.6-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:9720e6961b251bde64edfdab7d500725a2af5280f3f4c87e57c0208376aa8c3a", size = 427737, upload-time = "2026-03-18T19:04:51.866Z" }, + { url = "https://files.pythonhosted.org/packages/ee/90/b3c01fdec7d2f627b3a6884243ba328c1217ed2d978def5c12dc50d328a3/protobuf-6.33.6-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:e2afbae9b8e1825e3529f88d514754e094278bb95eadc0e199751cdd9a2e82a2", size = 324610, upload-time = "2026-03-18T19:04:53.096Z" }, + { url = "https://files.pythonhosted.org/packages/9b/ca/25afc144934014700c52e05103c2421997482d561f3101ff352e1292fb81/protobuf-6.33.6-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:c96c37eec15086b79762ed265d59ab204dabc53056e3443e702d2681f4b39ce3", size = 339381, upload-time = "2026-03-18T19:04:54.616Z" }, + { url = "https://files.pythonhosted.org/packages/16/92/d1e32e3e0d894fe00b15ce28ad4944ab692713f2e7f0a99787405e43533a/protobuf-6.33.6-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:e9db7e292e0ab79dd108d7f1a94fe31601ce1ee3f7b79e0692043423020b0593", size = 323436, upload-time = "2026-03-18T19:04:55.768Z" }, + { url = "https://files.pythonhosted.org/packages/c4/72/02445137af02769918a93807b2b7890047c32bfb9f90371cbc12688819eb/protobuf-6.33.6-py3-none-any.whl", hash = "sha256:77179e006c476e69bf8e8ce866640091ec42e1beb80b213c3900006ecfba6901", size = 170656, upload-time = "2026-03-18T19:04:59.826Z" }, +] + +[[package]] +name = "psutil" +version = "7.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" }, + { url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" }, + { url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" }, + { url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" }, + { url = "https://files.pythonhosted.org/packages/16/ba/0756dca669f5a9300d0cbcbfae9a4c30e446dfc7440ffe43ded5724bfd93/psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b", size = 139893, upload-time = "2026-01-28T18:15:06.378Z" }, + { url = "https://files.pythonhosted.org/packages/1c/61/8fa0e26f33623b49949346de05ec1ddaad02ed8ba64af45f40a147dbfa97/psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9", size = 135589, upload-time = "2026-01-28T18:15:08.03Z" }, + { url = "https://files.pythonhosted.org/packages/81/69/ef179ab5ca24f32acc1dac0c247fd6a13b501fd5534dbae0e05a1c48b66d/psutil-7.2.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00", size = 130664, upload-time = "2026-01-28T18:15:09.469Z" }, + { url = "https://files.pythonhosted.org/packages/7b/64/665248b557a236d3fa9efc378d60d95ef56dd0a490c2cd37dafc7660d4a9/psutil-7.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9", size = 131087, upload-time = "2026-01-28T18:15:11.724Z" }, + { url = "https://files.pythonhosted.org/packages/d5/2e/e6782744700d6759ebce3043dcfa661fb61e2fb752b91cdeae9af12c2178/psutil-7.2.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a", size = 182383, upload-time = "2026-01-28T18:15:13.445Z" }, + { url = "https://files.pythonhosted.org/packages/57/49/0a41cefd10cb7505cdc04dab3eacf24c0c2cb158a998b8c7b1d27ee2c1f5/psutil-7.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf", size = 185210, upload-time = "2026-01-28T18:15:16.002Z" }, + { url = "https://files.pythonhosted.org/packages/dd/2c/ff9bfb544f283ba5f83ba725a3c5fec6d6b10b8f27ac1dc641c473dc390d/psutil-7.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c7663d4e37f13e884d13994247449e9f8f574bc4655d509c3b95e9ec9e2b9dc1", size = 141228, upload-time = "2026-01-28T18:15:18.385Z" }, + { url = "https://files.pythonhosted.org/packages/f2/fc/f8d9c31db14fcec13748d373e668bc3bed94d9077dbc17fb0eebc073233c/psutil-7.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:11fe5a4f613759764e79c65cf11ebdf26e33d6dd34336f8a337aa2996d71c841", size = 136284, upload-time = "2026-01-28T18:15:19.912Z" }, + { url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" }, + { url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" }, + { url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" }, + { url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" }, + { url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" }, + { url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" }, + { url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" }, + { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" }, +] + +[[package]] +name = "py-key-value-aio" +version = "0.4.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beartype" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/04/3c/0397c072a38d4bc580994b42e0c90c5f44f679303489e4376289534735e5/py_key_value_aio-0.4.4.tar.gz", hash = "sha256:e3012e6243ed7cc09bb05457bd4d03b1ba5c2b1ca8700096b3927db79ffbbe55", size = 92300, upload-time = "2026-02-16T21:21:43.245Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/69/f1b537ee70b7def42d63124a539ed3026a11a3ffc3086947a1ca6e861868/py_key_value_aio-0.4.4-py3-none-any.whl", hash = "sha256:18e17564ecae61b987f909fc2cd41ee2012c84b4b1dcb8c055cf8b4bc1bf3f5d", size = 152291, upload-time = "2026-02-16T21:21:44.241Z" }, +] + +[package.optional-dependencies] +filetree = [ + { name = "aiofile" }, + { name = "anyio" }, +] +keyring = [ + { name = "keyring" }, +] +memory = [ + { name = "cachetools" }, +] + +[[package]] +name = "pyarrow" +version = "23.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/88/22/134986a4cc224d593c1afde5494d18ff629393d74cc2eddb176669f234a4/pyarrow-23.0.1.tar.gz", hash = "sha256:b8c5873e33440b2bc2f4a79d2b47017a89c5a24116c055625e6f2ee50523f019", size = 1167336, upload-time = "2026-02-16T10:14:12.39Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/10/2cbe4c6f0fb83d2de37249567373d64327a5e4d8db72f486db42875b08f6/pyarrow-23.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6b8fda694640b00e8af3c824f99f789e836720aa8c9379fb435d4c4953a756b8", size = 34210066, upload-time = "2026-02-16T10:10:45.487Z" }, + { url = "https://files.pythonhosted.org/packages/cb/4f/679fa7e84dadbaca7a65f7cdba8d6c83febbd93ca12fa4adf40ba3b6362b/pyarrow-23.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:8ff51b1addc469b9444b7c6f3548e19dc931b172ab234e995a60aea9f6e6025f", size = 35825526, upload-time = "2026-02-16T10:10:52.266Z" }, + { url = "https://files.pythonhosted.org/packages/f9/63/d2747d930882c9d661e9398eefc54f15696547b8983aaaf11d4a2e8b5426/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:71c5be5cbf1e1cb6169d2a0980850bccb558ddc9b747b6206435313c47c37677", size = 44473279, upload-time = "2026-02-16T10:11:01.557Z" }, + { url = "https://files.pythonhosted.org/packages/b3/93/10a48b5e238de6d562a411af6467e71e7aedbc9b87f8d3a35f1560ae30fb/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9b6f4f17b43bc39d56fec96e53fe89d94bac3eb134137964371b45352d40d0c2", size = 47585798, upload-time = "2026-02-16T10:11:09.401Z" }, + { url = "https://files.pythonhosted.org/packages/5c/20/476943001c54ef078dbf9542280e22741219a184a0632862bca4feccd666/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fc13fc6c403d1337acab46a2c4346ca6c9dec5780c3c697cf8abfd5e19b6b37", size = 48179446, upload-time = "2026-02-16T10:11:17.781Z" }, + { url = "https://files.pythonhosted.org/packages/4b/b6/5dd0c47b335fcd8edba9bfab78ad961bd0fd55ebe53468cc393f45e0be60/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5c16ed4f53247fa3ffb12a14d236de4213a4415d127fe9cebed33d51671113e2", size = 50623972, upload-time = "2026-02-16T10:11:26.185Z" }, + { url = "https://files.pythonhosted.org/packages/d5/09/a532297c9591a727d67760e2e756b83905dd89adb365a7f6e9c72578bcc1/pyarrow-23.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:cecfb12ef629cf6be0b1887f9f86463b0dd3dc3195ae6224e74006be4736035a", size = 27540749, upload-time = "2026-02-16T10:12:23.297Z" }, + { url = "https://files.pythonhosted.org/packages/a5/8e/38749c4b1303e6ae76b3c80618f84861ae0c55dd3c2273842ea6f8258233/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:29f7f7419a0e30264ea261fdc0e5fe63ce5a6095003db2945d7cd78df391a7e1", size = 34471544, upload-time = "2026-02-16T10:11:32.535Z" }, + { url = "https://files.pythonhosted.org/packages/a3/73/f237b2bc8c669212f842bcfd842b04fc8d936bfc9d471630569132dc920d/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:33d648dc25b51fd8055c19e4261e813dfc4d2427f068bcecc8b53d01b81b0500", size = 35949911, upload-time = "2026-02-16T10:11:39.813Z" }, + { url = "https://files.pythonhosted.org/packages/0c/86/b912195eee0903b5611bf596833def7d146ab2d301afeb4b722c57ffc966/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd395abf8f91c673dd3589cadc8cc1ee4e8674fa61b2e923c8dd215d9c7d1f41", size = 44520337, upload-time = "2026-02-16T10:11:47.764Z" }, + { url = "https://files.pythonhosted.org/packages/69/c2/f2a717fb824f62d0be952ea724b4f6f9372a17eed6f704b5c9526f12f2f1/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:00be9576d970c31defb5c32eb72ef585bf600ef6d0a82d5eccaae96639cf9d07", size = 47548944, upload-time = "2026-02-16T10:11:56.607Z" }, + { url = "https://files.pythonhosted.org/packages/84/a7/90007d476b9f0dc308e3bc57b832d004f848fd6c0da601375d20d92d1519/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c2139549494445609f35a5cda4eb94e2c9e4d704ce60a095b342f82460c73a83", size = 48236269, upload-time = "2026-02-16T10:12:04.47Z" }, + { url = "https://files.pythonhosted.org/packages/b0/3f/b16fab3e77709856eb6ac328ce35f57a6d4a18462c7ca5186ef31b45e0e0/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7044b442f184d84e2351e5084600f0d7343d6117aabcbc1ac78eb1ae11eb4125", size = 50604794, upload-time = "2026-02-16T10:12:11.797Z" }, + { url = "https://files.pythonhosted.org/packages/e9/a1/22df0620a9fac31d68397a75465c344e83c3dfe521f7612aea33e27ab6c0/pyarrow-23.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a35581e856a2fafa12f3f54fce4331862b1cfb0bef5758347a858a4aa9d6bae8", size = 27660642, upload-time = "2026-02-16T10:12:17.746Z" }, + { url = "https://files.pythonhosted.org/packages/8d/1b/6da9a89583ce7b23ac611f183ae4843cd3a6cf54f079549b0e8c14031e73/pyarrow-23.0.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:5df1161da23636a70838099d4aaa65142777185cc0cdba4037a18cee7d8db9ca", size = 34238755, upload-time = "2026-02-16T10:12:32.819Z" }, + { url = "https://files.pythonhosted.org/packages/ae/b5/d58a241fbe324dbaeb8df07be6af8752c846192d78d2272e551098f74e88/pyarrow-23.0.1-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:fa8e51cb04b9f8c9c5ace6bab63af9a1f88d35c0d6cbf53e8c17c098552285e1", size = 35847826, upload-time = "2026-02-16T10:12:38.949Z" }, + { url = "https://files.pythonhosted.org/packages/54/a5/8cbc83f04aba433ca7b331b38f39e000efd9f0c7ce47128670e737542996/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0b95a3994f015be13c63148fef8832e8a23938128c185ee951c98908a696e0eb", size = 44536859, upload-time = "2026-02-16T10:12:45.467Z" }, + { url = "https://files.pythonhosted.org/packages/36/2e/c0f017c405fcdc252dbccafbe05e36b0d0eb1ea9a958f081e01c6972927f/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4982d71350b1a6e5cfe1af742c53dfb759b11ce14141870d05d9e540d13bc5d1", size = 47614443, upload-time = "2026-02-16T10:12:55.525Z" }, + { url = "https://files.pythonhosted.org/packages/af/6b/2314a78057912f5627afa13ba43809d9d653e6630859618b0fd81a4e0759/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c250248f1fe266db627921c89b47b7c06fee0489ad95b04d50353537d74d6886", size = 48232991, upload-time = "2026-02-16T10:13:04.729Z" }, + { url = "https://files.pythonhosted.org/packages/40/f2/1bcb1d3be3460832ef3370d621142216e15a2c7c62602a4ea19ec240dd64/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5f4763b83c11c16e5f4c15601ba6dfa849e20723b46aa2617cb4bffe8768479f", size = 50645077, upload-time = "2026-02-16T10:13:14.147Z" }, + { url = "https://files.pythonhosted.org/packages/eb/3f/b1da7b61cd66566a4d4c8383d376c606d1c34a906c3f1cb35c479f59d1aa/pyarrow-23.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:3a4c85ef66c134161987c17b147d6bffdca4566f9a4c1d81a0a01cdf08414ea5", size = 28234271, upload-time = "2026-02-16T10:14:09.397Z" }, + { url = "https://files.pythonhosted.org/packages/b5/78/07f67434e910a0f7323269be7bfbf58699bd0c1d080b18a1ab49ba943fe8/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:17cd28e906c18af486a499422740298c52d7c6795344ea5002a7720b4eadf16d", size = 34488692, upload-time = "2026-02-16T10:13:21.541Z" }, + { url = "https://files.pythonhosted.org/packages/50/76/34cf7ae93ece1f740a04910d9f7e80ba166b9b4ab9596a953e9e62b90fe1/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:76e823d0e86b4fb5e1cf4a58d293036e678b5a4b03539be933d3b31f9406859f", size = 35964383, upload-time = "2026-02-16T10:13:28.63Z" }, + { url = "https://files.pythonhosted.org/packages/46/90/459b827238936d4244214be7c684e1b366a63f8c78c380807ae25ed92199/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a62e1899e3078bf65943078b3ad2a6ddcacf2373bc06379aac61b1e548a75814", size = 44538119, upload-time = "2026-02-16T10:13:35.506Z" }, + { url = "https://files.pythonhosted.org/packages/28/a1/93a71ae5881e99d1f9de1d4554a87be37da11cd6b152239fb5bd924fdc64/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:df088e8f640c9fae3b1f495b3c64755c4e719091caf250f3a74d095ddf3c836d", size = 47571199, upload-time = "2026-02-16T10:13:42.504Z" }, + { url = "https://files.pythonhosted.org/packages/88/a3/d2c462d4ef313521eaf2eff04d204ac60775263f1fb08c374b543f79f610/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:46718a220d64677c93bc243af1d44b55998255427588e400677d7192671845c7", size = 48259435, upload-time = "2026-02-16T10:13:49.226Z" }, + { url = "https://files.pythonhosted.org/packages/cc/f1/11a544b8c3d38a759eb3fbb022039117fd633e9a7b19e4841cc3da091915/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a09f3876e87f48bc2f13583ab551f0379e5dfb83210391e68ace404181a20690", size = 50629149, upload-time = "2026-02-16T10:13:57.238Z" }, + { url = "https://files.pythonhosted.org/packages/50/f2/c0e76a0b451ffdf0cf788932e182758eb7558953f4f27f1aff8e2518b653/pyarrow-23.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:527e8d899f14bd15b740cd5a54ad56b7f98044955373a17179d5956ddb93d9ce", size = 28365807, upload-time = "2026-02-16T10:14:03.892Z" }, +] + +[[package]] +name = "pyasn1" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" }, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" }, +] + +[[package]] +name = "pycparser" +version = "3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, +] + +[[package]] +name = "pydantic" +version = "2.13.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/6b/1353beb3d1cd5cf61cdec5b6f87a9872399de3bc5cae0b7ce07ff4de2ab0/pydantic-2.13.1.tar.gz", hash = "sha256:a0f829b279ddd1e39291133fe2539d2aa46cc6b150c1706a270ff0879e3774d2", size = 843746, upload-time = "2026-04-15T14:57:19.398Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/5a/2225f4c176dbfed0d809e848b50ef08f70e61daa667b7fa14b0d311ae44d/pydantic-2.13.1-py3-none-any.whl", hash = "sha256:9557ecc2806faaf6037f85b1fbd963d01e30511c48085f0d573650fdeaad378a", size = 471917, upload-time = "2026-04-15T14:57:17.277Z" }, +] + +[package.optional-dependencies] +email = [ + { name = "email-validator" }, +] + +[[package]] +name = "pydantic-core" +version = "2.46.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/93/f97a86a7eb28faa1d038af2fd5d6166418b4433659108a4c311b57128b2d/pydantic_core-2.46.1.tar.gz", hash = "sha256:d408153772d9f298098fb5d620f045bdf0f017af0d5cb6e309ef8c205540caa4", size = 471230, upload-time = "2026-04-15T14:49:34.52Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/d2/bda39bad2f426cb5078e6ad28076614d3926704196efe0d7a2a19a99025d/pydantic_core-2.46.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:cdc8a5762a9c4b9d86e204d555444e3227507c92daba06259ee66595834de47a", size = 2119092, upload-time = "2026-04-15T14:49:50.392Z" }, + { url = "https://files.pythonhosted.org/packages/ee/f3/69631e64d69cb3481494b2bddefe0ddd07771209f74e9106d066f9138c2a/pydantic_core-2.46.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ba381dfe9c85692c566ecb60fa5a77a697a2a8eebe274ec5e4d6ec15fafad799", size = 1951400, upload-time = "2026-04-15T14:51:06.588Z" }, + { url = "https://files.pythonhosted.org/packages/53/1c/21cb3db6ae997df31be8e91f213081f72ffa641cb45c89b8a1986832b1f9/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1593d8de98207466dc070118322fef68307a0cc6a5625e7b386f6fdae57f9ab6", size = 1976864, upload-time = "2026-04-15T14:50:54.804Z" }, + { url = "https://files.pythonhosted.org/packages/91/9c/05c819f734318ce5a6ca24da300d93696c105af4adb90494ee571303afd8/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8262c74a1af5b0fdf795f5537f7145785a63f9fbf9e15405f547440c30017ed8", size = 2066669, upload-time = "2026-04-15T14:51:42.346Z" }, + { url = "https://files.pythonhosted.org/packages/cb/23/fadddf1c7f2f517f58731aea9b35c914e6005250f08dac9b8e53904cdbaa/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b88949a24182e83fbbb3f7ca9b7858d0d37b735700ea91081434b7d37b3b444", size = 2238737, upload-time = "2026-04-15T14:50:45.558Z" }, + { url = "https://files.pythonhosted.org/packages/23/07/0cd4f95cb0359c8b1ec71e89c3777e7932c8dfeb9cd54740289f310aaead/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8f3708cd55537aeaf3fd0ea55df0d68d0da51dcb07cbc8508745b34acc4c6e0", size = 2316258, upload-time = "2026-04-15T14:51:08.471Z" }, + { url = "https://files.pythonhosted.org/packages/0c/40/6fc24c3766a19c222a0d60d652b78f0283339d4cd4c173fab06b7ee76571/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f79292435fff1d4f0c18d9cfaf214025cc88e4f5104bfaed53f173621da1c743", size = 2097474, upload-time = "2026-04-15T14:49:56.543Z" }, + { url = "https://files.pythonhosted.org/packages/4b/af/f39795d1ce549e35d0841382b9c616ae211caffb88863147369a8d74fba9/pydantic_core-2.46.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:a2e607aeb59cf4575bb364470288db3b9a1f0e7415d053a322e3e154c1a0802e", size = 2168383, upload-time = "2026-04-15T14:51:29.269Z" }, + { url = "https://files.pythonhosted.org/packages/e6/32/0d563f74582795779df6cc270c3fc220f49f4daf7860d74a5a6cda8491ff/pydantic_core-2.46.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ec5ca190b75878a9f6ae1fc8f5eb678497934475aef3d93204c9fa01e97370b6", size = 2186182, upload-time = "2026-04-15T14:50:19.097Z" }, + { url = "https://files.pythonhosted.org/packages/5c/07/1c10d5ce312fc4cf86d1e50bdcdbb8ef248409597b099cab1b4bb3a093f7/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:1f80535259dcdd517d7b8ca588d5ca24b4f337228e583bebedf7a3adcdf5f721", size = 2187859, upload-time = "2026-04-15T14:49:22.974Z" }, + { url = "https://files.pythonhosted.org/packages/92/01/e1f62d4cb39f0913dbf5c95b9b119ef30ddba9493dff8c2b012f0cdd67dc/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:24820b3c82c43df61eca30147e42853e6c127d8b868afdc0c162df829e011eb4", size = 2338372, upload-time = "2026-04-15T14:49:53.316Z" }, + { url = "https://files.pythonhosted.org/packages/44/ed/218dfeea6127fb1781a6ceca241ec6edf00e8a8933ff331af2215975a534/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:f12794b1dd8ac9fb66619e0b3a0427189f5d5638e55a3de1385121a9b7bf9b39", size = 2384039, upload-time = "2026-04-15T14:53:04.929Z" }, + { url = "https://files.pythonhosted.org/packages/6c/1e/011e763cd059238249fbd5780e0f8d0b04b47f86c8925e22784f3e5fc977/pydantic_core-2.46.1-cp313-cp313-win32.whl", hash = "sha256:9bc09aed935cdf50f09e908923f9efbcca54e9244bd14a5a0e2a6c8d2c21b4e9", size = 1977943, upload-time = "2026-04-15T14:52:17.969Z" }, + { url = "https://files.pythonhosted.org/packages/8c/06/b559a490d3ed106e9b1777b8d5c8112dd8d31716243cd662616f66c1f8ea/pydantic_core-2.46.1-cp313-cp313-win_amd64.whl", hash = "sha256:fac2d6c8615b8b42bee14677861ba09d56ee076ba4a65cfb9c3c3d0cc89042f2", size = 2068729, upload-time = "2026-04-15T14:53:07.288Z" }, + { url = "https://files.pythonhosted.org/packages/9f/52/32a198946e2e19508532aa9da02a61419eb15bd2d96bab57f810f2713e31/pydantic_core-2.46.1-cp313-cp313-win_arm64.whl", hash = "sha256:f978329f12ace9f3cb814a5e44d98bbeced2e36f633132bafa06d2d71332e33e", size = 2029550, upload-time = "2026-04-15T14:52:22.707Z" }, + { url = "https://files.pythonhosted.org/packages/bd/2b/6793fe89ab66cb2d3d6e5768044eab80bba1d0fae8fd904d0a1574712e17/pydantic_core-2.46.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:9917cb61effac7ec0f448ef491ec7584526d2193be84ff981e85cbf18b68c42a", size = 2118110, upload-time = "2026-04-15T14:50:52.947Z" }, + { url = "https://files.pythonhosted.org/packages/d2/87/e9a905ddfcc2fd7bd862b340c02be6ab1f827922822d425513635d0ac774/pydantic_core-2.46.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0e749679ca9f8a9d0bff95fb7f6b57bb53f2207fa42ffcc1ec86de7e0029ab89", size = 1948645, upload-time = "2026-04-15T14:51:55.577Z" }, + { url = "https://files.pythonhosted.org/packages/15/23/26e67f86ed62ac9d6f7f3091ee5220bf14b5ac36fb811851d601365ef896/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2ecacee70941e233a2dad23f7796a06f86cc10cc2fbd1c97c7dd5b5a79ffa4f", size = 1977576, upload-time = "2026-04-15T14:49:37.58Z" }, + { url = "https://files.pythonhosted.org/packages/b8/78/813c13c0de323d4de54ee2e6fdd69a0271c09ac8dd65a8a000931aa487a5/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:647d0a2475b8ed471962eed92fa69145b864942f9c6daa10f95ac70676637ae7", size = 2060358, upload-time = "2026-04-15T14:51:40.087Z" }, + { url = "https://files.pythonhosted.org/packages/09/5e/4caf2a15149271fbd2b4d968899a450853c800b85152abcf54b11531417f/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac9cde61965b0697fce6e6cc372df9e1ad93734828aac36e9c1c42a22ad02897", size = 2235980, upload-time = "2026-04-15T14:50:34.535Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c1/a2cdabb5da6f5cb63a3558bcafffc20f790fa14ccffbefbfb1370fadc93f/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0a2eb0864085f8b641fb3f54a2fb35c58aff24b175b80bc8a945050fcde03204", size = 2316800, upload-time = "2026-04-15T14:52:46.999Z" }, + { url = "https://files.pythonhosted.org/packages/76/fd/19d711e4e9331f9d77f222bffc202bf30ea0d74f6419046376bb82f244c8/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b83ce9fede4bc4fb649281d9857f06d30198b8f70168f18b987518d713111572", size = 2101762, upload-time = "2026-04-15T14:49:24.278Z" }, + { url = "https://files.pythonhosted.org/packages/dc/64/ce95625448e1a4e219390a2923fd594f3fa368599c6b42ac71a5df7238c9/pydantic_core-2.46.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:cb33192753c60f269d2f4a1db8253c95b0df6e04f2989631a8cc1b0f4f6e2e92", size = 2167737, upload-time = "2026-04-15T14:50:41.637Z" }, + { url = "https://files.pythonhosted.org/packages/ad/31/413572d03ca3e73b408f00f54418b91a8be6401451bc791eaeff210328e5/pydantic_core-2.46.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:96611d51f953f87e1ae97637c01ee596a08b7f494ea00a5afb67ea6547b9f53b", size = 2185658, upload-time = "2026-04-15T14:51:46.799Z" }, + { url = "https://files.pythonhosted.org/packages/36/09/e4f581353bdf3f0c7de8a8b27afd14fc761da29d78146376315a6fedc487/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:9b176fa55f9107db5e6c86099aa5bfd934f1d3ba6a8b43f714ddeebaed3f42b7", size = 2184154, upload-time = "2026-04-15T14:52:49.629Z" }, + { url = "https://files.pythonhosted.org/packages/1a/a4/d0d52849933f5a4bf1ad9d8da612792f96469b37e286a269e3ee9c60bbb1/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:79a59f63a4ce4f3330e27e6f3ce281dd1099453b637350e97d7cf24c207cd120", size = 2332379, upload-time = "2026-04-15T14:49:55.009Z" }, + { url = "https://files.pythonhosted.org/packages/30/93/25bfb08fdbef419f73290e573899ce938a327628c34e8f3a4bafeea30126/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:f200fce071808a385a314b7343f5e3688d7c45746be3d64dc71ee2d3e2a13268", size = 2377964, upload-time = "2026-04-15T14:51:59.649Z" }, + { url = "https://files.pythonhosted.org/packages/15/36/b777766ff83fef1cf97473d64764cd44f38e0d8c269ed06faace9ae17666/pydantic_core-2.46.1-cp314-cp314-win32.whl", hash = "sha256:3a07eccc0559fb9acc26d55b16bf8ebecd7f237c74a9e2c5741367db4e6d8aff", size = 1976450, upload-time = "2026-04-15T14:51:57.665Z" }, + { url = "https://files.pythonhosted.org/packages/7b/4b/4cd19d2437acfc18ca166db5a2067040334991eb862c4ecf2db098c91fbf/pydantic_core-2.46.1-cp314-cp314-win_amd64.whl", hash = "sha256:1706d270309ac7d071ffe393988c471363705feb3d009186e55d17786ada9622", size = 2067750, upload-time = "2026-04-15T14:49:38.941Z" }, + { url = "https://files.pythonhosted.org/packages/7f/a0/490751c0ef8f5b27aae81731859aed1508e72c1a9b5774c6034269db773b/pydantic_core-2.46.1-cp314-cp314-win_arm64.whl", hash = "sha256:22d4e7457ade8af06528012f382bc994a97cc2ce6e119305a70b3deff1e409d6", size = 2021109, upload-time = "2026-04-15T14:50:27.728Z" }, + { url = "https://files.pythonhosted.org/packages/36/3a/2a018968245fffd25d5f1972714121ad309ff2de19d80019ad93494844f9/pydantic_core-2.46.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:607ff9db0b7e2012e7eef78465e69f9a0d7d1c3e7c6a84cf0c4011db0fcc3feb", size = 2111548, upload-time = "2026-04-15T14:52:08.273Z" }, + { url = "https://files.pythonhosted.org/packages/77/5b/4103b6192213217e874e764e5467d2ff10d8873c1147d01fa432ac281880/pydantic_core-2.46.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8cda3eacaea13bd02a1bea7e457cc9fc30b91c5a91245cef9b215140f80dd78c", size = 1926745, upload-time = "2026-04-15T14:50:03.045Z" }, + { url = "https://files.pythonhosted.org/packages/c3/70/602a667cf4be4bec6c3334512b12ae4ea79ce9bfe41dc51be1fd34434453/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9493279cdc7997fe19e5ed9b41f30cbc3806bd4722adb402fedb6f6d41bd72a", size = 1965922, upload-time = "2026-04-15T14:51:12.555Z" }, + { url = "https://files.pythonhosted.org/packages/a9/24/06a89ce5323e755b7d2812189f9706b87aaebe49b34d247b380502f7992c/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3644e5e10059999202355b6c6616e624909e23773717d8f76deb8a6e2a72328c", size = 2043221, upload-time = "2026-04-15T14:51:18.995Z" }, + { url = "https://files.pythonhosted.org/packages/2c/6e/b1d9ad907d9d76964903903349fd2e33c87db4b993cc44713edcad0fc488/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ad6c9de57683e26c92730991960c0c3571b8053263b042de2d3e105930b2767", size = 2243655, upload-time = "2026-04-15T14:50:10.718Z" }, + { url = "https://files.pythonhosted.org/packages/ef/73/787abfaad51174641abb04c8aa125322279b40ad7ce23c495f5a69f76554/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:557ebaa27c7617e7088002318c679a8ce685fa048523417cd1ca52b7f516d955", size = 2295976, upload-time = "2026-04-15T14:53:09.694Z" }, + { url = "https://files.pythonhosted.org/packages/56/0b/b7c5a631b6d5153d4a1ea4923b139aea256dc3bd99c8e6c7b312c7733146/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cd37e39b22b796ba0298fe81e9421dd7b65f97acfbb0fb19b33ffdda7b9a7b4", size = 2103439, upload-time = "2026-04-15T14:50:08.32Z" }, + { url = "https://files.pythonhosted.org/packages/2a/3f/952ee470df69e5674cdec1cbde22331adf643b5cc2ff79f4292d80146ee4/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:6689443b59714992e67d62505cdd2f952d6cf1c14cc9fd9aeec6719befc6f23b", size = 2132871, upload-time = "2026-04-15T14:50:24.445Z" }, + { url = "https://files.pythonhosted.org/packages/e3/8b/1dea3b1e683c60c77a60f710215f90f486755962aa8939dbcb7c0f975ac3/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f32c41ca1e3456b5dd691827b7c1433c12d5f0058cc186afbb3615bc07d97b8", size = 2168658, upload-time = "2026-04-15T14:52:24.897Z" }, + { url = "https://files.pythonhosted.org/packages/67/97/32ae283810910d274d5ba9f48f856f5f2f612410b78b249f302d297816f5/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:88cd1355578852db83954dc36e4f58f299646916da976147c20cf6892ba5dc43", size = 2171184, upload-time = "2026-04-15T14:52:34.854Z" }, + { url = "https://files.pythonhosted.org/packages/a2/57/c9a855527fe56c2072070640221f53095b0b19eaf651f3c77643c9cabbe3/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:a170fefdb068279a473cc9d34848b85e61d68bfcc2668415b172c5dfc6f213bf", size = 2316573, upload-time = "2026-04-15T14:52:12.871Z" }, + { url = "https://files.pythonhosted.org/packages/37/b3/14c39ffc7399819c5448007c7bcb4e6da5669850cfb7dcbb727594290b48/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:556a63ff1006934dba4eed7ea31b58274c227e29298ec398e4275eda4b905e95", size = 2378340, upload-time = "2026-04-15T14:51:02.619Z" }, + { url = "https://files.pythonhosted.org/packages/01/55/a37461fbb29c053ea4e62cfc5c2d56425cb5efbef8316e63f6d84ae45718/pydantic_core-2.46.1-cp314-cp314t-win32.whl", hash = "sha256:3b146d8336a995f7d7da6d36e4a779b7e7dff2719ac00a1eb8bd3ded00bec87b", size = 1960843, upload-time = "2026-04-15T14:52:06.103Z" }, + { url = "https://files.pythonhosted.org/packages/22/d7/97e1221197d17a27f768363f87ec061519eeeed15bbd315d2e9d1429ff03/pydantic_core-2.46.1-cp314-cp314t-win_amd64.whl", hash = "sha256:f1bc856c958e6fe9ec071e210afe6feb695f2e2e81fd8d2b102f558d364c4c17", size = 2048696, upload-time = "2026-04-15T14:52:52.154Z" }, + { url = "https://files.pythonhosted.org/packages/19/d5/4eac95255c7d35094b46a32ec1e4d80eac94729c694726ee1d69948bd5f0/pydantic_core-2.46.1-cp314-cp314t-win_arm64.whl", hash = "sha256:21a5bfd8a1aa4de60494cdf66b0c912b1495f26a8899896040021fbd6038d989", size = 2022343, upload-time = "2026-04-15T14:49:49.036Z" }, +] + +[[package]] +name = "pydantic-settings" +version = "2.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/a1/ae859ffac5a3338a66b74c5e29e244fd3a3cc483c89feaf9f56c39898d75/pydantic_settings-2.13.0.tar.gz", hash = "sha256:95d875514610e8595672800a5c40b073e99e4aae467fa7c8f9c263061ea2e1fe", size = 222450, upload-time = "2026-02-15T12:11:23.476Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b0/1a/dd1b9d7e627486cf8e7523d09b70010e05a4bc41414f4ae6ce184cf0afb6/pydantic_settings-2.13.0-py3-none-any.whl", hash = "sha256:d67b576fff39cd086b595441bf9c75d4193ca9c0ed643b90360694d0f1240246", size = 58429, upload-time = "2026-02-15T12:11:22.133Z" }, +] + +[[package]] +name = "pygls" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cattrs" }, + { name = "lsprotocol" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/86/b9/41d173dad9eaa9db9c785a85671fc3d68961f08d67706dc2e79011e10b5c/pygls-1.3.1.tar.gz", hash = "sha256:140edceefa0da0e9b3c533547c892a42a7d2fd9217ae848c330c53d266a55018", size = 45527, upload-time = "2024-03-26T18:44:25.679Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/19/b74a10dd24548e96e8c80226cbacb28b021bc3a168a7d2709fb0d0185348/pygls-1.3.1-py3-none-any.whl", hash = "sha256:6e00f11efc56321bdeb6eac04f6d86131f654c7d49124344a9ebb968da3dd91e", size = 56031, upload-time = "2024-03-26T18:44:24.249Z" }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, +] + +[[package]] +name = "pyjwt" +version = "2.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/7a/8dd906bd22e79e47397a61742927f6747fe93242ef86645ee9092e610244/pyjwt-2.12.1-py3-none-any.whl", hash = "sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c", size = 29726, upload-time = "2026-03-13T19:27:35.677Z" }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + +[[package]] +name = "pyperclip" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/30/23/2f0a3efc4d6a32f3b63cdff36cd398d9701d26cda58e3ab97ac79fb5e60d/pyperclip-1.9.0.tar.gz", hash = "sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310", size = 20961, upload-time = "2024-06-18T20:38:48.401Z" } + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, +] + +[[package]] +name = "python-dotenv" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" }, +] + +[[package]] +name = "python-frontmatter" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/de/910fa208120314a12f9a88ea63e03707261692af782c99283f1a2c8a5e6f/python-frontmatter-1.1.0.tar.gz", hash = "sha256:7118d2bd56af9149625745c58c9b51fb67e8d1294a0c76796dafdc72c36e5f6d", size = 16256, upload-time = "2024-01-16T18:50:04.052Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/87/3c8da047b3ec5f99511d1b4d7a5bc72d4b98751c7e78492d14dc736319c5/python_frontmatter-1.1.0-py3-none-any.whl", hash = "sha256:335465556358d9d0e6c98bbeb69b1c969f2a4a21360587b9873bfc3b213407c1", size = 9834, upload-time = "2024-01-16T18:50:00.911Z" }, +] + +[[package]] +name = "python-multipart" +version = "0.0.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/01/979e98d542a70714b0cb2b6728ed0b7c46792b695e3eaec3e20711271ca3/python_multipart-0.0.22.tar.gz", hash = "sha256:7340bef99a7e0032613f56dc36027b959fd3b30a787ed62d310e951f7c3a3a58", size = 37612, upload-time = "2026-01-25T10:15:56.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" }, +] + +[[package]] +name = "pywin32" +version = "311" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" }, + { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" }, + { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" }, + { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" }, + { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" }, + { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" }, +] + +[[package]] +name = "pywin32-ctypes" +version = "0.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/85/9f/01a1a99704853cb63f253eea009390c88e7131c67e66a0a02099a8c917cb/pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755", size = 29471, upload-time = "2024-08-14T10:15:34.626Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/3d/8161f7711c017e01ac9f008dfddd9410dff3674334c233bde66e7ba65bbf/pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8", size = 30756, upload-time = "2024-08-14T10:15:33.187Z" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, +] + +[[package]] +name = "rank-bm25" +version = "0.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/0a/f9579384aa017d8b4c15613f86954b92a95a93d641cc849182467cf0bb3b/rank_bm25-0.2.2.tar.gz", hash = "sha256:096ccef76f8188563419aaf384a02f0ea459503fdf77901378d4fd9d87e5e51d", size = 8347, upload-time = "2022-02-16T12:10:52.196Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/21/f691fb2613100a62b3fa91e9988c991e9ca5b89ea31c0d3152a3210344f9/rank_bm25-0.2.2-py3-none-any.whl", hash = "sha256:7bd4a95571adadfc271746fa146a4bcfd89c0cf731e49c3d1ad863290adbe8ae", size = 8584, upload-time = "2022-02-16T12:10:50.626Z" }, +] + +[[package]] +name = "referencing" +version = "0.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, +] + +[[package]] +name = "regex" +version = "2026.2.28" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/71/41455aa99a5a5ac1eaf311f5d8efd9ce6433c03ac1e0962de163350d0d97/regex-2026.2.28.tar.gz", hash = "sha256:a729e47d418ea11d03469f321aaf67cdee8954cde3ff2cf8403ab87951ad10f2", size = 415184, upload-time = "2026-02-28T02:19:42.792Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/f6/dc9ef48c61b79c8201585bf37fa70cd781977da86e466cd94e8e95d2443b/regex-2026.2.28-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6d63a07e5ec8ce7184452cb00c41c37b49e67dc4f73b2955b5b8e782ea970784", size = 489311, upload-time = "2026-02-28T02:17:22.591Z" }, + { url = "https://files.pythonhosted.org/packages/95/c8/c20390f2232d3f7956f420f4ef1852608ad57aa26c3dd78516cb9f3dc913/regex-2026.2.28-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e59bc8f30414d283ae8ee1617b13d8112e7135cb92830f0ec3688cb29152585a", size = 291285, upload-time = "2026-02-28T02:17:24.355Z" }, + { url = "https://files.pythonhosted.org/packages/d2/a6/ba1068a631ebd71a230e7d8013fcd284b7c89c35f46f34a7da02082141b1/regex-2026.2.28-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:de0cf053139f96219ccfabb4a8dd2d217c8c82cb206c91d9f109f3f552d6b43d", size = 289051, upload-time = "2026-02-28T02:17:26.722Z" }, + { url = "https://files.pythonhosted.org/packages/1d/1b/7cc3b7af4c244c204b7a80924bd3d85aecd9ba5bc82b485c5806ee8cda9e/regex-2026.2.28-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fb4db2f17e6484904f986c5a657cec85574c76b5c5e61c7aae9ffa1bc6224f95", size = 796842, upload-time = "2026-02-28T02:17:29.064Z" }, + { url = "https://files.pythonhosted.org/packages/24/87/26bd03efc60e0d772ac1e7b60a2e6325af98d974e2358f659c507d3c76db/regex-2026.2.28-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:52b017b35ac2214d0db5f4f90e303634dc44e4aba4bd6235a27f97ecbe5b0472", size = 863083, upload-time = "2026-02-28T02:17:31.363Z" }, + { url = "https://files.pythonhosted.org/packages/ae/54/aeaf4afb1aa0a65e40de52a61dc2ac5b00a83c6cb081c8a1d0dda74f3010/regex-2026.2.28-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:69fc560ccbf08a09dc9b52ab69cacfae51e0ed80dc5693078bdc97db2f91ae96", size = 909412, upload-time = "2026-02-28T02:17:33.248Z" }, + { url = "https://files.pythonhosted.org/packages/12/2f/049901def913954e640d199bbc6a7ca2902b6aeda0e5da9d17f114100ec2/regex-2026.2.28-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e61eea47230eba62a31f3e8a0e3164d0f37ef9f40529fb2c79361bc6b53d2a92", size = 802101, upload-time = "2026-02-28T02:17:35.053Z" }, + { url = "https://files.pythonhosted.org/packages/7d/a5/512fb9ff7f5b15ea204bb1967ebb649059446decacccb201381f9fa6aad4/regex-2026.2.28-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4f5c0b182ad4269e7381b7c27fdb0408399881f7a92a4624fd5487f2971dfc11", size = 775260, upload-time = "2026-02-28T02:17:37.692Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a8/9a92935878aba19bd72706b9db5646a6f993d99b3f6ed42c02ec8beb1d61/regex-2026.2.28-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:96f6269a2882fbb0ee76967116b83679dc628e68eaea44e90884b8d53d833881", size = 784311, upload-time = "2026-02-28T02:17:39.855Z" }, + { url = "https://files.pythonhosted.org/packages/09/d3/fc51a8a738a49a6b6499626580554c9466d3ea561f2b72cfdc72e4149773/regex-2026.2.28-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b5acd4b6a95f37c3c3828e5d053a7d4edaedb85de551db0153754924cb7c83e3", size = 856876, upload-time = "2026-02-28T02:17:42.317Z" }, + { url = "https://files.pythonhosted.org/packages/08/b7/2e641f3d084b120ca4c52e8c762a78da0b32bf03ef546330db3e2635dc5f/regex-2026.2.28-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2234059cfe33d9813a3677ef7667999caea9eeaa83fef98eb6ce15c6cf9e0215", size = 763632, upload-time = "2026-02-28T02:17:45.073Z" }, + { url = "https://files.pythonhosted.org/packages/fe/6d/0009021d97e79ee99f3d8641f0a8d001eed23479ade4c3125a5480bf3e2d/regex-2026.2.28-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c15af43c72a7fb0c97cbc66fa36a43546eddc5c06a662b64a0cbf30d6ac40944", size = 849320, upload-time = "2026-02-28T02:17:47.192Z" }, + { url = "https://files.pythonhosted.org/packages/05/7a/51cfbad5758f8edae430cb21961a9c8d04bce1dae4d2d18d4186eec7cfa1/regex-2026.2.28-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9185cc63359862a6e80fe97f696e04b0ad9a11c4ac0a4a927f979f611bfe3768", size = 790152, upload-time = "2026-02-28T02:17:49.067Z" }, + { url = "https://files.pythonhosted.org/packages/90/3d/a83e2b6b3daa142acb8c41d51de3876186307d5cb7490087031747662500/regex-2026.2.28-cp313-cp313-win32.whl", hash = "sha256:fb66e5245db9652abd7196ace599b04d9c0e4aa7c8f0e2803938377835780081", size = 266398, upload-time = "2026-02-28T02:17:50.744Z" }, + { url = "https://files.pythonhosted.org/packages/85/4f/16e9ebb1fe5425e11b9596c8d57bf8877dcb32391da0bfd33742e3290637/regex-2026.2.28-cp313-cp313-win_amd64.whl", hash = "sha256:71a911098be38c859ceb3f9a9ce43f4ed9f4c6720ad8684a066ea246b76ad9ff", size = 277282, upload-time = "2026-02-28T02:17:53.074Z" }, + { url = "https://files.pythonhosted.org/packages/07/b4/92851335332810c5a89723bf7a7e35c7209f90b7d4160024501717b28cc9/regex-2026.2.28-cp313-cp313-win_arm64.whl", hash = "sha256:39bb5727650b9a0275c6a6690f9bb3fe693a7e6cc5c3155b1240aedf8926423e", size = 270382, upload-time = "2026-02-28T02:17:54.888Z" }, + { url = "https://files.pythonhosted.org/packages/24/07/6c7e4cec1e585959e96cbc24299d97e4437a81173217af54f1804994e911/regex-2026.2.28-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:97054c55db06ab020342cc0d35d6f62a465fa7662871190175f1ad6c655c028f", size = 492541, upload-time = "2026-02-28T02:17:56.813Z" }, + { url = "https://files.pythonhosted.org/packages/7c/13/55eb22ada7f43d4f4bb3815b6132183ebc331c81bd496e2d1f3b8d862e0d/regex-2026.2.28-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0d25a10811de831c2baa6aef3c0be91622f44dd8d31dd12e69f6398efb15e48b", size = 292984, upload-time = "2026-02-28T02:17:58.538Z" }, + { url = "https://files.pythonhosted.org/packages/5b/11/c301f8cb29ce9644a5ef85104c59244e6e7e90994a0f458da4d39baa8e17/regex-2026.2.28-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d6cfe798d8da41bb1862ed6e0cba14003d387c3c0c4a5d45591076ae9f0ce2f8", size = 291509, upload-time = "2026-02-28T02:18:00.208Z" }, + { url = "https://files.pythonhosted.org/packages/b5/43/aabe384ec1994b91796e903582427bc2ffaed9c4103819ed3c16d8e749f3/regex-2026.2.28-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fd0ce43e71d825b7c0661f9c54d4d74bd97c56c3fd102a8985bcfea48236bacb", size = 809429, upload-time = "2026-02-28T02:18:02.328Z" }, + { url = "https://files.pythonhosted.org/packages/04/b8/8d2d987a816720c4f3109cee7c06a4b24ad0e02d4fc74919ab619e543737/regex-2026.2.28-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:00945d007fd74a9084d2ab79b695b595c6b7ba3698972fadd43e23230c6979c1", size = 869422, upload-time = "2026-02-28T02:18:04.23Z" }, + { url = "https://files.pythonhosted.org/packages/fc/ad/2c004509e763c0c3719f97c03eca26473bffb3868d54c5f280b8cd4f9e3d/regex-2026.2.28-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bec23c11cbbf09a4df32fe50d57cbdd777bc442269b6e39a1775654f1c95dee2", size = 915175, upload-time = "2026-02-28T02:18:06.791Z" }, + { url = "https://files.pythonhosted.org/packages/55/c2/fd429066da487ef555a9da73bf214894aec77fc8c66a261ee355a69871a8/regex-2026.2.28-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5cdcc17d935c8f9d3f4db5c2ebe2640c332e3822ad5d23c2f8e0228e6947943a", size = 812044, upload-time = "2026-02-28T02:18:08.736Z" }, + { url = "https://files.pythonhosted.org/packages/5b/ca/feedb7055c62a3f7f659971bf45f0e0a87544b6b0cf462884761453f97c5/regex-2026.2.28-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a448af01e3d8031c89c5d902040b124a5e921a25c4e5e07a861ca591ce429341", size = 782056, upload-time = "2026-02-28T02:18:10.777Z" }, + { url = "https://files.pythonhosted.org/packages/95/30/1aa959ed0d25c1dd7dd5047ea8ba482ceaef38ce363c401fd32a6b923e60/regex-2026.2.28-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:10d28e19bd4888e4abf43bd3925f3c134c52fdf7259219003588a42e24c2aa25", size = 798743, upload-time = "2026-02-28T02:18:13.025Z" }, + { url = "https://files.pythonhosted.org/packages/3b/1f/dadb9cf359004784051c897dcf4d5d79895f73a1bbb7b827abaa4814ae80/regex-2026.2.28-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:99985a2c277dcb9ccb63f937451af5d65177af1efdeb8173ac55b61095a0a05c", size = 864633, upload-time = "2026-02-28T02:18:16.84Z" }, + { url = "https://files.pythonhosted.org/packages/a7/f1/b9a25eb24e1cf79890f09e6ec971ee5b511519f1851de3453bc04f6c902b/regex-2026.2.28-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:e1e7b24cb3ae9953a560c563045d1ba56ee4749fbd05cf21ba571069bd7be81b", size = 770862, upload-time = "2026-02-28T02:18:18.892Z" }, + { url = "https://files.pythonhosted.org/packages/02/9a/c5cb10b7aa6f182f9247a30cc9527e326601f46f4df864ac6db588d11fcd/regex-2026.2.28-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d8511a01d0e4ee1992eb3ba19e09bc1866fe03f05129c3aec3fdc4cbc77aad3f", size = 854788, upload-time = "2026-02-28T02:18:21.475Z" }, + { url = "https://files.pythonhosted.org/packages/0a/50/414ba0731c4bd40b011fa4703b2cc86879ec060c64f2a906e65a56452589/regex-2026.2.28-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:aaffaecffcd2479ce87aa1e74076c221700b7c804e48e98e62500ee748f0f550", size = 800184, upload-time = "2026-02-28T02:18:23.492Z" }, + { url = "https://files.pythonhosted.org/packages/69/50/0c7290987f97e7e6830b0d853f69dc4dc5852c934aae63e7fdcd76b4c383/regex-2026.2.28-cp313-cp313t-win32.whl", hash = "sha256:ef77bdde9c9eba3f7fa5b58084b29bbcc74bcf55fdbeaa67c102a35b5bd7e7cc", size = 269137, upload-time = "2026-02-28T02:18:25.375Z" }, + { url = "https://files.pythonhosted.org/packages/68/80/ef26ff90e74ceb4051ad6efcbbb8a4be965184a57e879ebcbdef327d18fa/regex-2026.2.28-cp313-cp313t-win_amd64.whl", hash = "sha256:98adf340100cbe6fbaf8e6dc75e28f2c191b1be50ffefe292fb0e6f6eefdb0d8", size = 280682, upload-time = "2026-02-28T02:18:27.205Z" }, + { url = "https://files.pythonhosted.org/packages/69/8b/fbad9c52e83ffe8f97e3ed1aa0516e6dff6bb633a41da9e64645bc7efdc5/regex-2026.2.28-cp313-cp313t-win_arm64.whl", hash = "sha256:2fb950ac1d88e6b6a9414381f403797b236f9fa17e1eee07683af72b1634207b", size = 271735, upload-time = "2026-02-28T02:18:29.015Z" }, + { url = "https://files.pythonhosted.org/packages/cf/03/691015f7a7cb1ed6dacb2ea5de5682e4858e05a4c5506b2839cd533bbcd6/regex-2026.2.28-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:78454178c7df31372ea737996fb7f36b3c2c92cccc641d251e072478afb4babc", size = 489497, upload-time = "2026-02-28T02:18:30.889Z" }, + { url = "https://files.pythonhosted.org/packages/c6/ba/8db8fd19afcbfa0e1036eaa70c05f20ca8405817d4ad7a38a6b4c2f031ac/regex-2026.2.28-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:5d10303dd18cedfd4d095543998404df656088240bcfd3cd20a8f95b861f74bd", size = 291295, upload-time = "2026-02-28T02:18:33.426Z" }, + { url = "https://files.pythonhosted.org/packages/5a/79/9aa0caf089e8defef9b857b52fc53801f62ff868e19e5c83d4a96612eba1/regex-2026.2.28-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:19a9c9e0a8f24f39d575a6a854d516b48ffe4cbdcb9de55cb0570a032556ecff", size = 289275, upload-time = "2026-02-28T02:18:35.247Z" }, + { url = "https://files.pythonhosted.org/packages/eb/26/ee53117066a30ef9c883bf1127eece08308ccf8ccd45c45a966e7a665385/regex-2026.2.28-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09500be324f49b470d907b3ef8af9afe857f5cca486f853853f7945ddbf75911", size = 797176, upload-time = "2026-02-28T02:18:37.15Z" }, + { url = "https://files.pythonhosted.org/packages/05/1b/67fb0495a97259925f343ae78b5d24d4a6624356ae138b57f18bd43006e4/regex-2026.2.28-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fb1c4ff62277d87a7335f2c1ea4e0387b8f2b3ad88a64efd9943906aafad4f33", size = 863813, upload-time = "2026-02-28T02:18:39.478Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/93ac9bbafc53618091c685c7ed40239a90bf9f2a82c983f0baa97cb7ae07/regex-2026.2.28-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b8b3f1be1738feadc69f62daa250c933e85c6f34fa378f54a7ff43807c1b9117", size = 908678, upload-time = "2026-02-28T02:18:41.619Z" }, + { url = "https://files.pythonhosted.org/packages/c7/7a/a8f5e0561702b25239846a16349feece59712ae20598ebb205580332a471/regex-2026.2.28-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc8ed8c3f41c27acb83f7b6a9eb727a73fc6663441890c5cb3426a5f6a91ce7d", size = 801528, upload-time = "2026-02-28T02:18:43.624Z" }, + { url = "https://files.pythonhosted.org/packages/96/5d/ed6d4cbde80309854b1b9f42d9062fee38ade15f7eb4909f6ef2440403b5/regex-2026.2.28-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fa539be029844c0ce1114762d2952ab6cfdd7c7c9bd72e0db26b94c3c36dcc5a", size = 775373, upload-time = "2026-02-28T02:18:46.102Z" }, + { url = "https://files.pythonhosted.org/packages/6a/e9/6e53c34e8068b9deec3e87210086ecb5b9efebdefca6b0d3fa43d66dcecb/regex-2026.2.28-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7900157786428a79615a8264dac1f12c9b02957c473c8110c6b1f972dcecaddf", size = 784859, upload-time = "2026-02-28T02:18:48.269Z" }, + { url = "https://files.pythonhosted.org/packages/48/3c/736e1c7ca7f0dcd2ae33819888fdc69058a349b7e5e84bc3e2f296bbf794/regex-2026.2.28-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:0b1d2b07614d95fa2bf8a63fd1e98bd8fa2b4848dc91b1efbc8ba219fdd73952", size = 857813, upload-time = "2026-02-28T02:18:50.576Z" }, + { url = "https://files.pythonhosted.org/packages/6e/7c/48c4659ad9da61f58e79dbe8c05223e0006696b603c16eb6b5cbfbb52c27/regex-2026.2.28-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:b389c61aa28a79c2e0527ac36da579869c2e235a5b208a12c5b5318cda2501d8", size = 763705, upload-time = "2026-02-28T02:18:52.59Z" }, + { url = "https://files.pythonhosted.org/packages/cf/a1/bc1c261789283128165f71b71b4b221dd1b79c77023752a6074c102f18d8/regex-2026.2.28-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f467cb602f03fbd1ab1908f68b53c649ce393fde056628dc8c7e634dab6bfc07", size = 848734, upload-time = "2026-02-28T02:18:54.595Z" }, + { url = "https://files.pythonhosted.org/packages/10/d8/979407faf1397036e25a5ae778157366a911c0f382c62501009f4957cf86/regex-2026.2.28-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e8c8cb2deba42f5ec1ede46374e990f8adc5e6456a57ac1a261b19be6f28e4e6", size = 789871, upload-time = "2026-02-28T02:18:57.34Z" }, + { url = "https://files.pythonhosted.org/packages/03/23/da716821277115fcb1f4e3de1e5dc5023a1e6533598c486abf5448612579/regex-2026.2.28-cp314-cp314-win32.whl", hash = "sha256:9036b400b20e4858d56d117108d7813ed07bb7803e3eed766675862131135ca6", size = 271825, upload-time = "2026-02-28T02:18:59.202Z" }, + { url = "https://files.pythonhosted.org/packages/91/ff/90696f535d978d5f16a52a419be2770a8d8a0e7e0cfecdbfc31313df7fab/regex-2026.2.28-cp314-cp314-win_amd64.whl", hash = "sha256:1d367257cd86c1cbb97ea94e77b373a0bbc2224976e247f173d19e8f18b4afa7", size = 280548, upload-time = "2026-02-28T02:19:01.049Z" }, + { url = "https://files.pythonhosted.org/packages/69/f9/5e1b5652fc0af3fcdf7677e7df3ad2a0d47d669b34ac29a63bb177bb731b/regex-2026.2.28-cp314-cp314-win_arm64.whl", hash = "sha256:5e68192bb3a1d6fb2836da24aa494e413ea65853a21505e142e5b1064a595f3d", size = 273444, upload-time = "2026-02-28T02:19:03.255Z" }, + { url = "https://files.pythonhosted.org/packages/d3/eb/8389f9e940ac89bcf58d185e230a677b4fd07c5f9b917603ad5c0f8fa8fe/regex-2026.2.28-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:a5dac14d0872eeb35260a8e30bac07ddf22adc1e3a0635b52b02e180d17c9c7e", size = 492546, upload-time = "2026-02-28T02:19:05.378Z" }, + { url = "https://files.pythonhosted.org/packages/7b/c7/09441d27ce2a6fa6a61ea3150ea4639c1dcda9b31b2ea07b80d6937b24dd/regex-2026.2.28-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ec0c608b7a7465ffadb344ed7c987ff2f11ee03f6a130b569aa74d8a70e8333c", size = 292986, upload-time = "2026-02-28T02:19:07.24Z" }, + { url = "https://files.pythonhosted.org/packages/fb/69/4144b60ed7760a6bd235e4087041f487aa4aa62b45618ce018b0c14833ea/regex-2026.2.28-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c7815afb0ca45456613fdaf60ea9c993715511c8d53a83bc468305cbc0ee23c7", size = 291518, upload-time = "2026-02-28T02:19:09.698Z" }, + { url = "https://files.pythonhosted.org/packages/2d/be/77e5426cf5948c82f98c53582009ca9e94938c71f73a8918474f2e2990bb/regex-2026.2.28-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b059e71ec363968671693a78c5053bd9cb2fe410f9b8e4657e88377ebd603a2e", size = 809464, upload-time = "2026-02-28T02:19:12.494Z" }, + { url = "https://files.pythonhosted.org/packages/45/99/2c8c5ac90dc7d05c6e7d8e72c6a3599dc08cd577ac476898e91ca787d7f1/regex-2026.2.28-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8cf76f1a29f0e99dcfd7aef1551a9827588aae5a737fe31442021165f1920dc", size = 869553, upload-time = "2026-02-28T02:19:15.151Z" }, + { url = "https://files.pythonhosted.org/packages/53/34/daa66a342f0271e7737003abf6c3097aa0498d58c668dbd88362ef94eb5d/regex-2026.2.28-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:180e08a435a0319e6a4821c3468da18dc7001987e1c17ae1335488dfe7518dd8", size = 915289, upload-time = "2026-02-28T02:19:17.331Z" }, + { url = "https://files.pythonhosted.org/packages/c5/c7/e22c2aaf0a12e7e22ab19b004bb78d32ca1ecc7ef245949935463c5567de/regex-2026.2.28-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e496956106fd59ba6322a8ea17141a27c5040e5ee8f9433ae92d4e5204462a0", size = 812156, upload-time = "2026-02-28T02:19:20.011Z" }, + { url = "https://files.pythonhosted.org/packages/7f/bb/2dc18c1efd9051cf389cd0d7a3a4d90f6804b9fff3a51b5dc3c85b935f71/regex-2026.2.28-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bba2b18d70eeb7b79950f12f633beeecd923f7c9ad6f6bae28e59b4cb3ab046b", size = 782215, upload-time = "2026-02-28T02:19:22.047Z" }, + { url = "https://files.pythonhosted.org/packages/17/1e/9e4ec9b9013931faa32226ec4aa3c71fe664a6d8a2b91ac56442128b332f/regex-2026.2.28-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6db7bfae0f8a2793ff1f7021468ea55e2699d0790eb58ee6ab36ae43aa00bc5b", size = 798925, upload-time = "2026-02-28T02:19:24.173Z" }, + { url = "https://files.pythonhosted.org/packages/71/57/a505927e449a9ccb41e2cc8d735e2abe3444b0213d1cf9cb364a8c1f2524/regex-2026.2.28-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:d0b02e8b7e5874b48ae0f077ecca61c1a6a9f9895e9c6dfb191b55b242862033", size = 864701, upload-time = "2026-02-28T02:19:26.376Z" }, + { url = "https://files.pythonhosted.org/packages/a6/ad/c62cb60cdd93e13eac5b3d9d6bd5d284225ed0e3329426f94d2552dd7cca/regex-2026.2.28-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:25b6eb660c5cf4b8c3407a1ed462abba26a926cc9965e164268a3267bcc06a43", size = 770899, upload-time = "2026-02-28T02:19:29.38Z" }, + { url = "https://files.pythonhosted.org/packages/3c/5a/874f861f5c3d5ab99633e8030dee1bc113db8e0be299d1f4b07f5b5ec349/regex-2026.2.28-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:5a932ea8ad5d0430351ff9c76c8db34db0d9f53c1d78f06022a21f4e290c5c18", size = 854727, upload-time = "2026-02-28T02:19:31.494Z" }, + { url = "https://files.pythonhosted.org/packages/6b/ca/d2c03b0efde47e13db895b975b2be6a73ed90b8ba963677927283d43bf74/regex-2026.2.28-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:1c2c95e1a2b0f89d01e821ff4de1be4b5d73d1f4b0bf679fa27c1ad8d2327f1a", size = 800366, upload-time = "2026-02-28T02:19:34.248Z" }, + { url = "https://files.pythonhosted.org/packages/14/bd/ee13b20b763b8989f7c75d592bfd5de37dc1181814a2a2747fedcf97e3ba/regex-2026.2.28-cp314-cp314t-win32.whl", hash = "sha256:bbb882061f742eb5d46f2f1bd5304055be0a66b783576de3d7eef1bed4778a6e", size = 274936, upload-time = "2026-02-28T02:19:36.313Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e7/d8020e39414c93af7f0d8688eabcecece44abfd5ce314b21dfda0eebd3d8/regex-2026.2.28-cp314-cp314t-win_amd64.whl", hash = "sha256:6591f281cb44dc13de9585b552cec6fc6cf47fb2fe7a48892295ee9bc4a612f9", size = 284779, upload-time = "2026-02-28T02:19:38.625Z" }, + { url = "https://files.pythonhosted.org/packages/13/c0/ad225f4a405827486f1955283407cf758b6d2fb966712644c5f5aef33d1b/regex-2026.2.28-cp314-cp314t-win_arm64.whl", hash = "sha256:dee50f1be42222f89767b64b283283ef963189da0dda4a515aa54a5563c62dec", size = 275010, upload-time = "2026-02-28T02:19:40.65Z" }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218, upload-time = "2024-05-29T15:37:49.536Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928, upload-time = "2024-05-29T15:37:47.027Z" }, +] + +[[package]] +name = "rich" +version = "14.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" }, +] + +[[package]] +name = "rich-rst" +version = "1.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "docutils" }, + { name = "rich" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bc/6d/a506aaa4a9eaa945ed8ab2b7347859f53593864289853c5d6d62b77246e0/rich_rst-1.3.2.tar.gz", hash = "sha256:a1196fdddf1e364b02ec68a05e8ff8f6914fee10fbca2e6b6735f166bb0da8d4", size = 14936, upload-time = "2025-10-14T16:49:45.332Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/2f/b4530fbf948867702d0a3f27de4a6aab1d156f406d72852ab902c4d04de9/rich_rst-1.3.2-py3-none-any.whl", hash = "sha256:a99b4907cbe118cf9d18b0b44de272efa61f15117c61e39ebdc431baf5df722a", size = 12567, upload-time = "2025-10-14T16:49:42.953Z" }, +] + +[[package]] +name = "rpds-py" +version = "0.30.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/dc/d61221eb88ff410de3c49143407f6f3147acf2538c86f2ab7ce65ae7d5f9/rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2", size = 374887, upload-time = "2025-11-30T20:22:41.812Z" }, + { url = "https://files.pythonhosted.org/packages/fd/32/55fb50ae104061dbc564ef15cc43c013dc4a9f4527a1f4d99baddf56fe5f/rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8", size = 358904, upload-time = "2025-11-30T20:22:43.479Z" }, + { url = "https://files.pythonhosted.org/packages/58/70/faed8186300e3b9bdd138d0273109784eea2396c68458ed580f885dfe7ad/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4", size = 389945, upload-time = "2025-11-30T20:22:44.819Z" }, + { url = "https://files.pythonhosted.org/packages/bd/a8/073cac3ed2c6387df38f71296d002ab43496a96b92c823e76f46b8af0543/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136", size = 407783, upload-time = "2025-11-30T20:22:46.103Z" }, + { url = "https://files.pythonhosted.org/packages/77/57/5999eb8c58671f1c11eba084115e77a8899d6e694d2a18f69f0ba471ec8b/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7", size = 515021, upload-time = "2025-11-30T20:22:47.458Z" }, + { url = "https://files.pythonhosted.org/packages/e0/af/5ab4833eadc36c0a8ed2bc5c0de0493c04f6c06de223170bd0798ff98ced/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2", size = 414589, upload-time = "2025-11-30T20:22:48.872Z" }, + { url = "https://files.pythonhosted.org/packages/b7/de/f7192e12b21b9e9a68a6d0f249b4af3fdcdff8418be0767a627564afa1f1/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6", size = 394025, upload-time = "2025-11-30T20:22:50.196Z" }, + { url = "https://files.pythonhosted.org/packages/91/c4/fc70cd0249496493500e7cc2de87504f5aa6509de1e88623431fec76d4b6/rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e", size = 408895, upload-time = "2025-11-30T20:22:51.87Z" }, + { url = "https://files.pythonhosted.org/packages/58/95/d9275b05ab96556fefff73a385813eb66032e4c99f411d0795372d9abcea/rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d", size = 422799, upload-time = "2025-11-30T20:22:53.341Z" }, + { url = "https://files.pythonhosted.org/packages/06/c1/3088fc04b6624eb12a57eb814f0d4997a44b0d208d6cace713033ff1a6ba/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7", size = 572731, upload-time = "2025-11-30T20:22:54.778Z" }, + { url = "https://files.pythonhosted.org/packages/d8/42/c612a833183b39774e8ac8fecae81263a68b9583ee343db33ab571a7ce55/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31", size = 599027, upload-time = "2025-11-30T20:22:56.212Z" }, + { url = "https://files.pythonhosted.org/packages/5f/60/525a50f45b01d70005403ae0e25f43c0384369ad24ffe46e8d9068b50086/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95", size = 563020, upload-time = "2025-11-30T20:22:58.2Z" }, + { url = "https://files.pythonhosted.org/packages/0b/5d/47c4655e9bcd5ca907148535c10e7d489044243cc9941c16ed7cd53be91d/rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d", size = 223139, upload-time = "2025-11-30T20:23:00.209Z" }, + { url = "https://files.pythonhosted.org/packages/f2/e1/485132437d20aa4d3e1d8b3fb5a5e65aa8139f1e097080c2a8443201742c/rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15", size = 240224, upload-time = "2025-11-30T20:23:02.008Z" }, + { url = "https://files.pythonhosted.org/packages/24/95/ffd128ed1146a153d928617b0ef673960130be0009c77d8fbf0abe306713/rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1", size = 230645, upload-time = "2025-11-30T20:23:03.43Z" }, + { url = "https://files.pythonhosted.org/packages/ff/1b/b10de890a0def2a319a2626334a7f0ae388215eb60914dbac8a3bae54435/rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a", size = 364443, upload-time = "2025-11-30T20:23:04.878Z" }, + { url = "https://files.pythonhosted.org/packages/0d/bf/27e39f5971dc4f305a4fb9c672ca06f290f7c4e261c568f3dea16a410d47/rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e", size = 353375, upload-time = "2025-11-30T20:23:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/40/58/442ada3bba6e8e6615fc00483135c14a7538d2ffac30e2d933ccf6852232/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000", size = 383850, upload-time = "2025-11-30T20:23:07.825Z" }, + { url = "https://files.pythonhosted.org/packages/14/14/f59b0127409a33c6ef6f5c1ebd5ad8e32d7861c9c7adfa9a624fc3889f6c/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db", size = 392812, upload-time = "2025-11-30T20:23:09.228Z" }, + { url = "https://files.pythonhosted.org/packages/b3/66/e0be3e162ac299b3a22527e8913767d869e6cc75c46bd844aa43fb81ab62/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2", size = 517841, upload-time = "2025-11-30T20:23:11.186Z" }, + { url = "https://files.pythonhosted.org/packages/3d/55/fa3b9cf31d0c963ecf1ba777f7cf4b2a2c976795ac430d24a1f43d25a6ba/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa", size = 408149, upload-time = "2025-11-30T20:23:12.864Z" }, + { url = "https://files.pythonhosted.org/packages/60/ca/780cf3b1a32b18c0f05c441958d3758f02544f1d613abf9488cd78876378/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083", size = 383843, upload-time = "2025-11-30T20:23:14.638Z" }, + { url = "https://files.pythonhosted.org/packages/82/86/d5f2e04f2aa6247c613da0c1dd87fcd08fa17107e858193566048a1e2f0a/rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9", size = 396507, upload-time = "2025-11-30T20:23:16.105Z" }, + { url = "https://files.pythonhosted.org/packages/4b/9a/453255d2f769fe44e07ea9785c8347edaf867f7026872e76c1ad9f7bed92/rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0", size = 414949, upload-time = "2025-11-30T20:23:17.539Z" }, + { url = "https://files.pythonhosted.org/packages/a3/31/622a86cdc0c45d6df0e9ccb6becdba5074735e7033c20e401a6d9d0e2ca0/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94", size = 565790, upload-time = "2025-11-30T20:23:19.029Z" }, + { url = "https://files.pythonhosted.org/packages/1c/5d/15bbf0fb4a3f58a3b1c67855ec1efcc4ceaef4e86644665fff03e1b66d8d/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08", size = 590217, upload-time = "2025-11-30T20:23:20.885Z" }, + { url = "https://files.pythonhosted.org/packages/6d/61/21b8c41f68e60c8cc3b2e25644f0e3681926020f11d06ab0b78e3c6bbff1/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27", size = 555806, upload-time = "2025-11-30T20:23:22.488Z" }, + { url = "https://files.pythonhosted.org/packages/f9/39/7e067bb06c31de48de3eb200f9fc7c58982a4d3db44b07e73963e10d3be9/rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6", size = 211341, upload-time = "2025-11-30T20:23:24.449Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d", size = 225768, upload-time = "2025-11-30T20:23:25.908Z" }, + { url = "https://files.pythonhosted.org/packages/86/81/dad16382ebbd3d0e0328776d8fd7ca94220e4fa0798d1dc5e7da48cb3201/rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0", size = 362099, upload-time = "2025-11-30T20:23:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/2b/60/19f7884db5d5603edf3c6bce35408f45ad3e97e10007df0e17dd57af18f8/rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be", size = 353192, upload-time = "2025-11-30T20:23:29.151Z" }, + { url = "https://files.pythonhosted.org/packages/bf/c4/76eb0e1e72d1a9c4703c69607cec123c29028bff28ce41588792417098ac/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f", size = 384080, upload-time = "2025-11-30T20:23:30.785Z" }, + { url = "https://files.pythonhosted.org/packages/72/87/87ea665e92f3298d1b26d78814721dc39ed8d2c74b86e83348d6b48a6f31/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f", size = 394841, upload-time = "2025-11-30T20:23:32.209Z" }, + { url = "https://files.pythonhosted.org/packages/77/ad/7783a89ca0587c15dcbf139b4a8364a872a25f861bdb88ed99f9b0dec985/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87", size = 516670, upload-time = "2025-11-30T20:23:33.742Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3c/2882bdac942bd2172f3da574eab16f309ae10a3925644e969536553cb4ee/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18", size = 408005, upload-time = "2025-11-30T20:23:35.253Z" }, + { url = "https://files.pythonhosted.org/packages/ce/81/9a91c0111ce1758c92516a3e44776920b579d9a7c09b2b06b642d4de3f0f/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad", size = 382112, upload-time = "2025-11-30T20:23:36.842Z" }, + { url = "https://files.pythonhosted.org/packages/cf/8e/1da49d4a107027e5fbc64daeab96a0706361a2918da10cb41769244b805d/rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07", size = 399049, upload-time = "2025-11-30T20:23:38.343Z" }, + { url = "https://files.pythonhosted.org/packages/df/5a/7ee239b1aa48a127570ec03becbb29c9d5a9eb092febbd1699d567cae859/rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f", size = 415661, upload-time = "2025-11-30T20:23:40.263Z" }, + { url = "https://files.pythonhosted.org/packages/70/ea/caa143cf6b772f823bc7929a45da1fa83569ee49b11d18d0ada7f5ee6fd6/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65", size = 565606, upload-time = "2025-11-30T20:23:42.186Z" }, + { url = "https://files.pythonhosted.org/packages/64/91/ac20ba2d69303f961ad8cf55bf7dbdb4763f627291ba3d0d7d67333cced9/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f", size = 591126, upload-time = "2025-11-30T20:23:44.086Z" }, + { url = "https://files.pythonhosted.org/packages/21/20/7ff5f3c8b00c8a95f75985128c26ba44503fb35b8e0259d812766ea966c7/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53", size = 553371, upload-time = "2025-11-30T20:23:46.004Z" }, + { url = "https://files.pythonhosted.org/packages/72/c7/81dadd7b27c8ee391c132a6b192111ca58d866577ce2d9b0ca157552cce0/rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed", size = 215298, upload-time = "2025-11-30T20:23:47.696Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d2/1aaac33287e8cfb07aab2e6b8ac1deca62f6f65411344f1433c55e6f3eb8/rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950", size = 228604, upload-time = "2025-11-30T20:23:49.501Z" }, + { url = "https://files.pythonhosted.org/packages/e8/95/ab005315818cc519ad074cb7784dae60d939163108bd2b394e60dc7b5461/rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6", size = 222391, upload-time = "2025-11-30T20:23:50.96Z" }, + { url = "https://files.pythonhosted.org/packages/9e/68/154fe0194d83b973cdedcdcc88947a2752411165930182ae41d983dcefa6/rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb", size = 364868, upload-time = "2025-11-30T20:23:52.494Z" }, + { url = "https://files.pythonhosted.org/packages/83/69/8bbc8b07ec854d92a8b75668c24d2abcb1719ebf890f5604c61c9369a16f/rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8", size = 353747, upload-time = "2025-11-30T20:23:54.036Z" }, + { url = "https://files.pythonhosted.org/packages/ab/00/ba2e50183dbd9abcce9497fa5149c62b4ff3e22d338a30d690f9af970561/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7", size = 383795, upload-time = "2025-11-30T20:23:55.556Z" }, + { url = "https://files.pythonhosted.org/packages/05/6f/86f0272b84926bcb0e4c972262f54223e8ecc556b3224d281e6598fc9268/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898", size = 393330, upload-time = "2025-11-30T20:23:57.033Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e9/0e02bb2e6dc63d212641da45df2b0bf29699d01715913e0d0f017ee29438/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e", size = 518194, upload-time = "2025-11-30T20:23:58.637Z" }, + { url = "https://files.pythonhosted.org/packages/ee/ca/be7bca14cf21513bdf9c0606aba17d1f389ea2b6987035eb4f62bd923f25/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419", size = 408340, upload-time = "2025-11-30T20:24:00.2Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c7/736e00ebf39ed81d75544c0da6ef7b0998f8201b369acf842f9a90dc8fce/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551", size = 383765, upload-time = "2025-11-30T20:24:01.759Z" }, + { url = "https://files.pythonhosted.org/packages/4a/3f/da50dfde9956aaf365c4adc9533b100008ed31aea635f2b8d7b627e25b49/rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8", size = 396834, upload-time = "2025-11-30T20:24:03.687Z" }, + { url = "https://files.pythonhosted.org/packages/4e/00/34bcc2565b6020eab2623349efbdec810676ad571995911f1abdae62a3a0/rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5", size = 415470, upload-time = "2025-11-30T20:24:05.232Z" }, + { url = "https://files.pythonhosted.org/packages/8c/28/882e72b5b3e6f718d5453bd4d0d9cf8df36fddeb4ddbbab17869d5868616/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404", size = 565630, upload-time = "2025-11-30T20:24:06.878Z" }, + { url = "https://files.pythonhosted.org/packages/3b/97/04a65539c17692de5b85c6e293520fd01317fd878ea1995f0367d4532fb1/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856", size = 591148, upload-time = "2025-11-30T20:24:08.445Z" }, + { url = "https://files.pythonhosted.org/packages/85/70/92482ccffb96f5441aab93e26c4d66489eb599efdcf96fad90c14bbfb976/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40", size = 556030, upload-time = "2025-11-30T20:24:10.956Z" }, + { url = "https://files.pythonhosted.org/packages/20/53/7c7e784abfa500a2b6b583b147ee4bb5a2b3747a9166bab52fec4b5b5e7d/rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0", size = 211570, upload-time = "2025-11-30T20:24:12.735Z" }, + { url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532, upload-time = "2025-11-30T20:24:14.634Z" }, +] + +[[package]] +name = "ruamel-yaml" +version = "0.19.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/3b/ebda527b56beb90cb7652cb1c7e4f91f48649fbcd8d2eb2fb6e77cd3329b/ruamel_yaml-0.19.1.tar.gz", hash = "sha256:53eb66cd27849eff968ebf8f0bf61f46cdac2da1d1f3576dd4ccee9b25c31993", size = 142709, upload-time = "2026-01-02T16:50:31.84Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/0c/51f6841f1d84f404f92463fc2b1ba0da357ca1e3db6b7fbda26956c3b82a/ruamel_yaml-0.19.1-py3-none-any.whl", hash = "sha256:27592957fedf6e0b62f281e96effd28043345e0e66001f97683aa9a40c667c93", size = 118102, upload-time = "2026-01-02T16:50:29.201Z" }, +] + +[[package]] +name = "ruff" +version = "0.15.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e6/97/e9f1ca355108ef7194e38c812ef40ba98c7208f47b13ad78d023caa583da/ruff-0.15.9.tar.gz", hash = "sha256:29cbb1255a9797903f6dde5ba0188c707907ff44a9006eb273b5a17bfa0739a2", size = 4617361, upload-time = "2026-04-02T18:17:20.829Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/1f/9cdfd0ac4b9d1e5a6cf09bedabdf0b56306ab5e333c85c87281273e7b041/ruff-0.15.9-py3-none-linux_armv6l.whl", hash = "sha256:6efbe303983441c51975c243e26dff328aca11f94b70992f35b093c2e71801e1", size = 10511206, upload-time = "2026-04-02T18:16:41.574Z" }, + { url = "https://files.pythonhosted.org/packages/3d/f6/32bfe3e9c136b35f02e489778d94384118bb80fd92c6d92e7ccd97db12ce/ruff-0.15.9-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:4965bac6ac9ea86772f4e23587746f0b7a395eccabb823eb8bfacc3fa06069f7", size = 10923307, upload-time = "2026-04-02T18:17:08.645Z" }, + { url = "https://files.pythonhosted.org/packages/ca/25/de55f52ab5535d12e7aaba1de37a84be6179fb20bddcbe71ec091b4a3243/ruff-0.15.9-py3-none-macosx_11_0_arm64.whl", hash = "sha256:eaf05aad70ca5b5a0a4b0e080df3a6b699803916d88f006efd1f5b46302daab8", size = 10316722, upload-time = "2026-04-02T18:16:44.206Z" }, + { url = "https://files.pythonhosted.org/packages/48/11/690d75f3fd6278fe55fff7c9eb429c92d207e14b25d1cae4064a32677029/ruff-0.15.9-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9439a342adb8725f32f92732e2bafb6d5246bd7a5021101166b223d312e8fc59", size = 10623674, upload-time = "2026-04-02T18:16:50.951Z" }, + { url = "https://files.pythonhosted.org/packages/bd/ec/176f6987be248fc5404199255522f57af1b4a5a1b57727e942479fec98ad/ruff-0.15.9-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9c5e6faf9d97c8edc43877c3f406f47446fc48c40e1442d58cfcdaba2acea745", size = 10351516, upload-time = "2026-04-02T18:16:57.206Z" }, + { url = "https://files.pythonhosted.org/packages/b2/fc/51cffbd2b3f240accc380171d51446a32aa2ea43a40d4a45ada67368fbd2/ruff-0.15.9-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b34a9766aeec27a222373d0b055722900fbc0582b24f39661aa96f3fe6ad901", size = 11150202, upload-time = "2026-04-02T18:17:06.452Z" }, + { url = "https://files.pythonhosted.org/packages/d6/d4/25292a6dfc125f6b6528fe6af31f5e996e19bf73ca8e3ce6eb7fa5b95885/ruff-0.15.9-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89dd695bc72ae76ff484ae54b7e8b0f6b50f49046e198355e44ea656e521fef9", size = 11988891, upload-time = "2026-04-02T18:17:18.575Z" }, + { url = "https://files.pythonhosted.org/packages/13/e1/1eebcb885c10e19f969dcb93d8413dfee8172578709d7ee933640f5e7147/ruff-0.15.9-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ce187224ef1de1bd225bc9a152ac7102a6171107f026e81f317e4257052916d5", size = 11480576, upload-time = "2026-04-02T18:16:52.986Z" }, + { url = "https://files.pythonhosted.org/packages/ff/6b/a1548ac378a78332a4c3dcf4a134c2475a36d2a22ddfa272acd574140b50/ruff-0.15.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b0c7c341f68adb01c488c3b7d4b49aa8ea97409eae6462d860a79cf55f431b6", size = 11254525, upload-time = "2026-04-02T18:17:02.041Z" }, + { url = "https://files.pythonhosted.org/packages/42/aa/4bb3af8e61acd9b1281db2ab77e8b2c3c5e5599bf2a29d4a942f1c62b8d6/ruff-0.15.9-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:55cc15eee27dc0eebdfcb0d185a6153420efbedc15eb1d38fe5e685657b0f840", size = 11204072, upload-time = "2026-04-02T18:17:13.581Z" }, + { url = "https://files.pythonhosted.org/packages/69/48/d550dc2aa6e423ea0bcc1d0ff0699325ffe8a811e2dba156bd80750b86dc/ruff-0.15.9-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a6537f6eed5cda688c81073d46ffdfb962a5f29ecb6f7e770b2dc920598997ed", size = 10594998, upload-time = "2026-04-02T18:16:46.369Z" }, + { url = "https://files.pythonhosted.org/packages/63/47/321167e17f5344ed5ec6b0aa2cff64efef5f9e985af8f5622cfa6536043f/ruff-0.15.9-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:6d3fcbca7388b066139c523bda744c822258ebdcfbba7d24410c3f454cc9af71", size = 10359769, upload-time = "2026-04-02T18:17:10.994Z" }, + { url = "https://files.pythonhosted.org/packages/67/5e/074f00b9785d1d2c6f8c22a21e023d0c2c1817838cfca4c8243200a1fa87/ruff-0.15.9-py3-none-musllinux_1_2_i686.whl", hash = "sha256:058d8e99e1bfe79d8a0def0b481c56059ee6716214f7e425d8e737e412d69677", size = 10850236, upload-time = "2026-04-02T18:16:48.749Z" }, + { url = "https://files.pythonhosted.org/packages/76/37/804c4135a2a2caf042925d30d5f68181bdbd4461fd0d7739da28305df593/ruff-0.15.9-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:8e1ddb11dbd61d5983fa2d7d6370ef3eb210951e443cace19594c01c72abab4c", size = 11358343, upload-time = "2026-04-02T18:16:55.068Z" }, + { url = "https://files.pythonhosted.org/packages/88/3d/1364fcde8656962782aa9ea93c92d98682b1ecec2f184e625a965ad3b4a6/ruff-0.15.9-py3-none-win32.whl", hash = "sha256:bde6ff36eaf72b700f32b7196088970bf8fdb2b917b7accd8c371bfc0fd573ec", size = 10583382, upload-time = "2026-04-02T18:17:04.261Z" }, + { url = "https://files.pythonhosted.org/packages/4c/56/5c7084299bd2cacaa07ae63a91c6f4ba66edc08bf28f356b24f6b717c799/ruff-0.15.9-py3-none-win_amd64.whl", hash = "sha256:45a70921b80e1c10cf0b734ef09421f71b5aa11d27404edc89d7e8a69505e43d", size = 11744969, upload-time = "2026-04-02T18:16:59.611Z" }, + { url = "https://files.pythonhosted.org/packages/03/36/76704c4f312257d6dbaae3c959add2a622f63fcca9d864659ce6d8d97d3d/ruff-0.15.9-py3-none-win_arm64.whl", hash = "sha256:0694e601c028fd97dc5c6ee244675bc241aeefced7ef80cd9c6935a871078f53", size = 11005870, upload-time = "2026-04-02T18:17:15.773Z" }, +] + +[[package]] +name = "secretstorage" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "jeepney" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1c/03/e834bcd866f2f8a49a85eaff47340affa3bfa391ee9912a952a1faa68c7b/secretstorage-3.5.0.tar.gz", hash = "sha256:f04b8e4689cbce351744d5537bf6b1329c6fc68f91fa666f60a380edddcd11be", size = 19884, upload-time = "2025-11-23T19:02:53.191Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/46/f5af3402b579fd5e11573ce652019a67074317e18c1935cc0b4ba9b35552/secretstorage-3.5.0-py3-none-any.whl", hash = "sha256:0ce65888c0725fcb2c5bc0fdb8e5438eece02c523557ea40ce0703c266248137", size = 15554, upload-time = "2025-11-23T19:02:51.545Z" }, +] + +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + +[[package]] +name = "slop-farmer" +version = "0.1.1" +source = { editable = "." } +dependencies = [ + { name = "duckdb" }, + { name = "fast-agent-mcp" }, + { name = "fastapi" }, + { name = "huggingface-hub" }, + { name = "pyarrow" }, + { name = "pydantic" }, + { name = "pyyaml" }, + { name = "rank-bm25" }, + { name = "uvicorn" }, +] + +[package.optional-dependencies] +dev = [ + { name = "httpx" }, + { name = "pytest" }, + { name = "ruff" }, + { name = "ty" }, +] +llm = [ + { name = "fast-agent-mcp" }, +] + +[package.metadata] +requires-dist = [ + { name = "duckdb", specifier = ">=1.2.2" }, + { name = "fast-agent-mcp", specifier = ">=0.6.17" }, + { name = "fast-agent-mcp", marker = "python_full_version >= '3.13.5' and extra == 'llm'", specifier = ">=0.6.16" }, + { name = "fastapi", specifier = ">=0.115.0" }, + { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.28.0" }, + { name = "huggingface-hub", specifier = ">=1.11.0" }, + { name = "pyarrow", specifier = ">=18.0.0" }, + { name = "pydantic", specifier = ">=2.11" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.3.0" }, + { name = "pyyaml", specifier = ">=6.0.2" }, + { name = "rank-bm25", specifier = ">=0.2.2" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.11" }, + { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.23" }, + { name = "uvicorn", specifier = ">=0.34.0" }, +] +provides-extras = ["dev", "llm"] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, +] + +[[package]] +name = "sse-starlette" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "starlette" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/14/2f/9223c24f568bb7a0c03d751e609844dce0968f13b39a3f73fbb3a96cd27a/sse_starlette-3.3.3.tar.gz", hash = "sha256:72a95d7575fd5129bd0ae15275ac6432bb35ac542fdebb82889c24bb9f3f4049", size = 32420, upload-time = "2026-03-17T20:05:55.529Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/e2/b8cff57a67dddf9a464d7e943218e031617fb3ddc133aeeb0602ff5f6c85/sse_starlette-3.3.3-py3-none-any.whl", hash = "sha256:c5abb5082a1cc1c6294d89c5290c46b5f67808cfdb612b7ec27e8ba061c22e8d", size = 14329, upload-time = "2026-03-17T20:05:54.35Z" }, +] + +[[package]] +name = "starlette" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/69/17425771797c36cded50b7fe44e850315d039f28b15901ab44839e70b593/starlette-1.0.0.tar.gz", hash = "sha256:6a4beaf1f81bb472fd19ea9b918b50dc3a77a6f2e190a12954b25e6ed5eea149", size = 2655289, upload-time = "2026-03-22T18:29:46.779Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" }, +] + +[[package]] +name = "tenacity" +version = "9.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/c6/ee486fd809e357697ee8a44d3d69222b344920433d3b6666ccd9b374630c/tenacity-9.1.4.tar.gz", hash = "sha256:adb31d4c263f2bd041081ab33b498309a57c77f9acf2db65aadf0898179cf93a", size = 49413, upload-time = "2026-02-07T10:45:33.841Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/c1/eb8f9debc45d3b7918a32ab756658a0904732f75e555402972246b0b8e71/tenacity-9.1.4-py3-none-any.whl", hash = "sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55", size = 28926, upload-time = "2026-02-07T10:45:32.24Z" }, +] + +[[package]] +name = "tiktoken" +version = "0.12.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "regex" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802, upload-time = "2025-10-06T20:22:00.96Z" }, + { url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995, upload-time = "2025-10-06T20:22:02.788Z" }, + { url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948, upload-time = "2025-10-06T20:22:03.814Z" }, + { url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986, upload-time = "2025-10-06T20:22:05.173Z" }, + { url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222, upload-time = "2025-10-06T20:22:06.265Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097, upload-time = "2025-10-06T20:22:07.403Z" }, + { url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117, upload-time = "2025-10-06T20:22:08.418Z" }, + { url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309, upload-time = "2025-10-06T20:22:10.939Z" }, + { url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712, upload-time = "2025-10-06T20:22:12.115Z" }, + { url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725, upload-time = "2025-10-06T20:22:13.541Z" }, + { url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875, upload-time = "2025-10-06T20:22:14.559Z" }, + { url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451, upload-time = "2025-10-06T20:22:15.545Z" }, + { url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794, upload-time = "2025-10-06T20:22:16.624Z" }, + { url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777, upload-time = "2025-10-06T20:22:18.036Z" }, + { url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188, upload-time = "2025-10-06T20:22:19.563Z" }, + { url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978, upload-time = "2025-10-06T20:22:20.702Z" }, + { url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271, upload-time = "2025-10-06T20:22:22.06Z" }, + { url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216, upload-time = "2025-10-06T20:22:23.085Z" }, + { url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860, upload-time = "2025-10-06T20:22:24.602Z" }, + { url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567, upload-time = "2025-10-06T20:22:25.671Z" }, + { url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067, upload-time = "2025-10-06T20:22:26.753Z" }, + { url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473, upload-time = "2025-10-06T20:22:27.775Z" }, + { url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855, upload-time = "2025-10-06T20:22:28.799Z" }, + { url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022, upload-time = "2025-10-06T20:22:29.981Z" }, + { url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736, upload-time = "2025-10-06T20:22:30.996Z" }, + { url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908, upload-time = "2025-10-06T20:22:32.073Z" }, + { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload-time = "2025-10-06T20:22:33.385Z" }, + { url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" }, +] + +[[package]] +name = "tqdm" +version = "4.67.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, +] + +[[package]] +name = "ty" +version = "0.0.29" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/d5/853561de49fae38c519e905b2d8da9c531219608f1fccc47a0fc2c896980/ty-0.0.29.tar.gz", hash = "sha256:e7936cca2f691eeda631876c92809688dbbab68687c3473f526cd83b6a9228d8", size = 5469221, upload-time = "2026-04-05T15:01:21.328Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/b7/911f9962115acfa24e3b2ec9d4992dd994c38e8769e1b1d7680bb4d28a51/ty-0.0.29-py3-none-linux_armv6l.whl", hash = "sha256:b8a40955f7660d3eaceb0d964affc81b790c0765e7052921a5f861ff8a471c30", size = 10568206, upload-time = "2026-04-05T15:01:19.165Z" }, + { url = "https://files.pythonhosted.org/packages/fe/c3/fcae2167d4c77a97269f92f11d1b43b03617f81de1283d5d05b43432110c/ty-0.0.29-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6b6849adae15b00bbe2d3c5b078967dcb62eba37d38936b8eeb4c81a82d2e3b8", size = 10442530, upload-time = "2026-04-05T15:01:28.471Z" }, + { url = "https://files.pythonhosted.org/packages/97/33/5a6bfa240cfcb9c36046ae2459fa9ea23238d20130d8656ff5ac4d6c012a/ty-0.0.29-py3-none-macosx_11_0_arm64.whl", hash = "sha256:dcdd9b17209788152f7b7ea815eda07989152325052fe690013537cc7904ce49", size = 9915735, upload-time = "2026-04-05T15:01:10.365Z" }, + { url = "https://files.pythonhosted.org/packages/b3/1e/318f45fae232118e81a6306c30f50de42c509c412128d5bd231eab699ffb/ty-0.0.29-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d8ed4789bae78ffaf94462c0d25589a734cab0366b86f2bbcb1bb90e1a7a169", size = 10419748, upload-time = "2026-04-05T15:01:32.375Z" }, + { url = "https://files.pythonhosted.org/packages/a9/a8/5687872e2ab5a0f7dd4fd8456eac31e9381ad4dc74961f6f29965ad4dd91/ty-0.0.29-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:91ec374b8565e0ad0900011c24641ebbef2da51adbd4fb69ff3280c8a7eceb02", size = 10394738, upload-time = "2026-04-05T15:01:06.473Z" }, + { url = "https://files.pythonhosted.org/packages/de/68/015d118097eeb95e6a44c4abce4c0a28b7b9dfb3085b7f0ee48e4f099633/ty-0.0.29-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:298a8d5faa2502d3810bbbb47a030b9455495b9921594206043c785dd61548cf", size = 10910613, upload-time = "2026-04-05T15:01:17.17Z" }, + { url = "https://files.pythonhosted.org/packages/1c/01/47ce3c6c53e0670eadbe80756b167bf80ed6681d1ba57cfde2e8065a13d1/ty-0.0.29-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c8fba1a3524c6109d1e020d92301c79d41bf442fa8d335b9fa366239339cb70", size = 11475750, upload-time = "2026-04-05T15:01:30.461Z" }, + { url = "https://files.pythonhosted.org/packages/c4/cf/e361845b1081c9264ad5b7c963231bab03f2666865a9f2a115c4233f2137/ty-0.0.29-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c48adf88a70d264128c39ee922ed14a947817fced1e93c08c1a89c9244edcde", size = 11190055, upload-time = "2026-04-05T15:01:12.369Z" }, + { url = "https://files.pythonhosted.org/packages/79/12/0fb0857e9a62cb11586e9a712103877bbf717f5fb570d16634408cfdefee/ty-0.0.29-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ce0a7a0e96bc7b42518cd3a1a6a6298ef64ff40ca4614355c1aa807059b5c6f", size = 11020539, upload-time = "2026-04-05T15:01:37.022Z" }, + { url = "https://files.pythonhosted.org/packages/20/36/5a26753802083f80cd125db6c4348ad42b3c982ec36e718e0bf4c18f75e5/ty-0.0.29-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a6ac86a05b4a3731d45365ab97780acc7b8146fa62fccb3cbe94fe6546c67a97", size = 10396399, upload-time = "2026-04-05T15:01:26.167Z" }, + { url = "https://files.pythonhosted.org/packages/00/e6/b4e75b5752239ab3ab400f19faef4dbef81d05aab5d3419fda0c062a3765/ty-0.0.29-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:6bbbf53141af0f3150bf288d716263f1a3550054e4b3551ca866d38192ba9891", size = 10421461, upload-time = "2026-04-05T15:01:08.367Z" }, + { url = "https://files.pythonhosted.org/packages/c0/21/1084b5b609f9abed62070ec0b31c283a403832a6310c8bbc208bd45ee1e6/ty-0.0.29-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1c9e06b770c1d0ff5efc51e34312390db31d53fcf3088163f413030b42b74f84", size = 10599187, upload-time = "2026-04-05T15:01:23.52Z" }, + { url = "https://files.pythonhosted.org/packages/ab/a1/ce19a2ca717bbcc1ee11378aba52ef70b6ce5b87245162a729d9fdc2360f/ty-0.0.29-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:0307fe37e3f000ef1a4ae230bbaf511508a78d24a5e51b40902a21b09d5e6037", size = 11121198, upload-time = "2026-04-05T15:01:15.22Z" }, + { url = "https://files.pythonhosted.org/packages/6b/6b/f1430b279af704321566ce7ec2725d3d8258c2f815ebd93e474c64cd4543/ty-0.0.29-py3-none-win32.whl", hash = "sha256:7a2a898217960a825f8bc0087e1fdbaf379606175e98f9807187221d53a4a8ed", size = 9995331, upload-time = "2026-04-05T15:01:01.32Z" }, + { url = "https://files.pythonhosted.org/packages/d2/ef/3ef01c17785ff9a69378465c7d0faccd48a07b163554db0995e5d65a5a23/ty-0.0.29-py3-none-win_amd64.whl", hash = "sha256:fc1294200226b91615acbf34e0a9ad81caf98c081e9c6a912a31b0a7b603bc3f", size = 11023644, upload-time = "2026-04-05T15:01:04.432Z" }, + { url = "https://files.pythonhosted.org/packages/2c/55/87280a994d6a2d2647c65e12abbc997ed49835794366153c04c4d9304d76/ty-0.0.29-py3-none-win_arm64.whl", hash = "sha256:f9794bbd1bb3ce13f78c191d0c89ae4c63f52c12b6daa0c6fe220b90d019d12c", size = 10428165, upload-time = "2026-04-05T15:01:34.665Z" }, +] + +[[package]] +name = "typer" +version = "0.24.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + +[[package]] +name = "uncalled-for" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/7c/b5b7d8136f872e3f13b0584e576886de0489d7213a12de6bebf29ff6ebfc/uncalled_for-0.2.0.tar.gz", hash = "sha256:b4f8fdbcec328c5a113807d653e041c5094473dd4afa7c34599ace69ccb7e69f", size = 49488, upload-time = "2026-02-27T17:40:58.137Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/7f/4320d9ce3be404e6310b915c3629fe27bf1e2f438a1a7a3cb0396e32e9a9/uncalled_for-0.2.0-py3-none-any.whl", hash = "sha256:2c0bd338faff5f930918f79e7eb9ff48290df2cb05fcc0b40a7f334e55d4d85f", size = 11351, upload-time = "2026-02-27T17:40:56.804Z" }, +] + +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, +] + +[[package]] +name = "uvicorn" +version = "0.42.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e3/ad/4a96c425be6fb67e0621e62d86c402b4a17ab2be7f7c055d9bd2f638b9e2/uvicorn-0.42.0.tar.gz", hash = "sha256:9b1f190ce15a2dd22e7758651d9b6d12df09a13d51ba5bf4fc33c383a48e1775", size = 85393, upload-time = "2026-03-16T06:19:50.077Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/89/f8827ccff89c1586027a105e5630ff6139a64da2515e24dafe860bd9ae4d/uvicorn-0.42.0-py3-none-any.whl", hash = "sha256:96c30f5c7abe6f74ae8900a70e92b85ad6613b745d4879eb9b16ccad15645359", size = 68830, upload-time = "2026-03-16T06:19:48.325Z" }, +] + +[[package]] +name = "uvloop" +version = "0.22.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/89/8c/182a2a593195bfd39842ea68ebc084e20c850806117213f5a299dfc513d9/uvloop-0.22.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:561577354eb94200d75aca23fbde86ee11be36b00e52a4eaf8f50fb0c86b7705", size = 1358611, upload-time = "2025-10-16T22:16:36.833Z" }, + { url = "https://files.pythonhosted.org/packages/d2/14/e301ee96a6dc95224b6f1162cd3312f6d1217be3907b79173b06785f2fe7/uvloop-0.22.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cdf5192ab3e674ca26da2eada35b288d2fa49fdd0f357a19f0e7c4e7d5077c8", size = 751811, upload-time = "2025-10-16T22:16:38.275Z" }, + { url = "https://files.pythonhosted.org/packages/b7/02/654426ce265ac19e2980bfd9ea6590ca96a56f10c76e63801a2df01c0486/uvloop-0.22.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e2ea3d6190a2968f4a14a23019d3b16870dd2190cd69c8180f7c632d21de68d", size = 4288562, upload-time = "2025-10-16T22:16:39.375Z" }, + { url = "https://files.pythonhosted.org/packages/15/c0/0be24758891ef825f2065cd5db8741aaddabe3e248ee6acc5e8a80f04005/uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0530a5fbad9c9e4ee3f2b33b148c6a64d47bbad8000ea63704fa8260f4cf728e", size = 4366890, upload-time = "2025-10-16T22:16:40.547Z" }, + { url = "https://files.pythonhosted.org/packages/d2/53/8369e5219a5855869bcee5f4d317f6da0e2c669aecf0ef7d371e3d084449/uvloop-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bc5ef13bbc10b5335792360623cc378d52d7e62c2de64660616478c32cd0598e", size = 4119472, upload-time = "2025-10-16T22:16:41.694Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ba/d69adbe699b768f6b29a5eec7b47dd610bd17a69de51b251126a801369ea/uvloop-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad", size = 4239051, upload-time = "2025-10-16T22:16:43.224Z" }, + { url = "https://files.pythonhosted.org/packages/90/cd/b62bdeaa429758aee8de8b00ac0dd26593a9de93d302bff3d21439e9791d/uvloop-0.22.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3879b88423ec7e97cd4eba2a443aa26ed4e59b45e6b76aabf13fe2f27023a142", size = 1362067, upload-time = "2025-10-16T22:16:44.503Z" }, + { url = "https://files.pythonhosted.org/packages/0d/f8/a132124dfda0777e489ca86732e85e69afcd1ff7686647000050ba670689/uvloop-0.22.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4baa86acedf1d62115c1dc6ad1e17134476688f08c6efd8a2ab076e815665c74", size = 752423, upload-time = "2025-10-16T22:16:45.968Z" }, + { url = "https://files.pythonhosted.org/packages/a3/94/94af78c156f88da4b3a733773ad5ba0b164393e357cc4bd0ab2e2677a7d6/uvloop-0.22.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:297c27d8003520596236bdb2335e6b3f649480bd09e00d1e3a99144b691d2a35", size = 4272437, upload-time = "2025-10-16T22:16:47.451Z" }, + { url = "https://files.pythonhosted.org/packages/b5/35/60249e9fd07b32c665192cec7af29e06c7cd96fa1d08b84f012a56a0b38e/uvloop-0.22.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1955d5a1dd43198244d47664a5858082a3239766a839b2102a269aaff7a4e25", size = 4292101, upload-time = "2025-10-16T22:16:49.318Z" }, + { url = "https://files.pythonhosted.org/packages/02/62/67d382dfcb25d0a98ce73c11ed1a6fba5037a1a1d533dcbb7cab033a2636/uvloop-0.22.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b31dc2fccbd42adc73bc4e7cdbae4fc5086cf378979e53ca5d0301838c5682c6", size = 4114158, upload-time = "2025-10-16T22:16:50.517Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/f1171b4a882a5d13c8b7576f348acfe6074d72eaf52cccef752f748d4a9f/uvloop-0.22.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:93f617675b2d03af4e72a5333ef89450dfaa5321303ede6e67ba9c9d26878079", size = 4177360, upload-time = "2025-10-16T22:16:52.646Z" }, + { url = "https://files.pythonhosted.org/packages/79/7b/b01414f31546caf0919da80ad57cbfe24c56b151d12af68cee1b04922ca8/uvloop-0.22.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:37554f70528f60cad66945b885eb01f1bb514f132d92b6eeed1c90fd54ed6289", size = 1454790, upload-time = "2025-10-16T22:16:54.355Z" }, + { url = "https://files.pythonhosted.org/packages/d4/31/0bb232318dd838cad3fa8fb0c68c8b40e1145b32025581975e18b11fab40/uvloop-0.22.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:b76324e2dc033a0b2f435f33eb88ff9913c156ef78e153fb210e03c13da746b3", size = 796783, upload-time = "2025-10-16T22:16:55.906Z" }, + { url = "https://files.pythonhosted.org/packages/42/38/c9b09f3271a7a723a5de69f8e237ab8e7803183131bc57c890db0b6bb872/uvloop-0.22.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:badb4d8e58ee08dad957002027830d5c3b06aea446a6a3744483c2b3b745345c", size = 4647548, upload-time = "2025-10-16T22:16:57.008Z" }, + { url = "https://files.pythonhosted.org/packages/c1/37/945b4ca0ac27e3dc4952642d4c900edd030b3da6c9634875af6e13ae80e5/uvloop-0.22.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b91328c72635f6f9e0282e4a57da7470c7350ab1c9f48546c0f2866205349d21", size = 4467065, upload-time = "2025-10-16T22:16:58.206Z" }, + { url = "https://files.pythonhosted.org/packages/97/cc/48d232f33d60e2e2e0b42f4e73455b146b76ebe216487e862700457fbf3c/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:daf620c2995d193449393d6c62131b3fbd40a63bf7b307a1527856ace637fe88", size = 4328384, upload-time = "2025-10-16T22:16:59.36Z" }, + { url = "https://files.pythonhosted.org/packages/e4/16/c1fd27e9549f3c4baf1dc9c20c456cd2f822dbf8de9f463824b0c0357e06/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6cde23eeda1a25c75b2e07d39970f3374105d5eafbaab2a4482be82f272d5a5e", size = 4296730, upload-time = "2025-10-16T22:17:00.744Z" }, +] + +[[package]] +name = "watchfiles" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/c9/8869df9b2a2d6c59d79220a4db37679e74f807c559ffe5265e08b227a210/watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2", size = 94440, upload-time = "2025-10-14T15:06:21.08Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bb/f4/f750b29225fe77139f7ae5de89d4949f5a99f934c65a1f1c0b248f26f747/watchfiles-1.1.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:130e4876309e8686a5e37dba7d5e9bc77e6ed908266996ca26572437a5271e18", size = 404321, upload-time = "2025-10-14T15:05:02.063Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f9/f07a295cde762644aa4c4bb0f88921d2d141af45e735b965fb2e87858328/watchfiles-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f3bde70f157f84ece3765b42b4a52c6ac1a50334903c6eaf765362f6ccca88a", size = 391783, upload-time = "2025-10-14T15:05:03.052Z" }, + { url = "https://files.pythonhosted.org/packages/bc/11/fc2502457e0bea39a5c958d86d2cb69e407a4d00b85735ca724bfa6e0d1a/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e0b1fe858430fc0251737ef3824c54027bedb8c37c38114488b8e131cf8219", size = 449279, upload-time = "2025-10-14T15:05:04.004Z" }, + { url = "https://files.pythonhosted.org/packages/e3/1f/d66bc15ea0b728df3ed96a539c777acfcad0eb78555ad9efcaa1274688f0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f27db948078f3823a6bb3b465180db8ebecf26dd5dae6f6180bd87383b6b4428", size = 459405, upload-time = "2025-10-14T15:05:04.942Z" }, + { url = "https://files.pythonhosted.org/packages/be/90/9f4a65c0aec3ccf032703e6db02d89a157462fbb2cf20dd415128251cac0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:059098c3a429f62fc98e8ec62b982230ef2c8df68c79e826e37b895bc359a9c0", size = 488976, upload-time = "2025-10-14T15:05:05.905Z" }, + { url = "https://files.pythonhosted.org/packages/37/57/ee347af605d867f712be7029bb94c8c071732a4b44792e3176fa3c612d39/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfb5862016acc9b869bb57284e6cb35fdf8e22fe59f7548858e2f971d045f150", size = 595506, upload-time = "2025-10-14T15:05:06.906Z" }, + { url = "https://files.pythonhosted.org/packages/a8/78/cc5ab0b86c122047f75e8fc471c67a04dee395daf847d3e59381996c8707/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:319b27255aacd9923b8a276bb14d21a5f7ff82564c744235fc5eae58d95422ae", size = 474936, upload-time = "2025-10-14T15:05:07.906Z" }, + { url = "https://files.pythonhosted.org/packages/62/da/def65b170a3815af7bd40a3e7010bf6ab53089ef1b75d05dd5385b87cf08/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c755367e51db90e75b19454b680903631d41f9e3607fbd941d296a020c2d752d", size = 456147, upload-time = "2025-10-14T15:05:09.138Z" }, + { url = "https://files.pythonhosted.org/packages/57/99/da6573ba71166e82d288d4df0839128004c67d2778d3b566c138695f5c0b/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c22c776292a23bfc7237a98f791b9ad3144b02116ff10d820829ce62dff46d0b", size = 630007, upload-time = "2025-10-14T15:05:10.117Z" }, + { url = "https://files.pythonhosted.org/packages/a8/51/7439c4dd39511368849eb1e53279cd3454b4a4dbace80bab88feeb83c6b5/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3a476189be23c3686bc2f4321dd501cb329c0a0469e77b7b534ee10129ae6374", size = 622280, upload-time = "2025-10-14T15:05:11.146Z" }, + { url = "https://files.pythonhosted.org/packages/95/9c/8ed97d4bba5db6fdcdb2b298d3898f2dd5c20f6b73aee04eabe56c59677e/watchfiles-1.1.1-cp313-cp313-win32.whl", hash = "sha256:bf0a91bfb5574a2f7fc223cf95eeea79abfefa404bf1ea5e339c0c1560ae99a0", size = 272056, upload-time = "2025-10-14T15:05:12.156Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f3/c14e28429f744a260d8ceae18bf58c1d5fa56b50d006a7a9f80e1882cb0d/watchfiles-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:52e06553899e11e8074503c8e716d574adeeb7e68913115c4b3653c53f9bae42", size = 288162, upload-time = "2025-10-14T15:05:13.208Z" }, + { url = "https://files.pythonhosted.org/packages/dc/61/fe0e56c40d5cd29523e398d31153218718c5786b5e636d9ae8ae79453d27/watchfiles-1.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac3cc5759570cd02662b15fbcd9d917f7ecd47efe0d6b40474eafd246f91ea18", size = 277909, upload-time = "2025-10-14T15:05:14.49Z" }, + { url = "https://files.pythonhosted.org/packages/79/42/e0a7d749626f1e28c7108a99fb9bf524b501bbbeb9b261ceecde644d5a07/watchfiles-1.1.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:563b116874a9a7ce6f96f87cd0b94f7faf92d08d0021e837796f0a14318ef8da", size = 403389, upload-time = "2025-10-14T15:05:15.777Z" }, + { url = "https://files.pythonhosted.org/packages/15/49/08732f90ce0fbbc13913f9f215c689cfc9ced345fb1bcd8829a50007cc8d/watchfiles-1.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3ad9fe1dae4ab4212d8c91e80b832425e24f421703b5a42ef2e4a1e215aff051", size = 389964, upload-time = "2025-10-14T15:05:16.85Z" }, + { url = "https://files.pythonhosted.org/packages/27/0d/7c315d4bd5f2538910491a0393c56bf70d333d51bc5b34bee8e68e8cea19/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce70f96a46b894b36eba678f153f052967a0d06d5b5a19b336ab0dbbd029f73e", size = 448114, upload-time = "2025-10-14T15:05:17.876Z" }, + { url = "https://files.pythonhosted.org/packages/c3/24/9e096de47a4d11bc4df41e9d1e61776393eac4cb6eb11b3e23315b78b2cc/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cb467c999c2eff23a6417e58d75e5828716f42ed8289fe6b77a7e5a91036ca70", size = 460264, upload-time = "2025-10-14T15:05:18.962Z" }, + { url = "https://files.pythonhosted.org/packages/cc/0f/e8dea6375f1d3ba5fcb0b3583e2b493e77379834c74fd5a22d66d85d6540/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:836398932192dae4146c8f6f737d74baeac8b70ce14831a239bdb1ca882fc261", size = 487877, upload-time = "2025-10-14T15:05:20.094Z" }, + { url = "https://files.pythonhosted.org/packages/ac/5b/df24cfc6424a12deb41503b64d42fbea6b8cb357ec62ca84a5a3476f654a/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:743185e7372b7bc7c389e1badcc606931a827112fbbd37f14c537320fca08620", size = 595176, upload-time = "2025-10-14T15:05:21.134Z" }, + { url = "https://files.pythonhosted.org/packages/8f/b5/853b6757f7347de4e9b37e8cc3289283fb983cba1ab4d2d7144694871d9c/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afaeff7696e0ad9f02cbb8f56365ff4686ab205fcf9c4c5b6fdfaaa16549dd04", size = 473577, upload-time = "2025-10-14T15:05:22.306Z" }, + { url = "https://files.pythonhosted.org/packages/e1/f7/0a4467be0a56e80447c8529c9fce5b38eab4f513cb3d9bf82e7392a5696b/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7eb7da0eb23aa2ba036d4f616d46906013a68caf61b7fdbe42fc8b25132e77", size = 455425, upload-time = "2025-10-14T15:05:23.348Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/82583485ea00137ddf69bc84a2db88bd92ab4a6e3c405e5fb878ead8d0e7/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:831a62658609f0e5c64178211c942ace999517f5770fe9436be4c2faeba0c0ef", size = 628826, upload-time = "2025-10-14T15:05:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/28/9a/a785356fccf9fae84c0cc90570f11702ae9571036fb25932f1242c82191c/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f9a2ae5c91cecc9edd47e041a930490c31c3afb1f5e6d71de3dc671bfaca02bf", size = 622208, upload-time = "2025-10-14T15:05:25.45Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f4/0872229324ef69b2c3edec35e84bd57a1289e7d3fe74588048ed8947a323/watchfiles-1.1.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:d1715143123baeeaeadec0528bb7441103979a1d5f6fd0e1f915383fea7ea6d5", size = 404315, upload-time = "2025-10-14T15:05:26.501Z" }, + { url = "https://files.pythonhosted.org/packages/7b/22/16d5331eaed1cb107b873f6ae1b69e9ced582fcf0c59a50cd84f403b1c32/watchfiles-1.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:39574d6370c4579d7f5d0ad940ce5b20db0e4117444e39b6d8f99db5676c52fd", size = 390869, upload-time = "2025-10-14T15:05:27.649Z" }, + { url = "https://files.pythonhosted.org/packages/b2/7e/5643bfff5acb6539b18483128fdc0ef2cccc94a5b8fbda130c823e8ed636/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7365b92c2e69ee952902e8f70f3ba6360d0d596d9299d55d7d386df84b6941fb", size = 449919, upload-time = "2025-10-14T15:05:28.701Z" }, + { url = "https://files.pythonhosted.org/packages/51/2e/c410993ba5025a9f9357c376f48976ef0e1b1aefb73b97a5ae01a5972755/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfff9740c69c0e4ed32416f013f3c45e2ae42ccedd1167ef2d805c000b6c71a5", size = 460845, upload-time = "2025-10-14T15:05:30.064Z" }, + { url = "https://files.pythonhosted.org/packages/8e/a4/2df3b404469122e8680f0fcd06079317e48db58a2da2950fb45020947734/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b27cf2eb1dda37b2089e3907d8ea92922b673c0c427886d4edc6b94d8dfe5db3", size = 489027, upload-time = "2025-10-14T15:05:31.064Z" }, + { url = "https://files.pythonhosted.org/packages/ea/84/4587ba5b1f267167ee715b7f66e6382cca6938e0a4b870adad93e44747e6/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:526e86aced14a65a5b0ec50827c745597c782ff46b571dbfe46192ab9e0b3c33", size = 595615, upload-time = "2025-10-14T15:05:32.074Z" }, + { url = "https://files.pythonhosted.org/packages/6a/0f/c6988c91d06e93cd0bb3d4a808bcf32375ca1904609835c3031799e3ecae/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04e78dd0b6352db95507fd8cb46f39d185cf8c74e4cf1e4fbad1d3df96faf510", size = 474836, upload-time = "2025-10-14T15:05:33.209Z" }, + { url = "https://files.pythonhosted.org/packages/b4/36/ded8aebea91919485b7bbabbd14f5f359326cb5ec218cd67074d1e426d74/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c85794a4cfa094714fb9c08d4a218375b2b95b8ed1666e8677c349906246c05", size = 455099, upload-time = "2025-10-14T15:05:34.189Z" }, + { url = "https://files.pythonhosted.org/packages/98/e0/8c9bdba88af756a2fce230dd365fab2baf927ba42cd47521ee7498fd5211/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:74d5012b7630714b66be7b7b7a78855ef7ad58e8650c73afc4c076a1f480a8d6", size = 630626, upload-time = "2025-10-14T15:05:35.216Z" }, + { url = "https://files.pythonhosted.org/packages/2a/84/a95db05354bf2d19e438520d92a8ca475e578c647f78f53197f5a2f17aaf/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:8fbe85cb3201c7d380d3d0b90e63d520f15d6afe217165d7f98c9c649654db81", size = 622519, upload-time = "2025-10-14T15:05:36.259Z" }, + { url = "https://files.pythonhosted.org/packages/1d/ce/d8acdc8de545de995c339be67711e474c77d643555a9bb74a9334252bd55/watchfiles-1.1.1-cp314-cp314-win32.whl", hash = "sha256:3fa0b59c92278b5a7800d3ee7733da9d096d4aabcfabb9a928918bd276ef9b9b", size = 272078, upload-time = "2025-10-14T15:05:37.63Z" }, + { url = "https://files.pythonhosted.org/packages/c4/c9/a74487f72d0451524be827e8edec251da0cc1fcf111646a511ae752e1a3d/watchfiles-1.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:c2047d0b6cea13b3316bdbafbfa0c4228ae593d995030fda39089d36e64fc03a", size = 287664, upload-time = "2025-10-14T15:05:38.95Z" }, + { url = "https://files.pythonhosted.org/packages/df/b8/8ac000702cdd496cdce998c6f4ee0ca1f15977bba51bdf07d872ebdfc34c/watchfiles-1.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:842178b126593addc05acf6fce960d28bc5fae7afbaa2c6c1b3a7b9460e5be02", size = 277154, upload-time = "2025-10-14T15:05:39.954Z" }, + { url = "https://files.pythonhosted.org/packages/47/a8/e3af2184707c29f0f14b1963c0aace6529f9d1b8582d5b99f31bbf42f59e/watchfiles-1.1.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:88863fbbc1a7312972f1c511f202eb30866370ebb8493aef2812b9ff28156a21", size = 403820, upload-time = "2025-10-14T15:05:40.932Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ec/e47e307c2f4bd75f9f9e8afbe3876679b18e1bcec449beca132a1c5ffb2d/watchfiles-1.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:55c7475190662e202c08c6c0f4d9e345a29367438cf8e8037f3155e10a88d5a5", size = 390510, upload-time = "2025-10-14T15:05:41.945Z" }, + { url = "https://files.pythonhosted.org/packages/d5/a0/ad235642118090f66e7b2f18fd5c42082418404a79205cdfca50b6309c13/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f53fa183d53a1d7a8852277c92b967ae99c2d4dcee2bfacff8868e6e30b15f7", size = 448408, upload-time = "2025-10-14T15:05:43.385Z" }, + { url = "https://files.pythonhosted.org/packages/df/85/97fa10fd5ff3332ae17e7e40e20784e419e28521549780869f1413742e9d/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6aae418a8b323732fa89721d86f39ec8f092fc2af67f4217a2b07fd3e93c6101", size = 458968, upload-time = "2025-10-14T15:05:44.404Z" }, + { url = "https://files.pythonhosted.org/packages/47/c2/9059c2e8966ea5ce678166617a7f75ecba6164375f3b288e50a40dc6d489/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f096076119da54a6080e8920cbdaac3dbee667eb91dcc5e5b78840b87415bd44", size = 488096, upload-time = "2025-10-14T15:05:45.398Z" }, + { url = "https://files.pythonhosted.org/packages/94/44/d90a9ec8ac309bc26db808a13e7bfc0e4e78b6fc051078a554e132e80160/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00485f441d183717038ed2e887a7c868154f216877653121068107b227a2f64c", size = 596040, upload-time = "2025-10-14T15:05:46.502Z" }, + { url = "https://files.pythonhosted.org/packages/95/68/4e3479b20ca305cfc561db3ed207a8a1c745ee32bf24f2026a129d0ddb6e/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a55f3e9e493158d7bfdb60a1165035f1cf7d320914e7b7ea83fe22c6023b58fc", size = 473847, upload-time = "2025-10-14T15:05:47.484Z" }, + { url = "https://files.pythonhosted.org/packages/4f/55/2af26693fd15165c4ff7857e38330e1b61ab8c37d15dc79118cdba115b7a/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c91ed27800188c2ae96d16e3149f199d62f86c7af5f5f4d2c61a3ed8cd3666c", size = 455072, upload-time = "2025-10-14T15:05:48.928Z" }, + { url = "https://files.pythonhosted.org/packages/66/1d/d0d200b10c9311ec25d2273f8aad8c3ef7cc7ea11808022501811208a750/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:311ff15a0bae3714ffb603e6ba6dbfba4065ab60865d15a6ec544133bdb21099", size = 629104, upload-time = "2025-10-14T15:05:49.908Z" }, + { url = "https://files.pythonhosted.org/packages/e3/bd/fa9bb053192491b3867ba07d2343d9f2252e00811567d30ae8d0f78136fe/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01", size = 622112, upload-time = "2025-10-14T15:05:50.941Z" }, +] + +[[package]] +name = "wcwidth" +version = "0.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/35/a2/8e3becb46433538a38726c948d3399905a4c7cabd0df578ede5dc51f0ec2/wcwidth-0.6.0.tar.gz", hash = "sha256:cdc4e4262d6ef9a1a57e018384cbeb1208d8abbc64176027e2c2455c81313159", size = 159684, upload-time = "2026-02-06T19:19:40.919Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/68/5a/199c59e0a824a3db2b89c5d2dade7ab5f9624dbf6448dc291b46d5ec94d3/wcwidth-0.6.0-py3-none-any.whl", hash = "sha256:1a3a1e510b553315f8e146c54764f4fb6264ffad731b3d78088cdb1478ffbdad", size = 94189, upload-time = "2026-02-06T19:19:39.646Z" }, +] + +[[package]] +name = "websockets" +version = "16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/04/24/4b2031d72e840ce4c1ccb255f693b15c334757fc50023e4db9537080b8c4/websockets-16.0.tar.gz", hash = "sha256:5f6261a5e56e8d5c42a4497b364ea24d94d9563e8fbd44e78ac40879c60179b5", size = 179346, upload-time = "2026-01-10T09:23:47.181Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/9c/baa8456050d1c1b08dd0ec7346026668cbc6f145ab4e314d707bb845bf0d/websockets-16.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:878b336ac47938b474c8f982ac2f7266a540adc3fa4ad74ae96fea9823a02cc9", size = 177364, upload-time = "2026-01-10T09:22:59.333Z" }, + { url = "https://files.pythonhosted.org/packages/7e/0c/8811fc53e9bcff68fe7de2bcbe75116a8d959ac699a3200f4847a8925210/websockets-16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:52a0fec0e6c8d9a784c2c78276a48a2bdf099e4ccc2a4cad53b27718dbfd0230", size = 175039, upload-time = "2026-01-10T09:23:01.171Z" }, + { url = "https://files.pythonhosted.org/packages/aa/82/39a5f910cb99ec0b59e482971238c845af9220d3ab9fa76dd9162cda9d62/websockets-16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e6578ed5b6981005df1860a56e3617f14a6c307e6a71b4fff8c48fdc50f3ed2c", size = 175323, upload-time = "2026-01-10T09:23:02.341Z" }, + { url = "https://files.pythonhosted.org/packages/bd/28/0a25ee5342eb5d5f297d992a77e56892ecb65e7854c7898fb7d35e9b33bd/websockets-16.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:95724e638f0f9c350bb1c2b0a7ad0e83d9cc0c9259f3ea94e40d7b02a2179ae5", size = 184975, upload-time = "2026-01-10T09:23:03.756Z" }, + { url = "https://files.pythonhosted.org/packages/f9/66/27ea52741752f5107c2e41fda05e8395a682a1e11c4e592a809a90c6a506/websockets-16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0204dc62a89dc9d50d682412c10b3542d748260d743500a85c13cd1ee4bde82", size = 186203, upload-time = "2026-01-10T09:23:05.01Z" }, + { url = "https://files.pythonhosted.org/packages/37/e5/8e32857371406a757816a2b471939d51c463509be73fa538216ea52b792a/websockets-16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:52ac480f44d32970d66763115edea932f1c5b1312de36df06d6b219f6741eed8", size = 185653, upload-time = "2026-01-10T09:23:06.301Z" }, + { url = "https://files.pythonhosted.org/packages/9b/67/f926bac29882894669368dc73f4da900fcdf47955d0a0185d60103df5737/websockets-16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6e5a82b677f8f6f59e8dfc34ec06ca6b5b48bc4fcda346acd093694cc2c24d8f", size = 184920, upload-time = "2026-01-10T09:23:07.492Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a1/3d6ccdcd125b0a42a311bcd15a7f705d688f73b2a22d8cf1c0875d35d34a/websockets-16.0-cp313-cp313-win32.whl", hash = "sha256:abf050a199613f64c886ea10f38b47770a65154dc37181bfaff70c160f45315a", size = 178255, upload-time = "2026-01-10T09:23:09.245Z" }, + { url = "https://files.pythonhosted.org/packages/6b/ae/90366304d7c2ce80f9b826096a9e9048b4bb760e44d3b873bb272cba696b/websockets-16.0-cp313-cp313-win_amd64.whl", hash = "sha256:3425ac5cf448801335d6fdc7ae1eb22072055417a96cc6b31b3861f455fbc156", size = 178689, upload-time = "2026-01-10T09:23:10.483Z" }, + { url = "https://files.pythonhosted.org/packages/f3/1d/e88022630271f5bd349ed82417136281931e558d628dd52c4d8621b4a0b2/websockets-16.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8cc451a50f2aee53042ac52d2d053d08bf89bcb31ae799cb4487587661c038a0", size = 177406, upload-time = "2026-01-10T09:23:12.178Z" }, + { url = "https://files.pythonhosted.org/packages/f2/78/e63be1bf0724eeb4616efb1ae1c9044f7c3953b7957799abb5915bffd38e/websockets-16.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:daa3b6ff70a9241cf6c7fc9e949d41232d9d7d26fd3522b1ad2b4d62487e9904", size = 175085, upload-time = "2026-01-10T09:23:13.511Z" }, + { url = "https://files.pythonhosted.org/packages/bb/f4/d3c9220d818ee955ae390cf319a7c7a467beceb24f05ee7aaaa2414345ba/websockets-16.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:fd3cb4adb94a2a6e2b7c0d8d05cb94e6f1c81a0cf9dc2694fb65c7e8d94c42e4", size = 175328, upload-time = "2026-01-10T09:23:14.727Z" }, + { url = "https://files.pythonhosted.org/packages/63/bc/d3e208028de777087e6fb2b122051a6ff7bbcca0d6df9d9c2bf1dd869ae9/websockets-16.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:781caf5e8eee67f663126490c2f96f40906594cb86b408a703630f95550a8c3e", size = 185044, upload-time = "2026-01-10T09:23:15.939Z" }, + { url = "https://files.pythonhosted.org/packages/ad/6e/9a0927ac24bd33a0a9af834d89e0abc7cfd8e13bed17a86407a66773cc0e/websockets-16.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:caab51a72c51973ca21fa8a18bd8165e1a0183f1ac7066a182ff27107b71e1a4", size = 186279, upload-time = "2026-01-10T09:23:17.148Z" }, + { url = "https://files.pythonhosted.org/packages/b9/ca/bf1c68440d7a868180e11be653c85959502efd3a709323230314fda6e0b3/websockets-16.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19c4dc84098e523fd63711e563077d39e90ec6702aff4b5d9e344a60cb3c0cb1", size = 185711, upload-time = "2026-01-10T09:23:18.372Z" }, + { url = "https://files.pythonhosted.org/packages/c4/f8/fdc34643a989561f217bb477cbc47a3a07212cbda91c0e4389c43c296ebf/websockets-16.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a5e18a238a2b2249c9a9235466b90e96ae4795672598a58772dd806edc7ac6d3", size = 184982, upload-time = "2026-01-10T09:23:19.652Z" }, + { url = "https://files.pythonhosted.org/packages/dd/d1/574fa27e233764dbac9c52730d63fcf2823b16f0856b3329fc6268d6ae4f/websockets-16.0-cp314-cp314-win32.whl", hash = "sha256:a069d734c4a043182729edd3e9f247c3b2a4035415a9172fd0f1b71658a320a8", size = 177915, upload-time = "2026-01-10T09:23:21.458Z" }, + { url = "https://files.pythonhosted.org/packages/8a/f1/ae6b937bf3126b5134ce1f482365fde31a357c784ac51852978768b5eff4/websockets-16.0-cp314-cp314-win_amd64.whl", hash = "sha256:c0ee0e63f23914732c6d7e0cce24915c48f3f1512ec1d079ed01fc629dab269d", size = 178381, upload-time = "2026-01-10T09:23:22.715Z" }, + { url = "https://files.pythonhosted.org/packages/06/9b/f791d1db48403e1f0a27577a6beb37afae94254a8c6f08be4a23e4930bc0/websockets-16.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:a35539cacc3febb22b8f4d4a99cc79b104226a756aa7400adc722e83b0d03244", size = 177737, upload-time = "2026-01-10T09:23:24.523Z" }, + { url = "https://files.pythonhosted.org/packages/bd/40/53ad02341fa33b3ce489023f635367a4ac98b73570102ad2cdd770dacc9a/websockets-16.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:b784ca5de850f4ce93ec85d3269d24d4c82f22b7212023c974c401d4980ebc5e", size = 175268, upload-time = "2026-01-10T09:23:25.781Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/6158d4e459b984f949dcbbb0c5d270154c7618e11c01029b9bbd1bb4c4f9/websockets-16.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:569d01a4e7fba956c5ae4fc988f0d4e187900f5497ce46339c996dbf24f17641", size = 175486, upload-time = "2026-01-10T09:23:27.033Z" }, + { url = "https://files.pythonhosted.org/packages/e5/2d/7583b30208b639c8090206f95073646c2c9ffd66f44df967981a64f849ad/websockets-16.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:50f23cdd8343b984957e4077839841146f67a3d31ab0d00e6b824e74c5b2f6e8", size = 185331, upload-time = "2026-01-10T09:23:28.259Z" }, + { url = "https://files.pythonhosted.org/packages/45/b0/cce3784eb519b7b5ad680d14b9673a31ab8dcb7aad8b64d81709d2430aa8/websockets-16.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:152284a83a00c59b759697b7f9e9cddf4e3c7861dd0d964b472b70f78f89e80e", size = 186501, upload-time = "2026-01-10T09:23:29.449Z" }, + { url = "https://files.pythonhosted.org/packages/19/60/b8ebe4c7e89fb5f6cdf080623c9d92789a53636950f7abacfc33fe2b3135/websockets-16.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bc59589ab64b0022385f429b94697348a6a234e8ce22544e3681b2e9331b5944", size = 186062, upload-time = "2026-01-10T09:23:31.368Z" }, + { url = "https://files.pythonhosted.org/packages/88/a8/a080593f89b0138b6cba1b28f8df5673b5506f72879322288b031337c0b8/websockets-16.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:32da954ffa2814258030e5a57bc73a3635463238e797c7375dc8091327434206", size = 185356, upload-time = "2026-01-10T09:23:32.627Z" }, + { url = "https://files.pythonhosted.org/packages/c2/b6/b9afed2afadddaf5ebb2afa801abf4b0868f42f8539bfe4b071b5266c9fe/websockets-16.0-cp314-cp314t-win32.whl", hash = "sha256:5a4b4cc550cb665dd8a47f868c8d04c8230f857363ad3c9caf7a0c3bf8c61ca6", size = 178085, upload-time = "2026-01-10T09:23:33.816Z" }, + { url = "https://files.pythonhosted.org/packages/9f/3e/28135a24e384493fa804216b79a6a6759a38cc4ff59118787b9fb693df93/websockets-16.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b14dc141ed6d2dde437cddb216004bcac6a1df0935d79656387bd41632ba0bbd", size = 178531, upload-time = "2026-01-10T09:23:35.016Z" }, + { url = "https://files.pythonhosted.org/packages/6f/28/258ebab549c2bf3e64d2b0217b973467394a9cea8c42f70418ca2c5d0d2e/websockets-16.0-py3-none-any.whl", hash = "sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec", size = 171598, upload-time = "2026-01-10T09:23:45.395Z" }, +] + +[[package]] +name = "wrapt" +version = "1.17.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload-time = "2025-08-12T05:53:21.714Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/f6/759ece88472157acb55fc195e5b116e06730f1b651b5b314c66291729193/wrapt-1.17.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0", size = 54003, upload-time = "2025-08-12T05:51:48.627Z" }, + { url = "https://files.pythonhosted.org/packages/4f/a9/49940b9dc6d47027dc850c116d79b4155f15c08547d04db0f07121499347/wrapt-1.17.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54a30837587c6ee3cd1a4d1c2ec5d24e77984d44e2f34547e2323ddb4e22eb77", size = 39025, upload-time = "2025-08-12T05:51:37.156Z" }, + { url = "https://files.pythonhosted.org/packages/45/35/6a08de0f2c96dcdd7fe464d7420ddb9a7655a6561150e5fc4da9356aeaab/wrapt-1.17.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:16ecf15d6af39246fe33e507105d67e4b81d8f8d2c6598ff7e3ca1b8a37213f7", size = 39108, upload-time = "2025-08-12T05:51:58.425Z" }, + { url = "https://files.pythonhosted.org/packages/0c/37/6faf15cfa41bf1f3dba80cd3f5ccc6622dfccb660ab26ed79f0178c7497f/wrapt-1.17.3-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6fd1ad24dc235e4ab88cda009e19bf347aabb975e44fd5c2fb22a3f6e4141277", size = 88072, upload-time = "2025-08-12T05:52:37.53Z" }, + { url = "https://files.pythonhosted.org/packages/78/f2/efe19ada4a38e4e15b6dff39c3e3f3f73f5decf901f66e6f72fe79623a06/wrapt-1.17.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ed61b7c2d49cee3c027372df5809a59d60cf1b6c2f81ee980a091f3afed6a2d", size = 88214, upload-time = "2025-08-12T05:52:15.886Z" }, + { url = "https://files.pythonhosted.org/packages/40/90/ca86701e9de1622b16e09689fc24b76f69b06bb0150990f6f4e8b0eeb576/wrapt-1.17.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:423ed5420ad5f5529db9ce89eac09c8a2f97da18eb1c870237e84c5a5c2d60aa", size = 87105, upload-time = "2025-08-12T05:52:17.914Z" }, + { url = "https://files.pythonhosted.org/packages/fd/e0/d10bd257c9a3e15cbf5523025252cc14d77468e8ed644aafb2d6f54cb95d/wrapt-1.17.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e01375f275f010fcbf7f643b4279896d04e571889b8a5b3f848423d91bf07050", size = 87766, upload-time = "2025-08-12T05:52:39.243Z" }, + { url = "https://files.pythonhosted.org/packages/e8/cf/7d848740203c7b4b27eb55dbfede11aca974a51c3d894f6cc4b865f42f58/wrapt-1.17.3-cp313-cp313-win32.whl", hash = "sha256:53e5e39ff71b3fc484df8a522c933ea2b7cdd0d5d15ae82e5b23fde87d44cbd8", size = 36711, upload-time = "2025-08-12T05:53:10.074Z" }, + { url = "https://files.pythonhosted.org/packages/57/54/35a84d0a4d23ea675994104e667ceff49227ce473ba6a59ba2c84f250b74/wrapt-1.17.3-cp313-cp313-win_amd64.whl", hash = "sha256:1f0b2f40cf341ee8cc1a97d51ff50dddb9fcc73241b9143ec74b30fc4f44f6cb", size = 38885, upload-time = "2025-08-12T05:53:08.695Z" }, + { url = "https://files.pythonhosted.org/packages/01/77/66e54407c59d7b02a3c4e0af3783168fff8e5d61def52cda8728439d86bc/wrapt-1.17.3-cp313-cp313-win_arm64.whl", hash = "sha256:7425ac3c54430f5fc5e7b6f41d41e704db073309acfc09305816bc6a0b26bb16", size = 36896, upload-time = "2025-08-12T05:52:55.34Z" }, + { url = "https://files.pythonhosted.org/packages/02/a2/cd864b2a14f20d14f4c496fab97802001560f9f41554eef6df201cd7f76c/wrapt-1.17.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cf30f6e3c077c8e6a9a7809c94551203c8843e74ba0c960f4a98cd80d4665d39", size = 54132, upload-time = "2025-08-12T05:51:49.864Z" }, + { url = "https://files.pythonhosted.org/packages/d5/46/d011725b0c89e853dc44cceb738a307cde5d240d023d6d40a82d1b4e1182/wrapt-1.17.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e228514a06843cae89621384cfe3a80418f3c04aadf8a3b14e46a7be704e4235", size = 39091, upload-time = "2025-08-12T05:51:38.935Z" }, + { url = "https://files.pythonhosted.org/packages/2e/9e/3ad852d77c35aae7ddebdbc3b6d35ec8013af7d7dddad0ad911f3d891dae/wrapt-1.17.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:5ea5eb3c0c071862997d6f3e02af1d055f381b1d25b286b9d6644b79db77657c", size = 39172, upload-time = "2025-08-12T05:51:59.365Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f7/c983d2762bcce2326c317c26a6a1e7016f7eb039c27cdf5c4e30f4160f31/wrapt-1.17.3-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:281262213373b6d5e4bb4353bc36d1ba4084e6d6b5d242863721ef2bf2c2930b", size = 87163, upload-time = "2025-08-12T05:52:40.965Z" }, + { url = "https://files.pythonhosted.org/packages/e4/0f/f673f75d489c7f22d17fe0193e84b41540d962f75fce579cf6873167c29b/wrapt-1.17.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4a8d2b25efb6681ecacad42fca8859f88092d8732b170de6a5dddd80a1c8fa", size = 87963, upload-time = "2025-08-12T05:52:20.326Z" }, + { url = "https://files.pythonhosted.org/packages/df/61/515ad6caca68995da2fac7a6af97faab8f78ebe3bf4f761e1b77efbc47b5/wrapt-1.17.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:373342dd05b1d07d752cecbec0c41817231f29f3a89aa8b8843f7b95992ed0c7", size = 86945, upload-time = "2025-08-12T05:52:21.581Z" }, + { url = "https://files.pythonhosted.org/packages/d3/bd/4e70162ce398462a467bc09e768bee112f1412e563620adc353de9055d33/wrapt-1.17.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d40770d7c0fd5cbed9d84b2c3f2e156431a12c9a37dc6284060fb4bec0b7ffd4", size = 86857, upload-time = "2025-08-12T05:52:43.043Z" }, + { url = "https://files.pythonhosted.org/packages/2b/b8/da8560695e9284810b8d3df8a19396a6e40e7518059584a1a394a2b35e0a/wrapt-1.17.3-cp314-cp314-win32.whl", hash = "sha256:fbd3c8319de8e1dc79d346929cd71d523622da527cca14e0c1d257e31c2b8b10", size = 37178, upload-time = "2025-08-12T05:53:12.605Z" }, + { url = "https://files.pythonhosted.org/packages/db/c8/b71eeb192c440d67a5a0449aaee2310a1a1e8eca41676046f99ed2487e9f/wrapt-1.17.3-cp314-cp314-win_amd64.whl", hash = "sha256:e1a4120ae5705f673727d3253de3ed0e016f7cd78dc463db1b31e2463e1f3cf6", size = 39310, upload-time = "2025-08-12T05:53:11.106Z" }, + { url = "https://files.pythonhosted.org/packages/45/20/2cda20fd4865fa40f86f6c46ed37a2a8356a7a2fde0773269311f2af56c7/wrapt-1.17.3-cp314-cp314-win_arm64.whl", hash = "sha256:507553480670cab08a800b9463bdb881b2edeed77dc677b0a5915e6106e91a58", size = 37266, upload-time = "2025-08-12T05:52:56.531Z" }, + { url = "https://files.pythonhosted.org/packages/77/ed/dd5cf21aec36c80443c6f900449260b80e2a65cf963668eaef3b9accce36/wrapt-1.17.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ed7c635ae45cfbc1a7371f708727bf74690daedc49b4dba310590ca0bd28aa8a", size = 56544, upload-time = "2025-08-12T05:51:51.109Z" }, + { url = "https://files.pythonhosted.org/packages/8d/96/450c651cc753877ad100c7949ab4d2e2ecc4d97157e00fa8f45df682456a/wrapt-1.17.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:249f88ed15503f6492a71f01442abddd73856a0032ae860de6d75ca62eed8067", size = 40283, upload-time = "2025-08-12T05:51:39.912Z" }, + { url = "https://files.pythonhosted.org/packages/d1/86/2fcad95994d9b572db57632acb6f900695a648c3e063f2cd344b3f5c5a37/wrapt-1.17.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5a03a38adec8066d5a37bea22f2ba6bbf39fcdefbe2d91419ab864c3fb515454", size = 40366, upload-time = "2025-08-12T05:52:00.693Z" }, + { url = "https://files.pythonhosted.org/packages/64/0e/f4472f2fdde2d4617975144311f8800ef73677a159be7fe61fa50997d6c0/wrapt-1.17.3-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5d4478d72eb61c36e5b446e375bbc49ed002430d17cdec3cecb36993398e1a9e", size = 108571, upload-time = "2025-08-12T05:52:44.521Z" }, + { url = "https://files.pythonhosted.org/packages/cc/01/9b85a99996b0a97c8a17484684f206cbb6ba73c1ce6890ac668bcf3838fb/wrapt-1.17.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223db574bb38637e8230eb14b185565023ab624474df94d2af18f1cdb625216f", size = 113094, upload-time = "2025-08-12T05:52:22.618Z" }, + { url = "https://files.pythonhosted.org/packages/25/02/78926c1efddcc7b3aa0bc3d6b33a822f7d898059f7cd9ace8c8318e559ef/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e405adefb53a435f01efa7ccdec012c016b5a1d3f35459990afc39b6be4d5056", size = 110659, upload-time = "2025-08-12T05:52:24.057Z" }, + { url = "https://files.pythonhosted.org/packages/dc/ee/c414501ad518ac3e6fe184753632fe5e5ecacdcf0effc23f31c1e4f7bfcf/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:88547535b787a6c9ce4086917b6e1d291aa8ed914fdd3a838b3539dc95c12804", size = 106946, upload-time = "2025-08-12T05:52:45.976Z" }, + { url = "https://files.pythonhosted.org/packages/be/44/a1bd64b723d13bb151d6cc91b986146a1952385e0392a78567e12149c7b4/wrapt-1.17.3-cp314-cp314t-win32.whl", hash = "sha256:41b1d2bc74c2cac6f9074df52b2efbef2b30bdfe5f40cb78f8ca22963bc62977", size = 38717, upload-time = "2025-08-12T05:53:15.214Z" }, + { url = "https://files.pythonhosted.org/packages/79/d9/7cfd5a312760ac4dd8bf0184a6ee9e43c33e47f3dadc303032ce012b8fa3/wrapt-1.17.3-cp314-cp314t-win_amd64.whl", hash = "sha256:73d496de46cd2cdbdbcce4ae4bcdb4afb6a11234a1df9c085249d55166b95116", size = 41334, upload-time = "2025-08-12T05:53:14.178Z" }, + { url = "https://files.pythonhosted.org/packages/46/78/10ad9781128ed2f99dbc474f43283b13fea8ba58723e98844367531c18e9/wrapt-1.17.3-cp314-cp314t-win_arm64.whl", hash = "sha256:f38e60678850c42461d4202739f9bf1e3a737c7ad283638251e79cc49effb6b6", size = 38471, upload-time = "2025-08-12T05:52:57.784Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" }, +] + +[[package]] +name = "yarl" +version = "1.23.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "multidict" }, + { name = "propcache" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/23/6e/beb1beec874a72f23815c1434518bfc4ed2175065173fb138c3705f658d4/yarl-1.23.0.tar.gz", hash = "sha256:53b1ea6ca88ebd4420379c330aea57e258408dd0df9af0992e5de2078dc9f5d5", size = 194676, upload-time = "2026-03-01T22:07:53.373Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/4b/a0a6e5d0ee8a2f3a373ddef8a4097d74ac901ac363eea1440464ccbe0898/yarl-1.23.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:16c6994ac35c3e74fb0ae93323bf8b9c2a9088d55946109489667c510a7d010e", size = 123796, upload-time = "2026-03-01T22:05:41.412Z" }, + { url = "https://files.pythonhosted.org/packages/67/b6/8925d68af039b835ae876db5838e82e76ec87b9782ecc97e192b809c4831/yarl-1.23.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4a42e651629dafb64fd5b0286a3580613702b5809ad3f24934ea87595804f2c5", size = 86547, upload-time = "2026-03-01T22:05:42.841Z" }, + { url = "https://files.pythonhosted.org/packages/ae/50/06d511cc4b8e0360d3c94af051a768e84b755c5eb031b12adaaab6dec6e5/yarl-1.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7c6b9461a2a8b47c65eef63bb1c76a4f1c119618ffa99ea79bc5bb1e46c5821b", size = 85854, upload-time = "2026-03-01T22:05:44.85Z" }, + { url = "https://files.pythonhosted.org/packages/c4/f4/4e30b250927ffdab4db70da08b9b8d2194d7c7b400167b8fbeca1e4701ca/yarl-1.23.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2569b67d616eab450d262ca7cb9f9e19d2f718c70a8b88712859359d0ab17035", size = 98351, upload-time = "2026-03-01T22:05:46.836Z" }, + { url = "https://files.pythonhosted.org/packages/86/fc/4118c5671ea948208bdb1492d8b76bdf1453d3e73df051f939f563e7dcc5/yarl-1.23.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e9d9a4d06d3481eab79803beb4d9bd6f6a8e781ec078ac70d7ef2dcc29d1bea5", size = 92711, upload-time = "2026-03-01T22:05:48.316Z" }, + { url = "https://files.pythonhosted.org/packages/56/11/1ed91d42bd9e73c13dc9e7eb0dd92298d75e7ac4dd7f046ad0c472e231cd/yarl-1.23.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f514f6474e04179d3d33175ed3f3e31434d3130d42ec153540d5b157deefd735", size = 106014, upload-time = "2026-03-01T22:05:50.028Z" }, + { url = "https://files.pythonhosted.org/packages/ce/c9/74e44e056a23fbc33aca71779ef450ca648a5bc472bdad7a82339918f818/yarl-1.23.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:fda207c815b253e34f7e1909840fd14299567b1c0eb4908f8c2ce01a41265401", size = 105557, upload-time = "2026-03-01T22:05:51.416Z" }, + { url = "https://files.pythonhosted.org/packages/66/fe/b1e10b08d287f518994f1e2ff9b6d26f0adeecd8dd7d533b01bab29a3eda/yarl-1.23.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34b6cf500e61c90f305094911f9acc9c86da1a05a7a3f5be9f68817043f486e4", size = 101559, upload-time = "2026-03-01T22:05:52.872Z" }, + { url = "https://files.pythonhosted.org/packages/72/59/c5b8d94b14e3d3c2a9c20cb100119fd534ab5a14b93673ab4cc4a4141ea5/yarl-1.23.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d7504f2b476d21653e4d143f44a175f7f751cd41233525312696c76aa3dbb23f", size = 100502, upload-time = "2026-03-01T22:05:54.954Z" }, + { url = "https://files.pythonhosted.org/packages/77/4f/96976cb54cbfc5c9fd73ed4c51804f92f209481d1fb190981c0f8a07a1d7/yarl-1.23.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:578110dd426f0d209d1509244e6d4a3f1a3e9077655d98c5f22583d63252a08a", size = 98027, upload-time = "2026-03-01T22:05:56.409Z" }, + { url = "https://files.pythonhosted.org/packages/63/6e/904c4f476471afdbad6b7e5b70362fb5810e35cd7466529a97322b6f5556/yarl-1.23.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:609d3614d78d74ebe35f54953c5bbd2ac647a7ddb9c30a5d877580f5e86b22f2", size = 95369, upload-time = "2026-03-01T22:05:58.141Z" }, + { url = "https://files.pythonhosted.org/packages/9d/40/acfcdb3b5f9d68ef499e39e04d25e141fe90661f9d54114556cf83be8353/yarl-1.23.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4966242ec68afc74c122f8459abd597afd7d8a60dc93d695c1334c5fd25f762f", size = 105565, upload-time = "2026-03-01T22:06:00.286Z" }, + { url = "https://files.pythonhosted.org/packages/5e/c6/31e28f3a6ba2869c43d124f37ea5260cac9c9281df803c354b31f4dd1f3c/yarl-1.23.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:e0fd068364a6759bc794459f0a735ab151d11304346332489c7972bacbe9e72b", size = 99813, upload-time = "2026-03-01T22:06:01.712Z" }, + { url = "https://files.pythonhosted.org/packages/08/1f/6f65f59e72d54aa467119b63fc0b0b1762eff0232db1f4720cd89e2f4a17/yarl-1.23.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:39004f0ad156da43e86aa71f44e033de68a44e5a31fc53507b36dd253970054a", size = 105632, upload-time = "2026-03-01T22:06:03.188Z" }, + { url = "https://files.pythonhosted.org/packages/a3/c4/18b178a69935f9e7a338127d5b77d868fdc0f0e49becd286d51b3a18c61d/yarl-1.23.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e5723c01a56c5028c807c701aa66722916d2747ad737a046853f6c46f4875543", size = 101895, upload-time = "2026-03-01T22:06:04.651Z" }, + { url = "https://files.pythonhosted.org/packages/8f/54/f5b870b5505663911dba950a8e4776a0dbd51c9c54c0ae88e823e4b874a0/yarl-1.23.0-cp313-cp313-win32.whl", hash = "sha256:1b6b572edd95b4fa8df75de10b04bc81acc87c1c7d16bcdd2035b09d30acc957", size = 82356, upload-time = "2026-03-01T22:06:06.04Z" }, + { url = "https://files.pythonhosted.org/packages/7a/84/266e8da36879c6edcd37b02b547e2d9ecdfea776be49598e75696e3316e1/yarl-1.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:baaf55442359053c7d62f6f8413a62adba3205119bcb6f49594894d8be47e5e3", size = 87515, upload-time = "2026-03-01T22:06:08.107Z" }, + { url = "https://files.pythonhosted.org/packages/00/fd/7e1c66efad35e1649114fa13f17485f62881ad58edeeb7f49f8c5e748bf9/yarl-1.23.0-cp313-cp313-win_arm64.whl", hash = "sha256:fb4948814a2a98e3912505f09c9e7493b1506226afb1f881825368d6fb776ee3", size = 81785, upload-time = "2026-03-01T22:06:10.181Z" }, + { url = "https://files.pythonhosted.org/packages/9c/fc/119dd07004f17ea43bb91e3ece6587759edd7519d6b086d16bfbd3319982/yarl-1.23.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:aecfed0b41aa72b7881712c65cf764e39ce2ec352324f5e0837c7048d9e6daaa", size = 130719, upload-time = "2026-03-01T22:06:11.708Z" }, + { url = "https://files.pythonhosted.org/packages/e6/0d/9f2348502fbb3af409e8f47730282cd6bc80dec6630c1e06374d882d6eb2/yarl-1.23.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a41bcf68efd19073376eb8cf948b8d9be0af26256403e512bb18f3966f1f9120", size = 89690, upload-time = "2026-03-01T22:06:13.429Z" }, + { url = "https://files.pythonhosted.org/packages/50/93/e88f3c80971b42cfc83f50a51b9d165a1dbf154b97005f2994a79f212a07/yarl-1.23.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cde9a2ecd91668bcb7f077c4966d8ceddb60af01b52e6e3e2680e4cf00ad1a59", size = 89851, upload-time = "2026-03-01T22:06:15.53Z" }, + { url = "https://files.pythonhosted.org/packages/1c/07/61c9dd8ba8f86473263b4036f70fb594c09e99c0d9737a799dfd8bc85651/yarl-1.23.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5023346c4ee7992febc0068e7593de5fa2bf611848c08404b35ebbb76b1b0512", size = 95874, upload-time = "2026-03-01T22:06:17.553Z" }, + { url = "https://files.pythonhosted.org/packages/9e/e9/f9ff8ceefba599eac6abddcfb0b3bee9b9e636e96dbf54342a8577252379/yarl-1.23.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d1009abedb49ae95b136a8904a3f71b342f849ffeced2d3747bf29caeda218c4", size = 88710, upload-time = "2026-03-01T22:06:19.004Z" }, + { url = "https://files.pythonhosted.org/packages/eb/78/0231bfcc5d4c8eec220bc2f9ef82cb4566192ea867a7c5b4148f44f6cbcd/yarl-1.23.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a8d00f29b42f534cc8aa3931cfe773b13b23e561e10d2b26f27a8d309b0e82a1", size = 101033, upload-time = "2026-03-01T22:06:21.203Z" }, + { url = "https://files.pythonhosted.org/packages/cd/9b/30ea5239a61786f18fd25797151a17fbb3be176977187a48d541b5447dd4/yarl-1.23.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:95451e6ce06c3e104556d73b559f5da6c34a069b6b62946d3ad66afcd51642ea", size = 100817, upload-time = "2026-03-01T22:06:22.738Z" }, + { url = "https://files.pythonhosted.org/packages/62/e2/a4980481071791bc83bce2b7a1a1f7adcabfa366007518b4b845e92eeee3/yarl-1.23.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:531ef597132086b6cf96faa7c6c1dcd0361dd5f1694e5cc30375907b9b7d3ea9", size = 97482, upload-time = "2026-03-01T22:06:24.21Z" }, + { url = "https://files.pythonhosted.org/packages/e5/1e/304a00cf5f6100414c4b5a01fc7ff9ee724b62158a08df2f8170dfc72a2d/yarl-1.23.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:88f9fb0116fbfcefcab70f85cf4b74a2b6ce5d199c41345296f49d974ddb4123", size = 95949, upload-time = "2026-03-01T22:06:25.697Z" }, + { url = "https://files.pythonhosted.org/packages/68/03/093f4055ed4cae649ac53bca3d180bd37102e9e11d048588e9ab0c0108d0/yarl-1.23.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e7b0460976dc75cb87ad9cc1f9899a4b97751e7d4e77ab840fc9b6d377b8fd24", size = 95839, upload-time = "2026-03-01T22:06:27.309Z" }, + { url = "https://files.pythonhosted.org/packages/b9/28/4c75ebb108f322aa8f917ae10a8ffa4f07cae10a8a627b64e578617df6a0/yarl-1.23.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:115136c4a426f9da976187d238e84139ff6b51a20839aa6e3720cd1026d768de", size = 90696, upload-time = "2026-03-01T22:06:29.048Z" }, + { url = "https://files.pythonhosted.org/packages/23/9c/42c2e2dd91c1a570402f51bdf066bfdb1241c2240ba001967bad778e77b7/yarl-1.23.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:ead11956716a940c1abc816b7df3fa2b84d06eaed8832ca32f5c5e058c65506b", size = 100865, upload-time = "2026-03-01T22:06:30.525Z" }, + { url = "https://files.pythonhosted.org/packages/74/05/1bcd60a8a0a914d462c305137246b6f9d167628d73568505fce3f1cb2e65/yarl-1.23.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:fe8f8f5e70e6dbdfca9882cd9deaac058729bcf323cf7a58660901e55c9c94f6", size = 96234, upload-time = "2026-03-01T22:06:32.692Z" }, + { url = "https://files.pythonhosted.org/packages/90/b2/f52381aac396d6778ce516b7bc149c79e65bfc068b5de2857ab69eeea3b7/yarl-1.23.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:a0e317df055958a0c1e79e5d2aa5a5eaa4a6d05a20d4b0c9c3f48918139c9fc6", size = 100295, upload-time = "2026-03-01T22:06:34.268Z" }, + { url = "https://files.pythonhosted.org/packages/e5/e8/638bae5bbf1113a659b2435d8895474598afe38b4a837103764f603aba56/yarl-1.23.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f0fd84de0c957b2d280143522c4f91a73aada1923caee763e24a2b3fda9f8a5", size = 97784, upload-time = "2026-03-01T22:06:35.864Z" }, + { url = "https://files.pythonhosted.org/packages/80/25/a3892b46182c586c202629fc2159aa13975d3741d52ebd7347fd501d48d5/yarl-1.23.0-cp313-cp313t-win32.whl", hash = "sha256:93a784271881035ab4406a172edb0faecb6e7d00f4b53dc2f55919d6c9688595", size = 88313, upload-time = "2026-03-01T22:06:37.39Z" }, + { url = "https://files.pythonhosted.org/packages/43/68/8c5b36aa5178900b37387937bc2c2fe0e9505537f713495472dcf6f6fccc/yarl-1.23.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dd00607bffbf30250fe108065f07453ec124dbf223420f57f5e749b04295e090", size = 94932, upload-time = "2026-03-01T22:06:39.579Z" }, + { url = "https://files.pythonhosted.org/packages/c6/cc/d79ba8292f51f81f4dc533a8ccfb9fc6992cabf0998ed3245de7589dc07c/yarl-1.23.0-cp313-cp313t-win_arm64.whl", hash = "sha256:ac09d42f48f80c9ee1635b2fcaa819496a44502737660d3c0f2ade7526d29144", size = 84786, upload-time = "2026-03-01T22:06:41.988Z" }, + { url = "https://files.pythonhosted.org/packages/90/98/b85a038d65d1b92c3903ab89444f48d3cee490a883477b716d7a24b1a78c/yarl-1.23.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:21d1b7305a71a15b4794b5ff22e8eef96ff4a6d7f9657155e5aa419444b28912", size = 124455, upload-time = "2026-03-01T22:06:43.615Z" }, + { url = "https://files.pythonhosted.org/packages/39/54/bc2b45559f86543d163b6e294417a107bb87557609007c007ad889afec18/yarl-1.23.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:85610b4f27f69984932a7abbe52703688de3724d9f72bceb1cca667deff27474", size = 86752, upload-time = "2026-03-01T22:06:45.425Z" }, + { url = "https://files.pythonhosted.org/packages/24/f9/e8242b68362bffe6fb536c8db5076861466fc780f0f1b479fc4ffbebb128/yarl-1.23.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:23f371bd662cf44a7630d4d113101eafc0cfa7518a2760d20760b26021454719", size = 86291, upload-time = "2026-03-01T22:06:46.974Z" }, + { url = "https://files.pythonhosted.org/packages/ea/d8/d1cb2378c81dd729e98c716582b1ccb08357e8488e4c24714658cc6630e8/yarl-1.23.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4a80f77dc1acaaa61f0934176fccca7096d9b1ff08c8ba9cddf5ae034a24319", size = 99026, upload-time = "2026-03-01T22:06:48.459Z" }, + { url = "https://files.pythonhosted.org/packages/0a/ff/7196790538f31debe3341283b5b0707e7feb947620fc5e8236ef28d44f72/yarl-1.23.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:bd654fad46d8d9e823afbb4f87c79160b5a374ed1ff5bde24e542e6ba8f41434", size = 92355, upload-time = "2026-03-01T22:06:50.306Z" }, + { url = "https://files.pythonhosted.org/packages/c1/56/25d58c3eddde825890a5fe6aa1866228377354a3c39262235234ab5f616b/yarl-1.23.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:682bae25f0a0dd23a056739f23a134db9f52a63e2afd6bfb37ddc76292bbd723", size = 106417, upload-time = "2026-03-01T22:06:52.1Z" }, + { url = "https://files.pythonhosted.org/packages/51/8a/882c0e7bc8277eb895b31bce0138f51a1ba551fc2e1ec6753ffc1e7c1377/yarl-1.23.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a82836cab5f197a0514235aaf7ffccdc886ccdaa2324bc0aafdd4ae898103039", size = 106422, upload-time = "2026-03-01T22:06:54.424Z" }, + { url = "https://files.pythonhosted.org/packages/42/2b/fef67d616931055bf3d6764885990a3ac647d68734a2d6a9e1d13de437a2/yarl-1.23.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c57676bdedc94cd3bc37724cf6f8cd2779f02f6aba48de45feca073e714fe52", size = 101915, upload-time = "2026-03-01T22:06:55.895Z" }, + { url = "https://files.pythonhosted.org/packages/18/6a/530e16aebce27c5937920f3431c628a29a4b6b430fab3fd1c117b26ff3f6/yarl-1.23.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c7f8dc16c498ff06497c015642333219871effba93e4a2e8604a06264aca5c5c", size = 100690, upload-time = "2026-03-01T22:06:58.21Z" }, + { url = "https://files.pythonhosted.org/packages/88/08/93749219179a45e27b036e03260fda05190b911de8e18225c294ac95bbc9/yarl-1.23.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5ee586fb17ff8f90c91cf73c6108a434b02d69925f44f5f8e0d7f2f260607eae", size = 98750, upload-time = "2026-03-01T22:06:59.794Z" }, + { url = "https://files.pythonhosted.org/packages/d9/cf/ea424a004969f5d81a362110a6ac1496d79efdc6d50c2c4b2e3ea0fc2519/yarl-1.23.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:17235362f580149742739cc3828b80e24029d08cbb9c4bda0242c7b5bc610a8e", size = 94685, upload-time = "2026-03-01T22:07:01.375Z" }, + { url = "https://files.pythonhosted.org/packages/e2/b7/14341481fe568e2b0408bcf1484c652accafe06a0ade9387b5d3fd9df446/yarl-1.23.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:0793e2bd0cf14234983bbb371591e6bea9e876ddf6896cdcc93450996b0b5c85", size = 106009, upload-time = "2026-03-01T22:07:03.151Z" }, + { url = "https://files.pythonhosted.org/packages/0a/e6/5c744a9b54f4e8007ad35bce96fbc9218338e84812d36f3390cea616881a/yarl-1.23.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:3650dc2480f94f7116c364096bc84b1d602f44224ef7d5c7208425915c0475dd", size = 100033, upload-time = "2026-03-01T22:07:04.701Z" }, + { url = "https://files.pythonhosted.org/packages/0c/23/e3bfc188d0b400f025bc49d99793d02c9abe15752138dcc27e4eaf0c4a9e/yarl-1.23.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f40e782d49630ad384db66d4d8b73ff4f1b8955dc12e26b09a3e3af064b3b9d6", size = 106483, upload-time = "2026-03-01T22:07:06.231Z" }, + { url = "https://files.pythonhosted.org/packages/72/42/f0505f949a90b3f8b7a363d6cbdf398f6e6c58946d85c6d3a3bc70595b26/yarl-1.23.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94f8575fbdf81749008d980c17796097e645574a3b8c28ee313931068dad14fe", size = 102175, upload-time = "2026-03-01T22:07:08.4Z" }, + { url = "https://files.pythonhosted.org/packages/aa/65/b39290f1d892a9dd671d1c722014ca062a9c35d60885d57e5375db0404b5/yarl-1.23.0-cp314-cp314-win32.whl", hash = "sha256:c8aa34a5c864db1087d911a0b902d60d203ea3607d91f615acd3f3108ac32169", size = 83871, upload-time = "2026-03-01T22:07:09.968Z" }, + { url = "https://files.pythonhosted.org/packages/a9/5b/9b92f54c784c26e2a422e55a8d2607ab15b7ea3349e28359282f84f01d43/yarl-1.23.0-cp314-cp314-win_amd64.whl", hash = "sha256:63e92247f383c85ab00dd0091e8c3fa331a96e865459f5ee80353c70a4a42d70", size = 89093, upload-time = "2026-03-01T22:07:11.501Z" }, + { url = "https://files.pythonhosted.org/packages/e0/7d/8a84dc9381fd4412d5e7ff04926f9865f6372b4c2fd91e10092e65d29eb8/yarl-1.23.0-cp314-cp314-win_arm64.whl", hash = "sha256:70efd20be968c76ece7baa8dafe04c5be06abc57f754d6f36f3741f7aa7a208e", size = 83384, upload-time = "2026-03-01T22:07:13.069Z" }, + { url = "https://files.pythonhosted.org/packages/dd/8d/d2fad34b1c08aa161b74394183daa7d800141aaaee207317e82c790b418d/yarl-1.23.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:9a18d6f9359e45722c064c97464ec883eb0e0366d33eda61cb19a244bf222679", size = 131019, upload-time = "2026-03-01T22:07:14.903Z" }, + { url = "https://files.pythonhosted.org/packages/19/ff/33009a39d3ccf4b94d7d7880dfe17fb5816c5a4fe0096d9b56abceea9ac7/yarl-1.23.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:2803ed8b21ca47a43da80a6fd1ed3019d30061f7061daa35ac54f63933409412", size = 89894, upload-time = "2026-03-01T22:07:17.372Z" }, + { url = "https://files.pythonhosted.org/packages/0c/f1/dab7ac5e7306fb79c0190766a3c00b4cb8d09a1f390ded68c85a5934faf5/yarl-1.23.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:394906945aa8b19fc14a61cf69743a868bb8c465efe85eee687109cc540b98f4", size = 89979, upload-time = "2026-03-01T22:07:19.361Z" }, + { url = "https://files.pythonhosted.org/packages/aa/b1/08e95f3caee1fad6e65017b9f26c1d79877b502622d60e517de01e72f95d/yarl-1.23.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:71d006bee8397a4a89f469b8deb22469fe7508132d3c17fa6ed871e79832691c", size = 95943, upload-time = "2026-03-01T22:07:21.266Z" }, + { url = "https://files.pythonhosted.org/packages/c0/cc/6409f9018864a6aa186c61175b977131f373f1988e198e031236916e87e4/yarl-1.23.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:62694e275c93d54f7ccedcfef57d42761b2aad5234b6be1f3e3026cae4001cd4", size = 88786, upload-time = "2026-03-01T22:07:23.129Z" }, + { url = "https://files.pythonhosted.org/packages/76/40/cc22d1d7714b717fde2006fad2ced5efe5580606cb059ae42117542122f3/yarl-1.23.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a31de1613658308efdb21ada98cbc86a97c181aa050ba22a808120bb5be3ab94", size = 101307, upload-time = "2026-03-01T22:07:24.689Z" }, + { url = "https://files.pythonhosted.org/packages/8f/0d/476c38e85ddb4c6ec6b20b815bdd779aa386a013f3d8b85516feee55c8dc/yarl-1.23.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:fb1e8b8d66c278b21d13b0a7ca22c41dd757a7c209c6b12c313e445c31dd3b28", size = 100904, upload-time = "2026-03-01T22:07:26.287Z" }, + { url = "https://files.pythonhosted.org/packages/72/32/0abe4a76d59adf2081dcb0397168553ece4616ada1c54d1c49d8936c74f8/yarl-1.23.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50f9d8d531dfb767c565f348f33dd5139a6c43f5cbdf3f67da40d54241df93f6", size = 97728, upload-time = "2026-03-01T22:07:27.906Z" }, + { url = "https://files.pythonhosted.org/packages/b7/35/7b30f4810fba112f60f5a43237545867504e15b1c7647a785fbaf588fac2/yarl-1.23.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:575aa4405a656e61a540f4a80eaa5260f2a38fff7bfdc4b5f611840d76e9e277", size = 95964, upload-time = "2026-03-01T22:07:30.198Z" }, + { url = "https://files.pythonhosted.org/packages/2d/86/ed7a73ab85ef00e8bb70b0cb5421d8a2a625b81a333941a469a6f4022828/yarl-1.23.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:041b1a4cefacf65840b4e295c6985f334ba83c30607441ae3cf206a0eed1a2e4", size = 95882, upload-time = "2026-03-01T22:07:32.132Z" }, + { url = "https://files.pythonhosted.org/packages/19/90/d56967f61a29d8498efb7afb651e0b2b422a1e9b47b0ab5f4e40a19b699b/yarl-1.23.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:d38c1e8231722c4ce40d7593f28d92b5fc72f3e9774fe73d7e800ec32299f63a", size = 90797, upload-time = "2026-03-01T22:07:34.404Z" }, + { url = "https://files.pythonhosted.org/packages/72/00/8b8f76909259f56647adb1011d7ed8b321bcf97e464515c65016a47ecdf0/yarl-1.23.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:d53834e23c015ee83a99377db6e5e37d8484f333edb03bd15b4bc312cc7254fb", size = 101023, upload-time = "2026-03-01T22:07:35.953Z" }, + { url = "https://files.pythonhosted.org/packages/ac/e2/cab11b126fb7d440281b7df8e9ddbe4851e70a4dde47a202b6642586b8d9/yarl-1.23.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:2e27c8841126e017dd2a054a95771569e6070b9ee1b133366d8b31beb5018a41", size = 96227, upload-time = "2026-03-01T22:07:37.594Z" }, + { url = "https://files.pythonhosted.org/packages/c2/9b/2c893e16bfc50e6b2edf76c1a9eb6cb0c744346197e74c65e99ad8d634d0/yarl-1.23.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:76855800ac56f878847a09ce6dba727c93ca2d89c9e9d63002d26b916810b0a2", size = 100302, upload-time = "2026-03-01T22:07:39.334Z" }, + { url = "https://files.pythonhosted.org/packages/28/ec/5498c4e3a6d5f1003beb23405671c2eb9cdbf3067d1c80f15eeafe301010/yarl-1.23.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e09fd068c2e169a7070d83d3bde728a4d48de0549f975290be3c108c02e499b4", size = 98202, upload-time = "2026-03-01T22:07:41.717Z" }, + { url = "https://files.pythonhosted.org/packages/fe/c3/cd737e2d45e70717907f83e146f6949f20cc23cd4bf7b2688727763aa458/yarl-1.23.0-cp314-cp314t-win32.whl", hash = "sha256:73309162a6a571d4cbd3b6a1dcc703c7311843ae0d1578df6f09be4e98df38d4", size = 90558, upload-time = "2026-03-01T22:07:43.433Z" }, + { url = "https://files.pythonhosted.org/packages/e1/19/3774d162f6732d1cfb0b47b4140a942a35ca82bb19b6db1f80e9e7bdc8f8/yarl-1.23.0-cp314-cp314t-win_amd64.whl", hash = "sha256:4503053d296bc6e4cbd1fad61cf3b6e33b939886c4f249ba7c78b602214fabe2", size = 97610, upload-time = "2026-03-01T22:07:45.773Z" }, + { url = "https://files.pythonhosted.org/packages/51/47/3fa2286c3cb162c71cdb34c4224d5745a1ceceb391b2bd9b19b668a8d724/yarl-1.23.0-cp314-cp314t-win_arm64.whl", hash = "sha256:44bb7bef4ea409384e3f8bc36c063d77ea1b8d4a5b2706956c0d6695f07dcc25", size = 86041, upload-time = "2026-03-01T22:07:49.026Z" }, + { url = "https://files.pythonhosted.org/packages/69/68/c8739671f5699c7dc470580a4f821ef37c32c4cb0b047ce223a7f115757f/yarl-1.23.0-py3-none-any.whl", hash = "sha256:a2df6afe50dea8ae15fa34c9f824a3ee958d785fd5d089063d960bae1daa0a3f", size = 48288, upload-time = "2026-03-01T22:07:51.388Z" }, +] + +[[package]] +name = "zipp" +version = "3.23.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" }, +]