Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

.docker +0 -17
data/hadith.json +0 -62
data/quran.json +0 -50
enrich_dataset.py +0 -210
fetch_data.py +0 -294

.docker DELETED Viewed

@@ -1,17 +0,0 @@
-# Use an official Python runtime as a parent image
-FROM python:3
-# Set the working directory in the container to /app
-WORKDIR /app
-# Copy the current directory contents into the container at /app
-COPY . /app
-# Install any needed packages specified in requirements.txt
-RUN pip install --trusted-host pypi.python.org -r requirements.txt
-# Make port 80 available to the world outside this container
-EXPOSE 80
-# Run main.py when the container launches
-CMD ["python", "main.py"]

data/hadith.json DELETED Viewed

@@ -1,62 +0,0 @@
-[
-  {
-    "id": "bukhari_1",
-    "arabic": "إِنَّمَا الْأَعْمَالُ بِالنِّيَّاتِ...",
-    "english": "Actions are judged by intentions...",
-    "reference": "Sahih al-Bukhari 1"
-  },
-  {
-    "id": "bukhari_8",
-    "arabic": "بُنِيَ الإِسْلامُ عَلَى خَمْسٍ...",
-    "english": "Islam is built upon five [pillars]...",
-    "reference": "Sahih al-Bukhari 8"
-  },
-  {
-    "id": "muslim_1",
-    "arabic": "الإِيمَانُ أَنْ تُؤْمِنَ بِاللَّهِ وَمَلائِكَتِهِ...",
-    "english": "Faith is to believe in Allah, His angels...",
-    "reference": "Sahih Muslim 1"
-  },
-  {
-    "id": "muslim_1907",
-    "arabic": "مَنْ صَامَ رَمَضَانَ إِيمَانًا وَاحْتِسَابًا...",
-    "english": "Whoever fasts Ramadan with faith and seeking reward...",
-    "reference": "Sahih Muslim 1907"
-  },
-  {
-    "id": "ahmad_3784",
-    "arabic": "بَدَأَ الإِسْلاَمُ غَرِيبًا وَسَيَعُودُ كَمَا بَدَأَ غَرِيبًا فَطُوبَى لِلْغُرَبَاءِ",
-    "english": "Islam began as something strange and will revert to being strange as it began, so give glad tidings to the strangers.",
-    "reference": "Musnad Ahmad 3784"
-  },
-  {
-    "id": "ahmad_2107",
-    "arabic": "أَحَبُّ الدِّينِ إِلَى اللَّهِ الْحَنِيفِيَّةُ السَّمْحَةُ",
-    "english": "The most beloved of religions to Allah is the easy monotheism (Hanifiyyah).",
-    "reference": "Musnad Ahmad 2107"
-  },
-  {
-    "id": "ahmad_8030",
-    "arabic": "الْمُؤْمِنُ مِرْآةُ أَخِيهِ",
-    "english": "The believer is a mirror for his brother.",
-    "reference": "Musnad Ahmad 8030"
-  },
-  {
-    "id": "bukhari_6018",
-    "arabic": "مَنْ كَانَ يُؤْمِنُ بِاللَّهِ وَالْيَوْمِ الآخِرِ فَلْيُحْسِنْ إِلَى جَارِهِ...",
-    "english": "Whoever believes in Allah and the Last Day should be kind to his neighbor...",
-    "reference": "Sahih al-Bukhari 6018"
-  },
-  {
-    "id": "tirmidhi_2003",
-    "arabic": "أَكْمَلُ الْمُؤْمِنِينَ إِيمَانًا أَحْسَنُهُمْ خُلُقًا...",
-    "english": "The most complete of believers in faith are those with the best character...",
-    "reference": "Jami` at-Tirmidhi 2003"
-  },
-  {
-    "id": "ibnmajah_224",
-    "arabic": "طَلَبُ الْعِلْمِ فَرِيضَةٌ عَلَى كُلِّ مُسْلِمٍ...",
-    "english": "Seeking knowledge is an obligation upon every muslim...",
-    "reference": "Sunan Ibn Majah 224"
-  }
-]

data/quran.json DELETED Viewed

@@ -1,50 +0,0 @@
-[
-  {
-    "id": "2:153",
-    "arabic": "يَا أَيُّهَا الَّذِينَ آمَنُوا اسْتَعِينُوا بِالصَّبْرِ وَالصَّلَاةِ...",
-    "english": "O you who have believed, seek help through patience and prayer...",
-    "source": "Surah Al-Baqarah 2:153"
-  },
-  {
-    "id": "1:1-7",
-    "arabic": "بِسْمِ اللَّهِ الرَّحْمَنِ الرَّحِيمِ... اهْدِنَا الصِّرَاطَ الْمُسْتَقِيمَ",
-    "english": "In the name of Allah, the Entirely Merciful, the Especially Merciful... Guide us to the straight path",
-    "source": "Surah Al-Fatihah 1:1-7"
-  },
-  {
-    "id": "2:255",
-    "arabic": "اللَّهُ لَا إِلَهَ إِلَّا هُوَ الْحَيُّ الْقَيُّومُ...",
-    "english": "Allah - there is no deity except Him, the Ever-Living, the Sustainer of [all] existence...",
-    "source": "Surah Al-Baqarah 2:255 (Ayat al-Kursi)"
-  },
-  {
-    "id": "112:1-4",
-    "arabic": "قُلْ هُوَ اللَّهُ أَحَدٌ... وَلَمْ يَكُن لَّهُ كُفُوًا أَحَدٌ",
-    "english": "Say, He is Allah, [who is] One... And there is none co-equal to Him.",
-    "source": "Surah Al-Ikhlas 112:1-4"
-  },
-  {
-    "id": "2:286",
-    "arabic": "لَا يُكَلِّفُ اللَّهُ نَفْسًا إِلَّا وُسْعَهَا...",
-    "english": "Allah does not charge a soul except [with that within] its capacity...",
-    "source": "Surah Al-Baqarah 2:286"
-  },
-  {
-    "id": "3:103",
-    "arabic": "وَاعْتَصِمُوا بِحَبْلِ اللَّهِ جَمِيعًا وَلَا تَفَرَّقُوا...",
-    "english": "And hold firmly to the rope of Allah all together and do not become divided...",
-    "source": "Surah Al-Imran 3:103"
-  },
-  {
-    "id": "5:8",
-    "arabic": "يَا أَيُّهَا الَّذِينَ آمَنُوا كُونُوا قَوَّامِينَ لِلَّهِ شُهَدَاءَ بِالْقِسْطِ...",
-    "english": "O you who have believed, be persistently standing firm for Allah, witnesses in justice...",
-    "source": "Surah Al-Ma'idah 5:8"
-  },
-  {
-    "id": "94:5",
-    "arabic": "فَإِنَّ مَعَ الْعُسْرِ يُسْرًا",
-    "english": "For indeed, with hardship [will be] ease.",
-    "source": "Surah Ash-Sharh 94:5"
-  }
-]

enrich_dataset.py DELETED Viewed

@@ -1,210 +0,0 @@
-#!/usr/bin/env python3
-"""
-Script to enrich the QModel dataset with hadith collections from GitHub.
-Fetches Musnad Ahmad and other major hadith collections from:
-https://github.com/AhmedBaset/hadith-json/tree/main/db/by_book/the_9_books
-"""
-import json
-import requests
-from typing import Dict, List
-from collections import defaultdict
-# The 9 canonical hadith books
-HADITH_BOOKS = {
-    "ahmed.json": {
-        "collection": "Musnad Ahmad",
-        "id_prefix": "ahmad",
-        "grade": "Hasan/Sahih",
-        "author": "Imam Ahmad ibn Hanbal"
-    },
-    "bukhari.json": {
-        "collection": "Sahih al-Bukhari",
-        "id_prefix": "bukhari",
-        "grade": "Sahih",
-        "author": "Muhammad al-Bukhari"
-    },
-    "muslim.json": {
-        "collection": "Sahih Muslim",
-        "id_prefix": "muslim",
-        "grade": "Sahih",
-        "author": "Muslim ibn al-Hajjaj"
-    },
-    "abudawud.json": {
-        "collection": "Sunan Abu Dawood",
-        "id_prefix": "abudawud",
-        "grade": "Hasan",
-        "author": "Abu Dawood Sulaiman"
-    },
-    "tirmidhi.json": {
-        "collection": "Jami' at-Tirmidhi",
-        "id_prefix": "tirmidhi",
-        "grade": "Hasan",
-        "author": "Al-Tirmidhi"
-    },
-    "ibnmajah.json": {
-        "collection": "Sunan Ibn Majah",
-        "id_prefix": "ibnmajah",
-        "grade": "Hasan",
-        "author": "Ibn Majah al-Qazwini"
-    },
-    "nasai.json": {
-        "collection": "Sunan an-Nasai",
-        "id_prefix": "nasai",
-        "grade": "Sahih",
-        "author": "Ahmad al-Nasai"
-    },
-    "malik.json": {
-        "collection": "Muwatta Malik",
-        "id_prefix": "malik",
-        "grade": "Sahih",
-        "author": "Malik ibn Anas"
-    },
-    "darimi.json": {
-        "collection": "Sunan al-Darimi",
-        "id_prefix": "darimi",
-        "grade": "Hasan",
-        "author": "Al-Darimi"
-    }
-}
-BASE_URL = "https://raw.githubusercontent.com/AhmedBaset/hadith-json/main/db/by_book/the_9_books"
-def fetch_hadith_book(filename: str) -> Dict:
-    """Fetch a hadith book JSON from GitHub."""
-    url = f"{BASE_URL}/{filename}"
-    print(f"Fetching {filename}...")
-    response = requests.get(url, timeout=30)
-    response.raise_for_status()
-    return response.json()
-def transform_hadith(hadith: Dict, book_config: Dict, book_data: Dict) -> Dict:
-    """Transform hadith from GitHub format to our metadata format."""
-    # Find chapter name if available
-    chapter_name = ""
-    if "chapterId" in hadith:
-        for chapter in book_data.get("chapters", []):
-            if chapter.get("id") == hadith.get("chapterId"):
-                chapter_name = chapter.get("arabic", "")
-                break
-    # Build the reference string
-    hadith_num = hadith.get("idInBook", hadith.get("id", ""))
-    reference = f"{book_config['collection']} {hadith_num}"
-    # Combine narrator and text for English
-    english_parts = []
-    if isinstance(hadith.get("english"), dict):
-        if hadith["english"].get("narrator"):
-            english_parts.append(hadith["english"]["narrator"])
-        if hadith["english"].get("text"):
-            english_parts.append(hadith["english"]["text"])
-        english = " ".join(english_parts)
-    else:
-        english = str(hadith.get("english", ""))
-    return {
-        "id": f"{book_config['id_prefix']}_{hadith_num}",
-        "arabic": hadith.get("arabic", ""),
-        "english": english,
-        "reference": reference,
-        "hadith_number": hadith_num,
-        "collection": book_config["collection"],
-        "chapter": chapter_name,
-        "grade": "",  # Will be inferred by main.py's infer_hadith_grade()
-        "type": "hadith",
-        "author": book_config["author"]
-    }
-def load_existing_metadata(filepath: str) -> List[Dict]:
-    """Load existing metadata.json file."""
-    print(f"Loading existing metadata from {filepath}...")
-    with open(filepath, 'r', encoding='utf-8') as f:
-        return json.load(f)
-def save_enriched_metadata(filepath: str, data: List[Dict], stats: Dict) -> None:
-    """Save enriched metadata to file."""
-    print(f"Saving enriched metadata to {filepath}...")
-    with open(filepath, 'w', encoding='utf-8') as f:
-        json.dump(data, f, ensure_ascii=False, indent=2)
-    print("\n" + "="*60)
-    print("Dataset Enrichment Summary")
-    print("="*60)
-    print(f"Total documents: {len(data)}")
-    print(f"\nBreakdown by collection:")
-    for collection, count in sorted(stats.items()):
-        print(f"  {collection}: {count}")
-    print("="*60)
-def main():
-    """Main enrichment process."""
-    # Load existing metadata
-    metadata_path = "/Users/elgendy/Projects/QModel/metadata.json"
-    existing_data = load_existing_metadata(metadata_path)
-    # Track which existing hadiths we have
-    existing_ids = {item["id"] for item in existing_data if item.get("type") == "hadith"}
-    print(f"Existing hadith entries: {len(existing_ids)}")
-    # New hadiths to add
-    new_hadiths = []
-    stats = defaultdict(int)
-    # Count existing Quran verses
-    for item in existing_data:
-        if item.get("type") == "quran":
-            stats["Quran"] += 1
-        elif item.get("type") == "hadith":
-            collection = item.get("collection", "Unknown")
-            stats[collection] += 1
-    # Fetch and process each hadith book
-    for filename, book_config in HADITH_BOOKS.items():
-        try:
-            book_data = fetch_hadith_book(filename)
-            hadiths = book_data.get("hadiths", [])
-            skipped = 0
-            added = 0
-            for hadith in hadiths:
-                # Transform to our format
-                transformed = transform_hadith(hadith, book_config, book_data)
-                # Check if we already have this hadith
-                if transformed["id"] in existing_ids:
-                    skipped += 1
-                    continue
-                new_hadiths.append(transformed)
-                existing_ids.add(transformed["id"])
-                added += 1
-            collection_name = book_config["collection"]
-            stats[collection_name] += added
-            print(f"  ✓ {filename}: {added} new hadiths added, {skipped} already exist")
-        except Exception as e:
-            print(f"  ✗ Error fetching {filename}: {e}")
-    # Merge with existing data
-    enriched_data = existing_data + new_hadiths
-    print(f"\nTotal new hadiths added: {len(new_hadiths)}")
-    print(f"Total documents after enrichment: {len(enriched_data)}")
-    # Save enriched metadata
-    save_enriched_metadata(metadata_path, enriched_data, stats)
-if __name__ == "__main__":
-    main()

fetch_data.py DELETED Viewed

@@ -1,294 +0,0 @@
-"""
-fetch_data.py — QModel Full Data Fetcher
-=========================================
-Fetches the COMPLETE Quran (6,236 verses, all 114 surahs) from risan/quran-json
-via jsDelivr CDN, using the per-chapter endpoint which contains both Arabic text
-AND English translation (Saheeh International) in a single request per surah.
-Also fetches major Hadith collections from fawazahmed0/hadith-api.
-Output files are drop-in replacements for quran.json / hadith.json and are
-fully compatible with build_index.py and main.py.
-Schema produced:
-  quran.json  → [{ "id": "2:1", "arabic": "...", "english": "...",
-                   "source": "Surah Al-Baqarah 2:1",
-                   "surah_number": 2, "surah_name_en": "Al-Baqarah",
-                   "surah_name_ar": "البقرة", "verse_number": 1,
-                   "transliteration": "..." }, ...]
-  hadith.json → [{ "id": "bukhari_1", "arabic": "...", "english": "...",
-                   "reference": "Sahih al-Bukhari 1",
-                   "hadith_number": 1, "collection": "Sahih al-Bukhari",
-                   "grade": "Sahih" }, ...]
-Usage:
-  pip install requests
-  python fetch_data.py                          # full download
-  python fetch_data.py --out-dir ./data         # custom output dir
-  python fetch_data.py --hadith-limit 500       # quick test run
-  python fetch_data.py --quran-only             # skip hadith
-  python fetch_data.py --hadith-only            # skip quran
-"""
-from __future__ import annotations
-import argparse
-import json
-import sys
-import time
-from pathlib import Path
-from typing import Optional
-try:
-    import requests
-except ImportError:
-    sys.exit("❌  Install requests first:  pip install requests")
-# ── CDN roots ─────────────────────────────────────────────────────────────────
-# risan/quran-json: per-chapter endpoint has BOTH arabic + english translation
-# Format: { "id": 1, "name": "Al-Fatihah", "transliteration": "...",
-#           "type": "meccan", "total_verses": 7,
-#           "verses": [ { "id": 1, "text": "<arabic>",
-#                         "translation": "<saheeh-international>",
-#                         "transliteration": "..." }, ... ] }
-QURAN_CHAPTER_URL   = "https://cdn.jsdelivr.net/npm/quran-json@3.1.2/dist/chapters/en/{n}.json"
-# fawazahmed0/hadith-api: full-book JSON per edition
-# Format: { "metadata": {...},
-#           "hadiths": [ { "hadithnumber": 1, "text": "...",
-#                          "grades": [{"grade": "Sahih", ...}] }, ... ] }
-HADITH_CDN = "https://cdn.jsdelivr.net/gh/fawazahmed0/hadith-api@1/editions"
-# ── Surah metadata ─────────────────────────────────────────────────────────────
-SURAH_AR = {
-    1:"الفاتحة",2:"البقرة",3:"آل عمران",4:"النساء",5:"المائدة",
-    6:"الأنعام",7:"الأعراف",8:"الأنفال",9:"التوبة",10:"يونس",
-    11:"هود",12:"يوسف",13:"الرعد",14:"إبراهيم",15:"الحجر",
-    16:"النحل",17:"الإسراء",18:"الكهف",19:"مريم",20:"طه",
-    21:"الأنبياء",22:"الحج",23:"المؤمنون",24:"النور",25:"الفرقان",
-    26:"الشعراء",27:"النمل",28:"القصص",29:"العنكبوت",30:"الروم",
-    31:"لقمان",32:"السجدة",33:"الأحزاب",34:"سبأ",35:"فاطر",
-    36:"يس",37:"الصافات",38:"ص",39:"الزمر",40:"غافر",
-    41:"فصلت",42:"الشورى",43:"الزخرف",44:"الدخان",45:"الجاثية",
-    46:"الأحقاف",47:"محمد",48:"الفتح",49:"الحجرات",50:"ق",
-    51:"الذاريات",52:"الطور",53:"النجم",54:"القمر",55:"الرحمن",
-    56:"الواقعة",57:"الحديد",58:"المجادلة",59:"الحشر",60:"الممتحنة",
-    61:"الصف",62:"الجمعة",63:"المنافقون",64:"التغابن",65:"الطلاق",
-    66:"التحريم",67:"الملك",68:"القلم",69:"الحاقة",70:"المعارج",
-    71:"نوح",72:"الجن",73:"المزمل",74:"المدثر",75:"القيامة",
-    76:"الإنسان",77:"المرسلات",78:"النبأ",79:"النازعات",80:"عبس",
-    81:"التكوير",82:"الانفطار",83:"المطففين",84:"الانشقاق",85:"البروج",
-    86:"الطارق",87:"الأعلى",88:"الغاشية",89:"الفجر",90:"البلد",
-    91:"الشمس",92:"الليل",93:"الضحى",94:"الشرح",95:"التين",
-    96:"العلق",97:"القدر",98:"البينة",99:"الزلزلة",100:"العاديات",
-    101:"القارعة",102:"التكاثر",103:"العصر",104:"الهمزة",105:"الفيل",
-    106:"قريش",107:"الماعون",108:"الكوثر",109:"الكافرون",110:"النصر",
-    111:"المسد",112:"الإخلاص",113:"ال��لق",114:"الناس",
-}
-# ── Hadith collections ─────────────────────────────────────────────────────────
-# (arabic_edition, english_edition, human_label, id_prefix)
-HADITH_EDITIONS = [
-    ("ara-bukhari",  "eng-bukhari",  "Sahih al-Bukhari",  "bukhari"),
-    ("ara-muslim",   "eng-muslim",   "Sahih Muslim",       "muslim"),
-    ("ara-abudawud", "eng-abudawud", "Sunan Abu Dawud",    "abudawud"),
-    ("ara-tirmidhi", "eng-tirmidhi", "Jami' at-Tirmidhi",  "tirmidhi"),
-    ("ara-ibnmajah", "eng-ibnmajah", "Sunan Ibn Majah",    "ibnmajah"),
-    ("ara-nasai",    "eng-nasai",    "Sunan an-Nasa'i",    "nasai"),
-    ("ara-malik",    "eng-malik",    "Muwatta Malik",      "malik"),
-]
-# ── HTTP helper ────────────────────────────────────────────────────────────────
-def get_json(url: str, retries: int = 4, backoff: float = 2.0) -> Optional[dict | list]:
-    for attempt in range(1, retries + 1):
-        try:
-            r = requests.get(url, timeout=60)
-            r.raise_for_status()
-            return r.json()
-        except Exception as exc:
-            print(f"    ⚠️  Attempt {attempt}/{retries}: {exc}")
-            if attempt < retries:
-                time.sleep(backoff * attempt)
-    return None
-def save(path: Path, data: list) -> None:
-    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
-    kb = path.stat().st_size / 1024
-    print(f"  💾  {path}  —  {len(data):,} records  ({kb:,.0f} KB)")
-# ── Quran ──────────────────────────────────────────────────────────────────────
-def fetch_quran() -> list:
-    """
-    Uses the risan/quran-json per-chapter English endpoint:
-      cdn.jsdelivr.net/npm/quran-json@3.1.2/dist/chapters/en/{N}.json
-    Each file contains:
-      {
-        "id": 1,
-        "name": "Al-Fatihah",
-        "transliteration": "Al-Fatihah",
-        "type": "meccan",
-        "total_verses": 7,
-        "verses": [
-          {
-            "id": 1,
-            "text": "<uthmani arabic>",
-            "translation": "<saheeh international english>",
-            "transliteration": "<latin>"
-          }, ...
-        ]
-      }
-    This single endpoint gives us Arabic + English + transliteration per verse —
-    no need to join two separate files.
-    """
-    print("\n📖  Fetching full Quran (114 surahs, 6,236 verses) …")
-    records = []
-    failed  = []
-    for n in range(1, 115):
-        url  = QURAN_CHAPTER_URL.format(n=n)
-        data = get_json(url)
-        if not data:
-            print(f"  ❌  Surah {n} — failed, skipping")
-            failed.append(n)
-            continue
-        surah_name_en = data.get("name") or data.get("transliteration") or f"Surah {n}"
-        surah_name_ar = SURAH_AR.get(n, "")
-        verses        = data.get("verses", [])
-        for v in verses:
-            vid     = int(v.get("id", 0))
-            arabic  = (v.get("text") or "").strip()
-            english = (v.get("translation") or "").strip()
-            translit= (v.get("transliteration") or "").strip()
-            if not vid or not arabic:
-                continue
-            records.append({
-                # ── core fields (required by main.py / build_index.py) ──
-                "id":            f"{n}:{vid}",
-                "arabic":        arabic,
-                "english":       english,
-                "source":        f"Surah {surah_name_en} {n}:{vid}",
-                # ── enriched metadata ──
-                "surah_number":  n,
-                "surah_name_en": surah_name_en,
-                "surah_name_ar": surah_name_ar,
-                "verse_number":  vid,
-                "transliteration": translit,
-            })
-        # Brief progress every 10 surahs
-        if n % 10 == 0 or n == 114:
-            print(f"  ✔  Surahs 1–{n} fetched  ({len(records):,} verses so far)")
-        time.sleep(0.15)   # be polite to the CDN
-    if failed:
-        print(f"\n  ⚠️  {len(failed)} surahs failed: {failed}")
-    print(f"\n  ✅  Quran complete — {len(records):,} verses")
-    return records
-# ── Hadith ─────────────────────────────────────────────────────────────────────
-def fetch_hadith_edition(
-    ar_edition: str, en_edition: str,
-    label: str, prefix: str,
-    limit: Optional[int],
-) -> list:
-    ar_data = get_json(f"{HADITH_CDN}/{ar_edition}.json")
-    en_data = get_json(f"{HADITH_CDN}/{en_edition}.json")
-    if not ar_data:
-        print(f"  ❌  {label} Arabic — unavailable, skipping")
-        return []
-    en_lookup = {
-        int(h["hadithnumber"]): (h.get("text") or "")
-        for h in (en_data or {}).get("hadiths", [])
-        if "hadithnumber" in h
-    }
-    records = []
-    for h in ar_data.get("hadiths", []):
-        num    = h.get("hadithnumber")
-        arabic = (h.get("text") or "").strip()
-        if not num or not arabic:
-            continue
-        num     = int(num)
-        english = en_lookup.get(num, "").strip()
-        grades  = h.get("grades") or []
-        grade   = grades[0].get("grade", "") if grades else ""
-        records.append({
-            # ── core fields ──
-            "id":           f"{prefix}_{num}",
-            "arabic":       arabic,
-            "english":      english,
-            "reference":    f"{label} {num}",
-            # ── enriched metadata ──
-            "hadith_number": num,
-            "collection":    label,
-            "grade":         grade,
-        })
-        if limit and len(records) >= limit:
-            break
-    print(f"  ✅  {label}: {len(records):,} hadiths")
-    return records
-def fetch_all_hadiths(limit_per_collection: Optional[int] = None) -> list:
-    print("\n📚  Fetching Hadith collections …")
-    all_hadiths: list = []
-    for ar_ed, en_ed, label, prefix in HADITH_EDITIONS:
-        print(f"\n  → {label}")
-        records = fetch_hadith_edition(ar_ed, en_ed, label, prefix, limit_per_collection)
-        all_hadiths.extend(records)
-        time.sleep(0.5)
-    print(f"\n  📊  Total hadiths: {len(all_hadiths):,}")
-    return all_hadiths
-# ── CLI ────────────────────────────────────────────────────────────────────────
-def main() -> None:
-    parser = argparse.ArgumentParser(
-        description="Fetch complete Quran + Hadith data for QModel"
-    )
-    parser.add_argument(
-        "--out-dir", default="./data",
-        help="Output directory (default: ./data)"
-    )
-    parser.add_argument(
-        "--hadith-limit", type=int, default=None,
-        help="Max hadiths per collection (omit = all, ~50k total)"
-    )
-    parser.add_argument("--quran-only",  action="store_true", help="Skip hadith")
-    parser.add_argument("--hadith-only", action="store_true", help="Skip quran")
-    args = parser.parse_args()
-    out = Path(args.out_dir)
-    out.mkdir(parents=True, exist_ok=True)
-    if not args.hadith_only:
-        quran = fetch_quran()
-        save(out / "quran.json", quran)
-    if not args.quran_only:
-        hadiths = fetch_all_hadiths(limit_per_collection=args.hadith_limit)
-        save(out / "hadith.json", hadiths)
-    print("\n🎉  Done!  Output:", out.resolve())
-    print("    Next:  python build_index.py")
-if __name__ == "__main__":
-    main()