Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Try Space push
Browse files- .gitignore +11 -0
- README.md +8 -12
- app.py +5 -0
- quick_deploy_agent.py +322 -0
- requirements.txt +6 -0
    	
        .gitignore
    ADDED
    
    | @@ -0,0 +1,11 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            .venv/
         | 
| 2 | 
            +
            __pycache__/
         | 
| 3 | 
            +
            *.ipynb_checkpoints/
         | 
| 4 | 
            +
            .cache/
         | 
| 5 | 
            +
            data/
         | 
| 6 | 
            +
            models/
         | 
| 7 | 
            +
            outputs/
         | 
| 8 | 
            +
            node_modules/
         | 
| 9 | 
            +
            *.pt
         | 
| 10 | 
            +
            *.bin
         | 
| 11 | 
            +
            *.ckpt
         | 
    	
        README.md
    CHANGED
    
    | @@ -1,12 +1,8 @@ | |
| 1 | 
            -
            ---
         | 
| 2 | 
            -
            title: OpenFoodFactsAgent  | 
| 3 | 
            -
            emoji:  | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
            pinned: false
         | 
| 10 | 
            -
            ---
         | 
| 11 | 
            -
             | 
| 12 | 
            -
            Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
         | 
|  | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            title: OpenFoodFactsAgent (COICOP)
         | 
| 3 | 
            +
            emoji: 🧀
         | 
| 4 | 
            +
            sdk: gradio
         | 
| 5 | 
            +
            app_file: app.py
         | 
| 6 | 
            +
            python_version: "3.10"
         | 
| 7 | 
            +
            pinned: false
         | 
| 8 | 
            +
            ---
         | 
|  | |
|  | |
|  | |
|  | 
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,5 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from smolagents import GradioUI
         | 
| 2 | 
            +
            from quick_deploy_agent import build_agent
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            agent = build_agent()
         | 
| 5 | 
            +
            demo = GradioUI(agent).create_app()
         | 
    	
        quick_deploy_agent.py
    ADDED
    
    | @@ -0,0 +1,322 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # quickstart_agent.py
         | 
| 2 | 
            +
            from __future__ import annotations
         | 
| 3 | 
            +
            import json, re, unicodedata, ast
         | 
| 4 | 
            +
            from typing import List, Dict, Any, Optional
         | 
| 5 | 
            +
            import requests
         | 
| 6 | 
            +
            from smolagents import Tool, CodeAgent, InferenceClientModel
         | 
| 7 | 
            +
            from sentence_transformers import SentenceTransformer, util
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # ---- Mini référentiel COICOP (démo) ----
         | 
| 10 | 
            +
            COICOP_ITEMS = [
         | 
| 11 | 
            +
                {"code": "01.1.4.5.1", "label": "Laits caillés, fromage blanc, petites crèmes fromagères"},
         | 
| 12 | 
            +
                {"code": "01.1.4.5.2", "label": "Fromage à pâte molle et à pâte persillée"},
         | 
| 13 | 
            +
                {"code": "01.1.4.5.3", "label": "Fromage à pâte pressée"},
         | 
| 14 | 
            +
                {"code": "01.1.4.5.4", "label": "Fromage de chèvre"},
         | 
| 15 | 
            +
                {"code": "01.1.4.5.5", "label": "Fromages fondus, râpés, portions"},
         | 
| 16 | 
            +
                {"code": "01.1.1.4", "label": "Pain"},
         | 
| 17 | 
            +
                {"code": "01.1.1.1", "label": "Riz"},
         | 
| 18 | 
            +
                {"code": "01.1.1.3", "label": "Pâtes, couscous et produits similaires"},
         | 
| 19 | 
            +
            ]
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            def normalize_txt(s: str) -> str:
         | 
| 22 | 
            +
                if not s: return ""
         | 
| 23 | 
            +
                s = s.upper()
         | 
| 24 | 
            +
                s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
         | 
| 25 | 
            +
                s = re.sub(r"[^A-Z0-9% ]+", " ", s)
         | 
| 26 | 
            +
                s = re.sub(r"\s+", " ", s).strip()
         | 
| 27 | 
            +
                return s
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            def ean_check_digit_ok(ean: str) -> bool:
         | 
| 30 | 
            +
                digits = re.sub(r"\D", "", ean)
         | 
| 31 | 
            +
                if len(digits) not in (8, 12, 13, 14): return False
         | 
| 32 | 
            +
                total = 0
         | 
| 33 | 
            +
                for i, ch in enumerate(reversed(digits[:-1]), start=1):
         | 
| 34 | 
            +
                    n = int(ch); total += n * (3 if i % 2 == 1 else 1)
         | 
| 35 | 
            +
                check = (10 - (total % 10)) % 10
         | 
| 36 | 
            +
                return check == int(digits[-1])
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            # ---- ValidateEANTool : tout en local dans forward ----
         | 
| 39 | 
            +
            class ValidateEANTool(Tool):
         | 
| 40 | 
            +
                name, description = "validate_ean", "Valide un EAN/GTIN (clé GS1)."
         | 
| 41 | 
            +
                inputs = {"ean": {"type": "string", "description": "Code EAN/GTIN (8/12/13/14 chiffres)."}}
         | 
| 42 | 
            +
                output_type = "string"
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                def forward(self, ean: str) -> str:
         | 
| 45 | 
            +
                    import json, re
         | 
| 46 | 
            +
                    digits = re.sub(r"\D", "", ean or "")
         | 
| 47 | 
            +
                    if len(digits) not in (8, 12, 13, 14):
         | 
| 48 | 
            +
                        return json.dumps({"valid": False, "normalized": digits})
         | 
| 49 | 
            +
                    total = 0
         | 
| 50 | 
            +
                    for i, ch in enumerate(reversed(digits[:-1]), start=1):
         | 
| 51 | 
            +
                        n = int(ch); total += n * (3 if i % 2 == 1 else 1)
         | 
| 52 | 
            +
                    check = (10 - (total % 10)) % 10
         | 
| 53 | 
            +
                    return json.dumps({"valid": check == int(digits[-1]), "normalized": digits})
         | 
| 54 | 
            +
             | 
| 55 | 
            +
             | 
| 56 | 
            +
            # ---- OFFByEAN : imports internes + requirements  ----
         | 
| 57 | 
            +
            # ---- OFFByEAN : robuste (retries + v2 + fallback .net) ----
         | 
| 58 | 
            +
            class OFFByEAN(Tool):
         | 
| 59 | 
            +
                name = "openfoodfacts_product_by_ean"
         | 
| 60 | 
            +
                description = "Open Food Facts /api/v0|v2/product/{ean} (name, brands, categories...)."
         | 
| 61 | 
            +
                inputs = {"ean": {"type": "string", "description": "EAN à interroger sur l'API OFF."}}
         | 
| 62 | 
            +
                output_type = "string"
         | 
| 63 | 
            +
                requirements = ["requests"]  # urllib3 est une dépendance de requests
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                def forward(self, ean: str) -> str:
         | 
| 66 | 
            +
                    import json, re, requests
         | 
| 67 | 
            +
                    from requests.adapters import HTTPAdapter
         | 
| 68 | 
            +
                    from urllib3.util.retry import Retry
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                    code = re.sub(r"\D", "", ean or "")
         | 
| 71 | 
            +
                    if not code:
         | 
| 72 | 
            +
                        return json.dumps({"status": 0, "code": "", "error": "EAN vide"})
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                    # Session HTTP avec retries (429/5xx) et UA explicite
         | 
| 75 | 
            +
                    sess = requests.Session()
         | 
| 76 | 
            +
                    sess.headers.update({
         | 
| 77 | 
            +
                        "User-Agent": "insee-coicop-agent/1.0",
         | 
| 78 | 
            +
                        "Accept": "application/json",
         | 
| 79 | 
            +
                    })
         | 
| 80 | 
            +
                    retry = Retry(
         | 
| 81 | 
            +
                        total=3,
         | 
| 82 | 
            +
                        backoff_factor=0.5,
         | 
| 83 | 
            +
                        status_forcelist=[429, 500, 502, 503, 504],
         | 
| 84 | 
            +
                        allowed_methods=frozenset(["GET"]),
         | 
| 85 | 
            +
                        raise_on_status=False,
         | 
| 86 | 
            +
                    )
         | 
| 87 | 
            +
                    sess.mount("https://", HTTPAdapter(max_retries=retry))
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                    # On tente v0, puis v2 (fields limités), puis le miroir .net
         | 
| 90 | 
            +
                    urls = [
         | 
| 91 | 
            +
                        f"https://world.openfoodfacts.org/api/v0/product/{code}.json",
         | 
| 92 | 
            +
                        f"https://world.openfoodfacts.org/api/v2/product/{code}"
         | 
| 93 | 
            +
                        "?fields=product_name_fr,product_name,brands,categories_tags,"
         | 
| 94 | 
            +
                        "ingredients_text_fr,ingredients_text,stores,status,status_verbose",
         | 
| 95 | 
            +
                        f"https://world.openfoodfacts.net/api/v0/product/{code}.json",
         | 
| 96 | 
            +
                    ]
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                    last_err = None
         | 
| 99 | 
            +
                    for u in urls:
         | 
| 100 | 
            +
                        try:
         | 
| 101 | 
            +
                            r = sess.get(u, timeout=15)
         | 
| 102 | 
            +
                            if not r.ok:
         | 
| 103 | 
            +
                                last_err = f"HTTP {r.status_code}"
         | 
| 104 | 
            +
                                continue
         | 
| 105 | 
            +
                            data = r.json()
         | 
| 106 | 
            +
                            # v0 : status==1 ; v2 : parfois status absent mais product présent
         | 
| 107 | 
            +
                            product = data.get("product")
         | 
| 108 | 
            +
                            status = data.get("status", 1 if product else 0)
         | 
| 109 | 
            +
                            if status == 1 or product:
         | 
| 110 | 
            +
                                p = product or {}
         | 
| 111 | 
            +
                                out = {
         | 
| 112 | 
            +
                                    "status": status,
         | 
| 113 | 
            +
                                    "code": code,
         | 
| 114 | 
            +
                                    "product_name": p.get("product_name_fr") or p.get("product_name"),
         | 
| 115 | 
            +
                                    "brands": p.get("brands"),
         | 
| 116 | 
            +
                                    "categories_tags": (
         | 
| 117 | 
            +
                                        p.get("categories_tags")
         | 
| 118 | 
            +
                                        or p.get("categories_tags_fr")
         | 
| 119 | 
            +
                                        or p.get("categories")
         | 
| 120 | 
            +
                                    ),
         | 
| 121 | 
            +
                                    "ingredients_text": p.get("ingredients_text_fr") or p.get("ingredients_text"),
         | 
| 122 | 
            +
                                    "stores": p.get("stores"),
         | 
| 123 | 
            +
                                }
         | 
| 124 | 
            +
                                return json.dumps(out)
         | 
| 125 | 
            +
                        except Exception as e:
         | 
| 126 | 
            +
                            last_err = str(e)
         | 
| 127 | 
            +
             | 
| 128 | 
            +
                    return json.dumps({"status": 0, "code": code, "error": last_err or "not found"})
         | 
| 129 | 
            +
             | 
| 130 | 
            +
             | 
| 131 | 
            +
             | 
| 132 | 
            +
             | 
| 133 | 
            +
            # ---- RegexCOICOP : normalisation locale + regex précompilées ----
         | 
| 134 | 
            +
            class RegexCOICOP(Tool):
         | 
| 135 | 
            +
                name, description = "coicop_regex_rules", "Règles regex → candidats COICOP."
         | 
| 136 | 
            +
                inputs = {"text": {"type": "string", "description": "Libellé produit (texte libre) à analyser."}}
         | 
| 137 | 
            +
                output_type = "string"
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                # précompile ici pour rester autonome
         | 
| 140 | 
            +
                import re as _re
         | 
| 141 | 
            +
                SOFT = _re.compile(r"(?:\b|^)(?:CAMEMB(?:ERT)?|BRIE|COULOMMI(?:ERS?)?|BLEU|ROQUEFORT|GORGONZ(?:OLA)?|REBLOCHON|MUNSTER)(?:\b|$)")
         | 
| 142 | 
            +
                PRESS = _re.compile(r"(?:\b|^)(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)(?:\b|$)")
         | 
| 143 | 
            +
                GOAT  = _re.compile(r"(?:\b|^)(CHEVRE|STE MAURE|CROTTIN|BUCHE|PICODON|PELARDON|BANON)(?:\b|$)")
         | 
| 144 | 
            +
                PROC  = _re.compile(r"(?:\b|^)(FONDU(?:ES?)?|FROMAGE FONDU|TOASTINETTES?|VACHE QUI RIT|KIRI|CARRE FRAIS|CARR[ÉE] FRAIS|PORTIONS?)(?:\b|$)|\bRAP[ÉE]?\b")
         | 
| 145 | 
            +
             | 
| 146 | 
            +
                @staticmethod
         | 
| 147 | 
            +
                def _normalize_txt(s: str) -> str:
         | 
| 148 | 
            +
                    import unicodedata, re
         | 
| 149 | 
            +
                    if not s: return ""
         | 
| 150 | 
            +
                    s = s.upper()
         | 
| 151 | 
            +
                    s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
         | 
| 152 | 
            +
                    s = re.sub(r"[^A-Z0-9% ]+", " ", s)
         | 
| 153 | 
            +
                    return re.sub(r"\s+", " ", s).strip()
         | 
| 154 | 
            +
             | 
| 155 | 
            +
                def forward(self, text: str) -> str:
         | 
| 156 | 
            +
                    import json, re
         | 
| 157 | 
            +
                    s = self._normalize_txt(text); c=[]
         | 
| 158 | 
            +
                    if self.SOFT.search(s): c.append({"code":"01.1.4.5.2","why":"pâte molle/persillée","score":0.95})
         | 
| 159 | 
            +
                    if self.PRESS.search(s): c.append({"code":"01.1.4.5.3","why":"pâte pressée","score":0.90})
         | 
| 160 | 
            +
                    if self.GOAT.search(s):  c.append({"code":"01.1.4.5.4","why":"chèvre","score":0.90})
         | 
| 161 | 
            +
                    if self.PROC.search(s):  c.append({"code":"01.1.4.5.5","why":"fondu/râpé/portions","score":0.85})
         | 
| 162 | 
            +
                    if not c and re.search(r"\bFROMAGE\b", s): c.append({"code":"01.1.4.5","why":"générique fromage/laits caillés","score":0.6})
         | 
| 163 | 
            +
                    return json.dumps({"candidates": c})
         | 
| 164 | 
            +
             | 
| 165 | 
            +
             | 
| 166 | 
            +
            # ---- OFFtoCOICOP : normalisation locale + regex règles ----
         | 
| 167 | 
            +
            class OFFtoCOICOP(Tool):
         | 
| 168 | 
            +
                name, description = "map_off_to_coicop", "Mappe catégories OFF vers COICOP."
         | 
| 169 | 
            +
                inputs = {
         | 
| 170 | 
            +
                    "product_name":    {"type":"string", "description":"Nom produit OFF (fr/en).", "nullable": True},
         | 
| 171 | 
            +
                    "categories_tags": {"type":"array",  "description":"Liste OFF categories_tags.", "nullable": True},
         | 
| 172 | 
            +
                    "ingredients_text":{"type":"string","description":"Texte ingrédients.", "nullable": True},
         | 
| 173 | 
            +
                }
         | 
| 174 | 
            +
                output_type="string"
         | 
| 175 | 
            +
                import re as _re
         | 
| 176 | 
            +
                RULES = [
         | 
| 177 | 
            +
                    (_re.compile(r"\b(CAMEMBERT|BRIE|COULOMMIERS|BLUE CHEESE|ROQUEFORT|GORGONZOLA|MUNSTER|REBLOCHON)\b"), ("01.1.4.5.2",0.95,"OFF: pâte molle/persillée")),
         | 
| 178 | 
            +
                    (_re.compile(r"\b(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)\b"), ("01.1.4.5.3",0.90,"OFF: pâte pressée")),
         | 
| 179 | 
            +
                    (_re.compile(r"\b(CHEVRE|STE MAURE|CROTTIN|BUCHE|PICODON|PELARDON|BANON)\b"), ("01.1.4.5.4",0.90,"OFF: chèvre")),
         | 
| 180 | 
            +
                    (_re.compile(r"\b(FONDU|FONDUES?|RAPE|RÂPE|PORTIONS?|KIRI|VACHE QUI RIT|CARRE FRAIS|CARR[ÉE] FRAIS)\b"), ("01.1.4.5.5",0.85,"OFF: fondu/rapé/portions")),
         | 
| 181 | 
            +
                ]
         | 
| 182 | 
            +
             | 
| 183 | 
            +
                @staticmethod
         | 
| 184 | 
            +
                def _normalize_txt(s: str) -> str:
         | 
| 185 | 
            +
                    import unicodedata, re
         | 
| 186 | 
            +
                    if not s: return ""
         | 
| 187 | 
            +
                    s = s.upper()
         | 
| 188 | 
            +
                    s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
         | 
| 189 | 
            +
                    s = re.sub(r"[^A-Z0-9% ]+", " ", s)
         | 
| 190 | 
            +
                    return re.sub(r"\s+", " ", s).strip()
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                def forward(self, product_name=None, categories_tags=None, ingredients_text=None) -> str:
         | 
| 193 | 
            +
                    import json
         | 
| 194 | 
            +
                    text = " ".join([t for t in [
         | 
| 195 | 
            +
                        self._normalize_txt(product_name or ""),
         | 
| 196 | 
            +
                        self._normalize_txt(" ".join(categories_tags or [])),
         | 
| 197 | 
            +
                        self._normalize_txt(ingredients_text or "")
         | 
| 198 | 
            +
                    ] if t])
         | 
| 199 | 
            +
                    c=[]
         | 
| 200 | 
            +
                    for rx,(code,score,why) in self.RULES:
         | 
| 201 | 
            +
                        if rx.search(text): c.append({"code":code,"why":why,"score":score})
         | 
| 202 | 
            +
                    return json.dumps({"candidates": c})
         | 
| 203 | 
            +
             | 
| 204 | 
            +
             | 
| 205 | 
            +
            # ---- SemSim : COICOP embarqué + import lazy du modèle ----
         | 
| 206 | 
            +
            class SemSim(Tool):
         | 
| 207 | 
            +
                name, description = "coicop_semantic_similarity", "Embeddings → top-k COICOP."
         | 
| 208 | 
            +
                inputs = {"text":{"type":"string","description":"Texte libellé"},
         | 
| 209 | 
            +
                          "topk":{"type":"integer","description":"Nombre de candidats (défaut 5)","nullable":True}}
         | 
| 210 | 
            +
                output_type = "string"
         | 
| 211 | 
            +
                # packages nécessaires côté Hub
         | 
| 212 | 
            +
                requirements = ["sentence_transformers", "torch"]
         | 
| 213 | 
            +
             | 
| 214 | 
            +
                # mini référentiel embarqué pour l'export Hub
         | 
| 215 | 
            +
                COICOP_ITEMS = [
         | 
| 216 | 
            +
                    {"code": "01.1.4.5.1", "label": "Laits caillés, fromage blanc, petites crèmes fromagères"},
         | 
| 217 | 
            +
                    {"code": "01.1.4.5.2", "label": "Fromage à pâte molle et à pâte persillée"},
         | 
| 218 | 
            +
                    {"code": "01.1.4.5.3", "label": "Fromage à pâte pressée"},
         | 
| 219 | 
            +
                    {"code": "01.1.4.5.4", "label": "Fromage de chèvre"},
         | 
| 220 | 
            +
                    {"code": "01.1.4.5.5", "label": "Fromages fondus, râpés, portions"},
         | 
| 221 | 
            +
                    {"code": "01.1.1.4", "label": "Pain"},
         | 
| 222 | 
            +
                    {"code": "01.1.1.1", "label": "Riz"},
         | 
| 223 | 
            +
                    {"code": "01.1.1.3", "label": "Pâtes, couscous et produits similaires"},
         | 
| 224 | 
            +
                ]
         | 
| 225 | 
            +
             | 
| 226 | 
            +
                @staticmethod
         | 
| 227 | 
            +
                def _normalize_txt(s: str) -> str:
         | 
| 228 | 
            +
                    import unicodedata, re
         | 
| 229 | 
            +
                    if not s: return ""
         | 
| 230 | 
            +
                    s = s.upper()
         | 
| 231 | 
            +
                    s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
         | 
| 232 | 
            +
                    s = re.sub(r"[^A-Z0-9% ]+", " ", s)
         | 
| 233 | 
            +
                    return re.sub(r"\s+", " ", s).strip()
         | 
| 234 | 
            +
             | 
| 235 | 
            +
                def forward(self, text: str, topk: int = 5) -> str:
         | 
| 236 | 
            +
                    import json
         | 
| 237 | 
            +
                    from sentence_transformers import SentenceTransformer, util
         | 
| 238 | 
            +
                    # lazy init pour la compat hub
         | 
| 239 | 
            +
                    if not hasattr(self, "_model"):
         | 
| 240 | 
            +
                        self._model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
         | 
| 241 | 
            +
                    q = self._normalize_txt(text)
         | 
| 242 | 
            +
                    q_emb = self._model.encode([q], normalize_embeddings=True)
         | 
| 243 | 
            +
                    labels = [f"{it['code']} {it['label']}" for it in self.COICOP_ITEMS]
         | 
| 244 | 
            +
                    L = self._model.encode(labels, normalize_embeddings=True)
         | 
| 245 | 
            +
                    sims = util.cos_sim(q_emb, L).tolist()[0]
         | 
| 246 | 
            +
                    ranked = sorted(
         | 
| 247 | 
            +
                        [{"code": self.COICOP_ITEMS[i]["code"], "label": self.COICOP_ITEMS[i]["label"], "score": float(sims[i])}
         | 
| 248 | 
            +
                         for i in range(len(self.COICOP_ITEMS))],
         | 
| 249 | 
            +
                        key=lambda x: x["score"], reverse=True
         | 
| 250 | 
            +
                    )
         | 
| 251 | 
            +
                    return json.dumps({"candidates": ranked[:max(1,int(topk))]})
         | 
| 252 | 
            +
             | 
| 253 | 
            +
             | 
| 254 | 
            +
            # ---- Resolve : import local json ----
         | 
| 255 | 
            +
            class Resolve(Tool):
         | 
| 256 | 
            +
                name, description = "resolve_coicop_candidates", "Fusionne candidats → choix final + alternatives + explication."
         | 
| 257 | 
            +
                inputs = {"json_lists": {"type":"array","description":"Liste de JSON (str) d'autres tools."},
         | 
| 258 | 
            +
                          "topn":{"type":"integer","description":"Nb d'alternatives (défaut 3)","nullable":True}}
         | 
| 259 | 
            +
                output_type = "string"
         | 
| 260 | 
            +
             | 
| 261 | 
            +
                def forward(self, json_lists, topn: int = 3) -> str:
         | 
| 262 | 
            +
                    import json
         | 
| 263 | 
            +
                    from typing import Dict, Any
         | 
| 264 | 
            +
                    bucket: Dict[str, Dict[str, Any]] = {}
         | 
| 265 | 
            +
                    for s in json_lists:
         | 
| 266 | 
            +
                        data = json.loads(s) if s else {}
         | 
| 267 | 
            +
                        for c in data.get("candidates", []):
         | 
| 268 | 
            +
                            code = c["code"]; score = float(c.get("score", 0.0))
         | 
| 269 | 
            +
                            why = c.get("why", "") or c.get("label", "")
         | 
| 270 | 
            +
                            if code not in bucket:
         | 
| 271 | 
            +
                                bucket[code] = {"code":code,"score":score,"votes":1,"evidences":[why] if why else []}
         | 
| 272 | 
            +
                            else:
         | 
| 273 | 
            +
                                bucket[code]["score"] = max(bucket[code]["score"], score)
         | 
| 274 | 
            +
                                bucket[code]["votes"] += 1
         | 
| 275 | 
            +
                                if why: bucket[code]["evidences"].append(why)
         | 
| 276 | 
            +
                    for v in bucket.values():
         | 
| 277 | 
            +
                        v["score_final"] = v["score"] + 0.05*(v["votes"]-1)
         | 
| 278 | 
            +
                    ranked = sorted(bucket.values(), key=lambda x: x["score_final"], reverse=True)
         | 
| 279 | 
            +
                    if not ranked: return json.dumps({"final": None, "alternatives": [], "explanation":"Aucun candidat"})
         | 
| 280 | 
            +
                    final = ranked[0]; alts = ranked[1:1+max(0,int(topn))]
         | 
| 281 | 
            +
                    exp = f"Choix {final['code']} (score {final['score_final']:.2f}) – votes={final['votes']} – raisons: {', '.join(sorted(set(final['evidences'])))}"
         | 
| 282 | 
            +
                    return json.dumps({"final": final, "alternatives": alts, "explanation": exp})
         | 
| 283 | 
            +
             | 
| 284 | 
            +
             | 
| 285 | 
            +
             | 
| 286 | 
            +
            def build_agent(model_id: str | None = None) -> CodeAgent:
         | 
| 287 | 
            +
                model_id = model_id or "Qwen/Qwen2.5-Coder-7B-Instruct"  # léger pour tester
         | 
| 288 | 
            +
                agent = CodeAgent(
         | 
| 289 | 
            +
                    tools=[ValidateEANTool(), OFFByEAN(), RegexCOICOP(), OFFtoCOICOP(), SemSim(), Resolve()],
         | 
| 290 | 
            +
                    model=InferenceClientModel(model_id=model_id),
         | 
| 291 | 
            +
                    add_base_tools=False,
         | 
| 292 | 
            +
                    max_steps=6,
         | 
| 293 | 
            +
                    verbosity_level=2,
         | 
| 294 | 
            +
                )
         | 
| 295 | 
            +
                return agent
         | 
| 296 | 
            +
             | 
| 297 | 
            +
            def parse_result(res):
         | 
| 298 | 
            +
                if isinstance(res, dict): return res
         | 
| 299 | 
            +
                try: return ast.literal_eval(res)
         | 
| 300 | 
            +
                except Exception: return {"raw": res}
         | 
| 301 | 
            +
             | 
| 302 | 
            +
            if __name__ == "__main__":
         | 
| 303 | 
            +
                # Remplace par les vraies données si possible - uniquement du test
         | 
| 304 | 
            +
                ean = "3256221112345"  # EAN fictif (peut ne pas exister sur OFF)
         | 
| 305 | 
            +
                label = "Camembert au lait cru AOP 250g - ALDI"
         | 
| 306 | 
            +
             | 
| 307 | 
            +
                agent = build_agent()
         | 
| 308 | 
            +
                task = f"""
         | 
| 309 | 
            +
                Classe ce produit en COICOP:
         | 
| 310 | 
            +
                EAN: {ean}
         | 
| 311 | 
            +
                Libellé: {label}
         | 
| 312 | 
            +
                Pipeline:
         | 
| 313 | 
            +
                1) validate_ean(ean)
         | 
| 314 | 
            +
                2) openfoodfacts_product_by_ean(ean)  # si OFF ne trouve pas, on s'appuie sur regex + embeddings
         | 
| 315 | 
            +
                3) map_off_to_coicop(product_name, categories_tags, ingredients_text)
         | 
| 316 | 
            +
                4) coicop_regex_rules(text=libellé)
         | 
| 317 | 
            +
                5) coicop_semantic_similarity(text=libellé, topk=5)
         | 
| 318 | 
            +
                6) resolve_coicop_candidates([...], topn=3)
         | 
| 319 | 
            +
                Attend un JSON final.
         | 
| 320 | 
            +
                """
         | 
| 321 | 
            +
                out = agent.run(task)
         | 
| 322 | 
            +
                print(parse_result(out))
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            smolagents
         | 
| 2 | 
            +
            huggingface_hub
         | 
| 3 | 
            +
            gradio
         | 
| 4 | 
            +
            requests
         | 
| 5 | 
            +
            sentence-transformers
         | 
| 6 | 
            +
            torch
         | 
