Spaces:

devusman
/

analysis_tool

Sleeping

App Files Files Community

devusman commited on Aug 27, 2025

Commit

6058f1c

verified ·

1 Parent(s): e8fa023

Update app.py

Browse files

Files changed (1) hide show

app.py +150 -118

app.py CHANGED Viewed

@@ -1,50 +1,25 @@
 import os
 import traceback
 from flask import Flask, request, jsonify
 from flask_cors import CORS
-# Try to import spacy lazily and handle missing models gracefully
 try:
     import spacy
 except Exception:
     spacy = None
 # ------------------------------
-# Caricamento modello spaCy (con fallback non-bloccante)
 # ------------------------------
-def load_it_model():
-    """
-    Prova a caricare un modello italiano in ordine di qualità.
-    Se nessun modello è installato, restituisce (None, None) e una istruzione per l'utente.
-    """
-    if spacy is None:
-        return None, None, ("La libreria spaCy non è installata. Installa spaCy: pip install spacy")
-    candidates = ["it_core_news_lg", "it_core_news_md", "it_core_news_sm"]
-    last_err = None
-    for name in candidates:
-        try:
-            nlp = spacy.load(name)
-            return nlp, name, None
-        except Exception as e:
-            last_err = e
-    suggestion = (
-        "Impossibile caricare un modello italiano spaCy. "
-        "Installa almeno uno tra: it_core_news_lg / it_core_news_md / it_core_news_sm.\n"
-        f"Esempio: python -m spacy download it_core_news_lg\nDettagli ultimo errore: {last_err}"
-    )
-    return None, None, suggestion
-nlp, IT_MODEL, MODEL_LOAD_ERROR = load_it_model()
-# ------------------------------
-# Flask App
-# ------------------------------
-app = Flask(__name__)
-CORS(app)
 # ------------------------------
-# Tabelle di spiegazione POS / NER
 # ------------------------------
 SPIEGAZIONI_POS_IT = {
     "ADJ": "Aggettivo", "ADP": "Preposizione", "ADV": "Avverbio", "AUX": "Ausiliare",
@@ -55,12 +30,10 @@ SPIEGAZIONI_POS_IT = {
 }
 SPIEGAZIONI_ENT_IT = {
-    "PER": "Persona", "LOC": "Luogo", "ORG": "Organizzazione", "MISC": "Miscellanea"
 }
-# ------------------------------
-# Traduzioni Morfologia (UD)
-# ------------------------------
 KEY_MAP = {
     "Gender": "Genere", "Number": "Numero", "Mood": "Modo", "Tense": "Tempo",
     "Person": "Persona", "VerbForm": "Forma del Verbo", "PronType": "Tipo di Pronome",
@@ -82,7 +55,7 @@ PAIR_VALUE_MAP = {
 }
 # ------------------------------
-# Mappature Dependency → Etichette italiane
 # ------------------------------
 MAPPA_DEP = {
     "nsubj": {"label": "Soggetto", "description": "Indica chi o cosa compie l'azione o si trova in un certo stato."},
@@ -107,156 +80,206 @@ MAPPA_DEP = {
 }
 # ------------------------------
-# Utilità di Analisi
 # ------------------------------
 def spiega_in_italiano(tag, tipo='pos'):
-    if tipo == 'pos': return SPIEGAZIONI_POS_IT.get(tag, tag)
-    if tipo == 'ent': return f"{SPIEGAZIONI_ENT_IT.get(tag, tag)}: {SPIEGAZIONI_ENT_IT.get(tag, {}).get('description', '')}"
     return tag
 def traduci_morfologia(morph_str: str) -> str:
-    if not morph_str or morph_str == "___": return "Non disponibile"
     parti = morph_str.split('|')
-    parti_tradotte = set()
     for parte in parti:
-        if '=' not in parte: continue
         chiave, valore = parte.split('=', 1)
         chiave_trad = KEY_MAP.get(chiave, chiave)
         valore_trad = PAIR_VALUE_MAP.get((chiave, valore), VALUE_MAP.get(valore, valore))
-        parti_tradotte.add(f"{chiave_trad}: {valore_trad}")
-    return ", ".join(sorted(list(parti_tradotte))) or "Non disponibile"
 def ottieni_tipo_complemento_con_dettagli(token):
-    case_token = next((child for child in token.children if child.dep_ == 'case'), None)
-    if not case_token: return MAPPA_DEP.get("obl")
-    preposizione = case_token.text.lower()
     mappa = {
         "di": ("Complemento di Specificazione", "Risponde alla domanda: di chi? / di che cosa?"),
         "a": ("Complemento di Termine", "Risponde alla domanda: a chi? / a che cosa?"),
-        "da": ("Complemento di Moto da Luogo", "Risponde alla domanda: da dove?"),
         "in": ("Complemento di Stato in Luogo", "Risponde alla domanda: dove?"),
         "con": ("Complemento di Compagnia o Mezzo", "Risponde alla domanda: con chi? / con che cosa?"),
         "su": ("Complemento di Argomento o Luogo", "Risponde alla domanda: su chi? / su che cosa? / dove?"),
         "per": ("Complemento di Fine o Causa", "Risponde alla domanda: per quale fine? / per quale causa?"),
         "tra": ("Complemento Partitivo / Luogo", "Risponde alla domanda: tra chi? / tra cosa?"),
         "fra": ("Complemento Partitivo / Luogo", "Risponde alla domanda: fra chi? / fra cosa?"),
     }
     for base, (label, desc) in mappa.items():
-        if preposizione.startswith(base):
-            if base == "da" and any(c.dep_ == "aux:pass" for c in token.head.children):
                 return {"label": "Complemento d'Agente", "description": "Indica da chi è compiuta l'azione in una frase passiva."}
             return {"label": label, "description": desc}
-    return MAPPA_DEP.get("obl")
 def get_full_phrase_for_token(token):
     """
-    FIXED: Costruisce un sintagma in modo preciso, raccogliendo solo i modificatori
-    strettamente collegati e gli elementi coordinati.
     """
-    phrase_tokens = []
-    # Funzione interna per raccogliere i token di un singolo elemento e i suoi figli diretti
-    def collect_children(t):
-        # Raccoglie i modificatori diretti (articoli, aggettivi, preposizioni)
-        children = [t]
         for child in t.children:
-            if child.dep_ in ('det', 'amod', 'case', 'compound', 'advmod', 'appos'):
-                children.extend(collect_children(child)) # Raccoglie anche i figli dei figli (es. avverbi di aggettivi)
-        return children
-    # Raccoglie i token per il token principale
-    phrase_tokens.extend(collect_children(token))
-    # Gestisce la coordinazione (es. "libri e quaderni")
     for child in token.children:
         if child.dep_ == 'conj':
-            # Aggiunge la congiunzione (es. "e", "o")
             cc = next((c for c in child.children if c.dep_ == 'cc'), None)
             if cc:
-                phrase_tokens.append(cc)
-            # Aggiunge l'intero sintagma coordinato
-            phrase_tokens.extend(get_full_phrase_for_token(child))
-    # Ordina i token in base alla loro posizione originale e rimuove duplicati
-    unique_tokens = sorted(list(set(phrase_tokens)), key=lambda t: t.i)
-    text = " ".join(t.text for t in unique_tokens)
-    indices = {t.i for t in unique_tokens}
-    return text, indices
 def costruisci_sintagmi_con_dettagli(tokens_proposizione):
     """
-    FIXED: L'algoritmo ora processa ogni componente logico separatamente e con precisione.
     """
-    risultato_analisi = []
     indici_elaborati = set()
-    # Definisce le dipendenze che non sono "teste" di un sintagma ma parti di esso
-    DEPS_DA_SALTARE = {'det', 'amod', 'case', 'aux', 'aux:pass', 'cop', 'mark', 'cc', 'advmod', 'compound', 'appos'}
     for token in tokens_proposizione:
-        if token.i in indici_elaborati or token.dep_ in DEPS_DA_SALTARE:
             continue
         testo_sintagma, indici_usati = get_full_phrase_for_token(token)
         dep = token.dep_
         if dep in ('obl', 'obl:agent', 'nmod'):
             info_etichetta = ottieni_tipo_complemento_con_dettagli(token)
         else:
             info_etichetta = MAPPA_DEP.get(dep, {"label": dep.capitalize(), "description": "Relazione non mappata."})
-        # Caso speciale per predicato nominale
-        if dep == "ROOT" and any(c.dep_ == 'cop' for c in token.children):
-            info_etichetta = {"label": "Parte Nominale del Predicato", "description": "Aggettivo o nome che descrive il soggetto."}
-        risultato_analisi.append({
             "text": testo_sintagma,
             "label_info": info_etichetta,
-            "token_details": {
-                "lemma": token.lemma_,
-                "pos": f"{token.pos_}: {spiega_in_italiano(token.pos_)}",
-                "tag": f"{token.tag_}: {spiega_in_italiano(token.tag_)}",
-                "morph": traduci_morfologia(str(token.morph))
-            },
             "token_index": token.i
         })
         indici_elaborati.update(indici_usati)
-    # Aggiungi componenti saltati (es. copula, congiunzioni) che sono importanti
     for token in tokens_proposizione:
         if token.i not in indici_elaborati and token.dep_ in ('cop', 'cc'):
-            risultato_analisi.append({
                 "text": token.text,
-                "label_info": MAPPA_DEP.get(token.dep_),
-                "token_details": { "lemma": token.lemma_, "pos": f"{token.pos_}: {spiega_in_italiano(token.pos_)}", "morph": traduci_morfologia(str(token.morph)) },
                 "token_index": token.i
             })
-    # Ordina i risultati finali in base all'indice del token principale
-    risultato_analisi.sort(key=lambda x: x['token_index'])
-    return risultato_analisi
 def analizza_proposizione_con_dettagli(tokens):
     tokens_validi = [t for t in tokens if not t.is_punct and not t.is_space]
     return costruisci_sintagmi_con_dettagli(tokens_validi)
 # ------------------------------
-# Routes
 # ------------------------------
 @app.route("/")
 def home():
     status = "ok" if nlp else "model_missing"
     return jsonify({
-        "messaggio": "API analisi logica in esecuzione", "modello_spacy": IT_MODEL or "Nessuno",
-        "model_status": status, "model_error": MODEL_LOAD_ERROR, "endpoint": "/api/analyze"
     })
 @app.route('/api/analyze', methods=['POST'])
 def analizza_frase():
     if not nlp:
         return jsonify({"errore": "Modello spaCy non caricato.", "dettagli": MODEL_LOAD_ERROR}), 503
@@ -265,12 +288,15 @@ def analizza_frase():
         frase = (dati.get('sentence') or "").strip()
         if not frase:
             return jsonify({"errore": "Frase non fornita o vuota."}), 400
         doc = nlp(frase)
-        proposizioni_subordinate, indici_subordinate = [], set()
-        SUBORD_DEPS = {"acl:relcl", "advcl", "ccomp", "csubj", "xcomp", "acl", "parataxis"}
         for token in doc:
             if token.dep_ in SUBORD_DEPS and token.i not in indici_subordinate:
                 subtree = list(token.subtree)
@@ -283,22 +309,26 @@ def analizza_frase():
                     "analysis": analizza_proposizione_con_dettagli(subtree)
                 })
-        token_principale = [t for t in doc if t.i not in indici_subordinate]
         entita_nominate = []
         visti = set()
         for ent in doc.ents:
             if ent.text not in visti:
                 visti.add(ent.text)
                 entita_nominate.append({
-                    "text": ent.text, "label": ent.label_,
-                    "explanation": f"{SPIEGAZIONI_ENT_IT.get(ent.label_, ent.label_)}"
                 })
         analisi_finale = {
-            "full_sentence": frase, "model": IT_MODEL,
             "main_clause": {
-                "text": " ".join(t.text for t in token_principale if not t.is_punct).strip(),
                 "analysis": analizza_proposizione_con_dettagli(token_principale)
             },
             "subordinate_clauses": proposizioni_subordinate,
@@ -308,9 +338,11 @@ def analizza_frase():
         return jsonify(analisi_finale)
     except Exception as e:
         traceback.print_exc()
         return jsonify({"errore": "Si è verificato un errore interno.", "dettagli": str(e)}), 500
 if __name__ == '__main__':
     port = int(os.environ.get("PORT", 8080))
-    app.run(host="0.0.0.0", port=port, debug=False, threaded=True)

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
 import os
 import traceback
 from flask import Flask, request, jsonify
 from flask_cors import CORS
+# Try to import spaCy lazily and handle missing models gracefully
 try:
     import spacy
 except Exception:
     spacy = None
 # ------------------------------
+# Config
 # ------------------------------
+MAX_SENTENCE_LENGTH = 2000  # characters, to avoid huge inputs
+SUBORD_DEPS = {"acl:relcl", "advcl", "ccomp", "csubj", "xcomp", "acl", "parataxis"}
 # ------------------------------
+# Utility dictionaries (Italian)
 # ------------------------------
 SPIEGAZIONI_POS_IT = {
     "ADJ": "Aggettivo", "ADP": "Preposizione", "ADV": "Avverbio", "AUX": "Ausiliare",
 }
 SPIEGAZIONI_ENT_IT = {
+    "PER": "Persona", "LOC": "Luogo", "ORG": "Organizzazione", "MISC": "Miscellanea",
+    # spaCy uses many possible entity labels depending on model/lang — fallback to label itself later
 }
 KEY_MAP = {
     "Gender": "Genere", "Number": "Numero", "Mood": "Modo", "Tense": "Tempo",
     "Person": "Persona", "VerbForm": "Forma del Verbo", "PronType": "Tipo di Pronome",
 }
 # ------------------------------
+# Map dependency labels to Italian labels and explanations
 # ------------------------------
 MAPPA_DEP = {
     "nsubj": {"label": "Soggetto", "description": "Indica chi o cosa compie l'azione o si trova in un certo stato."},
 }
 # ------------------------------
+# Model load helper (non-blocking)
 # ------------------------------
+def load_it_model():
+    """
+    Try to load an Italian spaCy model in order of quality.
+    Returns (nlp, model_name, error_message) where nlp may be None.
+    """
+    if spacy is None:
+        return None, None, "La libreria spaCy non è installata. Esegui: pip install spacy"
+    candidates = ["it_core_news_lg", "it_core_news_md", "it_core_news_sm"]
+    last_err = None
+    for name in candidates:
+        try:
+            nlp = spacy.load(name)
+            return nlp, name, None
+        except Exception as e:
+            last_err = e
+    suggestion = (
+        "Impossibile caricare un modello italiano spaCy. "
+        "Installa almeno uno tra: it_core_news_lg / it_core_news_md / it_core_news_sm.\n"
+        "Esempio: python -m spacy download it_core_news_lg\n"
+        f"Dettagli ultimo errore: {last_err}"
+    )
+    return None, None, suggestion
+nlp, IT_MODEL, MODEL_LOAD_ERROR = load_it_model()
+# ------------------------------
+# Small helper converters
+# ------------------------------
 def spiega_in_italiano(tag, tipo='pos'):
+    if tipo == 'pos':
+        return SPIEGAZIONI_POS_IT.get(tag, tag)
+    if tipo == 'ent':
+        return SPIEGAZIONI_ENT_IT.get(tag, tag)
     return tag
 def traduci_morfologia(morph_str: str) -> str:
+    if not morph_str or morph_str == "___":
+        return "Non disponibile"
     parti = morph_str.split('|')
+    parti_tradotte = []
     for parte in parti:
+        if '=' not in parte:
+            continue
         chiave, valore = parte.split('=', 1)
         chiave_trad = KEY_MAP.get(chiave, chiave)
         valore_trad = PAIR_VALUE_MAP.get((chiave, valore), VALUE_MAP.get(valore, valore))
+        parti_tradotte.append(f"{chiave_trad}: {valore_trad}")
+    return ", ".join(parti_tradotte) or "Non disponibile"
 def ottieni_tipo_complemento_con_dettagli(token):
+    """
+    Given a token that is an 'obl' or similar, inspect 'case' (preposition) children to
+    return a more precise complement label (e.g. stato in luogo, di termine, ecc.)
+    """
+    # find child with dep_ == 'case' (a preposition)
+    case_token = next((c for c in token.children if c.dep_ == 'case'), None)
+    if not case_token:
+        # fallback
+        return MAPPA_DEP.get("obl", {"label": "Complemento", "description": "Complemento non specificato."})
+    prepo = case_token.text.lower()
+    # mapping by start of preposition
     mappa = {
         "di": ("Complemento di Specificazione", "Risponde alla domanda: di chi? / di che cosa?"),
         "a": ("Complemento di Termine", "Risponde alla domanda: a chi? / a che cosa?"),
+        "da": ("Complemento di Moto da Luogo / Origine", "Risponde alla domanda: da dove?"),
         "in": ("Complemento di Stato in Luogo", "Risponde alla domanda: dove?"),
         "con": ("Complemento di Compagnia o Mezzo", "Risponde alla domanda: con chi? / con che cosa?"),
         "su": ("Complemento di Argomento o Luogo", "Risponde alla domanda: su chi? / su che cosa? / dove?"),
         "per": ("Complemento di Fine o Causa", "Risponde alla domanda: per quale fine? / per quale causa?"),
         "tra": ("Complemento Partitivo / Luogo", "Risponde alla domanda: tra chi? / tra cosa?"),
         "fra": ("Complemento Partitivo / Luogo", "Risponde alla domanda: fra chi? / fra cosa?"),
+        "sopra": ("Complemento di Luogo", "Risponde alla domanda: dove?"),
+        "sotto": ("Complemento di Luogo", "Risponde alla domanda: dove?"),
     }
     for base, (label, desc) in mappa.items():
+        if prepo.startswith(base):
+            # special-case: 'da' + passive aux => agente
+            if base == "da" and any(c.dep_.endswith('agent') or c.dep_ == 'aux:pass' for c in token.head.children):
                 return {"label": "Complemento d'Agente", "description": "Indica da chi è compiuta l'azione in una frase passiva."}
             return {"label": label, "description": desc}
+    return MAPPA_DEP.get("obl", {"label": "Complemento", "description": "Complemento non specificato."})
 def get_full_phrase_for_token(token):
     """
+    Build a compact phrase for a head token by collecting determiners, amod, case, compounds, and simple modifiers.
+    Returns (text, set(indices)).
     """
+    # recursive collection but with small scope to avoid over-collecting
+    collected = set()
+    def collect(t):
+        if t.i in collected:
+            return
+        collected.add(t.i)
+        # Collect children that usually belong inside the noun phrase / token phrase
         for child in t.children:
+            if child.dep_ in ('det', 'amod', 'case', 'compound', 'nummod', 'appos', 'fixed', 'flat', 'advmod'):
+                collect(child)
+    collect(token)
+    # also include simple coordinated tokens (conj)
     for child in token.children:
         if child.dep_ == 'conj':
+            collect(child)
+            # include the coordinating conjunction token if present (cc)
             cc = next((c for c in child.children if c.dep_ == 'cc'), None)
             if cc:
+                collected.add(cc.i)
+    # sort by token index
+    tokens = sorted(collected)
+    text = " ".join(token.doc[i].text for i in tokens)
+    return text, set(tokens)
 def costruisci_sintagmi_con_dettagli(tokens_proposizione):
     """
+    Build structured analysis for each "major" token in a clause.
     """
+    risultato = []
+    # tokens_proposizione assumed to be a list of spaCy tokens (no punctuation/space)
+    DEPS_DA_SALTARE = {'det', 'amod', 'case', 'aux', 'aux:pass', 'cop', 'mark', 'cc', 'compound', 'appos', 'punct'}
     indici_elaborati = set()
     for token in tokens_proposizione:
+        if token.i in indici_elaborati:
+            continue
+        # skip tokens that are primarily modifiers (we will include them as part of head tokens)
+        if token.dep_ in DEPS_DA_SALTARE and token.head.i != token.i:
             continue
         testo_sintagma, indici_usati = get_full_phrase_for_token(token)
         dep = token.dep_
         if dep in ('obl', 'obl:agent', 'nmod'):
             info_etichetta = ottieni_tipo_complemento_con_dettagli(token)
         else:
             info_etichetta = MAPPA_DEP.get(dep, {"label": dep.capitalize(), "description": "Relazione non mappata."})
+        token_details = {
+            "lemma": getattr(token, "lemma_", token.text),
+            "pos": f"{getattr(token, 'pos_', token.pos_)}: {spiega_in_italiano(getattr(token, 'pos_', token.pos_), 'pos')}",
+            "tag": getattr(token, "tag_", ""),
+            "morph": traduci_morfologia(str(getattr(token, "morph", "")))
+        }
+        risultato.append({
             "text": testo_sintagma,
             "label_info": info_etichetta,
+            "token_details": token_details,
             "token_index": token.i
         })
         indici_elaborati.update(indici_usati)
+    # include leftover important tokens like copula or coordinating conjunctions if not already included
     for token in tokens_proposizione:
         if token.i not in indici_elaborati and token.dep_ in ('cop', 'cc'):
+            risultato.append({
                 "text": token.text,
+                "label_info": MAPPA_DEP.get(token.dep_, {"label": token.dep_, "description": ""}),
+                "token_details": {
+                    "lemma": getattr(token, "lemma_", token.text),
+                    "pos": f"{getattr(token, 'pos_', token.pos_)}: {spiega_in_italiano(getattr(token, 'pos_', token.pos_), 'pos')}",
+                    "tag": getattr(token, "tag_", ""),
+                    "morph": traduci_morfologia(str(getattr(token, "morph", "")))
+                },
                 "token_index": token.i
             })
+            indici_elaborati.add(token.i)
+    risultato.sort(key=lambda x: x['token_index'])
+    return risultato
 def analizza_proposizione_con_dettagli(tokens):
     tokens_validi = [t for t in tokens if not t.is_punct and not t.is_space]
     return costruisci_sintagmi_con_dettagli(tokens_validi)
 # ------------------------------
+# Flask app
 # ------------------------------
+app = Flask(__name__)
+CORS(app)
 @app.route("/")
 def home():
     status = "ok" if nlp else "model_missing"
     return jsonify({
+        "messaggio": "API analisi logica in esecuzione",
+        "modello_spacy": IT_MODEL or "Nessuno",
+        "model_status": status,
+        "model_error": MODEL_LOAD_ERROR
     })
 @app.route('/api/analyze', methods=['POST'])
 def analizza_frase():
+    # Basic checks
     if not nlp:
         return jsonify({"errore": "Modello spaCy non caricato.", "dettagli": MODEL_LOAD_ERROR}), 503
         frase = (dati.get('sentence') or "").strip()
         if not frase:
             return jsonify({"errore": "Frase non fornita o vuota."}), 400
+        if len(frase) > MAX_SENTENCE_LENGTH:
+            return jsonify({"errore": "Frase troppo lunga.", "max_length": MAX_SENTENCE_LENGTH}), 400
         doc = nlp(frase)
+        proposizioni_subordinate = []
+        indici_subordinate = set()
+        # detect subordinate clauses via tokens that have dependency in SUBORD_DEPS
         for token in doc:
             if token.dep_ in SUBORD_DEPS and token.i not in indici_subordinate:
                 subtree = list(token.subtree)
                     "analysis": analizza_proposizione_con_dettagli(subtree)
                 })
+        # main clause tokens are tokens not part of subordinate clause subtrees
+        token_principale = [t for t in doc if t.i not in indici_subordinate and not t.is_punct and not t.is_space]
+        # named entities (unique)
         entita_nominate = []
         visti = set()
         for ent in doc.ents:
             if ent.text not in visti:
                 visti.add(ent.text)
                 entita_nominate.append({
+                    "text": ent.text,
+                    "label": ent.label_,
+                    "explanation": spiega_in_italiano(ent.label_, 'ent')
                 })
         analisi_finale = {
+            "full_sentence": frase,
+            "model": IT_MODEL,
             "main_clause": {
+                "text": " ".join(t.text for t in token_principale).strip(),
                 "analysis": analizza_proposizione_con_dettagli(token_principale)
             },
             "subordinate_clauses": proposizioni_subordinate,
         return jsonify(analisi_finale)
     except Exception as e:
+        # print to server log for debugging but return safe message
         traceback.print_exc()
         return jsonify({"errore": "Si è verificato un errore interno.", "dettagli": str(e)}), 500
 if __name__ == '__main__':
     port = int(os.environ.get("PORT", 8080))
+    # Note: debug=False for production; set to True only during development
+    app.run(host="0.0.0.0", port=port, debug=False, threaded=True)