Spaces:

devusman
/

analysis_tool

Sleeping

App Files Files Community

devusman commited on Aug 1, 2025

Commit

31edf0b

1 Parent(s): 4f5a1e9

update for explaination

Browse files

Files changed (1) hide show

app.py +113 -88

app.py CHANGED Viewed

@@ -2,55 +2,51 @@ import os
 from flask import Flask, request, jsonify
 from flask_cors import CORS
 import spacy
 # --- CORRECTED MODEL LOADING SECTION ---
-# This approach loads the model by its package name. It is more robust because
-# the model is now managed as a dependency in requirements.txt,
-# removing the need to manually place a model folder next to the script.
 try:
     nlp = spacy.load("it_core_news_sm")
 except OSError:
     raise RuntimeError(
         "Could not find the 'it_core_news_sm' model. "
         "Please ensure it is listed and installed from your requirements.txt file."
     )
 # --- END SECTION ---
-# Initialize the Flask app
 app = Flask(__name__)
-# Enable Cross-Origin Resource Sharing (CORS) to allow your frontend to call this API
 CORS(app)
-# A mapping from spaCy dependency labels to our logical analysis labels
 DEP_MAP = {
-    "nsubj": "Soggetto",
-    "ROOT": "Predicato Verbale",
-    "obj": "Complemento Oggetto",
-    "iobj": "Complemento di Termine",
-    "obl": "Complemento Indiretto",
-    "nmod": "Complemento di Specificazione",
-    "amod": "Attributo",
-    "advmod": "Complemento Avverbiale",
-    "appos": "Apposizione",
-    "acl:relcl": "Proposizione Subordinata Relativa",
-    "advcl": "Proposizione Subordinata Avverbiale",
-    "ccomp": "Proposizione Subordinata Oggettiva",
-    "csubj": "Proposizione Subordinata Soggettiva"
 }
-def get_complement_type(token):
-    """Refine the complement type based on the preceding preposition."""
     preposition = ""
-    # Look for a preposition (`case`) attached to this token
     for child in token.children:
         if child.dep_ == "case":
             preposition = child.text.lower()
             break
-    # If no preposition is found on the children, check the head token.
-    # This helps in cases of complex prepositional phrases.
     if not preposition and token.head.dep_ == 'obl':
         for child in token.head.children:
             if child.dep_ == "case":
@@ -58,51 +54,57 @@ def get_complement_type(token):
                 break
     if preposition in ["di", "del", "dello", "della", "dei", "degli", "delle"]:
-        return "Complemento di Specificazione"
     if preposition in ["a", "al", "allo", "alla", "ai", "agli", "alle"]:
-        return "Complemento di Termine"
     if preposition in ["da", "dal", "dallo", "dalla", "dai", "dagli", "dalle"]:
-        # Check if it's a passive sentence for Complemento d'Agente
         if any(child.dep_ == 'aux:pass' for child in token.head.children):
-            return "Complemento d'Agente"
-        return "Complemento di Moto da Luogo"
     if preposition in ["in", "nel", "nello", "nella", "nei", "negli", "nelle"]:
-        return "Complemento di Stato in Luogo"
     if preposition in ["con", "col", "coi"]:
-        return "Complemento di Compagnia o Mezzo"
     if preposition in ["su", "sul", "sullo", "sulla", "sui", "sugli", "sulle"]:
-        return "Complemento di Argomento o Luogo"
     if preposition in ["per"]:
-        return "Complemento di Fine o Causa"
     if preposition in ["tra", "fra"]:
-        return "Complemento di Luogo o Tempo (Partitivo)"
-    return "Complemento Indiretto"
 def get_full_text(token):
-    """Recursively builds the full text of a phrase starting from a head token."""
-    # Collect the text of the token and all its children that form the phrase
-    # (like articles, adjectives, etc.)
-    phrase_tokens = [token] + [t for t in token.children if t.dep_ in ('det', 'amod', 'case', 'advmod')]
-    # Sort by index to maintain original order
     phrase_tokens.sort(key=lambda x: x.i)
     return " ".join(t.text for t in phrase_tokens)
-def build_phrases(tokens):
-    """Merges tokens into meaningful grammatical phrases."""
     phrase_map = {}
-    # First pass: map head tokens to their full text
     for token in tokens:
-        # The head of a phrase is usually a noun, verb, or adjective
         if token.dep_ not in ['det', 'case', 'amod', 'punct', 'aux', 'cop', 'mark']:
             phrase_map[token.i] = {
                 "text": get_full_text(token),
-                "label": "", # Label will be assigned next
                 "token": token
             }
-    # Second pass: assign labels and structure
     analysis_result = []
     processed_indices = set()
@@ -112,56 +114,74 @@ def build_phrases(tokens):
         token = phrase['token']
         dep = token.dep_
-        label = ""
         if dep == "ROOT":
-            # Check for nominal predicate (e.g., "è bello")
             is_nominal = any(c.dep_ == 'cop' for c in token.children)
             if is_nominal:
                 copula = [c for c in token.children if c.dep_ == 'cop'][0]
                 predicate_name = get_full_text(token)
                 analysis_result.append({
                     "text": copula.text,
-                    "label": "Copula"
                 })
                 analysis_result.append({
                     "text": predicate_name,
-                    "label": "Parte Nominale del Predicato"
                 })
             else:
-                label = "Predicato Verbale"
         elif dep == 'obl':
-            label = get_complement_type(token)
         elif dep in DEP_MAP:
-            label = DEP_MAP[dep]
-        if label:
-            analysis_result.append({"text": phrase['text'], "label": label})
         processed_indices.add(index)
     return analysis_result
-def analyze_clause(clause_tokens):
-    """Analyzes a single clause (main or subordinate)."""
-    # Filter out conjunctions that introduce the clause as they are part of the structure, not the clause itself
     tokens_in_clause = [t for t in clause_tokens if t.dep_ != 'mark']
-    return build_phrases(tokens_in_clause)
 @app.route("/")
 def home():
-    """Provides a simple welcome message for the API root."""
-    return jsonify({"message": "API is running. Use the /api/analyze endpoint with a POST request."})
 @app.route('/api/analyze', methods=['POST'])
 def analyze_sentence():
-    """Main endpoint to receive a sentence and return its full logical analysis."""
     try:
         data = request.get_json()
         if not data or 'sentence' not in data:
-            return jsonify({"error": "Sentence not provided in JSON payload"}), 400
         sentence = data['sentence']
         doc = nlp(sentence)
@@ -169,49 +189,54 @@ def analyze_sentence():
         main_clause_tokens = []
         subordinate_clauses = []
-        # Identify subordinate clauses first
         for token in doc:
-            # Subordinate clauses are identified by specific dependency relations
             if token.dep_ in ["acl:relcl", "advcl", "ccomp", "csubj"]:
-                # The subtree of the token constitutes the subordinate clause
                 sub_clause_tokens = list(token.subtree)
-                sub_clause_type = DEP_MAP.get(token.dep_, "Proposizione Subordinata")
-                # Find the introducing element (e.g., 'che', 'quando', 'perché')
                 marker = [child for child in token.children if child.dep_ == 'mark']
                 intro = marker[0].text if marker else ""
                 subordinate_clauses.append({
-                    "type": sub_clause_type,
                     "text": " ".join(t.text for t in sub_clause_tokens),
                     "intro": intro,
-                    "analysis": analyze_clause(sub_clause_tokens)
                 })
-        # Tokens not in any subordinate clause belong to the main clause
         subordinate_indices = {token.i for clause in subordinate_clauses for token in nlp(clause["text"])}
         main_clause_tokens = [token for token in doc if token.i not in subordinate_indices]
-        # Final structured result
         final_analysis = {
             "main_clause": {
                 "text": " ".join(t.text for t in main_clause_tokens if not t.is_punct),
-                "analysis": analyze_clause(main_clause_tokens)
             },
-            "subordinate_clauses": subordinate_clauses
         }
         return jsonify(final_analysis)
     except Exception as e:
-        # Log the full error to the console for debugging
-        print(f"An error occurred during analysis: {e}")
-        import traceback
         traceback.print_exc()
-        return jsonify({"error": "An internal error occurred. Check server logs for details."}), 500
-# The following block is for local development and testing,
-# it won't be used when deployed with Gunicorn.
 if __name__ == '__main__':
-    # Use a port that is not default 5000 to avoid conflicts
-    app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 8080)), debug=True)

 from flask import Flask, request, jsonify
 from flask_cors import CORS
 import spacy
+import traceback
 # --- CORRECTED MODEL LOADING SECTION ---
 try:
+    # Laad het Italiaanse model van spaCy
     nlp = spacy.load("it_core_news_sm")
 except OSError:
     raise RuntimeError(
         "Could not find the 'it_core_news_sm' model. "
         "Please ensure it is listed and installed from your requirements.txt file."
     )
 # --- END SECTION ---
+# Initialiseer de Flask-app
 app = Flask(__name__)
+# Schakel Cross-Origin Resource Sharing (CORS) in
 CORS(app)
+# Een mapping van spaCy dependency-labels naar onze logische analyse-labels met uitleg
 DEP_MAP = {
+    "nsubj": {"label": "Soggetto", "description": "Indica chi o cosa compie l'azione o si trova in un certo stato."},
+    "ROOT": {"label": "Predicato Verbale", "description": "Esprime l'azione o lo stato del soggetto."},
+    "obj": {"label": "Complemento Oggetto", "description": "Indica l'oggetto diretto dell'azione del verbo."},
+    "iobj": {"label": "Complemento di Termine", "description": "Indica a chi o a cosa è destinata l'azione."},
+    "obl": {"label": "Complemento Indiretto", "description": "Fornisce informazioni aggiuntive come luogo, tempo, modo, causa, etc."},
+    "nmod": {"label": "Complemento di Specificazione", "description": "Specifica o chiarisce il significato del nome a cui si riferisce."},
+    "amod": {"label": "Attributo", "description": "Aggettivo che qualifica un nome."},
+    "advmod": {"label": "Complemento Avverbiale", "description": "Modifica il significato di un verbo, aggettivo o altro avverbio."},
+    "appos": {"label": "Apposizione", "description": "Nome che ne chiarisce un altro."},
+    "acl:relcl": {"label": "Proposizione Subordinata Relativa", "description": "Frase che espande un nome, introdotta da un pronome relativo."},
+    "advcl": {"label": "Proposizione Subordinata Avverbiale", "description": "Frase che funziona come un avverbio, modificando il verbo della principale."},
+    "ccomp": {"label": "Proposizione Subordinata Oggettiva", "description": "Frase che funge da complemento oggetto del verbo della principale."},
+    "csubj": {"label": "Proposizione Subordinata Soggettiva", "description": "Frase che funge da soggetto del verbo della principale."}
 }
+def get_complement_type_with_details(token):
+    """Verfijnt het complementtype op basis van het voorgaande voorzetsel en geeft details."""
     preposition = ""
+    # Zoek naar een voorzetsel ('case') als een kind van het token
     for child in token.children:
         if child.dep_ == "case":
             preposition = child.text.lower()
             break
+    # Fallback voor sommige structuren waar het voorzetsel een zuster is
     if not preposition and token.head.dep_ == 'obl':
         for child in token.head.children:
             if child.dep_ == "case":
                 break
     if preposition in ["di", "del", "dello", "della", "dei", "degli", "delle"]:
+        return {"label": "Complemento di Specificazione", "description": "Risponde alla domanda 'di chi?', 'di che cosa?'."}
     if preposition in ["a", "al", "allo", "alla", "ai", "agli", "alle"]:
+        return {"label": "Complemento di Termine", "description": "Risponde alla domanda 'a chi?', 'a che cosa?'."}
     if preposition in ["da", "dal", "dallo", "dalla", "dai", "dagli", "dalle"]:
+        # Controleer op passieve constructie voor Complemento d'Agente
         if any(child.dep_ == 'aux:pass' for child in token.head.children):
+            return {"label": "Complemento d'Agente", "description": "Indica da chi è compiuta l'azione in una frase passiva."}
+        return {"label": "Complemento di Moto da Luogo", "description": "Indica il luogo da cui inizia un movimento."}
     if preposition in ["in", "nel", "nello", "nella", "nei", "negli", "nelle"]:
+        return {"label": "Complemento di Stato in Luogo", "description": "Indica il luogo in cui si svolge un'azione o ci si trova."}
     if preposition in ["con", "col", "coi"]:
+        return {"label": "Complemento di Compagnia o Mezzo", "description": "Indica la persona/animale con cui si compie l'azione o lo strumento utilizzato."}
     if preposition in ["su", "sul", "sullo", "sulla", "sui", "sugli", "sulle"]:
+        return {"label": "Complemento di Argomento o Luogo", "description": "Indica l'argomento di cui si parla o il luogo su cui si trova qualcosa."}
     if preposition in ["per"]:
+        return {"label": "Complemento di Fine o Causa", "description": "Indica lo scopo o la causa di un'azione."}
     if preposition in ["tra", "fra"]:
+        return {"label": "Complemento di Luogo o Tempo (Partitivo)", "description": "Indica una posizione intermedia o una scelta all'interno di un gruppo."}
+    # Standaard als geen specifiek voorzetsel wordt gevonden
+    return {"label": "Complemento Indiretto", "description": "Fornisce un'informazione generica non classificata in modo più specifico."}
 def get_full_text(token):
+    """Bouwt recursief de volledige tekst van een zinsdeel op, beginnend bij een hoofdtoken."""
+    # Verzamel het hoofdtoken en de direct gerelateerde modifiers (determiners, adjectieven, voorzetsels)
+    phrase_tokens = [token] + sorted([t for t in token.children if t.dep_ in ('det', 'amod', 'case', 'advmod')], key=lambda x: x.i)
+    # Sorteer alle tokens op basis van hun positie in de zin om de juiste volgorde te krijgen
     phrase_tokens.sort(key=lambda x: x.i)
     return " ".join(t.text for t in phrase_tokens)
+def build_phrases_with_details(tokens):
+    """Voegt tokens samen tot betekenisvolle grammaticale zinsdelen met gedetailleerde uitleg."""
     phrase_map = {}
+    # Maak een map van belangrijke tokens (hoofden van zinsdelen)
     for token in tokens:
+        # Filter onbelangrijke tokens uit die later worden samengevoegd
         if token.dep_ not in ['det', 'case', 'amod', 'punct', 'aux', 'cop', 'mark']:
             phrase_map[token.i] = {
                 "text": get_full_text(token),
+                # Voeg gedetailleerde grammaticale informatie toe met uitleg
+                "token_details": {
+                    "lemma": token.lemma_,
+                    "pos": f"{token.pos_}: {spacy.explain(token.pos_)}",
+                    "tag": f"{token.tag_}: {spacy.explain(token.tag_)}",
+                    "morph": str(token.morph) if token.morph else "Non disponibile"
+                },
+                "label_info": {},
                 "token": token
             }
     analysis_result = []
     processed_indices = set()
         token = phrase['token']
         dep = token.dep_
+        label_info = {}
         if dep == "ROOT":
+            # Controleer op een naamwoordelijk gezegde (bv. "è bello")
             is_nominal = any(c.dep_ == 'cop' for c in token.children)
             if is_nominal:
                 copula = [c for c in token.children if c.dep_ == 'cop'][0]
                 predicate_name = get_full_text(token)
+                # Voeg de copula apart toe
                 analysis_result.append({
                     "text": copula.text,
+                    "label_info": {"label": "Copula", "description": "Verbo 'essere' che collega il soggetto alla parte nominale."},
+                    "token_details": {
+                         "lemma": copula.lemma_,
+                         "pos": f"{copula.pos_}: {spacy.explain(copula.pos_)}",
+                         "tag": f"{copula.tag_}: {spacy.explain(copula.tag_)}",
+                         "morph": str(copula.morph) if copula.morph else "Non disponibile"
+                    }
                 })
+                # Voeg het naamwoordelijk deel van het gezegde toe
                 analysis_result.append({
                     "text": predicate_name,
+                    "label_info": {"label": "Parte Nominale del Predicato", "description": "Aggettivo o nome che descrive il soggetto."},
+                     "token_details": phrase["token_details"]
                 })
             else:
+                # Het is een werkwoordelijk gezegde
+                label_info = DEP_MAP.get(dep, {})
         elif dep == 'obl':
+            # Gebruik de speciale functie om het type indirect complement te bepalen
+            label_info = get_complement_type_with_details(token)
         elif dep in DEP_MAP:
+            # Haal het label en de beschrijving op uit de map
+            label_info = DEP_MAP[dep]
+        # Voeg het geanalyseerde zinsdeel toe aan de resultatenlijst
+        if label_info:
+            phrase_to_add = {
+                "text": phrase['text'],
+                "label_info": label_info
+            }
+            # Voeg de token-details toe als ze bestaan
+            if phrase.get("token_details"):
+                phrase_to_add["token_details"] = phrase["token_details"]
+            analysis_result.append(phrase_to_add)
         processed_indices.add(index)
     return analysis_result
+def analyze_clause_with_details(clause_tokens):
+    """Analyseert een enkele (hoofd- of bij-)zin met details."""
+    # Verwijder verbindingswoorden (markers) uit de analyse van de zinsdelen zelf
     tokens_in_clause = [t for t in clause_tokens if t.dep_ != 'mark']
+    return build_phrases_with_details(tokens_in_clause)
 @app.route("/")
 def home():
+    """Geeft een eenvoudig welkomstbericht voor de API-root."""
+    return jsonify({"message": "API for logical analysis is running. Use the /api/analyze endpoint."})
 @app.route('/api/analyze', methods=['POST'])
 def analyze_sentence():
+    """Hoofd-endpoint om een zin te ontvangen en de volledige logische analyse met details terug te sturen."""
     try:
         data = request.get_json()
         if not data or 'sentence' not in data:
+            return jsonify({"error": "Sentence not provided"}), 400
         sentence = data['sentence']
         doc = nlp(sentence)
         main_clause_tokens = []
         subordinate_clauses = []
+        # Identificeer en scheid bijzinnen
         for token in doc:
             if token.dep_ in ["acl:relcl", "advcl", "ccomp", "csubj"]:
                 sub_clause_tokens = list(token.subtree)
+                sub_clause_type_info = DEP_MAP.get(token.dep_, {"label": "Proposizione Subordinata", "description": "Una frase che dipende da un'altra."})
+                # Vind het inleidende woord (bv. "che", "quando", "perché")
                 marker = [child for child in token.children if child.dep_ == 'mark']
                 intro = marker[0].text if marker else ""
                 subordinate_clauses.append({
+                    "type_info": sub_clause_type_info,
                     "text": " ".join(t.text for t in sub_clause_tokens),
                     "intro": intro,
+                    "analysis": analyze_clause_with_details(sub_clause_tokens)
                 })
+        # Bepaal de tokens van de hoofdzin door de tokens van de bijzinnen uit te sluiten
         subordinate_indices = {token.i for clause in subordinate_clauses for token in nlp(clause["text"])}
         main_clause_tokens = [token for token in doc if token.i not in subordinate_indices]
+        # Extraheer Named Entities met uitleg
+        named_entities = [{
+            "text": ent.text,
+            "label": ent.label_,
+            "explanation": spacy.explain(ent.label_) # Zorg voor uitleg
+        } for ent in doc.ents]
+        # Stel de uiteindelijke analyse samen
         final_analysis = {
+            "full_sentence": sentence,
             "main_clause": {
                 "text": " ".join(t.text for t in main_clause_tokens if not t.is_punct),
+                "analysis": analyze_clause_with_details(main_clause_tokens)
             },
+            "subordinate_clauses": subordinate_clauses,
+            "named_entities": named_entities
         }
         return jsonify(final_analysis)
     except Exception as e:
+        # Verbeterde foutafhandeling
+        print(f"Error during analysis: {e}")
         traceback.print_exc()
+        return jsonify({"error": "An internal error occurred."}), 500
 if __name__ == '__main__':
+    # Haal de poort op uit de omgevingsvariabelen voor implementatiegemak
+    port = int(os.environ.get("PORT", 8080))
+    app.run(host="0.0.0.0", port=port, debug=True)