devusman commited on
Commit
31edf0b
·
1 Parent(s): 4f5a1e9

update for explaination

Browse files
Files changed (1) hide show
  1. app.py +113 -88
app.py CHANGED
@@ -2,55 +2,51 @@ import os
2
  from flask import Flask, request, jsonify
3
  from flask_cors import CORS
4
  import spacy
 
5
 
6
  # --- CORRECTED MODEL LOADING SECTION ---
7
- # This approach loads the model by its package name. It is more robust because
8
- # the model is now managed as a dependency in requirements.txt,
9
- # removing the need to manually place a model folder next to the script.
10
  try:
 
11
  nlp = spacy.load("it_core_news_sm")
12
  except OSError:
13
  raise RuntimeError(
14
  "Could not find the 'it_core_news_sm' model. "
15
  "Please ensure it is listed and installed from your requirements.txt file."
16
  )
17
-
18
  # --- END SECTION ---
19
 
20
- # Initialize the Flask app
21
  app = Flask(__name__)
22
 
23
- # Enable Cross-Origin Resource Sharing (CORS) to allow your frontend to call this API
24
  CORS(app)
25
 
26
- # A mapping from spaCy dependency labels to our logical analysis labels
27
  DEP_MAP = {
28
- "nsubj": "Soggetto",
29
- "ROOT": "Predicato Verbale",
30
- "obj": "Complemento Oggetto",
31
- "iobj": "Complemento di Termine",
32
- "obl": "Complemento Indiretto",
33
- "nmod": "Complemento di Specificazione",
34
- "amod": "Attributo",
35
- "advmod": "Complemento Avverbiale",
36
- "appos": "Apposizione",
37
- "acl:relcl": "Proposizione Subordinata Relativa",
38
- "advcl": "Proposizione Subordinata Avverbiale",
39
- "ccomp": "Proposizione Subordinata Oggettiva",
40
- "csubj": "Proposizione Subordinata Soggettiva"
41
  }
42
 
43
- def get_complement_type(token):
44
- """Refine the complement type based on the preceding preposition."""
45
  preposition = ""
46
- # Look for a preposition (`case`) attached to this token
47
  for child in token.children:
48
  if child.dep_ == "case":
49
  preposition = child.text.lower()
50
  break
51
-
52
- # If no preposition is found on the children, check the head token.
53
- # This helps in cases of complex prepositional phrases.
54
  if not preposition and token.head.dep_ == 'obl':
55
  for child in token.head.children:
56
  if child.dep_ == "case":
@@ -58,51 +54,57 @@ def get_complement_type(token):
58
  break
59
 
60
  if preposition in ["di", "del", "dello", "della", "dei", "degli", "delle"]:
61
- return "Complemento di Specificazione"
62
  if preposition in ["a", "al", "allo", "alla", "ai", "agli", "alle"]:
63
- return "Complemento di Termine"
64
  if preposition in ["da", "dal", "dallo", "dalla", "dai", "dagli", "dalle"]:
65
- # Check if it's a passive sentence for Complemento d'Agente
66
  if any(child.dep_ == 'aux:pass' for child in token.head.children):
67
- return "Complemento d'Agente"
68
- return "Complemento di Moto da Luogo"
69
  if preposition in ["in", "nel", "nello", "nella", "nei", "negli", "nelle"]:
70
- return "Complemento di Stato in Luogo"
71
  if preposition in ["con", "col", "coi"]:
72
- return "Complemento di Compagnia o Mezzo"
73
  if preposition in ["su", "sul", "sullo", "sulla", "sui", "sugli", "sulle"]:
74
- return "Complemento di Argomento o Luogo"
75
  if preposition in ["per"]:
76
- return "Complemento di Fine o Causa"
77
  if preposition in ["tra", "fra"]:
78
- return "Complemento di Luogo o Tempo (Partitivo)"
79
-
80
- return "Complemento Indiretto"
 
81
 
82
  def get_full_text(token):
83
- """Recursively builds the full text of a phrase starting from a head token."""
84
- # Collect the text of the token and all its children that form the phrase
85
- # (like articles, adjectives, etc.)
86
- phrase_tokens = [token] + [t for t in token.children if t.dep_ in ('det', 'amod', 'case', 'advmod')]
87
- # Sort by index to maintain original order
88
  phrase_tokens.sort(key=lambda x: x.i)
89
  return " ".join(t.text for t in phrase_tokens)
90
 
91
- def build_phrases(tokens):
92
- """Merges tokens into meaningful grammatical phrases."""
93
  phrase_map = {}
94
 
95
- # First pass: map head tokens to their full text
96
  for token in tokens:
97
- # The head of a phrase is usually a noun, verb, or adjective
98
  if token.dep_ not in ['det', 'case', 'amod', 'punct', 'aux', 'cop', 'mark']:
99
  phrase_map[token.i] = {
100
  "text": get_full_text(token),
101
- "label": "", # Label will be assigned next
 
 
 
 
 
 
 
102
  "token": token
103
  }
104
 
105
- # Second pass: assign labels and structure
106
  analysis_result = []
107
  processed_indices = set()
108
 
@@ -112,56 +114,74 @@ def build_phrases(tokens):
112
 
113
  token = phrase['token']
114
  dep = token.dep_
115
- label = ""
116
 
117
  if dep == "ROOT":
118
- # Check for nominal predicate (e.g., "è bello")
119
  is_nominal = any(c.dep_ == 'cop' for c in token.children)
120
  if is_nominal:
121
  copula = [c for c in token.children if c.dep_ == 'cop'][0]
122
  predicate_name = get_full_text(token)
 
123
  analysis_result.append({
124
  "text": copula.text,
125
- "label": "Copula"
 
 
 
 
 
 
126
  })
 
127
  analysis_result.append({
128
  "text": predicate_name,
129
- "label": "Parte Nominale del Predicato"
 
130
  })
131
  else:
132
- label = "Predicato Verbale"
 
133
  elif dep == 'obl':
134
- label = get_complement_type(token)
 
135
  elif dep in DEP_MAP:
136
- label = DEP_MAP[dep]
137
-
138
- if label:
139
- analysis_result.append({"text": phrase['text'], "label": label})
140
 
 
 
 
 
 
 
 
 
 
 
 
141
  processed_indices.add(index)
142
 
143
  return analysis_result
144
 
145
-
146
- def analyze_clause(clause_tokens):
147
- """Analyzes a single clause (main or subordinate)."""
148
- # Filter out conjunctions that introduce the clause as they are part of the structure, not the clause itself
149
  tokens_in_clause = [t for t in clause_tokens if t.dep_ != 'mark']
150
- return build_phrases(tokens_in_clause)
151
-
152
 
153
  @app.route("/")
154
  def home():
155
- """Provides a simple welcome message for the API root."""
156
- return jsonify({"message": "API is running. Use the /api/analyze endpoint with a POST request."})
157
 
158
  @app.route('/api/analyze', methods=['POST'])
159
  def analyze_sentence():
160
- """Main endpoint to receive a sentence and return its full logical analysis."""
161
  try:
162
  data = request.get_json()
163
  if not data or 'sentence' not in data:
164
- return jsonify({"error": "Sentence not provided in JSON payload"}), 400
165
 
166
  sentence = data['sentence']
167
  doc = nlp(sentence)
@@ -169,49 +189,54 @@ def analyze_sentence():
169
  main_clause_tokens = []
170
  subordinate_clauses = []
171
 
172
- # Identify subordinate clauses first
173
  for token in doc:
174
- # Subordinate clauses are identified by specific dependency relations
175
  if token.dep_ in ["acl:relcl", "advcl", "ccomp", "csubj"]:
176
- # The subtree of the token constitutes the subordinate clause
177
  sub_clause_tokens = list(token.subtree)
178
- sub_clause_type = DEP_MAP.get(token.dep_, "Proposizione Subordinata")
179
 
180
- # Find the introducing element (e.g., 'che', 'quando', 'perché')
181
  marker = [child for child in token.children if child.dep_ == 'mark']
182
  intro = marker[0].text if marker else ""
183
 
184
  subordinate_clauses.append({
185
- "type": sub_clause_type,
186
  "text": " ".join(t.text for t in sub_clause_tokens),
187
  "intro": intro,
188
- "analysis": analyze_clause(sub_clause_tokens)
189
  })
190
 
191
- # Tokens not in any subordinate clause belong to the main clause
192
  subordinate_indices = {token.i for clause in subordinate_clauses for token in nlp(clause["text"])}
193
  main_clause_tokens = [token for token in doc if token.i not in subordinate_indices]
194
 
195
- # Final structured result
 
 
 
 
 
 
 
196
  final_analysis = {
 
197
  "main_clause": {
198
  "text": " ".join(t.text for t in main_clause_tokens if not t.is_punct),
199
- "analysis": analyze_clause(main_clause_tokens)
200
  },
201
- "subordinate_clauses": subordinate_clauses
 
202
  }
203
 
204
  return jsonify(final_analysis)
205
 
206
  except Exception as e:
207
- # Log the full error to the console for debugging
208
- print(f"An error occurred during analysis: {e}")
209
- import traceback
210
  traceback.print_exc()
211
- return jsonify({"error": "An internal error occurred. Check server logs for details."}), 500
212
 
213
- # The following block is for local development and testing,
214
- # it won't be used when deployed with Gunicorn.
215
  if __name__ == '__main__':
216
- # Use a port that is not default 5000 to avoid conflicts
217
- app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 8080)), debug=True)
 
 
2
  from flask import Flask, request, jsonify
3
  from flask_cors import CORS
4
  import spacy
5
+ import traceback
6
 
7
  # --- CORRECTED MODEL LOADING SECTION ---
 
 
 
8
  try:
9
+ # Laad het Italiaanse model van spaCy
10
  nlp = spacy.load("it_core_news_sm")
11
  except OSError:
12
  raise RuntimeError(
13
  "Could not find the 'it_core_news_sm' model. "
14
  "Please ensure it is listed and installed from your requirements.txt file."
15
  )
 
16
  # --- END SECTION ---
17
 
18
+ # Initialiseer de Flask-app
19
  app = Flask(__name__)
20
 
21
+ # Schakel Cross-Origin Resource Sharing (CORS) in
22
  CORS(app)
23
 
24
+ # Een mapping van spaCy dependency-labels naar onze logische analyse-labels met uitleg
25
  DEP_MAP = {
26
+ "nsubj": {"label": "Soggetto", "description": "Indica chi o cosa compie l'azione o si trova in un certo stato."},
27
+ "ROOT": {"label": "Predicato Verbale", "description": "Esprime l'azione o lo stato del soggetto."},
28
+ "obj": {"label": "Complemento Oggetto", "description": "Indica l'oggetto diretto dell'azione del verbo."},
29
+ "iobj": {"label": "Complemento di Termine", "description": "Indica a chi o a cosa è destinata l'azione."},
30
+ "obl": {"label": "Complemento Indiretto", "description": "Fornisce informazioni aggiuntive come luogo, tempo, modo, causa, etc."},
31
+ "nmod": {"label": "Complemento di Specificazione", "description": "Specifica o chiarisce il significato del nome a cui si riferisce."},
32
+ "amod": {"label": "Attributo", "description": "Aggettivo che qualifica un nome."},
33
+ "advmod": {"label": "Complemento Avverbiale", "description": "Modifica il significato di un verbo, aggettivo o altro avverbio."},
34
+ "appos": {"label": "Apposizione", "description": "Nome che ne chiarisce un altro."},
35
+ "acl:relcl": {"label": "Proposizione Subordinata Relativa", "description": "Frase che espande un nome, introdotta da un pronome relativo."},
36
+ "advcl": {"label": "Proposizione Subordinata Avverbiale", "description": "Frase che funziona come un avverbio, modificando il verbo della principale."},
37
+ "ccomp": {"label": "Proposizione Subordinata Oggettiva", "description": "Frase che funge da complemento oggetto del verbo della principale."},
38
+ "csubj": {"label": "Proposizione Subordinata Soggettiva", "description": "Frase che funge da soggetto del verbo della principale."}
39
  }
40
 
41
+ def get_complement_type_with_details(token):
42
+ """Verfijnt het complementtype op basis van het voorgaande voorzetsel en geeft details."""
43
  preposition = ""
44
+ # Zoek naar een voorzetsel ('case') als een kind van het token
45
  for child in token.children:
46
  if child.dep_ == "case":
47
  preposition = child.text.lower()
48
  break
49
+ # Fallback voor sommige structuren waar het voorzetsel een zuster is
 
 
50
  if not preposition and token.head.dep_ == 'obl':
51
  for child in token.head.children:
52
  if child.dep_ == "case":
 
54
  break
55
 
56
  if preposition in ["di", "del", "dello", "della", "dei", "degli", "delle"]:
57
+ return {"label": "Complemento di Specificazione", "description": "Risponde alla domanda 'di chi?', 'di che cosa?'."}
58
  if preposition in ["a", "al", "allo", "alla", "ai", "agli", "alle"]:
59
+ return {"label": "Complemento di Termine", "description": "Risponde alla domanda 'a chi?', 'a che cosa?'."}
60
  if preposition in ["da", "dal", "dallo", "dalla", "dai", "dagli", "dalle"]:
61
+ # Controleer op passieve constructie voor Complemento d'Agente
62
  if any(child.dep_ == 'aux:pass' for child in token.head.children):
63
+ return {"label": "Complemento d'Agente", "description": "Indica da chi è compiuta l'azione in una frase passiva."}
64
+ return {"label": "Complemento di Moto da Luogo", "description": "Indica il luogo da cui inizia un movimento."}
65
  if preposition in ["in", "nel", "nello", "nella", "nei", "negli", "nelle"]:
66
+ return {"label": "Complemento di Stato in Luogo", "description": "Indica il luogo in cui si svolge un'azione o ci si trova."}
67
  if preposition in ["con", "col", "coi"]:
68
+ return {"label": "Complemento di Compagnia o Mezzo", "description": "Indica la persona/animale con cui si compie l'azione o lo strumento utilizzato."}
69
  if preposition in ["su", "sul", "sullo", "sulla", "sui", "sugli", "sulle"]:
70
+ return {"label": "Complemento di Argomento o Luogo", "description": "Indica l'argomento di cui si parla o il luogo su cui si trova qualcosa."}
71
  if preposition in ["per"]:
72
+ return {"label": "Complemento di Fine o Causa", "description": "Indica lo scopo o la causa di un'azione."}
73
  if preposition in ["tra", "fra"]:
74
+ return {"label": "Complemento di Luogo o Tempo (Partitivo)", "description": "Indica una posizione intermedia o una scelta all'interno di un gruppo."}
75
+
76
+ # Standaard als geen specifiek voorzetsel wordt gevonden
77
+ return {"label": "Complemento Indiretto", "description": "Fornisce un'informazione generica non classificata in modo più specifico."}
78
 
79
  def get_full_text(token):
80
+ """Bouwt recursief de volledige tekst van een zinsdeel op, beginnend bij een hoofdtoken."""
81
+ # Verzamel het hoofdtoken en de direct gerelateerde modifiers (determiners, adjectieven, voorzetsels)
82
+ phrase_tokens = [token] + sorted([t for t in token.children if t.dep_ in ('det', 'amod', 'case', 'advmod')], key=lambda x: x.i)
83
+ # Sorteer alle tokens op basis van hun positie in de zin om de juiste volgorde te krijgen
 
84
  phrase_tokens.sort(key=lambda x: x.i)
85
  return " ".join(t.text for t in phrase_tokens)
86
 
87
+ def build_phrases_with_details(tokens):
88
+ """Voegt tokens samen tot betekenisvolle grammaticale zinsdelen met gedetailleerde uitleg."""
89
  phrase_map = {}
90
 
91
+ # Maak een map van belangrijke tokens (hoofden van zinsdelen)
92
  for token in tokens:
93
+ # Filter onbelangrijke tokens uit die later worden samengevoegd
94
  if token.dep_ not in ['det', 'case', 'amod', 'punct', 'aux', 'cop', 'mark']:
95
  phrase_map[token.i] = {
96
  "text": get_full_text(token),
97
+ # Voeg gedetailleerde grammaticale informatie toe met uitleg
98
+ "token_details": {
99
+ "lemma": token.lemma_,
100
+ "pos": f"{token.pos_}: {spacy.explain(token.pos_)}",
101
+ "tag": f"{token.tag_}: {spacy.explain(token.tag_)}",
102
+ "morph": str(token.morph) if token.morph else "Non disponibile"
103
+ },
104
+ "label_info": {},
105
  "token": token
106
  }
107
 
 
108
  analysis_result = []
109
  processed_indices = set()
110
 
 
114
 
115
  token = phrase['token']
116
  dep = token.dep_
117
+ label_info = {}
118
 
119
  if dep == "ROOT":
120
+ # Controleer op een naamwoordelijk gezegde (bv. "è bello")
121
  is_nominal = any(c.dep_ == 'cop' for c in token.children)
122
  if is_nominal:
123
  copula = [c for c in token.children if c.dep_ == 'cop'][0]
124
  predicate_name = get_full_text(token)
125
+ # Voeg de copula apart toe
126
  analysis_result.append({
127
  "text": copula.text,
128
+ "label_info": {"label": "Copula", "description": "Verbo 'essere' che collega il soggetto alla parte nominale."},
129
+ "token_details": {
130
+ "lemma": copula.lemma_,
131
+ "pos": f"{copula.pos_}: {spacy.explain(copula.pos_)}",
132
+ "tag": f"{copula.tag_}: {spacy.explain(copula.tag_)}",
133
+ "morph": str(copula.morph) if copula.morph else "Non disponibile"
134
+ }
135
  })
136
+ # Voeg het naamwoordelijk deel van het gezegde toe
137
  analysis_result.append({
138
  "text": predicate_name,
139
+ "label_info": {"label": "Parte Nominale del Predicato", "description": "Aggettivo o nome che descrive il soggetto."},
140
+ "token_details": phrase["token_details"]
141
  })
142
  else:
143
+ # Het is een werkwoordelijk gezegde
144
+ label_info = DEP_MAP.get(dep, {})
145
  elif dep == 'obl':
146
+ # Gebruik de speciale functie om het type indirect complement te bepalen
147
+ label_info = get_complement_type_with_details(token)
148
  elif dep in DEP_MAP:
149
+ # Haal het label en de beschrijving op uit de map
150
+ label_info = DEP_MAP[dep]
 
 
151
 
152
+ # Voeg het geanalyseerde zinsdeel toe aan de resultatenlijst
153
+ if label_info:
154
+ phrase_to_add = {
155
+ "text": phrase['text'],
156
+ "label_info": label_info
157
+ }
158
+ # Voeg de token-details toe als ze bestaan
159
+ if phrase.get("token_details"):
160
+ phrase_to_add["token_details"] = phrase["token_details"]
161
+ analysis_result.append(phrase_to_add)
162
+
163
  processed_indices.add(index)
164
 
165
  return analysis_result
166
 
167
+ def analyze_clause_with_details(clause_tokens):
168
+ """Analyseert een enkele (hoofd- of bij-)zin met details."""
169
+ # Verwijder verbindingswoorden (markers) uit de analyse van de zinsdelen zelf
 
170
  tokens_in_clause = [t for t in clause_tokens if t.dep_ != 'mark']
171
+ return build_phrases_with_details(tokens_in_clause)
 
172
 
173
  @app.route("/")
174
  def home():
175
+ """Geeft een eenvoudig welkomstbericht voor de API-root."""
176
+ return jsonify({"message": "API for logical analysis is running. Use the /api/analyze endpoint."})
177
 
178
  @app.route('/api/analyze', methods=['POST'])
179
  def analyze_sentence():
180
+ """Hoofd-endpoint om een zin te ontvangen en de volledige logische analyse met details terug te sturen."""
181
  try:
182
  data = request.get_json()
183
  if not data or 'sentence' not in data:
184
+ return jsonify({"error": "Sentence not provided"}), 400
185
 
186
  sentence = data['sentence']
187
  doc = nlp(sentence)
 
189
  main_clause_tokens = []
190
  subordinate_clauses = []
191
 
192
+ # Identificeer en scheid bijzinnen
193
  for token in doc:
 
194
  if token.dep_ in ["acl:relcl", "advcl", "ccomp", "csubj"]:
 
195
  sub_clause_tokens = list(token.subtree)
196
+ sub_clause_type_info = DEP_MAP.get(token.dep_, {"label": "Proposizione Subordinata", "description": "Una frase che dipende da un'altra."})
197
 
198
+ # Vind het inleidende woord (bv. "che", "quando", "perché")
199
  marker = [child for child in token.children if child.dep_ == 'mark']
200
  intro = marker[0].text if marker else ""
201
 
202
  subordinate_clauses.append({
203
+ "type_info": sub_clause_type_info,
204
  "text": " ".join(t.text for t in sub_clause_tokens),
205
  "intro": intro,
206
+ "analysis": analyze_clause_with_details(sub_clause_tokens)
207
  })
208
 
209
+ # Bepaal de tokens van de hoofdzin door de tokens van de bijzinnen uit te sluiten
210
  subordinate_indices = {token.i for clause in subordinate_clauses for token in nlp(clause["text"])}
211
  main_clause_tokens = [token for token in doc if token.i not in subordinate_indices]
212
 
213
+ # Extraheer Named Entities met uitleg
214
+ named_entities = [{
215
+ "text": ent.text,
216
+ "label": ent.label_,
217
+ "explanation": spacy.explain(ent.label_) # Zorg voor uitleg
218
+ } for ent in doc.ents]
219
+
220
+ # Stel de uiteindelijke analyse samen
221
  final_analysis = {
222
+ "full_sentence": sentence,
223
  "main_clause": {
224
  "text": " ".join(t.text for t in main_clause_tokens if not t.is_punct),
225
+ "analysis": analyze_clause_with_details(main_clause_tokens)
226
  },
227
+ "subordinate_clauses": subordinate_clauses,
228
+ "named_entities": named_entities
229
  }
230
 
231
  return jsonify(final_analysis)
232
 
233
  except Exception as e:
234
+ # Verbeterde foutafhandeling
235
+ print(f"Error during analysis: {e}")
 
236
  traceback.print_exc()
237
+ return jsonify({"error": "An internal error occurred."}), 500
238
 
 
 
239
  if __name__ == '__main__':
240
+ # Haal de poort op uit de omgevingsvariabelen voor implementatiegemak
241
+ port = int(os.environ.get("PORT", 8080))
242
+ app.run(host="0.0.0.0", port=port, debug=True)