JacobWP commited on
Commit
0b721e9
·
verified ·
1 Parent(s): 7f672ca

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +317 -399
  2. requirements.txt +10 -1
app.py CHANGED
@@ -1,419 +1,337 @@
 
 
1
  """
2
- Filters that accept a `Application` as argument.
3
- """
4
-
5
- from __future__ import annotations
6
-
7
- from typing import TYPE_CHECKING, cast
8
-
9
- from prompt_toolkit.application.current import get_app
10
- from prompt_toolkit.cache import memoized
11
- from prompt_toolkit.enums import EditingMode
12
-
13
- from .base import Condition
14
-
15
- if TYPE_CHECKING:
16
- from prompt_toolkit.layout.layout import FocusableElement
17
-
18
-
19
- __all__ = [
20
- "has_arg",
21
- "has_completions",
22
- "completion_is_selected",
23
- "has_focus",
24
- "buffer_has_focus",
25
- "has_selection",
26
- "has_suggestion",
27
- "has_validation_error",
28
- "is_done",
29
- "is_read_only",
30
- "is_multiline",
31
- "renderer_height_is_known",
32
- "in_editing_mode",
33
- "in_paste_mode",
34
- "vi_mode",
35
- "vi_navigation_mode",
36
- "vi_insert_mode",
37
- "vi_insert_multiple_mode",
38
- "vi_replace_mode",
39
- "vi_selection_mode",
40
- "vi_waiting_for_text_object_mode",
41
- "vi_digraph_mode",
42
- "vi_recording_macro",
43
- "emacs_mode",
44
- "emacs_insert_mode",
45
- "emacs_selection_mode",
46
- "shift_selection_mode",
47
- "is_searching",
48
- "control_is_searchable",
49
- "vi_search_direction_reversed",
50
- ]
51
-
52
-
53
- # NOTE: `has_focus` below should *not* be `memoized`. It can reference any user
54
- # control. For instance, if we would continuously create new
55
- # `PromptSession` instances, then previous instances won't be released,
56
- # because this memoize (which caches results in the global scope) will
57
- # still refer to each instance.
58
- def has_focus(value: FocusableElement) -> Condition:
59
- """
60
- Enable when this buffer has the focus.
61
- """
62
- from prompt_toolkit.buffer import Buffer
63
- from prompt_toolkit.layout import walk
64
- from prompt_toolkit.layout.containers import Container, Window, to_container
65
- from prompt_toolkit.layout.controls import UIControl
66
-
67
- if isinstance(value, str):
68
-
69
- def test() -> bool:
70
- return get_app().current_buffer.name == value
71
-
72
- elif isinstance(value, Buffer):
73
-
74
- def test() -> bool:
75
- return get_app().current_buffer == value
76
-
77
- elif isinstance(value, UIControl):
78
-
79
- def test() -> bool:
80
- return get_app().layout.current_control == value
81
-
82
- else:
83
- value = to_container(value)
84
-
85
- if isinstance(value, Window):
86
-
87
- def test() -> bool:
88
- return get_app().layout.current_window == value
89
-
90
- else:
91
-
92
- def test() -> bool:
93
- # Consider focused when any window inside this container is
94
- # focused.
95
- current_window = get_app().layout.current_window
96
-
97
- for c in walk(cast(Container, value)):
98
- if isinstance(c, Window) and c == current_window:
99
- return True
100
- return False
101
-
102
- @Condition
103
- def has_focus_filter() -> bool:
104
- return test()
105
-
106
- return has_focus_filter
107
-
108
-
109
- @Condition
110
- def buffer_has_focus() -> bool:
111
- """
112
- Enabled when the currently focused control is a `BufferControl`.
113
- """
114
- return get_app().layout.buffer_has_focus
115
-
116
-
117
- @Condition
118
- def has_selection() -> bool:
119
- """
120
- Enable when the current buffer has a selection.
121
- """
122
- return bool(get_app().current_buffer.selection_state)
123
-
124
-
125
- @Condition
126
- def has_suggestion() -> bool:
127
- """
128
- Enable when the current buffer has a suggestion.
129
- """
130
- buffer = get_app().current_buffer
131
- return buffer.suggestion is not None and buffer.suggestion.text != ""
132
-
133
-
134
- @Condition
135
- def has_completions() -> bool:
136
- """
137
- Enable when the current buffer has completions.
138
- """
139
- state = get_app().current_buffer.complete_state
140
- return state is not None and len(state.completions) > 0
141
-
142
-
143
- @Condition
144
- def completion_is_selected() -> bool:
145
- """
146
- True when the user selected a completion.
147
- """
148
- complete_state = get_app().current_buffer.complete_state
149
- return complete_state is not None and complete_state.current_completion is not None
150
 
 
 
151
 
152
- @Condition
153
- def is_read_only() -> bool:
154
- """
155
- True when the current buffer is read only.
156
- """
157
- return get_app().current_buffer.read_only()
158
-
159
-
160
- @Condition
161
- def is_multiline() -> bool:
162
- """
163
- True when the current buffer has been marked as multiline.
164
- """
165
- return get_app().current_buffer.multiline()
166
-
167
-
168
- @Condition
169
- def has_validation_error() -> bool:
170
- "Current buffer has validation error."
171
- return get_app().current_buffer.validation_error is not None
172
-
173
-
174
- @Condition
175
- def has_arg() -> bool:
176
- "Enable when the input processor has an 'arg'."
177
- return get_app().key_processor.arg is not None
178
-
179
-
180
- @Condition
181
- def is_done() -> bool:
182
- """
183
- True when the CLI is returning, aborting or exiting.
184
- """
185
- return get_app().is_done
186
-
187
-
188
- @Condition
189
- def renderer_height_is_known() -> bool:
190
- """
191
- Only True when the renderer knows it's real height.
192
-
193
- (On VT100 terminals, we have to wait for a CPR response, before we can be
194
- sure of the available height between the cursor position and the bottom of
195
- the terminal. And usually it's nicer to wait with drawing bottom toolbars
196
- until we receive the height, in order to avoid flickering -- first drawing
197
- somewhere in the middle, and then again at the bottom.)
198
- """
199
- return get_app().renderer.height_is_known
200
-
201
 
202
- @memoized()
203
- def in_editing_mode(editing_mode: EditingMode) -> Condition:
204
- """
205
- Check whether a given editing mode is active. (Vi or Emacs.)
206
- """
207
 
208
- @Condition
209
- def in_editing_mode_filter() -> bool:
210
- return get_app().editing_mode == editing_mode
211
 
212
- return in_editing_mode_filter
 
 
 
 
213
 
 
 
214
 
215
- @Condition
216
- def in_paste_mode() -> bool:
217
- return get_app().paste_mode()
 
 
218
 
 
 
 
219
 
220
- @Condition
221
- def vi_mode() -> bool:
222
- return get_app().editing_mode == EditingMode.VI
223
 
 
 
 
 
224
 
225
- @Condition
226
- def vi_navigation_mode() -> bool:
227
- """
228
- Active when the set for Vi navigation key bindings are active.
229
- """
230
- from prompt_toolkit.key_binding.vi_state import InputMode
231
 
232
- app = get_app()
233
 
234
- if (
235
- app.editing_mode != EditingMode.VI
236
- or app.vi_state.operator_func
237
- or app.vi_state.waiting_for_digraph
238
- or app.current_buffer.selection_state
239
- ):
240
- return False
241
 
242
- return (
243
- app.vi_state.input_mode == InputMode.NAVIGATION
244
- or app.vi_state.temporary_navigation_mode
245
- or app.current_buffer.read_only()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  )
247
-
248
-
249
- @Condition
250
- def vi_insert_mode() -> bool:
251
- from prompt_toolkit.key_binding.vi_state import InputMode
252
-
253
- app = get_app()
254
-
255
- if (
256
- app.editing_mode != EditingMode.VI
257
- or app.vi_state.operator_func
258
- or app.vi_state.waiting_for_digraph
259
- or app.current_buffer.selection_state
260
- or app.vi_state.temporary_navigation_mode
261
- or app.current_buffer.read_only()
262
- ):
263
- return False
264
-
265
- return app.vi_state.input_mode == InputMode.INSERT
266
-
267
-
268
- @Condition
269
- def vi_insert_multiple_mode() -> bool:
270
- from prompt_toolkit.key_binding.vi_state import InputMode
271
-
272
- app = get_app()
273
-
274
- if (
275
- app.editing_mode != EditingMode.VI
276
- or app.vi_state.operator_func
277
- or app.vi_state.waiting_for_digraph
278
- or app.current_buffer.selection_state
279
- or app.vi_state.temporary_navigation_mode
280
- or app.current_buffer.read_only()
281
- ):
282
- return False
283
-
284
- return app.vi_state.input_mode == InputMode.INSERT_MULTIPLE
285
-
286
-
287
- @Condition
288
- def vi_replace_mode() -> bool:
289
- from prompt_toolkit.key_binding.vi_state import InputMode
290
-
291
- app = get_app()
292
-
293
- if (
294
- app.editing_mode != EditingMode.VI
295
- or app.vi_state.operator_func
296
- or app.vi_state.waiting_for_digraph
297
- or app.current_buffer.selection_state
298
- or app.vi_state.temporary_navigation_mode
299
- or app.current_buffer.read_only()
300
- ):
301
- return False
302
-
303
- return app.vi_state.input_mode == InputMode.REPLACE
304
-
305
-
306
- @Condition
307
- def vi_replace_single_mode() -> bool:
308
- from prompt_toolkit.key_binding.vi_state import InputMode
309
-
310
- app = get_app()
311
-
312
- if (
313
- app.editing_mode != EditingMode.VI
314
- or app.vi_state.operator_func
315
- or app.vi_state.waiting_for_digraph
316
- or app.current_buffer.selection_state
317
- or app.vi_state.temporary_navigation_mode
318
- or app.current_buffer.read_only()
319
- ):
320
- return False
321
-
322
- return app.vi_state.input_mode == InputMode.REPLACE_SINGLE
323
-
324
-
325
- @Condition
326
- def vi_selection_mode() -> bool:
327
- app = get_app()
328
- if app.editing_mode != EditingMode.VI:
329
- return False
330
-
331
- return bool(app.current_buffer.selection_state)
332
-
333
-
334
- @Condition
335
- def vi_waiting_for_text_object_mode() -> bool:
336
- app = get_app()
337
- if app.editing_mode != EditingMode.VI:
338
- return False
339
-
340
- return app.vi_state.operator_func is not None
341
-
342
-
343
- @Condition
344
- def vi_digraph_mode() -> bool:
345
- app = get_app()
346
- if app.editing_mode != EditingMode.VI:
347
- return False
348
-
349
- return app.vi_state.waiting_for_digraph
350
-
351
-
352
- @Condition
353
- def vi_recording_macro() -> bool:
354
- "When recording a Vi macro."
355
- app = get_app()
356
- if app.editing_mode != EditingMode.VI:
357
- return False
358
-
359
- return app.vi_state.recording_register is not None
360
-
361
-
362
- @Condition
363
- def emacs_mode() -> bool:
364
- "When the Emacs bindings are active."
365
- return get_app().editing_mode == EditingMode.EMACS
366
-
367
-
368
- @Condition
369
- def emacs_insert_mode() -> bool:
370
- app = get_app()
371
- if (
372
- app.editing_mode != EditingMode.EMACS
373
- or app.current_buffer.selection_state
374
- or app.current_buffer.read_only()
375
- ):
376
- return False
377
- return True
378
-
379
-
380
- @Condition
381
- def emacs_selection_mode() -> bool:
382
- app = get_app()
383
- return bool(
384
- app.editing_mode == EditingMode.EMACS and app.current_buffer.selection_state
385
  )
386
-
387
-
388
- @Condition
389
- def shift_selection_mode() -> bool:
390
- app = get_app()
391
- return bool(
392
- app.current_buffer.selection_state
393
- and app.current_buffer.selection_state.shift_mode
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  )
395
-
396
-
397
- @Condition
398
- def is_searching() -> bool:
399
- "When we are searching."
400
- app = get_app()
401
- return app.layout.is_searching
402
-
403
-
404
- @Condition
405
- def control_is_searchable() -> bool:
406
- "When the current UIControl is searchable."
407
- from prompt_toolkit.layout.controls import BufferControl
408
-
409
- control = get_app().layout.current_control
410
-
411
- return (
412
- isinstance(control, BufferControl) and control.search_buffer_control is not None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
  )
 
 
 
 
 
 
414
 
415
-
416
- @Condition
417
- def vi_search_direction_reversed() -> bool:
418
- "When the '/' and '?' key bindings for Vi-style searching have been reversed."
419
- return get_app().reverse_vi_search_direction()
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
  """
4
+ Created on Mon May 19 16:49:22 2025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ @author: jacobwildt-persson
7
+ """
8
 
9
+ #!/usr/bin/env python3
10
+ # -*- coding: utf-8 -*-
11
+ # -----------------------------------------------
12
+ # Requirements & Setup Instructions
13
+ # -----------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Python version:
16
+ # Requires Python 3.10 or later (tested on 3.12)
 
 
 
17
 
 
 
 
18
 
19
+ # Run your script inside a virtual environment (e.g. conda or venv) to avoid conflicts.
20
+ # Recreate the environment with theese command in terminal
21
+ # conda env create -f environment.yml
22
+ # conda activate sprakenv
23
+ #
24
 
25
+ # Install all required packages:
26
+ # Run these commands in the terminal:
27
 
28
+ # pip install --upgrade gradio
29
+ # pip install pdfplumber
30
+ # pip install nltk
31
+ # pip install transformers
32
+ # pip install -U spacy
33
 
34
+ # Download language models:
35
+ # python -m spacy download es_core_news_lg
36
+ # python -m spacy download en_core_web_lg # if you add NER for English
37
 
38
+ # Check Gradio version used:
39
+ # import gradio as gr
40
+ # print(gr.__version__) # Gradio version 4.18.0
41
 
42
+ # 🔗 Reference: Gradio Quickstart Guide
43
+ # https://www.gradio.app/guides/quickstart
44
+ #Hugging Face
45
+ # https://huggingface.co/models
46
 
47
+ # Enghlish API model
48
+ # LanguageTool API: https://languagetool.org/http-api/swagger
 
 
 
 
49
 
 
50
 
 
 
 
 
 
 
 
51
 
52
+ #Rembember !!!!!!!!!!!!!!!!!!!!!!!!!
53
+ # Run your script inside a virtual environment (e.g. conda or venv) to avoid conflicts.
54
+ # Recreate the environment with theese command in terminal
55
+ # conda env create -f environment.yml
56
+ # conda activate sprakenv
57
+ # python -m spacy download es_core_news_lg
58
+ #python -m nltk.downloader punkt wordnet
59
+ # -----------------------------------------------
60
+ """
61
+ Language learning app with Gradio UI, on & multiple users:
62
+ - Import text from file (.txt/.csv/.pdf) or manual text input
63
+ - Grammar correction via transformers (Spanish) or LanguageTool API (English)
64
+ - Analyze text (known/unknown words) per user & language
65
+ - Save unknown words as known
66
+ - Generate coherent practice sentence (Spanish & English)
67
+ - Log grammar corrections and practice sentence suggestions to CSV
68
+ """
69
+ import os
70
+ import datetime
71
+ import sqlite3
72
+ import requests
73
+ import random
74
+ import pandas as pd
75
+ import pdfplumber
76
+ import spacy
77
+ import csv
78
+ # SQLite is accessed via the built-in sqlite3 module (no need to install sqlite3-binary)
79
+ import sqlite3
80
+
81
+ from nltk.tokenize import word_tokenize
82
+ from nltk.stem import WordNetLemmatizer
83
+ from transformers import AutoTokenizer, BartForConditionalGeneration, AutoModelForCausalLM
84
+ import gradio as gr
85
+ import gradio_client.utils as _gcu
86
+
87
+ # --- PATCH for Gradio utils schema bug ---
88
+ _orig_json = _gcu.json_schema_to_python_type
89
+ _orig_get = _gcu.get_type
90
+
91
+ def _patched_json_to_py(schema, defs=None):
92
+ if not isinstance(schema, dict):
93
+ return "any"
94
+ try:
95
+ return _orig_json(schema, defs)
96
+ except Exception:
97
+ return "any"
98
+
99
+ def _patched_get_type(schema):
100
+ if not isinstance(schema, dict):
101
+ return "any"
102
+ try:
103
+ return _orig_get(schema)
104
+ except Exception:
105
+ return "any"
106
+
107
+ _gcu.json_schema_to_python_type = _patched_json_to_py
108
+ _gcu.get_type = _patched_get_type
109
+
110
+ # --- SQLite Database initialization ---
111
+ DB_NAME = "vocabulary.db"
112
+ conn = sqlite3.connect(DB_NAME)
113
+ conn.execute("""
114
+ CREATE TABLE IF NOT EXISTS vocabulary (
115
+ user_id TEXT,
116
+ language TEXT,
117
+ word TEXT,
118
+ timestamp TEXT,
119
+ UNIQUE(user_id, language, word)
120
  )
121
+ """)
122
+ conn.commit()
123
+ conn.close()
124
+
125
+ # --- Save word to database ---
126
+ def save_word_to_db(user_id: str, language: str, word: str):
127
+ ts = datetime.datetime.now().isoformat()
128
+ conn = sqlite3.connect(DB_NAME)
129
+ conn.execute(
130
+ "INSERT OR IGNORE INTO vocabulary (user_id, language, word, timestamp) VALUES (?, ?, ?, ?)",
131
+ (user_id, language, word, ts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  )
133
+ conn.commit()
134
+ conn.close()
135
+
136
+ # --- Retrieve known words for user/language ---
137
+ def get_user_vocabulary(user_id: str, language: str) -> set[str]:
138
+ conn = sqlite3.connect(DB_NAME)
139
+ rows = conn.execute(
140
+ "SELECT word FROM vocabulary WHERE user_id=? AND language=?",
141
+ (user_id, language)
142
+ ).fetchall()
143
+ conn.close()
144
+ return {r[0] for r in rows}
145
+
146
+ # --- Load NLP models ---
147
+ nlp = spacy.load("es_core_news_lg")
148
+ tokenizer = AutoTokenizer.from_pretrained("SkitCon/gec-spanish-BARTO-COWS-L2H")
149
+ model = BartForConditionalGeneration.from_pretrained("SkitCon/gec-spanish-BARTO-COWS-L2H")
150
+ gpt2_tokenizer_es = AutoTokenizer.from_pretrained("mrm8488/spanish-gpt2")
151
+ gpt2_model_es = AutoModelForCausalLM.from_pretrained("mrm8488/spanish-gpt2")
152
+ gpt2_tokenizer_en = AutoTokenizer.from_pretrained("gpt2")
153
+ gpt2_model_en = AutoModelForCausalLM.from_pretrained("gpt2")
154
+ lemmatizer = WordNetLemmatizer()
155
+
156
+ # ---Log to CSV (grammar corrections and sentence suggestions) ---
157
+ def log_to_csv(filename, row, fieldnames):
158
+ file_exists = os.path.isfile(filename)
159
+ with open(filename, "a", newline='', encoding="utf-8") as csvfile:
160
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
161
+ if not file_exists:
162
+ writer.writeheader()
163
+ writer.writerow(row)
164
+
165
+ # --- File Import ---
166
+ def import_file(path: str) -> str:
167
+ ext = os.path.splitext(path)[1].lower()
168
+ if ext == ".pdf":
169
+ pages = []
170
+ with pdfplumber.open(path) as pdf:
171
+ for p in pdf.pages:
172
+ pages.append(p.extract_text() or "")
173
+ return "\n".join(pages)
174
+ if ext == ".csv":
175
+ df = pd.read_csv(path)
176
+ if "text" in df:
177
+ return "\n".join(df["text"].astype(str))
178
+ raise ValueError("CSV saknar kolumnen 'text'.")
179
+ if ext == ".txt":
180
+ return open(path, encoding="utf-8").read()
181
+ raise ValueError(f"Okänt filformat: {ext}")
182
+
183
+ # --- Grammar Correction ---
184
+
185
+ def correct_grammar(text: str, language: str) -> str:
186
+ if language == "es":
187
+ corrected = []
188
+ for sent in nlp(text).sents:
189
+ s = sent.text.strip()
190
+ if not s: continue
191
+ inp = tokenizer(s, return_tensors="pt", truncation=True, padding=True)
192
+ out = model.generate(
193
+ **inp,
194
+ max_new_tokens=inp.input_ids.shape[1],
195
+ num_beams=5,
196
+ early_stopping=True
197
+ )
198
+ corrected.append(tokenizer.decode(out[0], skip_special_tokens=True))
199
+ return " ".join(corrected)
200
+ # English: LanguageTool API
201
+ resp = requests.post(
202
+ "https://api.languagetool.org/v2/check",
203
+ data={"text": text, "language": language}
204
+ ).json()
205
+ for m in reversed(resp.get("matches", [])):
206
+ off, ln = m["offset"], m["length"]
207
+ repls = m.get("replacements", [])
208
+ val = repls[0]["value"] if repls else ""
209
+ text = text[:off] + val + text[off+ln:]
210
+ return text
211
+
212
+ # --- Analyze known and unknown words ---
213
+
214
+ def analyze_text(text: str, user_id: str, language: str):
215
+ toks = word_tokenize(text)
216
+ lems = [lemmatizer.lemmatize(w.lower()) for w in toks if w.isalpha()]
217
+ vocab = get_user_vocabulary(user_id, language)
218
+ known = [w for w in lems if w in vocab]
219
+ unknown = [w for w in lems if w not in vocab]
220
+ return known, unknown
221
+ # --- Generate sentence using GPT2 based on unknown words ---
222
+ def generate_coherent_sentence(text: str, user_id: str, language: str, num_unknown=2) -> str:
223
+ kn, un = analyze_text(text, user_id, language)
224
+ if not un:
225
+ return "Inga okända ord att generera mening med."
226
+ chosen = random.sample(un, min(num_unknown, len(un)))
227
+ if language == "es":
228
+ prompt = "Escribe una sola frase clara que incluya estas palabras: " + ", ".join(chosen) + "."
229
+ tokenizer = gpt2_tokenizer_es
230
+ model = gpt2_model_es
231
+ else:
232
+ prompt = "Write one clear sentence that includes the following words: " + ", ".join(chosen) + "."
233
+ tokenizer = gpt2_tokenizer_en
234
+ model = gpt2_model_en
235
+ inp = tokenizer(prompt, return_tensors="pt", truncation=True)
236
+ outs = model.generate(
237
+ **inp,
238
+ max_new_tokens=50,
239
+ do_sample=True,
240
+ top_k=50,
241
+ top_p=0.95
242
  )
243
+ gen = tokenizer.decode(outs[0], skip_special_tokens=True)
244
+ body = gen[len(prompt):].strip() if gen.startswith(prompt) else gen.strip()
245
+ sentence = (body.split(".")[0].strip() + ".") if "." in body else body
246
+ if not any(c.isalpha() for c in sentence):
247
+ return "Misslyckades att generera meningsfull övningsmening."
248
+ return sentence
249
+
250
+
251
+ # --- Gradio process callback ---
252
+ def process(user, language, txt, file, do_grammar, do_save):
253
+ try:
254
+ if txt and txt.strip():
255
+ text = txt.strip()
256
+ elif file:
257
+ text = import_file(file.name)
258
+ else:
259
+ return "", "", "", "Ingen text angiven.", ""
260
+ out = correct_grammar(text, language) if do_grammar else text
261
+ kn, un = analyze_text(out, user, language)
262
+ status = ""
263
+ if do_save and un:
264
+ for w in un:
265
+ save_word_to_db(user, language, w)
266
+ status = f"Sparade {len(un)} ord."
267
+ # Logga grammatikrättning till CSV
268
+ log_to_csv(
269
+ "grammarlog.csv",
270
+ {
271
+ "user": user, "language": language, "input": text,
272
+ "output": out, "timestamp": datetime.datetime.now().isoformat()
273
+ },
274
+ ["user", "language", "input", "output", "timestamp"]
275
+ )
276
+ return out, ", ".join(kn), ", ".join(un), status, ""
277
+ except Exception as e:
278
+ import traceback
279
+ tb = traceback.format_exc()
280
+ return "", "", "", f"FEL i process:\n{tb}", ""
281
+
282
+ # --- Sentence generation callback ---
283
+ def coherent_fn(user, language, txt, num):
284
+ try:
285
+ suggestion = generate_coherent_sentence(txt or "", user, language, num)
286
+ # Logga övningsförslag till CSV
287
+ log_to_csv(
288
+ "sentencelog.csv",
289
+ {
290
+ "user": user, "language": language, "input": txt,
291
+ "output": suggestion, "timestamp": datetime.datetime.now().isoformat()
292
+ },
293
+ ["user", "language", "input", "output", "timestamp"]
294
+ )
295
+ return suggestion
296
+ except Exception as e:
297
+ return f"Fel vid generering: {e}"
298
+
299
+ # --- Gradio UI ---
300
+ demo = gr.Blocks()
301
+ with demo:
302
+ gr.Markdown("### 🌟 Språkinlärningsapp med användare & flerspråkighet")
303
+ with gr.Row():
304
+ user_input = gr.Textbox(label="Användarnamn", placeholder="Ditt namn här")
305
+ lang_dd = gr.Dropdown(choices=["es", "en"], value="es", label="Språk")
306
+ with gr.Column():
307
+ manual_input = gr.Textbox(lines=4, label="Skriv/klistra in text")
308
+ file_input = gr.File(file_types=[".txt",".csv",".pdf"], label="Importera fil")
309
+ grammar_cb = gr.Checkbox(label="Grammatik­rättning")
310
+ autosave_cb = gr.Checkbox(label="Spara okända ord")
311
+ run_btn = gr.Button("Kör analys & korrigering")
312
+ num_slider = gr.Slider(minimum=1, maximum=5, step=1, value=2, label="Antal okända ord för övning")
313
+ coherent_btn = gr.Button("Koherent övningsmening")
314
+
315
+ corr_out = gr.Textbox(label="Korrigerad text", lines=4)
316
+ known_out = gr.Textbox(label="Kända ord")
317
+ unknown_out = gr.Textbox(label="Okända ord")
318
+ status_out = gr.Textbox(label="Status")
319
+ coherent_out = gr.Textbox(label="Koherent övningsmening")
320
+
321
+ # --- Knapparnas click‐kopplingar ---
322
+ run_btn.click(
323
+ fn=process,
324
+ inputs=[user_input, lang_dd, manual_input, file_input, grammar_cb, autosave_cb],
325
+ outputs=[corr_out, known_out, unknown_out, status_out, coherent_out]
326
  )
327
+ coherent_btn.click(
328
+ fn=coherent_fn,
329
+ inputs=[user_input, lang_dd, manual_input, num_slider],
330
+ outputs=[coherent_out]
331
+ )
332
+ #Make sure to change language for the textfile to be analyzed in its target language
333
 
334
+ # --- Start app ---
335
+ if __name__ == "__main__":
336
+ url = demo.launch(share=True, inbrowser=True, prevent_thread_lock=True)
337
+ print("Appen körs på:", url)
 
requirements.txt CHANGED
@@ -1 +1,10 @@
1
- huggingface_hub==0.25.2
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.29.0
2
+ transformers
3
+ nltk
4
+ pdfplumber
5
+ spacy
6
+ torch
7
+ requests
8
+ prompt_toolkit
9
+ es_core_news_lg @ https://github.com/explosion/spacy-models/releases/download/es_core_news_lg-3.7.0/es_core_news_lg-3.7.0-py3-none-any.whl
10
+ en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.0/en_core_web_lg-3.7.0-py3-none-any.whl