Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- app.py +317 -399
- requirements.txt +10 -1
app.py
CHANGED
@@ -1,419 +1,337 @@
|
|
|
|
|
|
1 |
"""
|
2 |
-
|
3 |
-
"""
|
4 |
-
|
5 |
-
from __future__ import annotations
|
6 |
-
|
7 |
-
from typing import TYPE_CHECKING, cast
|
8 |
-
|
9 |
-
from prompt_toolkit.application.current import get_app
|
10 |
-
from prompt_toolkit.cache import memoized
|
11 |
-
from prompt_toolkit.enums import EditingMode
|
12 |
-
|
13 |
-
from .base import Condition
|
14 |
-
|
15 |
-
if TYPE_CHECKING:
|
16 |
-
from prompt_toolkit.layout.layout import FocusableElement
|
17 |
-
|
18 |
-
|
19 |
-
__all__ = [
|
20 |
-
"has_arg",
|
21 |
-
"has_completions",
|
22 |
-
"completion_is_selected",
|
23 |
-
"has_focus",
|
24 |
-
"buffer_has_focus",
|
25 |
-
"has_selection",
|
26 |
-
"has_suggestion",
|
27 |
-
"has_validation_error",
|
28 |
-
"is_done",
|
29 |
-
"is_read_only",
|
30 |
-
"is_multiline",
|
31 |
-
"renderer_height_is_known",
|
32 |
-
"in_editing_mode",
|
33 |
-
"in_paste_mode",
|
34 |
-
"vi_mode",
|
35 |
-
"vi_navigation_mode",
|
36 |
-
"vi_insert_mode",
|
37 |
-
"vi_insert_multiple_mode",
|
38 |
-
"vi_replace_mode",
|
39 |
-
"vi_selection_mode",
|
40 |
-
"vi_waiting_for_text_object_mode",
|
41 |
-
"vi_digraph_mode",
|
42 |
-
"vi_recording_macro",
|
43 |
-
"emacs_mode",
|
44 |
-
"emacs_insert_mode",
|
45 |
-
"emacs_selection_mode",
|
46 |
-
"shift_selection_mode",
|
47 |
-
"is_searching",
|
48 |
-
"control_is_searchable",
|
49 |
-
"vi_search_direction_reversed",
|
50 |
-
]
|
51 |
-
|
52 |
-
|
53 |
-
# NOTE: `has_focus` below should *not* be `memoized`. It can reference any user
|
54 |
-
# control. For instance, if we would continuously create new
|
55 |
-
# `PromptSession` instances, then previous instances won't be released,
|
56 |
-
# because this memoize (which caches results in the global scope) will
|
57 |
-
# still refer to each instance.
|
58 |
-
def has_focus(value: FocusableElement) -> Condition:
|
59 |
-
"""
|
60 |
-
Enable when this buffer has the focus.
|
61 |
-
"""
|
62 |
-
from prompt_toolkit.buffer import Buffer
|
63 |
-
from prompt_toolkit.layout import walk
|
64 |
-
from prompt_toolkit.layout.containers import Container, Window, to_container
|
65 |
-
from prompt_toolkit.layout.controls import UIControl
|
66 |
-
|
67 |
-
if isinstance(value, str):
|
68 |
-
|
69 |
-
def test() -> bool:
|
70 |
-
return get_app().current_buffer.name == value
|
71 |
-
|
72 |
-
elif isinstance(value, Buffer):
|
73 |
-
|
74 |
-
def test() -> bool:
|
75 |
-
return get_app().current_buffer == value
|
76 |
-
|
77 |
-
elif isinstance(value, UIControl):
|
78 |
-
|
79 |
-
def test() -> bool:
|
80 |
-
return get_app().layout.current_control == value
|
81 |
-
|
82 |
-
else:
|
83 |
-
value = to_container(value)
|
84 |
-
|
85 |
-
if isinstance(value, Window):
|
86 |
-
|
87 |
-
def test() -> bool:
|
88 |
-
return get_app().layout.current_window == value
|
89 |
-
|
90 |
-
else:
|
91 |
-
|
92 |
-
def test() -> bool:
|
93 |
-
# Consider focused when any window inside this container is
|
94 |
-
# focused.
|
95 |
-
current_window = get_app().layout.current_window
|
96 |
-
|
97 |
-
for c in walk(cast(Container, value)):
|
98 |
-
if isinstance(c, Window) and c == current_window:
|
99 |
-
return True
|
100 |
-
return False
|
101 |
-
|
102 |
-
@Condition
|
103 |
-
def has_focus_filter() -> bool:
|
104 |
-
return test()
|
105 |
-
|
106 |
-
return has_focus_filter
|
107 |
-
|
108 |
-
|
109 |
-
@Condition
|
110 |
-
def buffer_has_focus() -> bool:
|
111 |
-
"""
|
112 |
-
Enabled when the currently focused control is a `BufferControl`.
|
113 |
-
"""
|
114 |
-
return get_app().layout.buffer_has_focus
|
115 |
-
|
116 |
-
|
117 |
-
@Condition
|
118 |
-
def has_selection() -> bool:
|
119 |
-
"""
|
120 |
-
Enable when the current buffer has a selection.
|
121 |
-
"""
|
122 |
-
return bool(get_app().current_buffer.selection_state)
|
123 |
-
|
124 |
-
|
125 |
-
@Condition
|
126 |
-
def has_suggestion() -> bool:
|
127 |
-
"""
|
128 |
-
Enable when the current buffer has a suggestion.
|
129 |
-
"""
|
130 |
-
buffer = get_app().current_buffer
|
131 |
-
return buffer.suggestion is not None and buffer.suggestion.text != ""
|
132 |
-
|
133 |
-
|
134 |
-
@Condition
|
135 |
-
def has_completions() -> bool:
|
136 |
-
"""
|
137 |
-
Enable when the current buffer has completions.
|
138 |
-
"""
|
139 |
-
state = get_app().current_buffer.complete_state
|
140 |
-
return state is not None and len(state.completions) > 0
|
141 |
-
|
142 |
-
|
143 |
-
@Condition
|
144 |
-
def completion_is_selected() -> bool:
|
145 |
-
"""
|
146 |
-
True when the user selected a completion.
|
147 |
-
"""
|
148 |
-
complete_state = get_app().current_buffer.complete_state
|
149 |
-
return complete_state is not None and complete_state.current_completion is not None
|
150 |
|
|
|
|
|
151 |
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
return get_app().current_buffer.read_only()
|
158 |
-
|
159 |
-
|
160 |
-
@Condition
|
161 |
-
def is_multiline() -> bool:
|
162 |
-
"""
|
163 |
-
True when the current buffer has been marked as multiline.
|
164 |
-
"""
|
165 |
-
return get_app().current_buffer.multiline()
|
166 |
-
|
167 |
-
|
168 |
-
@Condition
|
169 |
-
def has_validation_error() -> bool:
|
170 |
-
"Current buffer has validation error."
|
171 |
-
return get_app().current_buffer.validation_error is not None
|
172 |
-
|
173 |
-
|
174 |
-
@Condition
|
175 |
-
def has_arg() -> bool:
|
176 |
-
"Enable when the input processor has an 'arg'."
|
177 |
-
return get_app().key_processor.arg is not None
|
178 |
-
|
179 |
-
|
180 |
-
@Condition
|
181 |
-
def is_done() -> bool:
|
182 |
-
"""
|
183 |
-
True when the CLI is returning, aborting or exiting.
|
184 |
-
"""
|
185 |
-
return get_app().is_done
|
186 |
-
|
187 |
-
|
188 |
-
@Condition
|
189 |
-
def renderer_height_is_known() -> bool:
|
190 |
-
"""
|
191 |
-
Only True when the renderer knows it's real height.
|
192 |
-
|
193 |
-
(On VT100 terminals, we have to wait for a CPR response, before we can be
|
194 |
-
sure of the available height between the cursor position and the bottom of
|
195 |
-
the terminal. And usually it's nicer to wait with drawing bottom toolbars
|
196 |
-
until we receive the height, in order to avoid flickering -- first drawing
|
197 |
-
somewhere in the middle, and then again at the bottom.)
|
198 |
-
"""
|
199 |
-
return get_app().renderer.height_is_known
|
200 |
-
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
"""
|
205 |
-
Check whether a given editing mode is active. (Vi or Emacs.)
|
206 |
-
"""
|
207 |
|
208 |
-
@Condition
|
209 |
-
def in_editing_mode_filter() -> bool:
|
210 |
-
return get_app().editing_mode == editing_mode
|
211 |
|
212 |
-
|
|
|
|
|
|
|
|
|
213 |
|
|
|
|
|
214 |
|
215 |
-
|
216 |
-
|
217 |
-
|
|
|
|
|
218 |
|
|
|
|
|
|
|
219 |
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
|
|
|
|
|
|
|
|
|
224 |
|
225 |
-
|
226 |
-
|
227 |
-
"""
|
228 |
-
Active when the set for Vi navigation key bindings are active.
|
229 |
-
"""
|
230 |
-
from prompt_toolkit.key_binding.vi_state import InputMode
|
231 |
|
232 |
-
app = get_app()
|
233 |
|
234 |
-
if (
|
235 |
-
app.editing_mode != EditingMode.VI
|
236 |
-
or app.vi_state.operator_func
|
237 |
-
or app.vi_state.waiting_for_digraph
|
238 |
-
or app.current_buffer.selection_state
|
239 |
-
):
|
240 |
-
return False
|
241 |
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
)
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
or app.vi_state.waiting_for_digraph
|
259 |
-
or app.current_buffer.selection_state
|
260 |
-
or app.vi_state.temporary_navigation_mode
|
261 |
-
or app.current_buffer.read_only()
|
262 |
-
):
|
263 |
-
return False
|
264 |
-
|
265 |
-
return app.vi_state.input_mode == InputMode.INSERT
|
266 |
-
|
267 |
-
|
268 |
-
@Condition
|
269 |
-
def vi_insert_multiple_mode() -> bool:
|
270 |
-
from prompt_toolkit.key_binding.vi_state import InputMode
|
271 |
-
|
272 |
-
app = get_app()
|
273 |
-
|
274 |
-
if (
|
275 |
-
app.editing_mode != EditingMode.VI
|
276 |
-
or app.vi_state.operator_func
|
277 |
-
or app.vi_state.waiting_for_digraph
|
278 |
-
or app.current_buffer.selection_state
|
279 |
-
or app.vi_state.temporary_navigation_mode
|
280 |
-
or app.current_buffer.read_only()
|
281 |
-
):
|
282 |
-
return False
|
283 |
-
|
284 |
-
return app.vi_state.input_mode == InputMode.INSERT_MULTIPLE
|
285 |
-
|
286 |
-
|
287 |
-
@Condition
|
288 |
-
def vi_replace_mode() -> bool:
|
289 |
-
from prompt_toolkit.key_binding.vi_state import InputMode
|
290 |
-
|
291 |
-
app = get_app()
|
292 |
-
|
293 |
-
if (
|
294 |
-
app.editing_mode != EditingMode.VI
|
295 |
-
or app.vi_state.operator_func
|
296 |
-
or app.vi_state.waiting_for_digraph
|
297 |
-
or app.current_buffer.selection_state
|
298 |
-
or app.vi_state.temporary_navigation_mode
|
299 |
-
or app.current_buffer.read_only()
|
300 |
-
):
|
301 |
-
return False
|
302 |
-
|
303 |
-
return app.vi_state.input_mode == InputMode.REPLACE
|
304 |
-
|
305 |
-
|
306 |
-
@Condition
|
307 |
-
def vi_replace_single_mode() -> bool:
|
308 |
-
from prompt_toolkit.key_binding.vi_state import InputMode
|
309 |
-
|
310 |
-
app = get_app()
|
311 |
-
|
312 |
-
if (
|
313 |
-
app.editing_mode != EditingMode.VI
|
314 |
-
or app.vi_state.operator_func
|
315 |
-
or app.vi_state.waiting_for_digraph
|
316 |
-
or app.current_buffer.selection_state
|
317 |
-
or app.vi_state.temporary_navigation_mode
|
318 |
-
or app.current_buffer.read_only()
|
319 |
-
):
|
320 |
-
return False
|
321 |
-
|
322 |
-
return app.vi_state.input_mode == InputMode.REPLACE_SINGLE
|
323 |
-
|
324 |
-
|
325 |
-
@Condition
|
326 |
-
def vi_selection_mode() -> bool:
|
327 |
-
app = get_app()
|
328 |
-
if app.editing_mode != EditingMode.VI:
|
329 |
-
return False
|
330 |
-
|
331 |
-
return bool(app.current_buffer.selection_state)
|
332 |
-
|
333 |
-
|
334 |
-
@Condition
|
335 |
-
def vi_waiting_for_text_object_mode() -> bool:
|
336 |
-
app = get_app()
|
337 |
-
if app.editing_mode != EditingMode.VI:
|
338 |
-
return False
|
339 |
-
|
340 |
-
return app.vi_state.operator_func is not None
|
341 |
-
|
342 |
-
|
343 |
-
@Condition
|
344 |
-
def vi_digraph_mode() -> bool:
|
345 |
-
app = get_app()
|
346 |
-
if app.editing_mode != EditingMode.VI:
|
347 |
-
return False
|
348 |
-
|
349 |
-
return app.vi_state.waiting_for_digraph
|
350 |
-
|
351 |
-
|
352 |
-
@Condition
|
353 |
-
def vi_recording_macro() -> bool:
|
354 |
-
"When recording a Vi macro."
|
355 |
-
app = get_app()
|
356 |
-
if app.editing_mode != EditingMode.VI:
|
357 |
-
return False
|
358 |
-
|
359 |
-
return app.vi_state.recording_register is not None
|
360 |
-
|
361 |
-
|
362 |
-
@Condition
|
363 |
-
def emacs_mode() -> bool:
|
364 |
-
"When the Emacs bindings are active."
|
365 |
-
return get_app().editing_mode == EditingMode.EMACS
|
366 |
-
|
367 |
-
|
368 |
-
@Condition
|
369 |
-
def emacs_insert_mode() -> bool:
|
370 |
-
app = get_app()
|
371 |
-
if (
|
372 |
-
app.editing_mode != EditingMode.EMACS
|
373 |
-
or app.current_buffer.selection_state
|
374 |
-
or app.current_buffer.read_only()
|
375 |
-
):
|
376 |
-
return False
|
377 |
-
return True
|
378 |
-
|
379 |
-
|
380 |
-
@Condition
|
381 |
-
def emacs_selection_mode() -> bool:
|
382 |
-
app = get_app()
|
383 |
-
return bool(
|
384 |
-
app.editing_mode == EditingMode.EMACS and app.current_buffer.selection_state
|
385 |
)
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
394 |
)
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
413 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
414 |
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
"
|
419 |
-
return get_app().reverse_vi_search_direction()
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
"""
|
4 |
+
Created on Mon May 19 16:49:22 2025
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
@author: jacobwildt-persson
|
7 |
+
"""
|
8 |
|
9 |
+
#!/usr/bin/env python3
|
10 |
+
# -*- coding: utf-8 -*-
|
11 |
+
# -----------------------------------------------
|
12 |
+
# Requirements & Setup Instructions
|
13 |
+
# -----------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
+
# Python version:
|
16 |
+
# Requires Python 3.10 or later (tested on 3.12)
|
|
|
|
|
|
|
17 |
|
|
|
|
|
|
|
18 |
|
19 |
+
# Run your script inside a virtual environment (e.g. conda or venv) to avoid conflicts.
|
20 |
+
# Recreate the environment with theese command in terminal
|
21 |
+
# conda env create -f environment.yml
|
22 |
+
# conda activate sprakenv
|
23 |
+
#
|
24 |
|
25 |
+
# Install all required packages:
|
26 |
+
# Run these commands in the terminal:
|
27 |
|
28 |
+
# pip install --upgrade gradio
|
29 |
+
# pip install pdfplumber
|
30 |
+
# pip install nltk
|
31 |
+
# pip install transformers
|
32 |
+
# pip install -U spacy
|
33 |
|
34 |
+
# Download language models:
|
35 |
+
# python -m spacy download es_core_news_lg
|
36 |
+
# python -m spacy download en_core_web_lg # if you add NER for English
|
37 |
|
38 |
+
# Check Gradio version used:
|
39 |
+
# import gradio as gr
|
40 |
+
# print(gr.__version__) # Gradio version 4.18.0
|
41 |
|
42 |
+
# 🔗 Reference: Gradio Quickstart Guide
|
43 |
+
# https://www.gradio.app/guides/quickstart
|
44 |
+
#Hugging Face
|
45 |
+
# https://huggingface.co/models
|
46 |
|
47 |
+
# Enghlish API model
|
48 |
+
# LanguageTool API: https://languagetool.org/http-api/swagger
|
|
|
|
|
|
|
|
|
49 |
|
|
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
#Rembember !!!!!!!!!!!!!!!!!!!!!!!!!
|
53 |
+
# Run your script inside a virtual environment (e.g. conda or venv) to avoid conflicts.
|
54 |
+
# Recreate the environment with theese command in terminal
|
55 |
+
# conda env create -f environment.yml
|
56 |
+
# conda activate sprakenv
|
57 |
+
# python -m spacy download es_core_news_lg
|
58 |
+
#python -m nltk.downloader punkt wordnet
|
59 |
+
# -----------------------------------------------
|
60 |
+
"""
|
61 |
+
Language learning app with Gradio UI, on & multiple users:
|
62 |
+
- Import text from file (.txt/.csv/.pdf) or manual text input
|
63 |
+
- Grammar correction via transformers (Spanish) or LanguageTool API (English)
|
64 |
+
- Analyze text (known/unknown words) per user & language
|
65 |
+
- Save unknown words as known
|
66 |
+
- Generate coherent practice sentence (Spanish & English)
|
67 |
+
- Log grammar corrections and practice sentence suggestions to CSV
|
68 |
+
"""
|
69 |
+
import os
|
70 |
+
import datetime
|
71 |
+
import sqlite3
|
72 |
+
import requests
|
73 |
+
import random
|
74 |
+
import pandas as pd
|
75 |
+
import pdfplumber
|
76 |
+
import spacy
|
77 |
+
import csv
|
78 |
+
# SQLite is accessed via the built-in sqlite3 module (no need to install sqlite3-binary)
|
79 |
+
import sqlite3
|
80 |
+
|
81 |
+
from nltk.tokenize import word_tokenize
|
82 |
+
from nltk.stem import WordNetLemmatizer
|
83 |
+
from transformers import AutoTokenizer, BartForConditionalGeneration, AutoModelForCausalLM
|
84 |
+
import gradio as gr
|
85 |
+
import gradio_client.utils as _gcu
|
86 |
+
|
87 |
+
# --- PATCH for Gradio utils schema bug ---
|
88 |
+
_orig_json = _gcu.json_schema_to_python_type
|
89 |
+
_orig_get = _gcu.get_type
|
90 |
+
|
91 |
+
def _patched_json_to_py(schema, defs=None):
|
92 |
+
if not isinstance(schema, dict):
|
93 |
+
return "any"
|
94 |
+
try:
|
95 |
+
return _orig_json(schema, defs)
|
96 |
+
except Exception:
|
97 |
+
return "any"
|
98 |
+
|
99 |
+
def _patched_get_type(schema):
|
100 |
+
if not isinstance(schema, dict):
|
101 |
+
return "any"
|
102 |
+
try:
|
103 |
+
return _orig_get(schema)
|
104 |
+
except Exception:
|
105 |
+
return "any"
|
106 |
+
|
107 |
+
_gcu.json_schema_to_python_type = _patched_json_to_py
|
108 |
+
_gcu.get_type = _patched_get_type
|
109 |
+
|
110 |
+
# --- SQLite Database initialization ---
|
111 |
+
DB_NAME = "vocabulary.db"
|
112 |
+
conn = sqlite3.connect(DB_NAME)
|
113 |
+
conn.execute("""
|
114 |
+
CREATE TABLE IF NOT EXISTS vocabulary (
|
115 |
+
user_id TEXT,
|
116 |
+
language TEXT,
|
117 |
+
word TEXT,
|
118 |
+
timestamp TEXT,
|
119 |
+
UNIQUE(user_id, language, word)
|
120 |
)
|
121 |
+
""")
|
122 |
+
conn.commit()
|
123 |
+
conn.close()
|
124 |
+
|
125 |
+
# --- Save word to database ---
|
126 |
+
def save_word_to_db(user_id: str, language: str, word: str):
|
127 |
+
ts = datetime.datetime.now().isoformat()
|
128 |
+
conn = sqlite3.connect(DB_NAME)
|
129 |
+
conn.execute(
|
130 |
+
"INSERT OR IGNORE INTO vocabulary (user_id, language, word, timestamp) VALUES (?, ?, ?, ?)",
|
131 |
+
(user_id, language, word, ts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
)
|
133 |
+
conn.commit()
|
134 |
+
conn.close()
|
135 |
+
|
136 |
+
# --- Retrieve known words for user/language ---
|
137 |
+
def get_user_vocabulary(user_id: str, language: str) -> set[str]:
|
138 |
+
conn = sqlite3.connect(DB_NAME)
|
139 |
+
rows = conn.execute(
|
140 |
+
"SELECT word FROM vocabulary WHERE user_id=? AND language=?",
|
141 |
+
(user_id, language)
|
142 |
+
).fetchall()
|
143 |
+
conn.close()
|
144 |
+
return {r[0] for r in rows}
|
145 |
+
|
146 |
+
# --- Load NLP models ---
|
147 |
+
nlp = spacy.load("es_core_news_lg")
|
148 |
+
tokenizer = AutoTokenizer.from_pretrained("SkitCon/gec-spanish-BARTO-COWS-L2H")
|
149 |
+
model = BartForConditionalGeneration.from_pretrained("SkitCon/gec-spanish-BARTO-COWS-L2H")
|
150 |
+
gpt2_tokenizer_es = AutoTokenizer.from_pretrained("mrm8488/spanish-gpt2")
|
151 |
+
gpt2_model_es = AutoModelForCausalLM.from_pretrained("mrm8488/spanish-gpt2")
|
152 |
+
gpt2_tokenizer_en = AutoTokenizer.from_pretrained("gpt2")
|
153 |
+
gpt2_model_en = AutoModelForCausalLM.from_pretrained("gpt2")
|
154 |
+
lemmatizer = WordNetLemmatizer()
|
155 |
+
|
156 |
+
# ---Log to CSV (grammar corrections and sentence suggestions) ---
|
157 |
+
def log_to_csv(filename, row, fieldnames):
|
158 |
+
file_exists = os.path.isfile(filename)
|
159 |
+
with open(filename, "a", newline='', encoding="utf-8") as csvfile:
|
160 |
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
161 |
+
if not file_exists:
|
162 |
+
writer.writeheader()
|
163 |
+
writer.writerow(row)
|
164 |
+
|
165 |
+
# --- File Import ---
|
166 |
+
def import_file(path: str) -> str:
|
167 |
+
ext = os.path.splitext(path)[1].lower()
|
168 |
+
if ext == ".pdf":
|
169 |
+
pages = []
|
170 |
+
with pdfplumber.open(path) as pdf:
|
171 |
+
for p in pdf.pages:
|
172 |
+
pages.append(p.extract_text() or "")
|
173 |
+
return "\n".join(pages)
|
174 |
+
if ext == ".csv":
|
175 |
+
df = pd.read_csv(path)
|
176 |
+
if "text" in df:
|
177 |
+
return "\n".join(df["text"].astype(str))
|
178 |
+
raise ValueError("CSV saknar kolumnen 'text'.")
|
179 |
+
if ext == ".txt":
|
180 |
+
return open(path, encoding="utf-8").read()
|
181 |
+
raise ValueError(f"Okänt filformat: {ext}")
|
182 |
+
|
183 |
+
# --- Grammar Correction ---
|
184 |
+
|
185 |
+
def correct_grammar(text: str, language: str) -> str:
|
186 |
+
if language == "es":
|
187 |
+
corrected = []
|
188 |
+
for sent in nlp(text).sents:
|
189 |
+
s = sent.text.strip()
|
190 |
+
if not s: continue
|
191 |
+
inp = tokenizer(s, return_tensors="pt", truncation=True, padding=True)
|
192 |
+
out = model.generate(
|
193 |
+
**inp,
|
194 |
+
max_new_tokens=inp.input_ids.shape[1],
|
195 |
+
num_beams=5,
|
196 |
+
early_stopping=True
|
197 |
+
)
|
198 |
+
corrected.append(tokenizer.decode(out[0], skip_special_tokens=True))
|
199 |
+
return " ".join(corrected)
|
200 |
+
# English: LanguageTool API
|
201 |
+
resp = requests.post(
|
202 |
+
"https://api.languagetool.org/v2/check",
|
203 |
+
data={"text": text, "language": language}
|
204 |
+
).json()
|
205 |
+
for m in reversed(resp.get("matches", [])):
|
206 |
+
off, ln = m["offset"], m["length"]
|
207 |
+
repls = m.get("replacements", [])
|
208 |
+
val = repls[0]["value"] if repls else ""
|
209 |
+
text = text[:off] + val + text[off+ln:]
|
210 |
+
return text
|
211 |
+
|
212 |
+
# --- Analyze known and unknown words ---
|
213 |
+
|
214 |
+
def analyze_text(text: str, user_id: str, language: str):
|
215 |
+
toks = word_tokenize(text)
|
216 |
+
lems = [lemmatizer.lemmatize(w.lower()) for w in toks if w.isalpha()]
|
217 |
+
vocab = get_user_vocabulary(user_id, language)
|
218 |
+
known = [w for w in lems if w in vocab]
|
219 |
+
unknown = [w for w in lems if w not in vocab]
|
220 |
+
return known, unknown
|
221 |
+
# --- Generate sentence using GPT2 based on unknown words ---
|
222 |
+
def generate_coherent_sentence(text: str, user_id: str, language: str, num_unknown=2) -> str:
|
223 |
+
kn, un = analyze_text(text, user_id, language)
|
224 |
+
if not un:
|
225 |
+
return "Inga okända ord att generera mening med."
|
226 |
+
chosen = random.sample(un, min(num_unknown, len(un)))
|
227 |
+
if language == "es":
|
228 |
+
prompt = "Escribe una sola frase clara que incluya estas palabras: " + ", ".join(chosen) + "."
|
229 |
+
tokenizer = gpt2_tokenizer_es
|
230 |
+
model = gpt2_model_es
|
231 |
+
else:
|
232 |
+
prompt = "Write one clear sentence that includes the following words: " + ", ".join(chosen) + "."
|
233 |
+
tokenizer = gpt2_tokenizer_en
|
234 |
+
model = gpt2_model_en
|
235 |
+
inp = tokenizer(prompt, return_tensors="pt", truncation=True)
|
236 |
+
outs = model.generate(
|
237 |
+
**inp,
|
238 |
+
max_new_tokens=50,
|
239 |
+
do_sample=True,
|
240 |
+
top_k=50,
|
241 |
+
top_p=0.95
|
242 |
)
|
243 |
+
gen = tokenizer.decode(outs[0], skip_special_tokens=True)
|
244 |
+
body = gen[len(prompt):].strip() if gen.startswith(prompt) else gen.strip()
|
245 |
+
sentence = (body.split(".")[0].strip() + ".") if "." in body else body
|
246 |
+
if not any(c.isalpha() for c in sentence):
|
247 |
+
return "Misslyckades att generera meningsfull övningsmening."
|
248 |
+
return sentence
|
249 |
+
|
250 |
+
|
251 |
+
# --- Gradio process callback ---
|
252 |
+
def process(user, language, txt, file, do_grammar, do_save):
|
253 |
+
try:
|
254 |
+
if txt and txt.strip():
|
255 |
+
text = txt.strip()
|
256 |
+
elif file:
|
257 |
+
text = import_file(file.name)
|
258 |
+
else:
|
259 |
+
return "", "", "", "Ingen text angiven.", ""
|
260 |
+
out = correct_grammar(text, language) if do_grammar else text
|
261 |
+
kn, un = analyze_text(out, user, language)
|
262 |
+
status = ""
|
263 |
+
if do_save and un:
|
264 |
+
for w in un:
|
265 |
+
save_word_to_db(user, language, w)
|
266 |
+
status = f"Sparade {len(un)} ord."
|
267 |
+
# Logga grammatikrättning till CSV
|
268 |
+
log_to_csv(
|
269 |
+
"grammarlog.csv",
|
270 |
+
{
|
271 |
+
"user": user, "language": language, "input": text,
|
272 |
+
"output": out, "timestamp": datetime.datetime.now().isoformat()
|
273 |
+
},
|
274 |
+
["user", "language", "input", "output", "timestamp"]
|
275 |
+
)
|
276 |
+
return out, ", ".join(kn), ", ".join(un), status, ""
|
277 |
+
except Exception as e:
|
278 |
+
import traceback
|
279 |
+
tb = traceback.format_exc()
|
280 |
+
return "", "", "", f"FEL i process:\n{tb}", ""
|
281 |
+
|
282 |
+
# --- Sentence generation callback ---
|
283 |
+
def coherent_fn(user, language, txt, num):
|
284 |
+
try:
|
285 |
+
suggestion = generate_coherent_sentence(txt or "", user, language, num)
|
286 |
+
# Logga övningsförslag till CSV
|
287 |
+
log_to_csv(
|
288 |
+
"sentencelog.csv",
|
289 |
+
{
|
290 |
+
"user": user, "language": language, "input": txt,
|
291 |
+
"output": suggestion, "timestamp": datetime.datetime.now().isoformat()
|
292 |
+
},
|
293 |
+
["user", "language", "input", "output", "timestamp"]
|
294 |
+
)
|
295 |
+
return suggestion
|
296 |
+
except Exception as e:
|
297 |
+
return f"Fel vid generering: {e}"
|
298 |
+
|
299 |
+
# --- Gradio UI ---
|
300 |
+
demo = gr.Blocks()
|
301 |
+
with demo:
|
302 |
+
gr.Markdown("### 🌟 Språkinlärningsapp med användare & flerspråkighet")
|
303 |
+
with gr.Row():
|
304 |
+
user_input = gr.Textbox(label="Användarnamn", placeholder="Ditt namn här")
|
305 |
+
lang_dd = gr.Dropdown(choices=["es", "en"], value="es", label="Språk")
|
306 |
+
with gr.Column():
|
307 |
+
manual_input = gr.Textbox(lines=4, label="Skriv/klistra in text")
|
308 |
+
file_input = gr.File(file_types=[".txt",".csv",".pdf"], label="Importera fil")
|
309 |
+
grammar_cb = gr.Checkbox(label="Grammatikrättning")
|
310 |
+
autosave_cb = gr.Checkbox(label="Spara okända ord")
|
311 |
+
run_btn = gr.Button("Kör analys & korrigering")
|
312 |
+
num_slider = gr.Slider(minimum=1, maximum=5, step=1, value=2, label="Antal okända ord för övning")
|
313 |
+
coherent_btn = gr.Button("Koherent övningsmening")
|
314 |
+
|
315 |
+
corr_out = gr.Textbox(label="Korrigerad text", lines=4)
|
316 |
+
known_out = gr.Textbox(label="Kända ord")
|
317 |
+
unknown_out = gr.Textbox(label="Okända ord")
|
318 |
+
status_out = gr.Textbox(label="Status")
|
319 |
+
coherent_out = gr.Textbox(label="Koherent övningsmening")
|
320 |
+
|
321 |
+
# --- Knapparnas click‐kopplingar ---
|
322 |
+
run_btn.click(
|
323 |
+
fn=process,
|
324 |
+
inputs=[user_input, lang_dd, manual_input, file_input, grammar_cb, autosave_cb],
|
325 |
+
outputs=[corr_out, known_out, unknown_out, status_out, coherent_out]
|
326 |
)
|
327 |
+
coherent_btn.click(
|
328 |
+
fn=coherent_fn,
|
329 |
+
inputs=[user_input, lang_dd, manual_input, num_slider],
|
330 |
+
outputs=[coherent_out]
|
331 |
+
)
|
332 |
+
#Make sure to change language for the textfile to be analyzed in its target language
|
333 |
|
334 |
+
# --- Start app ---
|
335 |
+
if __name__ == "__main__":
|
336 |
+
url = demo.launch(share=True, inbrowser=True, prevent_thread_lock=True)
|
337 |
+
print("Appen körs på:", url)
|
|
requirements.txt
CHANGED
@@ -1 +1,10 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==4.29.0
|
2 |
+
transformers
|
3 |
+
nltk
|
4 |
+
pdfplumber
|
5 |
+
spacy
|
6 |
+
torch
|
7 |
+
requests
|
8 |
+
prompt_toolkit
|
9 |
+
es_core_news_lg @ https://github.com/explosion/spacy-models/releases/download/es_core_news_lg-3.7.0/es_core_news_lg-3.7.0-py3-none-any.whl
|
10 |
+
en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.0/en_core_web_lg-3.7.0-py3-none-any.whl
|