Spaces:

JacobWP
/

language_app_Jacob_WP

Runtime error

App Files Files Community

JacobWP commited on Jun 23

Commit

0b721e9

verified ·

1 Parent(s): 7f672ca

Upload 2 files

Browse files

Files changed (2) hide show

app.py +317 -399
requirements.txt +10 -1

app.py CHANGED Viewed

@@ -1,419 +1,337 @@
 """
-Filters that accept a `Application` as argument.
-"""
-from __future__ import annotations
-from typing import TYPE_CHECKING, cast
-from prompt_toolkit.application.current import get_app
-from prompt_toolkit.cache import memoized
-from prompt_toolkit.enums import EditingMode
-from .base import Condition
-if TYPE_CHECKING:
-    from prompt_toolkit.layout.layout import FocusableElement
-__all__ = [
-    "has_arg",
-    "has_completions",
-    "completion_is_selected",
-    "has_focus",
-    "buffer_has_focus",
-    "has_selection",
-    "has_suggestion",
-    "has_validation_error",
-    "is_done",
-    "is_read_only",
-    "is_multiline",
-    "renderer_height_is_known",
-    "in_editing_mode",
-    "in_paste_mode",
-    "vi_mode",
-    "vi_navigation_mode",
-    "vi_insert_mode",
-    "vi_insert_multiple_mode",
-    "vi_replace_mode",
-    "vi_selection_mode",
-    "vi_waiting_for_text_object_mode",
-    "vi_digraph_mode",
-    "vi_recording_macro",
-    "emacs_mode",
-    "emacs_insert_mode",
-    "emacs_selection_mode",
-    "shift_selection_mode",
-    "is_searching",
-    "control_is_searchable",
-    "vi_search_direction_reversed",
-]
-# NOTE: `has_focus` below should *not* be `memoized`. It can reference any user
-#       control. For instance, if we would continuously create new
-#       `PromptSession` instances, then previous instances won't be released,
-#       because this memoize (which caches results in the global scope) will
-#       still refer to each instance.
-def has_focus(value: FocusableElement) -> Condition:
-    """
-    Enable when this buffer has the focus.
-    """
-    from prompt_toolkit.buffer import Buffer
-    from prompt_toolkit.layout import walk
-    from prompt_toolkit.layout.containers import Container, Window, to_container
-    from prompt_toolkit.layout.controls import UIControl
-    if isinstance(value, str):
-        def test() -> bool:
-            return get_app().current_buffer.name == value
-    elif isinstance(value, Buffer):
-        def test() -> bool:
-            return get_app().current_buffer == value
-    elif isinstance(value, UIControl):
-        def test() -> bool:
-            return get_app().layout.current_control == value
-    else:
-        value = to_container(value)
-        if isinstance(value, Window):
-            def test() -> bool:
-                return get_app().layout.current_window == value
-        else:
-            def test() -> bool:
-                # Consider focused when any window inside this container is
-                # focused.
-                current_window = get_app().layout.current_window
-                for c in walk(cast(Container, value)):
-                    if isinstance(c, Window) and c == current_window:
-                        return True
-                return False
-    @Condition
-    def has_focus_filter() -> bool:
-        return test()
-    return has_focus_filter
-@Condition
-def buffer_has_focus() -> bool:
-    """
-    Enabled when the currently focused control is a `BufferControl`.
-    """
-    return get_app().layout.buffer_has_focus
-@Condition
-def has_selection() -> bool:
-    """
-    Enable when the current buffer has a selection.
-    """
-    return bool(get_app().current_buffer.selection_state)
-@Condition
-def has_suggestion() -> bool:
-    """
-    Enable when the current buffer has a suggestion.
-    """
-    buffer = get_app().current_buffer
-    return buffer.suggestion is not None and buffer.suggestion.text != ""
-@Condition
-def has_completions() -> bool:
-    """
-    Enable when the current buffer has completions.
-    """
-    state = get_app().current_buffer.complete_state
-    return state is not None and len(state.completions) > 0
-@Condition
-def completion_is_selected() -> bool:
-    """
-    True when the user selected a completion.
-    """
-    complete_state = get_app().current_buffer.complete_state
-    return complete_state is not None and complete_state.current_completion is not None
-@Condition
-def is_read_only() -> bool:
-    """
-    True when the current buffer is read only.
-    """
-    return get_app().current_buffer.read_only()
-@Condition
-def is_multiline() -> bool:
-    """
-    True when the current buffer has been marked as multiline.
-    """
-    return get_app().current_buffer.multiline()
-@Condition
-def has_validation_error() -> bool:
-    "Current buffer has validation error."
-    return get_app().current_buffer.validation_error is not None
-@Condition
-def has_arg() -> bool:
-    "Enable when the input processor has an 'arg'."
-    return get_app().key_processor.arg is not None
-@Condition
-def is_done() -> bool:
-    """
-    True when the CLI is returning, aborting or exiting.
-    """
-    return get_app().is_done
-@Condition
-def renderer_height_is_known() -> bool:
-    """
-    Only True when the renderer knows it's real height.
-    (On VT100 terminals, we have to wait for a CPR response, before we can be
-    sure of the available height between the cursor position and the bottom of
-    the terminal. And usually it's nicer to wait with drawing bottom toolbars
-    until we receive the height, in order to avoid flickering -- first drawing
-    somewhere in the middle, and then again at the bottom.)
-    """
-    return get_app().renderer.height_is_known
-@memoized()
-def in_editing_mode(editing_mode: EditingMode) -> Condition:
-    """
-    Check whether a given editing mode is active. (Vi or Emacs.)
-    """
-    @Condition
-    def in_editing_mode_filter() -> bool:
-        return get_app().editing_mode == editing_mode
-    return in_editing_mode_filter
-@Condition
-def in_paste_mode() -> bool:
-    return get_app().paste_mode()
-@Condition
-def vi_mode() -> bool:
-    return get_app().editing_mode == EditingMode.VI
-@Condition
-def vi_navigation_mode() -> bool:
-    """
-    Active when the set for Vi navigation key bindings are active.
-    """
-    from prompt_toolkit.key_binding.vi_state import InputMode
-    app = get_app()
-    if (
-        app.editing_mode != EditingMode.VI
-        or app.vi_state.operator_func
-        or app.vi_state.waiting_for_digraph
-        or app.current_buffer.selection_state
-    ):
-        return False
-    return (
-        app.vi_state.input_mode == InputMode.NAVIGATION
-        or app.vi_state.temporary_navigation_mode
-        or app.current_buffer.read_only()
     )
-@Condition
-def vi_insert_mode() -> bool:
-    from prompt_toolkit.key_binding.vi_state import InputMode
-    app = get_app()
-    if (
-        app.editing_mode != EditingMode.VI
-        or app.vi_state.operator_func
-        or app.vi_state.waiting_for_digraph
-        or app.current_buffer.selection_state
-        or app.vi_state.temporary_navigation_mode
-        or app.current_buffer.read_only()
-    ):
-        return False
-    return app.vi_state.input_mode == InputMode.INSERT
-@Condition
-def vi_insert_multiple_mode() -> bool:
-    from prompt_toolkit.key_binding.vi_state import InputMode
-    app = get_app()
-    if (
-        app.editing_mode != EditingMode.VI
-        or app.vi_state.operator_func
-        or app.vi_state.waiting_for_digraph
-        or app.current_buffer.selection_state
-        or app.vi_state.temporary_navigation_mode
-        or app.current_buffer.read_only()
-    ):
-        return False
-    return app.vi_state.input_mode == InputMode.INSERT_MULTIPLE
-@Condition
-def vi_replace_mode() -> bool:
-    from prompt_toolkit.key_binding.vi_state import InputMode
-    app = get_app()
-    if (
-        app.editing_mode != EditingMode.VI
-        or app.vi_state.operator_func
-        or app.vi_state.waiting_for_digraph
-        or app.current_buffer.selection_state
-        or app.vi_state.temporary_navigation_mode
-        or app.current_buffer.read_only()
-    ):
-        return False
-    return app.vi_state.input_mode == InputMode.REPLACE
-@Condition
-def vi_replace_single_mode() -> bool:
-    from prompt_toolkit.key_binding.vi_state import InputMode
-    app = get_app()
-    if (
-        app.editing_mode != EditingMode.VI
-        or app.vi_state.operator_func
-        or app.vi_state.waiting_for_digraph
-        or app.current_buffer.selection_state
-        or app.vi_state.temporary_navigation_mode
-        or app.current_buffer.read_only()
-    ):
-        return False
-    return app.vi_state.input_mode == InputMode.REPLACE_SINGLE
-@Condition
-def vi_selection_mode() -> bool:
-    app = get_app()
-    if app.editing_mode != EditingMode.VI:
-        return False
-    return bool(app.current_buffer.selection_state)
-@Condition
-def vi_waiting_for_text_object_mode() -> bool:
-    app = get_app()
-    if app.editing_mode != EditingMode.VI:
-        return False
-    return app.vi_state.operator_func is not None
-@Condition
-def vi_digraph_mode() -> bool:
-    app = get_app()
-    if app.editing_mode != EditingMode.VI:
-        return False
-    return app.vi_state.waiting_for_digraph
-@Condition
-def vi_recording_macro() -> bool:
-    "When recording a Vi macro."
-    app = get_app()
-    if app.editing_mode != EditingMode.VI:
-        return False
-    return app.vi_state.recording_register is not None
-@Condition
-def emacs_mode() -> bool:
-    "When the Emacs bindings are active."
-    return get_app().editing_mode == EditingMode.EMACS
-@Condition
-def emacs_insert_mode() -> bool:
-    app = get_app()
-    if (
-        app.editing_mode != EditingMode.EMACS
-        or app.current_buffer.selection_state
-        or app.current_buffer.read_only()
-    ):
-        return False
-    return True
-@Condition
-def emacs_selection_mode() -> bool:
-    app = get_app()
-    return bool(
-        app.editing_mode == EditingMode.EMACS and app.current_buffer.selection_state
     )
-@Condition
-def shift_selection_mode() -> bool:
-    app = get_app()
-    return bool(
-        app.current_buffer.selection_state
-        and app.current_buffer.selection_state.shift_mode
     )
-@Condition
-def is_searching() -> bool:
-    "When we are searching."
-    app = get_app()
-    return app.layout.is_searching
-@Condition
-def control_is_searchable() -> bool:
-    "When the current UIControl is searchable."
-    from prompt_toolkit.layout.controls import BufferControl
-    control = get_app().layout.current_control
-    return (
-        isinstance(control, BufferControl) and control.search_buffer_control is not None
     )
-@Condition
-def vi_search_direction_reversed() -> bool:
-    "When the '/' and '?' key bindings for Vi-style searching have been reversed."
-    return get_app().reverse_vi_search_direction()

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
 """
+Created on Mon May 19 16:49:22 2025
+@author: jacobwildt-persson
+"""
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# -----------------------------------------------
+# Requirements & Setup Instructions
+# -----------------------------------------------
+# Python version:
+# Requires Python 3.10 or later (tested on 3.12)
+# Run your script inside a virtual environment (e.g. conda or venv) to avoid conflicts.
+# Recreate the environment with theese command in terminal
+# conda env create -f environment.yml
+# conda activate sprakenv
+#
+# Install all required packages:
+# Run these commands in the terminal:
+# pip install --upgrade gradio
+# pip install pdfplumber
+# pip install nltk
+# pip install transformers
+# pip install -U spacy
+# Download language models:
+# python -m spacy download es_core_news_lg
+# python -m spacy download en_core_web_lg  # if you add NER for English
+# Check Gradio version used:
+# import gradio as gr
+# print(gr.__version__)  # Gradio version 4.18.0
+# 🔗 Reference: Gradio Quickstart Guide
+# https://www.gradio.app/guides/quickstart
+#Hugging Face
+ # https://huggingface.co/models
+# Enghlish API model
+# LanguageTool API: https://languagetool.org/http-api/swagger
+#Rembember !!!!!!!!!!!!!!!!!!!!!!!!!
+# Run your script inside a virtual environment (e.g. conda or venv) to avoid conflicts.
+# Recreate the environment with theese command in terminal
+# conda env create -f environment.yml
+# conda activate sprakenv
+# python -m spacy download es_core_news_lg
+#python -m nltk.downloader punkt wordnet
+# -----------------------------------------------
+"""
+Language learning app with Gradio UI, on & multiple users:
+- Import text from file (.txt/.csv/.pdf) or manual text input
+- Grammar correction via transformers (Spanish) or LanguageTool API (English)
+- Analyze text (known/unknown words) per user & language
+- Save unknown words as known
+- Generate coherent practice sentence (Spanish & English)
+- Log grammar corrections and practice sentence suggestions to CSV
+"""
+import os
+import datetime
+import sqlite3
+import requests
+import random
+import pandas as pd
+import pdfplumber
+import spacy
+import csv
+# SQLite is accessed via the built-in sqlite3 module (no need to install sqlite3-binary)
+import sqlite3
+from nltk.tokenize import word_tokenize
+from nltk.stem import WordNetLemmatizer
+from transformers import AutoTokenizer, BartForConditionalGeneration, AutoModelForCausalLM
+import gradio as gr
+import gradio_client.utils as _gcu
+# --- PATCH for Gradio utils schema bug ---
+_orig_json = _gcu.json_schema_to_python_type
+_orig_get = _gcu.get_type
+def _patched_json_to_py(schema, defs=None):
+    if not isinstance(schema, dict):
+        return "any"
+    try:
+        return _orig_json(schema, defs)
+    except Exception:
+        return "any"
+def _patched_get_type(schema):
+    if not isinstance(schema, dict):
+        return "any"
+    try:
+        return _orig_get(schema)
+    except Exception:
+        return "any"
+_gcu.json_schema_to_python_type = _patched_json_to_py
+_gcu.get_type = _patched_get_type
+# --- SQLite Database initialization ---
+DB_NAME = "vocabulary.db"
+conn = sqlite3.connect(DB_NAME)
+conn.execute("""
+    CREATE TABLE IF NOT EXISTS vocabulary (
+      user_id   TEXT,
+      language  TEXT,
+      word      TEXT,
+      timestamp TEXT,
+      UNIQUE(user_id, language, word)
     )
+""")
+conn.commit()
+conn.close()
+# --- Save word to database ---
+def save_word_to_db(user_id: str, language: str, word: str):
+    ts = datetime.datetime.now().isoformat()
+    conn = sqlite3.connect(DB_NAME)
+    conn.execute(
+        "INSERT OR IGNORE INTO vocabulary (user_id, language, word, timestamp) VALUES (?, ?, ?, ?)",
+        (user_id, language, word, ts)
     )
+    conn.commit()
+    conn.close()
+# --- Retrieve known words for user/language ---
+def get_user_vocabulary(user_id: str, language: str) -> set[str]:
+    conn = sqlite3.connect(DB_NAME)
+    rows = conn.execute(
+        "SELECT word FROM vocabulary WHERE user_id=? AND language=?",
+        (user_id, language)
+    ).fetchall()
+    conn.close()
+    return {r[0] for r in rows}
+# --- Load NLP models ---
+nlp = spacy.load("es_core_news_lg")
+tokenizer = AutoTokenizer.from_pretrained("SkitCon/gec-spanish-BARTO-COWS-L2H")
+model     = BartForConditionalGeneration.from_pretrained("SkitCon/gec-spanish-BARTO-COWS-L2H")
+gpt2_tokenizer_es = AutoTokenizer.from_pretrained("mrm8488/spanish-gpt2")
+gpt2_model_es     = AutoModelForCausalLM.from_pretrained("mrm8488/spanish-gpt2")
+gpt2_tokenizer_en = AutoTokenizer.from_pretrained("gpt2")
+gpt2_model_en     = AutoModelForCausalLM.from_pretrained("gpt2")
+lemmatizer        = WordNetLemmatizer()
+# ---Log to CSV (grammar corrections and sentence suggestions)  ---
+def log_to_csv(filename, row, fieldnames):
+    file_exists = os.path.isfile(filename)
+    with open(filename, "a", newline='', encoding="utf-8") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+        if not file_exists:
+            writer.writeheader()
+        writer.writerow(row)
+# --- File Import ---
+def import_file(path: str) -> str:
+    ext = os.path.splitext(path)[1].lower()
+    if ext == ".pdf":
+        pages = []
+        with pdfplumber.open(path) as pdf:
+            for p in pdf.pages:
+                pages.append(p.extract_text() or "")
+        return "\n".join(pages)
+    if ext == ".csv":
+        df = pd.read_csv(path)
+        if "text" in df:
+            return "\n".join(df["text"].astype(str))
+        raise ValueError("CSV saknar kolumnen 'text'.")
+    if ext == ".txt":
+        return open(path, encoding="utf-8").read()
+    raise ValueError(f"Okänt filformat: {ext}")
+# --- Grammar Correction ---
+def correct_grammar(text: str, language: str) -> str:
+    if language == "es":
+        corrected = []
+        for sent in nlp(text).sents:
+            s = sent.text.strip()
+            if not s: continue
+            inp = tokenizer(s, return_tensors="pt", truncation=True, padding=True)
+            out = model.generate(
+                **inp,
+                max_new_tokens=inp.input_ids.shape[1],
+                num_beams=5,
+                early_stopping=True
+            )
+            corrected.append(tokenizer.decode(out[0], skip_special_tokens=True))
+        return " ".join(corrected)
+    # English: LanguageTool API
+    resp = requests.post(
+        "https://api.languagetool.org/v2/check",
+        data={"text": text, "language": language}
+    ).json()
+    for m in reversed(resp.get("matches", [])):
+        off, ln = m["offset"], m["length"]
+        repls = m.get("replacements", [])
+        val = repls[0]["value"] if repls else ""
+        text = text[:off] + val + text[off+ln:]
+    return text
+# --- Analyze known and unknown words ---
+def analyze_text(text: str, user_id: str, language: str):
+    toks = word_tokenize(text)
+    lems = [lemmatizer.lemmatize(w.lower()) for w in toks if w.isalpha()]
+    vocab = get_user_vocabulary(user_id, language)
+    known   = [w for w in lems if w in vocab]
+    unknown = [w for w in lems if w not in vocab]
+    return known, unknown
+# --- Generate sentence using GPT2 based on unknown words ---
+def generate_coherent_sentence(text: str, user_id: str, language: str, num_unknown=2) -> str:
+    kn, un = analyze_text(text, user_id, language)
+    if not un:
+        return "Inga okända ord att generera mening med."
+    chosen = random.sample(un, min(num_unknown, len(un)))
+    if language == "es":
+        prompt = "Escribe una sola frase clara que incluya estas palabras: " + ", ".join(chosen) + "."
+        tokenizer = gpt2_tokenizer_es
+        model     = gpt2_model_es
+    else:
+        prompt = "Write one clear sentence that includes the following words: " + ", ".join(chosen) + "."
+        tokenizer = gpt2_tokenizer_en
+        model     = gpt2_model_en
+    inp = tokenizer(prompt, return_tensors="pt", truncation=True)
+    outs = model.generate(
+        **inp,
+        max_new_tokens=50,
+        do_sample=True,
+        top_k=50,
+        top_p=0.95
     )
+    gen = tokenizer.decode(outs[0], skip_special_tokens=True)
+    body = gen[len(prompt):].strip() if gen.startswith(prompt) else gen.strip()
+    sentence = (body.split(".")[0].strip() + ".") if "." in body else body
+    if not any(c.isalpha() for c in sentence):
+        return "Misslyckades att generera meningsfull övningsmening."
+    return sentence
+# --- Gradio process callback ---
+def process(user, language, txt, file, do_grammar, do_save):
+    try:
+        if txt and txt.strip():
+            text = txt.strip()
+        elif file:
+            text = import_file(file.name)
+        else:
+            return "", "", "", "Ingen text angiven.", ""
+        out = correct_grammar(text, language) if do_grammar else text
+        kn, un = analyze_text(out, user, language)
+        status = ""
+        if do_save and un:
+            for w in un:
+                save_word_to_db(user, language, w)
+            status = f"Sparade {len(un)} ord."
+        # Logga grammatikrättning till CSV
+        log_to_csv(
+            "grammarlog.csv",
+            {
+                "user": user, "language": language, "input": text,
+                "output": out, "timestamp": datetime.datetime.now().isoformat()
+            },
+            ["user", "language", "input", "output", "timestamp"]
+        )
+        return out, ", ".join(kn), ", ".join(un), status, ""
+    except Exception as e:
+        import traceback
+        tb = traceback.format_exc()
+        return "", "", "", f"FEL i process:\n{tb}", ""
+# --- Sentence generation callback ---
+def coherent_fn(user, language, txt, num):
+    try:
+        suggestion = generate_coherent_sentence(txt or "", user, language, num)
+        # Logga övningsförslag till CSV
+        log_to_csv(
+            "sentencelog.csv",
+            {
+                "user": user, "language": language, "input": txt,
+                "output": suggestion, "timestamp": datetime.datetime.now().isoformat()
+            },
+            ["user", "language", "input", "output", "timestamp"]
+        )
+        return suggestion
+    except Exception as e:
+        return f"Fel vid generering: {e}"
+# --- Gradio UI ---
+demo = gr.Blocks()
+with demo:
+    gr.Markdown("### 🌟 Språkinlärningsapp med användare & flerspråkighet")
+    with gr.Row():
+        user_input  = gr.Textbox(label="Användarnamn", placeholder="Ditt namn här")
+        lang_dd     = gr.Dropdown(choices=["es", "en"], value="es", label="Språk")
+    with gr.Column():
+        manual_input = gr.Textbox(lines=4, label="Skriv/klistra in text")
+        file_input   = gr.File(file_types=[".txt",".csv",".pdf"], label="Importera fil")
+        grammar_cb   = gr.Checkbox(label="Grammatikrättning")
+        autosave_cb  = gr.Checkbox(label="Spara okända ord")
+        run_btn      = gr.Button("Kör analys & korrigering")
+        num_slider   = gr.Slider(minimum=1, maximum=5, step=1, value=2, label="Antal okända ord för övning")
+        coherent_btn = gr.Button("Koherent övningsmening")
+    corr_out    = gr.Textbox(label="Korrigerad text", lines=4)
+    known_out   = gr.Textbox(label="Kända ord")
+    unknown_out = gr.Textbox(label="Okända ord")
+    status_out  = gr.Textbox(label="Status")
+    coherent_out = gr.Textbox(label="Koherent övningsmening")
+    # --- Knapparnas click‐kopplingar ---
+    run_btn.click(
+        fn=process,
+        inputs=[user_input, lang_dd, manual_input, file_input, grammar_cb, autosave_cb],
+        outputs=[corr_out, known_out, unknown_out, status_out, coherent_out]
     )
+    coherent_btn.click(
+        fn=coherent_fn,
+        inputs=[user_input, lang_dd, manual_input, num_slider],
+        outputs=[coherent_out]
+    )
+    #Make sure to change language for the textfile to be analyzed in its target language
+# --- Start app ---
+if __name__ == "__main__":
+    url = demo.launch(share=True, inbrowser=True, prevent_thread_lock=True)
+    print("Appen körs på:", url)

requirements.txt CHANGED Viewed

	@@ -1 +1,10 @@
1	- ~~huggingface_hub~~==0.25.2

+gradio==4.29.0
+transformers
+nltk
+pdfplumber
+spacy
+torch
+requests
+prompt_toolkit
+es_core_news_lg @ https://github.com/explosion/spacy-models/releases/download/es_core_news_lg-3.7.0/es_core_news_lg-3.7.0-py3-none-any.whl
+en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.0/en_core_web_lg-3.7.0-py3-none-any.whl