Spaces:

JacobWP
/

LanguageLearningSpace

Runtime error

App Files Files Community

JacobWP commited on Jul 7

Commit

45baf3e

verified ·

1 Parent(s): 4ed530f

Delete app.py 2.txt

Browse files

Files changed (1) hide show

app.py 2.txt +0 -337

app.py 2.txt DELETED Viewed

@@ -1,337 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Mon May 19 16:49:22 2025
-@author: jacobwildt-persson
-"""
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# -----------------------------------------------
-# Requirements & Setup Instructions
-# -----------------------------------------------
-# Python version:
-# Requires Python 3.10 or later (tested on 3.12)
-# Run your script inside a virtual environment (e.g. conda or venv) to avoid conflicts.
-# Recreate the environment with theese command in terminal
-# conda env create -f environment.yml
-# conda activate sprakenv
-#
-# Install all required packages:
-# Run these commands in the terminal:
-# pip install --upgrade gradio
-# pip install pdfplumber
-# pip install nltk
-# pip install transformers
-# pip install -U spacy
-# Download language models:
-# python -m spacy download es_core_news_lg
-# python -m spacy download en_core_web_lg  # if you add NER for English
-# Check Gradio version used:
-# import gradio as gr
-# print(gr.__version__)  # Gradio version 4.18.0
-# 🔗 Reference: Gradio Quickstart Guide
-# https://www.gradio.app/guides/quickstart
-#Hugging Face
- # https://huggingface.co/models
-# Enghlish API model
-# LanguageTool API: https://languagetool.org/http-api/swagger
-#Rembember !!!!!!!!!!!!!!!!!!!!!!!!!
-# Run your script inside a virtual environment (e.g. conda or venv) to avoid conflicts.
-# Recreate the environment with theese command in terminal
-# conda env create -f environment.yml
-# conda activate sprakenv
-# python -m spacy download es_core_news_lg
-#python -m nltk.downloader punkt wordnet
-# -----------------------------------------------
-"""
-Language learning app with Gradio UI, on & multiple users:
-- Import text from file (.txt/.csv/.pdf) or manual text input
-- Grammar correction via transformers (Spanish) or LanguageTool API (English)
-- Analyze text (known/unknown words) per user & language
-- Save unknown words as known
-- Generate coherent practice sentence (Spanish & English)
-- Log grammar corrections and practice sentence suggestions to CSV
-"""
-import os
-import datetime
-import sqlite3
-import requests
-import random
-import pandas as pd
-import pdfplumber
-import spacy
-import csv
-# SQLite is accessed via the built-in sqlite3 module (no need to install sqlite3-binary)
-import sqlite3
-from nltk.tokenize import word_tokenize
-from nltk.stem import WordNetLemmatizer
-from transformers import AutoTokenizer, BartForConditionalGeneration, AutoModelForCausalLM
-import gradio as gr
-import gradio_client.utils as _gcu
-# --- PATCH for Gradio utils schema bug ---
-_orig_json = _gcu.json_schema_to_python_type
-_orig_get = _gcu.get_type
-def _patched_json_to_py(schema, defs=None):
-    if not isinstance(schema, dict):
-        return "any"
-    try:
-        return _orig_json(schema, defs)
-    except Exception:
-        return "any"
-def _patched_get_type(schema):
-    if not isinstance(schema, dict):
-        return "any"
-    try:
-        return _orig_get(schema)
-    except Exception:
-        return "any"
-_gcu.json_schema_to_python_type = _patched_json_to_py
-_gcu.get_type = _patched_get_type
-# --- SQLite Database initialization ---
-DB_NAME = "vocabulary.db"
-conn = sqlite3.connect(DB_NAME)
-conn.execute("""
-    CREATE TABLE IF NOT EXISTS vocabulary (
-      user_id   TEXT,
-      language  TEXT,
-      word      TEXT,
-      timestamp TEXT,
-      UNIQUE(user_id, language, word)
-    )
-""")
-conn.commit()
-conn.close()
-# --- Save word to database ---
-def save_word_to_db(user_id: str, language: str, word: str):
-    ts = datetime.datetime.now().isoformat()
-    conn = sqlite3.connect(DB_NAME)
-    conn.execute(
-        "INSERT OR IGNORE INTO vocabulary (user_id, language, word, timestamp) VALUES (?, ?, ?, ?)",
-        (user_id, language, word, ts)
-    )
-    conn.commit()
-    conn.close()
-# --- Retrieve known words for user/language ---
-def get_user_vocabulary(user_id: str, language: str) -> set[str]:
-    conn = sqlite3.connect(DB_NAME)
-    rows = conn.execute(
-        "SELECT word FROM vocabulary WHERE user_id=? AND language=?",
-        (user_id, language)
-    ).fetchall()
-    conn.close()
-    return {r[0] for r in rows}
-# --- Load NLP models ---
-nlp = spacy.load("es_core_news_lg")
-tokenizer = AutoTokenizer.from_pretrained("SkitCon/gec-spanish-BARTO-COWS-L2H")
-model     = BartForConditionalGeneration.from_pretrained("SkitCon/gec-spanish-BARTO-COWS-L2H")
-gpt2_tokenizer_es = AutoTokenizer.from_pretrained("mrm8488/spanish-gpt2")
-gpt2_model_es     = AutoModelForCausalLM.from_pretrained("mrm8488/spanish-gpt2")
-gpt2_tokenizer_en = AutoTokenizer.from_pretrained("gpt2")
-gpt2_model_en     = AutoModelForCausalLM.from_pretrained("gpt2")
-lemmatizer        = WordNetLemmatizer()
-# ---Log to CSV (grammar corrections and sentence suggestions)  ---
-def log_to_csv(filename, row, fieldnames):
-    file_exists = os.path.isfile(filename)
-    with open(filename, "a", newline='', encoding="utf-8") as csvfile:
-        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-        if not file_exists:
-            writer.writeheader()
-        writer.writerow(row)
-# --- File Import ---
-def import_file(path: str) -> str:
-    ext = os.path.splitext(path)[1].lower()
-    if ext == ".pdf":
-        pages = []
-        with pdfplumber.open(path) as pdf:
-            for p in pdf.pages:
-                pages.append(p.extract_text() or "")
-        return "\n".join(pages)
-    if ext == ".csv":
-        df = pd.read_csv(path)
-        if "text" in df:
-            return "\n".join(df["text"].astype(str))
-        raise ValueError("CSV saknar kolumnen 'text'.")
-    if ext == ".txt":
-        return open(path, encoding="utf-8").read()
-    raise ValueError(f"Okänt filformat: {ext}")
-# --- Grammar Correction ---
-def correct_grammar(text: str, language: str) -> str:
-    if language == "es":
-        corrected = []
-        for sent in nlp(text).sents:
-            s = sent.text.strip()
-            if not s: continue
-            inp = tokenizer(s, return_tensors="pt", truncation=True, padding=True)
-            out = model.generate(
-                **inp,
-                max_new_tokens=inp.input_ids.shape[1],
-                num_beams=5,
-                early_stopping=True
-            )
-            corrected.append(tokenizer.decode(out[0], skip_special_tokens=True))
-        return " ".join(corrected)
-    # English: LanguageTool API
-    resp = requests.post(
-        "https://api.languagetool.org/v2/check",
-        data={"text": text, "language": language}
-    ).json()
-    for m in reversed(resp.get("matches", [])):
-        off, ln = m["offset"], m["length"]
-        repls = m.get("replacements", [])
-        val = repls[0]["value"] if repls else ""
-        text = text[:off] + val + text[off+ln:]
-    return text
-# --- Analyze known and unknown words ---
-def analyze_text(text: str, user_id: str, language: str):
-    toks = word_tokenize(text)
-    lems = [lemmatizer.lemmatize(w.lower()) for w in toks if w.isalpha()]
-    vocab = get_user_vocabulary(user_id, language)
-    known   = [w for w in lems if w in vocab]
-    unknown = [w for w in lems if w not in vocab]
-    return known, unknown
-# --- Generate sentence using GPT2 based on unknown words ---
-def generate_coherent_sentence(text: str, user_id: str, language: str, num_unknown=2) -> str:
-    kn, un = analyze_text(text, user_id, language)
-    if not un:
-        return "Inga okända ord att generera mening med."
-    chosen = random.sample(un, min(num_unknown, len(un)))
-    if language == "es":
-        prompt = "Escribe una sola frase clara que incluya estas palabras: " + ", ".join(chosen) + "."
-        tokenizer = gpt2_tokenizer_es
-        model     = gpt2_model_es
-    else:
-        prompt = "Write one clear sentence that includes the following words: " + ", ".join(chosen) + "."
-        tokenizer = gpt2_tokenizer_en
-        model     = gpt2_model_en
-    inp = tokenizer(prompt, return_tensors="pt", truncation=True)
-    outs = model.generate(
-        **inp,
-        max_new_tokens=50,
-        do_sample=True,
-        top_k=50,
-        top_p=0.95
-    )
-    gen = tokenizer.decode(outs[0], skip_special_tokens=True)
-    body = gen[len(prompt):].strip() if gen.startswith(prompt) else gen.strip()
-    sentence = (body.split(".")[0].strip() + ".") if "." in body else body
-    if not any(c.isalpha() for c in sentence):
-        return "Misslyckades att generera meningsfull övningsmening."
-    return sentence
-# --- Gradio process callback ---
-def process(user, language, txt, file, do_grammar, do_save):
-    try:
-        if txt and txt.strip():
-            text = txt.strip()
-        elif file:
-            text = import_file(file.name)
-        else:
-            return "", "", "", "Ingen text angiven.", ""
-        out = correct_grammar(text, language) if do_grammar else text
-        kn, un = analyze_text(out, user, language)
-        status = ""
-        if do_save and un:
-            for w in un:
-                save_word_to_db(user, language, w)
-            status = f"Sparade {len(un)} ord."
-        # Logga grammatikrättning till CSV
-        log_to_csv(
-            "grammarlog.csv",
-            {
-                "user": user, "language": language, "input": text,
-                "output": out, "timestamp": datetime.datetime.now().isoformat()
-            },
-            ["user", "language", "input", "output", "timestamp"]
-        )
-        return out, ", ".join(kn), ", ".join(un), status, ""
-    except Exception as e:
-        import traceback
-        tb = traceback.format_exc()
-        return "", "", "", f"FEL i process:\n{tb}", ""
-# --- Sentence generation callback ---
-def coherent_fn(user, language, txt, num):
-    try:
-        suggestion = generate_coherent_sentence(txt or "", user, language, num)
-        # Logga övningsförslag till CSV
-        log_to_csv(
-            "sentencelog.csv",
-            {
-                "user": user, "language": language, "input": txt,
-                "output": suggestion, "timestamp": datetime.datetime.now().isoformat()
-            },
-            ["user", "language", "input", "output", "timestamp"]
-        )
-        return suggestion
-    except Exception as e:
-        return f"Fel vid generering: {e}"
-# --- Gradio UI ---
-demo = gr.Blocks()
-with demo:
-    gr.Markdown("### 🌟 Språkinlärningsapp med användare & flerspråkighet")
-    with gr.Row():
-        user_input  = gr.Textbox(label="Användarnamn", placeholder="Ditt namn här")
-        lang_dd     = gr.Dropdown(choices=["es", "en"], value="es", label="Språk")
-    with gr.Column():
-        manual_input = gr.Textbox(lines=4, label="Skriv/klistra in text")
-        file_input   = gr.File(file_types=[".txt",".csv",".pdf"], label="Importera fil")
-        grammar_cb   = gr.Checkbox(label="Grammatikrättning")
-        autosave_cb  = gr.Checkbox(label="Spara okända ord")
-        run_btn      = gr.Button("Kör analys & korrigering")
-        num_slider   = gr.Slider(minimum=1, maximum=5, step=1, value=2, label="Antal okända ord för övning")
-        coherent_btn = gr.Button("Koherent övningsmening")
-    corr_out    = gr.Textbox(label="Korrigerad text", lines=4)
-    known_out   = gr.Textbox(label="Kända ord")
-    unknown_out = gr.Textbox(label="Okända ord")
-    status_out  = gr.Textbox(label="Status")
-    coherent_out = gr.Textbox(label="Koherent övningsmening")
-    # --- Knapparnas click‐kopplingar ---
-    run_btn.click(
-        fn=process,
-        inputs=[user_input, lang_dd, manual_input, file_input, grammar_cb, autosave_cb],
-        outputs=[corr_out, known_out, unknown_out, status_out, coherent_out]
-    )
-    coherent_btn.click(
-        fn=coherent_fn,
-        inputs=[user_input, lang_dd, manual_input, num_slider],
-        outputs=[coherent_out]
-    )
-    #Make sure to change language for the textfile to be analyzed in its target language
-# --- Start app ---
-if __name__ == "__main__":
-    url = demo.launch(share=True, inbrowser=True, prevent_thread_lock=True)
-    print("Appen körs på:", url)