Spaces:

Connexus
/

grammar-genie-api

Runtime error

App Files Files Community

Connexus commited on Sep 1

Commit

9a053c1

verified ·

1 Parent(s): 2978d40

Upload 8 files

Browse files

Files changed (8) hide show

.gitattributes +35 -35
Dockerfile +23 -0
README.md +13 -11
app.py +51 -0
download_models.py +50 -0
requirements.txt +8 -0
services/__pycache__/grammar_service.cpython-310.pyc +0 -0
services/grammar_service.py +113 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# Use a standard Python 3.10 base image
+FROM python:3.10-slim
+# Set the working directory inside the container
+WORKDIR /code
+# Copy the requirements file into the container
+COPY ./requirements.txt /code/requirements.txt
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Download the NLTK 'punkt' package during the build
+RUN python -c "import nltk; nltk.download('punkt')"
+# Copy the rest of your application code into the container
+COPY . /code/
+# Tell the container to listen on port 7860 (the default for HF Spaces)
+EXPOSE 7860
+# The command to run your application using Gunicorn
+CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]

README.md CHANGED Viewed

@@ -1,11 +1,13 @@
----
-title: Grammar Genie Api
-emoji: 📚
-colorFrom: red
-colorTo: yellow
-sdk: docker
-pinned: false
-license: apache-2.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Grammar Genie API
+emoji: 🧞
+colorFrom: blue
+colorTo: purple
+sdk: docker
+pinned: false
+---
+# Grammar Genie API Backend
+This Space hosts the Python/Flask backend for the Grammar Genie application.
+It loads models from a private Hub and exposes a /correct endpoint.

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from flask import Flask, request, jsonify
+from services.grammar_service import GrammarService
+# Initialize the Flask web server
+app = Flask(__name__)
+# --- Load the AI models ONCE when the server starts ---
+# This is crucial for performance. It prevents reloading the models on every request.
+GrammarService.load_models()
+# --- Define the API endpoint for grammar correction ---
+@app.route("/correct", methods=["POST"])
+def correct_grammar_endpoint():
+    """
+    API endpoint to handle grammar correction requests.
+    Expects a JSON payload with 'text' and 'language' keys.
+    """
+    # 1. Get data from the incoming request
+    data = request.get_json()
+    if not data:
+        return jsonify({"error": "Invalid request: No JSON payload received."}), 400
+    text_to_correct = data.get('text')
+    language = data.get('language')
+    # 2. Validate the input
+    if not text_to_correct or not language:
+        return jsonify({"error": "Missing 'text' or 'language' in request."}), 400
+    print(f"\nReceived request for language: '{language}'")
+    print(f"  > Input text: '{text_to_correct[:100]}...'") # Log first 100 chars
+    # 3. Call the service to perform the correction
+    corrected_text = GrammarService.correct_paragraph(text_to_correct, language)
+    print(f"  > Corrected text: '{corrected_text[:100]}...'")
+    # 4. Send the result back to the client (Flutter app)
+    return jsonify({
+        "original_text": text_to_correct,
+        "corrected_text": corrected_text,
+        "language": language
+    })
+# --- Main execution block to run the server ---
+if __name__ == "__main__":
+    # Use host='0.0.0.0' to make the server accessible from your local network
+    # This is essential for testing with your mobile phone.
+    print("Starting Flask server...")
+    app.run(host='0.0.0.0', port=5000, debug=False)

download_models.py ADDED Viewed

	@@ -0,0 +1,50 @@

+# File: backend/download_models.py
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import os
+# --- This is your central registry of models ---
+MODELS_TO_DOWNLOAD = {
+    "english": "pszemraj/flan-t5-large-grammar-synthesis",
+    "french": "PoloHuggingface/French_grammar_error_corrector"
+}
+# The base directory where all models will be stored
+BASE_MODELS_DIR = "models"
+def download_all_models():
+    """
+    Downloads and saves all models defined in the MODELS_TO_DOWNLOAD registry
+    into clean, language-named folders.
+    """
+    if not os.path.exists(BASE_MODELS_DIR):
+        os.makedirs(BASE_MODELS_DIR)
+    for lang, model_name in MODELS_TO_DOWNLOAD.items():
+        local_path = os.path.join(BASE_MODELS_DIR, lang)
+        print("-" * 50)
+        print(f"Processing language: '{lang}'")
+        print(f"  > Hugging Face model: '{model_name}'")
+        print(f"  > Saving to local path: '{local_path}'")
+        if os.path.exists(local_path) and os.listdir(local_path):
+            print("  > Model already exists locally. Skipping download.")
+            continue
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(model_name)
+            model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+            model.save_pretrained(local_path)
+            tokenizer.save_pretrained(local_path)
+            print("  > Download and save successful!")
+        except Exception as e:
+            print(f"  > [ERROR] Failed to download model for '{lang}'.")
+            print(f"  > Please check for typos or network issues.")
+            print(f"  > Details: {e}")
+if __name__ == "__main__":
+    download_all_models()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+# File: backend/requirements.txt
+flask
+transformers
+torch
+nltk
+sentencepiece
+gunicorn

services/__pycache__/grammar_service.cpython-310.pyc ADDED Viewed

Binary file (3.4 kB). View file

services/grammar_service.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import os
+import nltk
+import torch
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+class GrammarService:
+    """
+    A service class to handle loading grammar models from a private Hugging Face Hub
+    and correcting text for the Grammar Genie application.
+    This version manually loads the model and tokenizer for maximum reliability.
+    """
+    _models = {}
+    # --- CONFIGURATION ---
+    _hf_repo_name = "Connexus/grammar-genie-models" # Your specific repo name
+    _hf_token = os.environ.get("HUGGING_FACE_TOKEN")
+    @classmethod
+    def load_models(cls):
+        """
+        Loads all available models from the private Hugging Face repository into memory.
+        This is called once when the server starts.
+        """
+        print("="*50)
+        print(f"Initializing GrammarService: Loading models from '{cls._hf_repo_name}'...")
+        # Ensure NLTK's sentence tokenizer is available
+        try:
+            nltk.data.find('tokenizers/punkt')
+        except nltk.downloader.DownloadError:
+            print("  > First-time setup: Downloading NLTK's 'punkt' sentence tokenizer...")
+            nltk.download('punkt', quiet=True)
+        supported_languages = ["english", "french"]
+        if not cls._hf_token:
+            print("  > [FATAL ERROR] HUGGING_FACE_TOKEN environment variable not set.")
+            print("="*50)
+            return
+        for lang in supported_languages:
+            model_subfolder = lang
+            print(f"  > Loading model for '{lang}' from subfolder '{model_subfolder}'...")
+            try:
+                # --- NEW AND IMPROVED LOADING METHOD ---
+                # 1. Manually load the tokenizer from the specific subfolder
+                tokenizer = AutoTokenizer.from_pretrained(
+                    cls._hf_repo_name,
+                    subfolder=model_subfolder,
+                    use_auth_token=cls._hf_token
+                )
+                # 2. Manually load the model from the specific subfolder
+                model = AutoModelForSeq2SeqLM.from_pretrained(
+                    cls._hf_repo_name,
+                    subfolder=model_subfolder,
+                    use_auth_token=cls._hf_token
+                )
+                # 3. Create the pipeline using the pre-loaded components
+                device_num = 0 if torch.cuda.is_available() else -1
+                cls._models[lang] = pipeline(
+                    "text2-generation", # Note: 'text2text-generation' is often aliased to 'text2-generation'
+                    model=model,
+                    tokenizer=tokenizer,
+                    device=device_num
+                )
+                print(f"  > Model for '{lang}' loaded successfully.")
+            except Exception as e:
+                print(f"  > [ERROR] Failed to load model for '{lang}'. Please check repo name, subfolder, and token.")
+                print(f"  > Details: {e}")
+        print("Model loading complete.")
+        print("="*50)
+    @classmethod
+    def correct_paragraph(cls, paragraph: str, language: str) -> str:
+        """
+        Corrects the grammar of a paragraph for a specified language.
+        (This method does not need to change).
+        """
+        if language not in cls._models:
+            return f"Error: Language '{language}' is not supported or its model failed to load."
+        corrector = cls._models[language]
+        sentences = nltk.sent_tokenize(paragraph)
+        if language == 'english':
+            prefix = "fix grammatical errors in the following text: "
+        elif language == 'french':
+            prefix = ""
+        else:
+            prefix = "correct grammar: "
+        corrected_sentences = []
+        for sentence in sentences:
+            input_text = f"{prefix}{sentence}"
+            try:
+                results = corrector(input_text, max_length=256, num_beams=5)
+                raw_output = results[0]['generated_text']
+                if prefix and raw_output.startswith(prefix):
+                     clean_sentence = raw_output.replace(prefix, "", 1).strip()
+                else:
+                     clean_sentence = raw_output.strip()
+                corrected_sentences.append(clean_sentence)
+            except Exception as e:
+                print(f"  > [WARNING] Failed to process a sentence. Using original. Error: {e}")
+                corrected_sentences.append(sentence)
+        return " ".join(corrected_sentences)