Connexus commited on
Commit
9a053c1
·
verified ·
1 Parent(s): 2978d40

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a standard Python 3.10 base image
2
+ FROM python:3.10-slim
3
+
4
+ # Set the working directory inside the container
5
+ WORKDIR /code
6
+
7
+ # Copy the requirements file into the container
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ # Install Python dependencies
11
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
12
+
13
+ # Download the NLTK 'punkt' package during the build
14
+ RUN python -c "import nltk; nltk.download('punkt')"
15
+
16
+ # Copy the rest of your application code into the container
17
+ COPY . /code/
18
+
19
+ # Tell the container to listen on port 7860 (the default for HF Spaces)
20
+ EXPOSE 7860
21
+
22
+ # The command to run your application using Gunicorn
23
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]
README.md CHANGED
@@ -1,11 +1,13 @@
1
- ---
2
- title: Grammar Genie Api
3
- emoji: 📚
4
- colorFrom: red
5
- colorTo: yellow
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
+ ---
2
+ title: Grammar Genie API
3
+ emoji: 🧞
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ # Grammar Genie API Backend
11
+
12
+ This Space hosts the Python/Flask backend for the Grammar Genie application.
13
+ It loads models from a private Hub and exposes a /correct endpoint.
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from services.grammar_service import GrammarService
3
+
4
+ # Initialize the Flask web server
5
+ app = Flask(__name__)
6
+
7
+ # --- Load the AI models ONCE when the server starts ---
8
+ # This is crucial for performance. It prevents reloading the models on every request.
9
+ GrammarService.load_models()
10
+
11
+
12
+ # --- Define the API endpoint for grammar correction ---
13
+ @app.route("/correct", methods=["POST"])
14
+ def correct_grammar_endpoint():
15
+ """
16
+ API endpoint to handle grammar correction requests.
17
+ Expects a JSON payload with 'text' and 'language' keys.
18
+ """
19
+ # 1. Get data from the incoming request
20
+ data = request.get_json()
21
+ if not data:
22
+ return jsonify({"error": "Invalid request: No JSON payload received."}), 400
23
+
24
+ text_to_correct = data.get('text')
25
+ language = data.get('language')
26
+
27
+ # 2. Validate the input
28
+ if not text_to_correct or not language:
29
+ return jsonify({"error": "Missing 'text' or 'language' in request."}), 400
30
+
31
+ print(f"\nReceived request for language: '{language}'")
32
+ print(f" > Input text: '{text_to_correct[:100]}...'") # Log first 100 chars
33
+
34
+ # 3. Call the service to perform the correction
35
+ corrected_text = GrammarService.correct_paragraph(text_to_correct, language)
36
+
37
+ print(f" > Corrected text: '{corrected_text[:100]}...'")
38
+
39
+ # 4. Send the result back to the client (Flutter app)
40
+ return jsonify({
41
+ "original_text": text_to_correct,
42
+ "corrected_text": corrected_text,
43
+ "language": language
44
+ })
45
+
46
+ # --- Main execution block to run the server ---
47
+ if __name__ == "__main__":
48
+ # Use host='0.0.0.0' to make the server accessible from your local network
49
+ # This is essential for testing with your mobile phone.
50
+ print("Starting Flask server...")
51
+ app.run(host='0.0.0.0', port=5000, debug=False)
download_models.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # File: backend/download_models.py
2
+
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+ import os
5
+
6
+ # --- This is your central registry of models ---
7
+ MODELS_TO_DOWNLOAD = {
8
+ "english": "pszemraj/flan-t5-large-grammar-synthesis",
9
+ "french": "PoloHuggingface/French_grammar_error_corrector"
10
+ }
11
+
12
+ # The base directory where all models will be stored
13
+ BASE_MODELS_DIR = "models"
14
+
15
+ def download_all_models():
16
+ """
17
+ Downloads and saves all models defined in the MODELS_TO_DOWNLOAD registry
18
+ into clean, language-named folders.
19
+ """
20
+ if not os.path.exists(BASE_MODELS_DIR):
21
+ os.makedirs(BASE_MODELS_DIR)
22
+
23
+ for lang, model_name in MODELS_TO_DOWNLOAD.items():
24
+ local_path = os.path.join(BASE_MODELS_DIR, lang)
25
+
26
+ print("-" * 50)
27
+ print(f"Processing language: '{lang}'")
28
+ print(f" > Hugging Face model: '{model_name}'")
29
+ print(f" > Saving to local path: '{local_path}'")
30
+
31
+ if os.path.exists(local_path) and os.listdir(local_path):
32
+ print(" > Model already exists locally. Skipping download.")
33
+ continue
34
+
35
+ try:
36
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
37
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
38
+
39
+ model.save_pretrained(local_path)
40
+ tokenizer.save_pretrained(local_path)
41
+
42
+ print(" > Download and save successful!")
43
+
44
+ except Exception as e:
45
+ print(f" > [ERROR] Failed to download model for '{lang}'.")
46
+ print(f" > Please check for typos or network issues.")
47
+ print(f" > Details: {e}")
48
+
49
+ if __name__ == "__main__":
50
+ download_all_models()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # File: backend/requirements.txt
2
+
3
+ flask
4
+ transformers
5
+ torch
6
+ nltk
7
+ sentencepiece
8
+ gunicorn
services/__pycache__/grammar_service.cpython-310.pyc ADDED
Binary file (3.4 kB). View file
 
services/grammar_service.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import nltk
3
+ import torch
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
5
+
6
+ class GrammarService:
7
+ """
8
+ A service class to handle loading grammar models from a private Hugging Face Hub
9
+ and correcting text for the Grammar Genie application.
10
+ This version manually loads the model and tokenizer for maximum reliability.
11
+ """
12
+ _models = {}
13
+
14
+ # --- CONFIGURATION ---
15
+ _hf_repo_name = "Connexus/grammar-genie-models" # Your specific repo name
16
+ _hf_token = os.environ.get("HUGGING_FACE_TOKEN")
17
+
18
+ @classmethod
19
+ def load_models(cls):
20
+ """
21
+ Loads all available models from the private Hugging Face repository into memory.
22
+ This is called once when the server starts.
23
+ """
24
+ print("="*50)
25
+ print(f"Initializing GrammarService: Loading models from '{cls._hf_repo_name}'...")
26
+
27
+ # Ensure NLTK's sentence tokenizer is available
28
+ try:
29
+ nltk.data.find('tokenizers/punkt')
30
+ except nltk.downloader.DownloadError:
31
+ print(" > First-time setup: Downloading NLTK's 'punkt' sentence tokenizer...")
32
+ nltk.download('punkt', quiet=True)
33
+
34
+ supported_languages = ["english", "french"]
35
+
36
+ if not cls._hf_token:
37
+ print(" > [FATAL ERROR] HUGGING_FACE_TOKEN environment variable not set.")
38
+ print("="*50)
39
+ return
40
+
41
+ for lang in supported_languages:
42
+ model_subfolder = lang
43
+ print(f" > Loading model for '{lang}' from subfolder '{model_subfolder}'...")
44
+ try:
45
+ # --- NEW AND IMPROVED LOADING METHOD ---
46
+ # 1. Manually load the tokenizer from the specific subfolder
47
+ tokenizer = AutoTokenizer.from_pretrained(
48
+ cls._hf_repo_name,
49
+ subfolder=model_subfolder,
50
+ use_auth_token=cls._hf_token
51
+ )
52
+
53
+ # 2. Manually load the model from the specific subfolder
54
+ model = AutoModelForSeq2SeqLM.from_pretrained(
55
+ cls._hf_repo_name,
56
+ subfolder=model_subfolder,
57
+ use_auth_token=cls._hf_token
58
+ )
59
+
60
+ # 3. Create the pipeline using the pre-loaded components
61
+ device_num = 0 if torch.cuda.is_available() else -1
62
+ cls._models[lang] = pipeline(
63
+ "text2-generation", # Note: 'text2text-generation' is often aliased to 'text2-generation'
64
+ model=model,
65
+ tokenizer=tokenizer,
66
+ device=device_num
67
+ )
68
+ print(f" > Model for '{lang}' loaded successfully.")
69
+
70
+ except Exception as e:
71
+ print(f" > [ERROR] Failed to load model for '{lang}'. Please check repo name, subfolder, and token.")
72
+ print(f" > Details: {e}")
73
+
74
+ print("Model loading complete.")
75
+ print("="*50)
76
+
77
+ @classmethod
78
+ def correct_paragraph(cls, paragraph: str, language: str) -> str:
79
+ """
80
+ Corrects the grammar of a paragraph for a specified language.
81
+ (This method does not need to change).
82
+ """
83
+ if language not in cls._models:
84
+ return f"Error: Language '{language}' is not supported or its model failed to load."
85
+
86
+ corrector = cls._models[language]
87
+ sentences = nltk.sent_tokenize(paragraph)
88
+
89
+ if language == 'english':
90
+ prefix = "fix grammatical errors in the following text: "
91
+ elif language == 'french':
92
+ prefix = ""
93
+ else:
94
+ prefix = "correct grammar: "
95
+
96
+ corrected_sentences = []
97
+ for sentence in sentences:
98
+ input_text = f"{prefix}{sentence}"
99
+ try:
100
+ results = corrector(input_text, max_length=256, num_beams=5)
101
+ raw_output = results[0]['generated_text']
102
+
103
+ if prefix and raw_output.startswith(prefix):
104
+ clean_sentence = raw_output.replace(prefix, "", 1).strip()
105
+ else:
106
+ clean_sentence = raw_output.strip()
107
+
108
+ corrected_sentences.append(clean_sentence)
109
+ except Exception as e:
110
+ print(f" > [WARNING] Failed to process a sentence. Using original. Error: {e}")
111
+ corrected_sentences.append(sentence)
112
+
113
+ return " ".join(corrected_sentences)