Spaces:

sxtran
/

grammar-corrector-app

Sleeping

App Files Files Community

Sontran0108 commited on Sep 17

Commit

42d64b4

1 Parent(s): 7ec0c82

Add CoEdIT handler with custom logic

Browse files

Files changed (5) hide show

README.md +53 -6
app.py +144 -0
handler.py +136 -0
packages.txt +4 -0
requirements.txt +5 -0

README.md CHANGED Viewed

@@ -1,13 +1,60 @@
 ---
-title: GRAMMAR CORRECTOR
-emoji: 🔥
 colorFrom: blue
-colorTo: green
 sdk: gradio
-sdk_version: 5.46.0
 app_file: app.py
 pinned: false
-license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: CoEdIT Handler
+emoji: ✏️
 colorFrom: blue
+colorTo: purple
 sdk: gradio
+sdk_version: 4.0.0
 app_file: app.py
 pinned: false
+license: cc-by-nc-4.0
+short_description: Custom handler for Grammarly CoEdIT model
 ---
+# CoEdIT Handler
+This is a custom handler for the Grammarly CoEdIT model, providing grammar correction and text enhancement capabilities.
+## Features
+- Grammar correction
+- Text coherence improvement
+- Text simplification
+- Paraphrasing
+- Formal/informal style conversion
+- Neutral tone conversion
+## API Usage
+The Space provides a REST API endpoint at `/predict`:
+```bash
+curl -X POST "https://your-space-url.hf.space/predict" \
+     -H "Content-Type: application/json" \
+     -d '{"inputs": ["Fix the grammar: When I grow up, I start to understand what he said is quite right."]}'
+```
+## Response Format
+```json
+{
+  "success": true,
+  "results": [
+    {
+      "original_sentence": "Fix the grammar: When I grow up, I start to understand what he said is quite right.",
+      "enhanced_sentence": "When I grow up, I start to understand that what he said is quite right.",
+      "changes": [
+        {
+          "original_phrase": "what he said is quite right",
+          "new_phrase": "that what he said is quite right",
+          "explanation": "replace change"
+        }
+      ]
+    }
+  ]
+}
+```
+## Model
+This handler uses the [grammarly/coedit-large](https://huggingface.co/grammarly/coedit-large) model.

app.py ADDED Viewed

	@@ -0,0 +1,144 @@

+#!/usr/bin/env python3
+"""
+Hugging Face Space app for CoEdIT Handler
+"""
+import gradio as gr
+import sys
+import os
+import json
+# Add current directory to path so we can import handler
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+from handler import EndpointHandler
+# Initialize the handler
+print("🚀 Initializing CoEdIT Handler...")
+try:
+    handler = EndpointHandler("grammarly/coedit-large")
+    print("✅ Handler initialized successfully")
+except Exception as e:
+    print(f"❌ Failed to initialize handler: {e}")
+    handler = None
+def process_text(text, num_return_sequences=1, temperature=1.0):
+    """Process text through the CoEdIT handler"""
+    if handler is None:
+        return "❌ Handler not initialized. Please check the logs."
+    try:
+        # Prepare input for the handler
+        inputs = {
+            "inputs": [text],
+            "parameters": {
+                "num_return_sequences": num_return_sequences,
+                "temperature": temperature
+            }
+        }
+        # Process through handler
+        result = handler(inputs)
+        if result.get("success", False):
+            results = result.get("results", [])
+            if results:
+                enhanced = results[0].get("enhanced_sentence", "")
+                changes = results[0].get("changes", [])
+                # Format the response
+                response = f"**Enhanced Text:**\n{enhanced}\n\n"
+                if changes:
+                    response += "**Changes Made:**\n"
+                    for i, change in enumerate(changes, 1):
+                        original = change.get("original_phrase", "")
+                        new = change.get("new_phrase", "")
+                        if original and new:
+                            response += f"{i}. '{original}' → '{new}'\n"
+                return response
+            else:
+                return "No results returned."
+        else:
+            return f"❌ Error: {result.get('error', 'Unknown error')}"
+    except Exception as e:
+        return f"❌ Error processing text: {str(e)}"
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="CoEdIT Handler", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("""
+        # CoEdIT Text Editor
+        This is a custom handler for the Grammarly CoEdIT model, providing grammar correction and text enhancement.
+        """)
+        with gr.Row():
+            with gr.Column():
+                input_text = gr.Textbox(
+                    label="Input Text",
+                    placeholder="Fix the grammar: When I grow up, I start to understand what he said is quite right.",
+                    lines=3
+                )
+                with gr.Row():
+                    num_sequences = gr.Slider(
+                        minimum=1,
+                        maximum=5,
+                        value=1,
+                        step=1,
+                        label="Number of variations"
+                    )
+                    temperature = gr.Slider(
+                        minimum=0.1,
+                        maximum=2.0,
+                        value=1.0,
+                        step=0.1,
+                        label="Temperature"
+                    )
+                process_btn = gr.Button("Process Text", variant="primary")
+            with gr.Column():
+                output_text = gr.Markdown(label="Enhanced Text")
+        # Example texts
+        gr.Examples(
+            examples=[
+                "Fix the grammar: When I grow up, I start to understand what he said is quite right.",
+                "Make this text coherent: Their flight is weak. They run quickly through the tree canopy.",
+                "Rewrite to make this easier to understand: A storm surge is what forecasters consider a hurricane's most treacherous aspect.",
+                "Paraphrase this: Do you know where I was born?",
+                "Write this more formally: omg i love that song im listening to it right now"
+            ],
+            inputs=input_text
+        )
+        # Event handlers
+        process_btn.click(
+            fn=process_text,
+            inputs=[input_text, num_sequences, temperature],
+            outputs=output_text
+        )
+        # API endpoint info
+        gr.Markdown("""
+        ## API Endpoint
+        This Space also provides an API endpoint at `/predict` for programmatic access:
+        ```bash
+        curl -X POST "https://your-space-url.hf.space/predict" \\
+             -H "Content-Type: application/json" \\
+             -d '{"inputs": ["Your text here"]}'
+        ```
+        """)
+    return demo
+# Create the interface
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch(server_name="0.0.0.0", server_port=7860)

handler.py ADDED Viewed

	@@ -0,0 +1,136 @@

+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+from difflib import SequenceMatcher
+class EndpointHandler:
+    def __init__(self, path=""):
+        # Load model and tokenizer from the current directory or specified path
+        model_path = path if path else "."
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model.to(self.device)
+        self.model.eval()  # Set to evaluation mode
+    def paraphrase_batch(self, sentences, num_return_sequences=1, temperature=1.0):
+        # Add the grammar correction prefix to each sentence
+        prefix = "correct grammar for this sentence: "
+        sentences_with_prefix = [prefix + s for s in sentences]
+        inputs = self.tokenizer(
+            sentences_with_prefix,
+            padding=True,
+            truncation=True,
+            max_length=512,
+            return_tensors="pt"
+        ).to(self.device)
+        outputs = self.model.generate(
+            **inputs,
+            max_length=512,
+            num_beams=5,
+            temperature=temperature,
+            num_return_sequences=num_return_sequences,
+            early_stopping=True
+        )
+        decoded = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        if num_return_sequences > 1:
+            grouped = [
+                decoded[i * num_return_sequences:(i + 1) * num_return_sequences]
+                for i in range(len(sentences))
+            ]
+            return grouped
+        else:
+            return decoded
+    def compute_changes(self, original, enhanced):
+        # Your existing compute_changes logic
+        changes = []
+        matcher = SequenceMatcher(None, original.split(), enhanced.split())
+        for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+            if tag in ("replace", "insert", "delete"):
+                original_phrase = " ".join(original.split()[i1:i2])
+                new_phrase = " ".join(enhanced.split()[j1:j2])
+                changes.append({
+                    "original_phrase": original_phrase,
+                    "new_phrase": new_phrase,
+                    "char_start": i1,
+                    "char_end": i2,
+                    "token_start": i1,
+                    "token_end": i2,
+                    "explanation": f"{tag} change",
+                    "error_type": "",
+                    "tip": ""
+                })
+        return changes
+    def __call__(self, inputs):
+        # This method is the main entry point for the Hugging Face Endpoint.
+        # Check for both standard and wrapped JSON inputs
+        if isinstance(inputs, list):
+            sentences = inputs
+            parameters = {}
+        elif isinstance(inputs, dict):
+            # Check for the common {"inputs": "...", "parameters": {}} format
+            sentences = inputs.get("inputs", [])
+            # If inputs is a single string, wrap it in a list
+            if isinstance(sentences, str):
+                sentences = [sentences]
+            parameters = inputs.get("parameters", {})
+        else:
+            return {
+                "success": False,
+                "error": "Invalid input format. Expected a string, list of strings, or a dictionary with 'inputs' and 'parameters' keys."
+            }
+        # Handle optional parameters
+        num_return_sequences = parameters.get("num_return_sequences", 1)
+        temperature = parameters.get("temperature", 1.0)
+        if not sentences:
+            return {
+                "success": False,
+                "error": "No sentences provided."
+            }
+        try:
+            paraphrased = self.paraphrase_batch(sentences, num_return_sequences, temperature)
+            results = []
+            if num_return_sequences > 1:
+                # Logic for multiple return sequences
+                for i, orig in enumerate(sentences):
+                    for cand in paraphrased[i]:
+                        results.append({
+                            "original_sentence": orig,
+                            "enhanced_sentence": cand,
+                            "changes": self.compute_changes(orig, cand)
+                        })
+            else:
+                # Logic for single return sequence
+                for orig, cand in zip(sentences, paraphrased):
+                    results.append({
+                        "original_sentence": orig,
+                        "enhanced_sentence": cand,
+                        "changes": self.compute_changes(orig, cand)
+                    })
+            return {
+                "success": True,
+                "results": results,
+                "sentences_count": len(sentences),
+                "processed_count": len(results),
+                "skipped_count": 0,
+                "error_count": 0
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "error": str(e),
+                "sentences_count": len(sentences),
+                "processed_count": 0,
+                "skipped_count": 0,
+                "error_count": 1
+            }

packages.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+# System packages for Hugging Face Spaces
+git
+wget
+curl

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch>=2.0.0
+transformers>=4.30.0
+flask>=2.3.0
+requests>=2.31.0
+safetensors>=0.3.0