Sontran0108 commited on
Commit
42d64b4
·
1 Parent(s): 7ec0c82

Add CoEdIT handler with custom logic

Browse files
Files changed (5) hide show
  1. README.md +53 -6
  2. app.py +144 -0
  3. handler.py +136 -0
  4. packages.txt +4 -0
  5. requirements.txt +5 -0
README.md CHANGED
@@ -1,13 +1,60 @@
1
  ---
2
- title: GRAMMAR CORRECTOR
3
- emoji: 🔥
4
  colorFrom: blue
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.46.0
8
  app_file: app.py
9
  pinned: false
10
- license: apache-2.0
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: CoEdIT Handler
3
+ emoji: ✏️
4
  colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.0.0
8
  app_file: app.py
9
  pinned: false
10
+ license: cc-by-nc-4.0
11
+ short_description: Custom handler for Grammarly CoEdIT model
12
  ---
13
 
14
+ # CoEdIT Handler
15
+
16
+ This is a custom handler for the Grammarly CoEdIT model, providing grammar correction and text enhancement capabilities.
17
+
18
+ ## Features
19
+
20
+ - Grammar correction
21
+ - Text coherence improvement
22
+ - Text simplification
23
+ - Paraphrasing
24
+ - Formal/informal style conversion
25
+ - Neutral tone conversion
26
+
27
+ ## API Usage
28
+
29
+ The Space provides a REST API endpoint at `/predict`:
30
+
31
+ ```bash
32
+ curl -X POST "https://your-space-url.hf.space/predict" \
33
+ -H "Content-Type: application/json" \
34
+ -d '{"inputs": ["Fix the grammar: When I grow up, I start to understand what he said is quite right."]}'
35
+ ```
36
+
37
+ ## Response Format
38
+
39
+ ```json
40
+ {
41
+ "success": true,
42
+ "results": [
43
+ {
44
+ "original_sentence": "Fix the grammar: When I grow up, I start to understand what he said is quite right.",
45
+ "enhanced_sentence": "When I grow up, I start to understand that what he said is quite right.",
46
+ "changes": [
47
+ {
48
+ "original_phrase": "what he said is quite right",
49
+ "new_phrase": "that what he said is quite right",
50
+ "explanation": "replace change"
51
+ }
52
+ ]
53
+ }
54
+ ]
55
+ }
56
+ ```
57
+
58
+ ## Model
59
+
60
+ This handler uses the [grammarly/coedit-large](https://huggingface.co/grammarly/coedit-large) model.
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Hugging Face Space app for CoEdIT Handler
4
+ """
5
+ import gradio as gr
6
+ import sys
7
+ import os
8
+ import json
9
+
10
+ # Add current directory to path so we can import handler
11
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
12
+
13
+ from handler import EndpointHandler
14
+
15
+ # Initialize the handler
16
+ print("🚀 Initializing CoEdIT Handler...")
17
+ try:
18
+ handler = EndpointHandler("grammarly/coedit-large")
19
+ print("✅ Handler initialized successfully")
20
+ except Exception as e:
21
+ print(f"❌ Failed to initialize handler: {e}")
22
+ handler = None
23
+
24
+ def process_text(text, num_return_sequences=1, temperature=1.0):
25
+ """Process text through the CoEdIT handler"""
26
+ if handler is None:
27
+ return "❌ Handler not initialized. Please check the logs."
28
+
29
+ try:
30
+ # Prepare input for the handler
31
+ inputs = {
32
+ "inputs": [text],
33
+ "parameters": {
34
+ "num_return_sequences": num_return_sequences,
35
+ "temperature": temperature
36
+ }
37
+ }
38
+
39
+ # Process through handler
40
+ result = handler(inputs)
41
+
42
+ if result.get("success", False):
43
+ results = result.get("results", [])
44
+ if results:
45
+ enhanced = results[0].get("enhanced_sentence", "")
46
+ changes = results[0].get("changes", [])
47
+
48
+ # Format the response
49
+ response = f"**Enhanced Text:**\n{enhanced}\n\n"
50
+
51
+ if changes:
52
+ response += "**Changes Made:**\n"
53
+ for i, change in enumerate(changes, 1):
54
+ original = change.get("original_phrase", "")
55
+ new = change.get("new_phrase", "")
56
+ if original and new:
57
+ response += f"{i}. '{original}' → '{new}'\n"
58
+
59
+ return response
60
+ else:
61
+ return "No results returned."
62
+ else:
63
+ return f"❌ Error: {result.get('error', 'Unknown error')}"
64
+
65
+ except Exception as e:
66
+ return f"❌ Error processing text: {str(e)}"
67
+
68
+ # Create Gradio interface
69
+ def create_interface():
70
+ with gr.Blocks(title="CoEdIT Handler", theme=gr.themes.Soft()) as demo:
71
+ gr.Markdown("""
72
+ # CoEdIT Text Editor
73
+
74
+ This is a custom handler for the Grammarly CoEdIT model, providing grammar correction and text enhancement.
75
+ """)
76
+
77
+ with gr.Row():
78
+ with gr.Column():
79
+ input_text = gr.Textbox(
80
+ label="Input Text",
81
+ placeholder="Fix the grammar: When I grow up, I start to understand what he said is quite right.",
82
+ lines=3
83
+ )
84
+
85
+ with gr.Row():
86
+ num_sequences = gr.Slider(
87
+ minimum=1,
88
+ maximum=5,
89
+ value=1,
90
+ step=1,
91
+ label="Number of variations"
92
+ )
93
+
94
+ temperature = gr.Slider(
95
+ minimum=0.1,
96
+ maximum=2.0,
97
+ value=1.0,
98
+ step=0.1,
99
+ label="Temperature"
100
+ )
101
+
102
+ process_btn = gr.Button("Process Text", variant="primary")
103
+
104
+ with gr.Column():
105
+ output_text = gr.Markdown(label="Enhanced Text")
106
+
107
+ # Example texts
108
+ gr.Examples(
109
+ examples=[
110
+ "Fix the grammar: When I grow up, I start to understand what he said is quite right.",
111
+ "Make this text coherent: Their flight is weak. They run quickly through the tree canopy.",
112
+ "Rewrite to make this easier to understand: A storm surge is what forecasters consider a hurricane's most treacherous aspect.",
113
+ "Paraphrase this: Do you know where I was born?",
114
+ "Write this more formally: omg i love that song im listening to it right now"
115
+ ],
116
+ inputs=input_text
117
+ )
118
+
119
+ # Event handlers
120
+ process_btn.click(
121
+ fn=process_text,
122
+ inputs=[input_text, num_sequences, temperature],
123
+ outputs=output_text
124
+ )
125
+
126
+ # API endpoint info
127
+ gr.Markdown("""
128
+ ## API Endpoint
129
+
130
+ This Space also provides an API endpoint at `/predict` for programmatic access:
131
+
132
+ ```bash
133
+ curl -X POST "https://your-space-url.hf.space/predict" \\
134
+ -H "Content-Type: application/json" \\
135
+ -d '{"inputs": ["Your text here"]}'
136
+ ```
137
+ """)
138
+
139
+ return demo
140
+
141
+ # Create the interface
142
+ if __name__ == "__main__":
143
+ demo = create_interface()
144
+ demo.launch(server_name="0.0.0.0", server_port=7860)
handler.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
+ import torch
3
+ from difflib import SequenceMatcher
4
+
5
+ class EndpointHandler:
6
+ def __init__(self, path=""):
7
+ # Load model and tokenizer from the current directory or specified path
8
+ model_path = path if path else "."
9
+ self.tokenizer = AutoTokenizer.from_pretrained(model_path)
10
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
11
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+ self.model.to(self.device)
13
+ self.model.eval() # Set to evaluation mode
14
+
15
+ def paraphrase_batch(self, sentences, num_return_sequences=1, temperature=1.0):
16
+ # Add the grammar correction prefix to each sentence
17
+ prefix = "correct grammar for this sentence: "
18
+ sentences_with_prefix = [prefix + s for s in sentences]
19
+
20
+ inputs = self.tokenizer(
21
+ sentences_with_prefix,
22
+ padding=True,
23
+ truncation=True,
24
+ max_length=512,
25
+ return_tensors="pt"
26
+ ).to(self.device)
27
+
28
+ outputs = self.model.generate(
29
+ **inputs,
30
+ max_length=512,
31
+ num_beams=5,
32
+ temperature=temperature,
33
+ num_return_sequences=num_return_sequences,
34
+ early_stopping=True
35
+ )
36
+
37
+ decoded = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
38
+ if num_return_sequences > 1:
39
+ grouped = [
40
+ decoded[i * num_return_sequences:(i + 1) * num_return_sequences]
41
+ for i in range(len(sentences))
42
+ ]
43
+ return grouped
44
+ else:
45
+ return decoded
46
+
47
+ def compute_changes(self, original, enhanced):
48
+ # Your existing compute_changes logic
49
+ changes = []
50
+ matcher = SequenceMatcher(None, original.split(), enhanced.split())
51
+ for tag, i1, i2, j1, j2 in matcher.get_opcodes():
52
+ if tag in ("replace", "insert", "delete"):
53
+ original_phrase = " ".join(original.split()[i1:i2])
54
+ new_phrase = " ".join(enhanced.split()[j1:j2])
55
+ changes.append({
56
+ "original_phrase": original_phrase,
57
+ "new_phrase": new_phrase,
58
+ "char_start": i1,
59
+ "char_end": i2,
60
+ "token_start": i1,
61
+ "token_end": i2,
62
+ "explanation": f"{tag} change",
63
+ "error_type": "",
64
+ "tip": ""
65
+ })
66
+ return changes
67
+
68
+ def __call__(self, inputs):
69
+ # This method is the main entry point for the Hugging Face Endpoint.
70
+
71
+ # Check for both standard and wrapped JSON inputs
72
+ if isinstance(inputs, list):
73
+ sentences = inputs
74
+ parameters = {}
75
+ elif isinstance(inputs, dict):
76
+ # Check for the common {"inputs": "...", "parameters": {}} format
77
+ sentences = inputs.get("inputs", [])
78
+ # If inputs is a single string, wrap it in a list
79
+ if isinstance(sentences, str):
80
+ sentences = [sentences]
81
+ parameters = inputs.get("parameters", {})
82
+ else:
83
+ return {
84
+ "success": False,
85
+ "error": "Invalid input format. Expected a string, list of strings, or a dictionary with 'inputs' and 'parameters' keys."
86
+ }
87
+
88
+ # Handle optional parameters
89
+ num_return_sequences = parameters.get("num_return_sequences", 1)
90
+ temperature = parameters.get("temperature", 1.0)
91
+
92
+ if not sentences:
93
+ return {
94
+ "success": False,
95
+ "error": "No sentences provided."
96
+ }
97
+
98
+ try:
99
+ paraphrased = self.paraphrase_batch(sentences, num_return_sequences, temperature)
100
+ results = []
101
+
102
+ if num_return_sequences > 1:
103
+ # Logic for multiple return sequences
104
+ for i, orig in enumerate(sentences):
105
+ for cand in paraphrased[i]:
106
+ results.append({
107
+ "original_sentence": orig,
108
+ "enhanced_sentence": cand,
109
+ "changes": self.compute_changes(orig, cand)
110
+ })
111
+ else:
112
+ # Logic for single return sequence
113
+ for orig, cand in zip(sentences, paraphrased):
114
+ results.append({
115
+ "original_sentence": orig,
116
+ "enhanced_sentence": cand,
117
+ "changes": self.compute_changes(orig, cand)
118
+ })
119
+
120
+ return {
121
+ "success": True,
122
+ "results": results,
123
+ "sentences_count": len(sentences),
124
+ "processed_count": len(results),
125
+ "skipped_count": 0,
126
+ "error_count": 0
127
+ }
128
+ except Exception as e:
129
+ return {
130
+ "success": False,
131
+ "error": str(e),
132
+ "sentences_count": len(sentences),
133
+ "processed_count": 0,
134
+ "skipped_count": 0,
135
+ "error_count": 1
136
+ }
packages.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # System packages for Hugging Face Spaces
2
+ git
3
+ wget
4
+ curl
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch>=2.0.0
2
+ transformers>=4.30.0
3
+ flask>=2.3.0
4
+ requests>=2.31.0
5
+ safetensors>=0.3.0