microsoft_Phi_Instruct_Explorer

Sleeping

App Files Files Community

AItool commited on Sep 20

Commit

7291080

verified ·

1 Parent(s): 799982d

using translator

Browse files

Files changed (1) hide show

app.py +60 -0

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# app.py
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+# Translation models for English→English correction
+MODEL_OPTIONS = {
+    "Helsinki-NLP/opus-mt-en-en (light, CPU-friendly)": "Helsinki-NLP/opus-mt-en-en",
+    "facebook/mbart-large-50-many-to-many-mmt (heavier)": "facebook/mbart-large-50-many-to-many-mmt"
+}
+# Cache loaded pipelines
+loaded_pipelines = {}
+def get_pipeline(model_id: str):
+    if model_id not in loaded_pipelines:
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        model = AutoModelForSeq2SeqLM.from_pretrained(
+            model_id,
+            low_cpu_mem_usage=True,
+            torch_dtype="auto"
+        )
+        pipe = pipeline("translation", model=model, tokenizer=tokenizer, device=-1)
+        # Warm-up
+        _ = pipe("This is a test.", max_length=32)
+        loaded_pipelines[model_id] = pipe
+    return loaded_pipelines[model_id]
+def polish(sentence: str, model_choice: str) -> str:
+    model_id = MODEL_OPTIONS[model_choice]
+    translator = get_pipeline(model_id)
+    # For mbart we need to set language codes
+    if "mbart" in model_id:
+        inputs = translator.tokenizer(sentence, return_tensors="pt")
+        inputs["forced_bos_token_id"] = translator.tokenizer.lang_code_to_id["en_XX"]
+        out = translator.model.generate(**inputs, max_length=128, num_beams=4)
+        text = translator.tokenizer.decode(out[0], skip_special_tokens=True)
+    else:
+        out = translator(sentence, max_length=128)
+        text = out[0]["translation_text"]
+    return text.strip()
+# Gradio interface
+demo = gr.Interface(
+    fn=polish,
+    inputs=[
+        gr.Textbox(lines=2, placeholder="Enter a sentence to correct..."),
+        gr.Dropdown(choices=list(MODEL_OPTIONS.keys()),
+                    value="Helsinki-NLP/opus-mt-en-en (light, CPU-friendly)",
+                    label="Choose Model")
+    ],
+    outputs=gr.Textbox(label="Corrected English"),
+    title="English→English Grammar Polisher",
+    description="Uses translation models (Helsinki-NLP opus-mt-en-en and facebook mbart-large-50) to rewrite English sentences into fluent, corrected English."
+)
+if __name__ == "__main__":
+    demo.launch()