Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline | |
| import json | |
| # ---- Load language map ---- | |
| with open("languages.json", "r", encoding="utf-8") as f: | |
| LANG_MAP = json.load(f) | |
| LANG_NAMES = list(LANG_MAP.keys()) | |
| # ---- Translation setup (M2M100 supports ~100 languages) ---- | |
| TRANS_MODEL_NAME = "facebook/m2m100_418M" | |
| trans_tokenizer = AutoTokenizer.from_pretrained(TRANS_MODEL_NAME) | |
| trans_model = AutoModelForSeq2SeqLM.from_pretrained(TRANS_MODEL_NAME) | |
| def translate_text(text, src_name, tgt_name, max_new_tokens=200): | |
| if not text.strip(): | |
| return "" | |
| src = LANG_MAP[src_name] | |
| tgt = LANG_MAP[tgt_name] | |
| # Set source language for tokenizer | |
| trans_tokenizer.src_lang = src | |
| # Tokenize | |
| encoded = trans_tokenizer(text, return_tensors="pt", truncation=True, max_length=1024) | |
| # Set target language via forced BOS token | |
| forced_bos_token_id = trans_tokenizer.get_lang_id(tgt) | |
| generated_tokens = trans_model.generate( | |
| **encoded, | |
| forced_bos_token_id=forced_bos_token_id, | |
| max_new_tokens=max_new_tokens, | |
| num_beams=4 | |
| ) | |
| return trans_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] | |
| # ---- Summarization setup (English-focused, small & fast) ---- | |
| SUM_MODEL_NAME = "sshleifer/distilbart-cnn-12-6" | |
| summarizer = pipeline("summarization", model=SUM_MODEL_NAME) | |
| def summarize_text(long_text, min_len=60, max_len=160): | |
| if not long_text.strip(): | |
| return "" | |
| # DistilBART expects English and up to 1024 tokens; truncation handled by pipeline tokenizer | |
| summary = summarizer(long_text, min_length=int(min_len), max_length=int(max_len), truncation=True) | |
| return summary[0]["summary_text"] | |
| # ---- Gradio UI ---- | |
| with gr.Blocks(title="Multipurpose NLP (Translate + Summarize)") as demo: | |
| gr.Markdown( | |
| """ | |
| # ๐ Multipurpose NLP: Translation & Summarization | |
| - **Translate** between many languages using **M2M100 (418M)** | |
| - **Summarize** long English text using **DistilBART-CNN** | |
| - Built with **Gradio** on Hugging Face Spaces | |
| """ | |
| ) | |
| with gr.Tab("๐ฃ๏ธ Translate"): | |
| with gr.Row(): | |
| src_lang = gr.Dropdown(choices=LANG_NAMES, value="English", label="Source language") | |
| tgt_lang = gr.Dropdown(choices=LANG_NAMES, value="Hindi", label="Target language") | |
| input_text = gr.Textbox(lines=6, label="Input text") | |
| max_toks = gr.Slider(16, 400, value=200, step=1, label="Max new tokens") | |
| translate_btn = gr.Button("Translate") | |
| output_text = gr.Textbox(lines=6, label="Translation", interactive=False) | |
| translate_btn.click( | |
| fn=translate_text, | |
| inputs=[input_text, src_lang, tgt_lang, max_toks], | |
| outputs=[output_text] | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["Hello, how are you?", "English", "French", 60], | |
| ["India is developing rapidly in science and technology.", "English", "Hindi", 80], | |
| ["ู ุฑุญุจุง ููู ุญุงููุ", "Arabic", "English", 60], | |
| ], | |
| inputs=[input_text, src_lang, tgt_lang, max_toks], | |
| ) | |
| with gr.Tab("๐ Summarize"): | |
| long_input = gr.Textbox(lines=12, label="Long input text (English)") | |
| with gr.Row(): | |
| min_len = gr.Slider(20, 200, value=60, step=1, label="Min summary length") | |
| max_len = gr.Slider(60, 400, value=160, step=1, label="Max summary length") | |
| sum_btn = gr.Button("Summarize") | |
| sum_output = gr.Textbox(lines=8, label="Summary", interactive=False) | |
| sum_btn.click( | |
| fn=summarize_text, | |
| inputs=[long_input, min_len, max_len], | |
| outputs=[sum_output] | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["""Artificial intelligence (AI) has seen rapid growth over the past decade. | |
| Advances in deep learning, transformers, and large-scale datasets have enabled breakthroughs across natural language processing, computer vision, and speech. | |
| However, concerns about bias, interpretability, and energy consumption persist. | |
| As AI systems are deployed in high-stakes settings, researchers and policymakers emphasize responsible AI principles such as fairness, transparency, and accountability."""], | |
| ], | |
| inputs=[long_input], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |