Spaces:
Running
on
Zero
Running
on
Zero
| # app.py (为 Hugging Face ZeroGPU 修改) | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig | |
| import spaces | |
| # -------------------------------------------------------------------------- | |
| # 1. 配置 (在应用启动时执行) | |
| # -------------------------------------------------------------------------- | |
| # !! 重要的模型 ID (从 HF Hub 加载) | |
| model_id = "AIDC-AI/Marco-MT-Algharb" | |
| # --- ZeroGPU 修改 1: | |
| # 在启动时 *只* 定义全局变量为 None | |
| # 大模型将在第一个请求到来时被加载 | |
| # --- | |
| model = None | |
| tokenizer = None | |
| generation_config = None | |
| print("ZeroGPU 启动脚本开始...") | |
| print(f"准备从 {model_id} 加载 Tokenizer...") | |
| # Tokenizer 很小, 可以在启动时加载 | |
| # ★★★ 提醒: 这仍然需要你已在 Space settings 中设置 HF_TOKEN 密钥 ★★★ | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_id, | |
| trust_remote_code=True | |
| ) | |
| print("Tokenizer 加载成功!") | |
| # --- ZeroGPU 修改 2: | |
| # Tokenizer 加载成功后, *立即* 定义 GenerationConfig | |
| # (这解决了你之前关于 Qwen3 停止 token 的问题) | |
| # --- | |
| im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>") | |
| eot_id = tokenizer.eos_token_id | |
| print(f"设置停止 IDs: <|im_end|_id={im_end_id}, <|endoftext|_id={eot_id}") | |
| generation_config = GenerationConfig( | |
| do_sample=False, | |
| max_new_tokens=512, | |
| eos_token_id=[im_end_id, eot_id], | |
| pad_token_id=eot_id | |
| ) | |
| print("GenerationConfig 配置成功。") | |
| except Exception as e: | |
| print(f"Tokenizer 加载失败: {e}") | |
| print("!! 严重错误: 如果这是 Gated Repo 问题, 请确保 HF_TOKEN 密钥已设置并重启 Space。") | |
| # 语言代码到全名的映射 (保持不变) | |
| source_lang_name_map = { | |
| "en": "english", | |
| "ja": "japanese", | |
| "cs": "czech", | |
| "de": "german", | |
| } | |
| target_lang_name_map = { | |
| "zh": "chinese", | |
| "ko": "korean", | |
| "ja": "japanese", | |
| "ar": "arabic", | |
| "cs": "czech", | |
| "ru": "russian", | |
| "uk": "ukraine", | |
| "et": "estonian", | |
| "bho": "bhojpuri", | |
| "sr_latin": "serbian", | |
| "de": "german", | |
| } | |
| # -------------------------------------------------------------------------- | |
| # 2. 定义核心翻译函数 (修改版) | |
| # -------------------------------------------------------------------------- | |
| def translate(source_text, source_lang_code, target_lang_code): | |
| """ | |
| 接收用户输入并返回翻译结果 | |
| (ZeroGPU: 在首次调用时加载模型) | |
| """ | |
| global model # ★★★ 关键: 引用全局 'model' 变量 | |
| # --- ZeroGPU 修改 3: 首次调用时加载模型 --- | |
| if model is None: | |
| if tokenizer is None: | |
| return "错误:Tokenizer 未能成功加载,无法继续。请检查启动日志。" | |
| print("--- 首次请求 ---") | |
| print("检测到模型未加载。正在加载模型到 ZeroGPU (Nvidia H200)...") | |
| try: | |
| # 这一步会触发 ZeroGPU 分配 H200 | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype="auto", | |
| device_map="auto", # 'auto' 将会检测到 H200 | |
| trust_remote_code=True | |
| ) | |
| model.eval() | |
| print("模型已成功加载到 GPU!") | |
| except Exception as e: | |
| print(f"在首次加载时模型失败: {e}") | |
| return f"错误:模型在加载到 GPU 时失败: {e}" | |
| # ----------------------------------------- | |
| # (从这里开始, 代码与之前版本相同) | |
| # 简单的输入验证 | |
| if not source_text or not source_text.strip(): | |
| return "" | |
| source_language_name = source_lang_name_map.get(source_lang_code, "the source language") | |
| target_language_name = target_lang_name_map.get(target_lang_code, "the target language") | |
| prompt = ( | |
| f"Human: Please translate the following text into {target_language_name}: \n" | |
| f"{source_text}<|im_end|>\n" | |
| f"Assistant:" | |
| ) | |
| try: | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| generation_config=generation_config | |
| ) | |
| input_length = inputs.input_ids.shape[1] | |
| generated_ids = outputs[0][input_length:] | |
| generated_text = tokenizer.decode(generated_ids, skip_special_tokens=True).strip() | |
| return generated_text | |
| except Exception as e: | |
| print(f"翻译过程中出错: {e}") | |
| return f"翻译时发生错误: {e}" | |
| # -------------------------------------------------------------------------- | |
| # 3. 创建并配置 Gradio 界面 (这部分保持不变) | |
| # -------------------------------------------------------------------------- | |
| # <--- 定义自定义 CSS 样式 ---> | |
| css = """ | |
| /* ... 你的所有 CSS 样式 ... */ | |
| .gradio-textbox { | |
| min-height: 300px !important; | |
| } | |
| """ | |
| # <--- 修复: choices 定义 --- | |
| source_lang_choices = [(name.capitalize(), code) for code, name in source_lang_name_map.items()] | |
| target_lang_choices = [(name.capitalize(), code) for code, name in target_lang_name_map.items()] | |
| # <--- 使用 gr.Blocks 并保持主题 ---> | |
| with gr.Blocks( | |
| theme=gr.themes.Soft(primary_hue="amber", secondary_hue="amber"), | |
| css=css, | |
| ) as demo: | |
| gr.HTML( | |
| """ | |
| <div align="center" style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif, 'Microsoft YaHei', sans-serif; padding: 20px 0;"> | |
| <h1 style="font-weight: 700; color: #2C3E50; margin-bottom: 0.25rem; font-size: 2.5rem;"> | |
| Marco-MT-Algharb | |
| </h1> | |
| <p style="margin-top: 0; margin-bottom: 1.5rem;"> | |
| <a href="https://www.aidc-ai.com/marcomt" style="font-size: 1.25rem; color: #E67E22; text-decoration: none; font-weight: 500;"> | |
| Alibaba International Digital Commerce | |
| </a> | |
| </p> | |
| <div style="display: flex; justify-content: center; gap: 8px;"> | |
| <a href="https://github.com/AIDC-AI/Marco-MT"> | |
| <img src="https://img.shields.io/badge/GitHub-Repository-181717?logo=github&style=for-the-badge" alt="GitHub"> | |
| </a> | |
| <a href="https://huggingface.co/AIDC-AI/Marco-MT-Algharb"> | |
| <img src="https://img.shields.io/badge/Hugging%20Face-Model-FFC107?logo=huggingface&style=for-the-badge" alt="Hugging Face Model"> | |
| </a> | |
| <a href="https://www2.statmt.org/wmt25/pdf/2025.wmt-1.33.pdf"> | |
| <img src="https://img.shields.io/badge/Paper-WMT%202025-C0392B?logo=arxiv&style=for-the-badge" alt="Paper WMT 2025"> | |
| </a> | |
| <a href="https://huggingface.co/spaces/AIDC-AI/Marco-MT-Algharb"> | |
| <img src="https://img.shields.io/badge/Demo-HF%20Space-E67E22?logo=huggingface&style=for-the-badge" alt="Demo HF Space"> | |
| </a> | |
| </div> | |
| </div> | |
| """ | |
| ) | |
| # --- 标题 --- | |
| gr.HTML(f""" | |
| """) | |
| # --- 翻译器主界面 (两栏布局) --- | |
| with gr.Row(variant="panel", equal_height=True): | |
| # --- 左侧输入卡片 --- | |
| with gr.Group(): | |
| source_lang_dd = gr.Dropdown( | |
| choices=source_lang_choices, | |
| value="en", | |
| label="源语言 (Source Language)" | |
| ) | |
| source_text_tb = gr.Textbox( | |
| lines=10, | |
| label="源文本 (Source Text)", | |
| placeholder="Enter text to translate here...", | |
| elem_classes=["gradio-textbox"] | |
| ) | |
| # --- 右侧输出卡片 --- | |
| with gr.Group(): | |
| target_lang_dd = gr.Dropdown( | |
| choices=target_lang_choices, | |
| value="zh", | |
| label="目标语言 (Target Language)" | |
| ) | |
| output_text_tb = gr.Textbox( | |
| lines=10, | |
| label="翻译结果 (Translation)", | |
| interactive=False, | |
| elem_classes=["gradio-textbox"] | |
| ) | |
| # --- 按钮行 --- | |
| with gr.Row(): | |
| clear_btn = gr.ClearButton( | |
| value="清除 (Clear)", | |
| components=[source_text_tb, output_text_tb, source_lang_dd, target_lang_dd] | |
| ) | |
| submit_btn = gr.Button("翻译 (Submit)", variant="primary", scale=1) | |
| # --- 示例 --- | |
| example_list = [ | |
| ["The quick brown fox jumps over the lazy dog.", "en", "zh"], | |
| ["The sunset painted the sky with brilliant shades of orange and purple.", "en", "ko"], | |
| ["The ancient ruins stand as a silent testament to the rise and fall of a great civilization.", "en", "ja"], | |
| ] | |
| gr.Examples( | |
| examples=example_list, | |
| inputs=[source_text_tb, source_lang_dd, target_lang_dd] | |
| ) | |
| # --- 支持的语向卡片 --- | |
| gr.HTML(f""" | |
| <div style="color: #444; font-size: 16px; margin-top: 30px; padding: 20px 25px; background-color: #FFFFFF; border-radius: 15px; max-width: 900px; margin-left: auto; margin-right: auto; box-shadow: 0 4px 20px rgba(0,0,0,0.05);"> | |
| <h3 style="text-align: center; margin-top: 5px; margin-bottom: 20px; color: #444444; font-weight: 600;">Supported Language Pairs</h3> | |
| <div style="display: flex; justify-content: space-around; text-align: left; line-height: 1.8;"> | |
| <div> | |
| <strong>From English (en):</strong> | |
| <ul style="list-style-type: '» '; margin: 5px 0 0 20px; padding: 0;"> | |
| <li>en2zh</li> | |
| <li>en2ja</li> | |
| <li>en2ko</li> | |
| <li>en2ar</li> | |
| <li>en2et</li> | |
| <li>en2sr_latin</li> | |
| <li>en2ru</li> | |
| <li>en2uk</li> | |
| <li>en2cs</li> | |
| <li>en2bho</li> | |
| </ul> | |
| </div> | |
| <div style="margin-left: 20px;"> | |
| <strong>From Czech (cs):</strong> | |
| <ul style="list-style-type: '» '; margin: 5px 0 15px 20px; padding: 0;"> | |
| <li>cs2uk</li> | |
| <li>cs2de</li> | |
| </ul> | |
| <strong>From Japanese (ja):</strong> | |
| <ul style="list-style-type: '» '; margin: 5px 0 0 20px; padding: 0;"> | |
| <li>ja2zh</li> | |
| </ul> | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| # --- 设定按钮的点击逻辑 --- | |
| submit_btn.click( | |
| fn=translate, | |
| inputs=[source_text_tb, source_lang_dd, target_lang_dd], | |
| outputs=[output_text_tb], | |
| api_name="translate" | |
| ) | |
| # 启动应用 | |
| if __name__ == "__main__": | |
| demo.launch() |