| | from fastapi import FastAPI |
| | from pydantic import BaseModel, Field |
| | from typing import Optional |
| | from transformers import MarianMTModel, MarianTokenizer |
| | import datetime |
| | import logging |
| | import hashlib |
| | import time |
| |
|
| | logger = logging.getLogger("translate") |
| | logger.setLevel(logging.INFO) |
| |
|
| | app = FastAPI(title="翻译服务") |
| |
|
| |
|
| | cache = {} |
| |
|
| | def _hash_text(text: str) -> str: |
| | return hashlib.sha256(text.encode("utf-8")).hexdigest() |
| |
|
| | def _clean_cache(): |
| | now = time.time() |
| | |
| | expired_keys = [k for k, (_, exp) in cache.items() if exp < now] |
| | for k in expired_keys: |
| | del cache[k] |
| |
|
| |
|
| | |
| | MODEL_NAME = "Helsinki-NLP/opus-mt-tc-bible-big-zhx-en" |
| | logger.info(f"{datetime.datetime.now()} Loading model {MODEL_NAME}...") |
| | tokenizer = MarianTokenizer.from_pretrained(MODEL_NAME) |
| | model = MarianMTModel.from_pretrained(MODEL_NAME) |
| | logger.info(f"{datetime.datetime.now()} Model loaded.") |
| |
|
| | class TranslateRequest(BaseModel): |
| | text: str = Field(..., description="待翻译的中文文本") |
| |
|
| | class TranslateResponse(BaseModel): |
| | translated_text: str |
| | detected_lang: Optional[str] = None |
| |
|
| | @app.post("/api/translate", response_model=TranslateResponse) |
| | async def translate(req: TranslateRequest): |
| | _clean_cache() |
| |
|
| | h = _hash_text(req.text) |
| |
|
| | |
| | if h in cache: |
| | translated_text, expire_ts = cache[h] |
| | if expire_ts > time.time(): |
| | logger.info(f"Cache hit: {h}") |
| | return TranslateResponse(translated_text=translated_text) |
| | |
| | batch = tokenizer([req.text], return_tensors="pt", padding=True) |
| | translated = model.generate(**batch) |
| | output = tokenizer.decode(translated[0], skip_special_tokens=True) |
| |
|
| | |
| | cache[h] = (output, time.time() + 30 * 60) |
| |
|
| | return TranslateResponse( |
| | translated_text=output, |
| | detected_lang=None |
| | ) |
| |
|