Avanish3412 commited on
Commit
21b1aa0
·
verified ·
1 Parent(s): 59de99b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -108
app.py CHANGED
@@ -3,8 +3,7 @@ from fastapi.responses import HTMLResponse
3
  from pydantic import BaseModel, Field
4
  import torch
5
  import re
6
- from nltk.tokenize import sent_tokenize
7
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
8
  from fastapi.middleware.cors import CORSMiddleware
9
  import asyncio
10
  from concurrent.futures import ThreadPoolExecutor
@@ -14,26 +13,17 @@ import time
14
  from typing import List, Optional
15
  import os
16
  import uvicorn
17
- import nltk
18
- try:
19
- nltk.data.find('tokenizers/punkt_tab')
20
- except LookupError:
21
- nltk.download('punkt_tab')
22
 
23
  # Configure logging
24
  logging.basicConfig(level=logging.INFO)
25
  logger = logging.getLogger(__name__)
26
 
27
- # Download required NLTK data
28
- try:
29
- nltk.data.find('tokenizers/punkt')
30
- except LookupError:
31
- nltk.download('punkt', quiet=True)
32
-
33
  app = FastAPI(
34
  title="English to Telugu Translation API",
35
  description="High-performance translation service powered by MBart",
36
- version="1.0.0"
37
  )
38
 
39
  # Add CORS middleware
@@ -49,12 +39,14 @@ app.add_middleware(
49
  translator = None
50
  device = None
51
  executor = None
 
 
52
 
53
  # Pydantic models
54
  class TranslationRequest(BaseModel):
55
  text: str = Field(..., max_length=5000, min_length=1)
56
- batch_size: Optional[int] = Field(default=8, ge=1, le=32)
57
- max_length: Optional[int] = Field(default=512, ge=1, le=1024)
58
 
59
  class TranslationResponse(BaseModel):
60
  original_text: str
@@ -62,110 +54,157 @@ class TranslationResponse(BaseModel):
62
  processing_time: float
63
  model_used: str
64
 
65
- @lru_cache(maxsize=2000)
66
- def cached_sentence_tokenize(text: str) -> tuple:
67
- """Cached sentence tokenization"""
68
- return tuple(sent_tokenize(text))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- class OptimizedTranslator:
71
  def __init__(self, model, tokenizer, device):
72
  self.model = model
73
  self.tokenizer = tokenizer
74
  self.device = device
75
 
76
- if hasattr(self.model, 'to'):
77
- self.model = self.model.to(device)
78
- logger.info(f"Model moved to device: {device}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- def translate_batch(self, sentences: List[str], max_length: int = 512) -> List[str]:
81
- """Batch translation for better performance"""
82
  if not sentences:
83
  return []
84
 
85
- processed_sentences = []
86
- for sentence in sentences:
 
 
 
87
  sentence = sentence.strip()
88
- if sentence and not sentence.endswith(('.', '?', '!', ':', ';')):
89
- sentence += '.'
90
- processed_sentences.append(sentence)
 
 
 
 
 
91
 
92
  try:
 
93
  inputs = self.tokenizer(
94
- processed_sentences,
95
  return_tensors="pt",
96
  padding=True,
97
  truncation=True,
98
- max_length=max_length
99
  )
100
 
101
  inputs = {k: v.to(self.device) for k, v in inputs.items()}
102
 
103
  with torch.no_grad():
 
104
  outputs = self.model.generate(
105
  **inputs,
106
- max_length=max_length,
107
- num_beams=2,
108
  early_stopping=True,
109
  do_sample=False,
110
  pad_token_id=self.tokenizer.pad_token_id,
111
- eos_token_id=self.tokenizer.eos_token_id
 
112
  )
113
 
 
114
  translations = []
115
  for output in outputs:
116
- translated_text = self.tokenizer.decode(output, skip_special_tokens=True)
 
 
 
 
117
  translations.append(translated_text)
118
 
119
- return translations
 
 
 
120
 
121
- except Exception as e:
122
- logger.error(f"Batch translation error: {e}")
123
- return [self.translate_single(sentence, max_length) for sentence in processed_sentences]
124
-
125
- def translate_single(self, sentence: str, max_length: int = 512) -> str:
126
- """Single sentence translation fallback"""
127
- try:
128
- inputs = self.tokenizer(sentence, return_tensors="pt", max_length=max_length, truncation=True)
129
- inputs = {k: v.to(self.device) for k, v in inputs.items()}
130
 
131
- with torch.no_grad():
132
- outputs = self.model.generate(
133
- **inputs,
134
- max_length=max_length,
135
- num_beams=2,
136
- early_stopping=True,
137
- do_sample=False
138
- )
139
-
140
- return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
141
  except Exception as e:
142
- logger.error(f"Single translation error: {e}")
143
- return sentence
144
 
145
  @app.on_event("startup")
146
  async def load_models():
147
- global translator, device, executor
148
 
149
  start_time = time.time()
150
  logger.info("🚀 Loading translation models...")
151
 
 
152
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
153
  logger.info(f"📱 Using device: {device}")
154
 
155
- executor = ThreadPoolExecutor(max_workers=2)
 
 
 
 
156
 
157
  try:
158
  model_name = "aryaumesh/english-to-telugu"
159
  logger.info(f"📦 Loading model: {model_name}")
160
 
161
- tokenizer = AutoTokenizer.from_pretrained(model_name)
162
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
163
 
164
- translator = OptimizedTranslator(model, tokenizer, device)
165
 
166
- # Warm up the model
167
  logger.info("🔥 Warming up model...")
168
- _ = translator.translate_single("Hello, this is a test.")
169
 
170
  load_time = time.time() - start_time
171
  logger.info(f"✅ Models loaded successfully in {load_time:.2f} seconds")
@@ -182,10 +221,11 @@ async def shutdown_event():
182
  executor.shutdown(wait=True)
183
  logger.info("✅ Shutdown complete")
184
 
185
- def process_translation_sync(text: str, batch_size: int = 8, max_length: int = 512) -> tuple:
186
- """Synchronous translation processing"""
187
  start_time = time.time()
188
 
 
189
  lines = text.split('\n')
190
  translated_lines = []
191
 
@@ -195,16 +235,18 @@ def process_translation_sync(text: str, batch_size: int = 8, max_length: int = 5
195
  translated_lines.append("")
196
  continue
197
 
198
- sentences = list(cached_sentence_tokenize(line))
 
199
 
200
  if not sentences:
201
  translated_lines.append("")
202
  continue
203
 
 
204
  translated_sentences = []
205
  for i in range(0, len(sentences), batch_size):
206
  batch = sentences[i:i + batch_size]
207
- batch_translations = translator.translate_batch(batch, max_length)
208
  translated_sentences.extend(batch_translations)
209
 
210
  translated_line = " ".join(translated_sentences)
@@ -217,7 +259,7 @@ def process_translation_sync(text: str, batch_size: int = 8, max_length: int = 5
217
 
218
  @app.post("/translate/", response_model=TranslationResponse)
219
  async def translate_text(request: TranslationRequest):
220
- """Main translation endpoint"""
221
  if not request.text.strip():
222
  return TranslationResponse(
223
  original_text=request.text,
@@ -230,17 +272,17 @@ async def translate_text(request: TranslationRequest):
230
  loop = asyncio.get_event_loop()
231
  translation, processing_time = await loop.run_in_executor(
232
  executor,
233
- process_translation_sync,
234
  request.text,
235
- request.batch_size or 8,
236
- request.max_length or 512
237
  )
238
 
239
  return TranslationResponse(
240
  original_text=request.text,
241
  translated_text=translation,
242
  processing_time=processing_time,
243
- model_used="aryaumesh/english-to-telugu"
244
  )
245
 
246
  except Exception as e:
@@ -254,14 +296,14 @@ async def translate_text(request: TranslationRequest):
254
 
255
  @app.get("/", response_class=HTMLResponse)
256
  async def read_root():
257
- """Root endpoint with simple HTML interface"""
258
  html_content = """
259
  <!DOCTYPE html>
260
  <html lang="en">
261
  <head>
262
  <meta charset="UTF-8">
263
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
264
- <title>English to Telugu Translation</title>
265
  <style>
266
  * { margin: 0; padding: 0; box-sizing: border-box; }
267
  body {
@@ -279,13 +321,21 @@ async def read_root():
279
  overflow: hidden;
280
  }
281
  .header {
282
- background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
283
  color: white;
284
  padding: 30px;
285
  text-align: center;
286
  }
287
  .header h1 { font-size: 2.5em; margin-bottom: 10px; }
288
  .header p { font-size: 1.1em; opacity: 0.9; }
 
 
 
 
 
 
 
 
289
  .content { padding: 40px; }
290
  .form-group { margin-bottom: 25px; }
291
  label {
@@ -308,8 +358,8 @@ async def read_root():
308
  }
309
  textarea:focus {
310
  outline: none;
311
- border-color: #4facfe;
312
- box-shadow: 0 0 0 3px rgba(79, 172, 254, 0.1);
313
  }
314
  .controls {
315
  display: flex;
@@ -334,7 +384,7 @@ async def read_root():
334
  width: 100px;
335
  }
336
  button {
337
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
338
  color: white;
339
  padding: 15px 30px;
340
  border: none;
@@ -347,7 +397,7 @@ async def read_root():
347
  }
348
  button:hover {
349
  transform: translateY(-2px);
350
- box-shadow: 0 10px 20px rgba(102, 126, 234, 0.3);
351
  }
352
  button:disabled {
353
  opacity: 0.7;
@@ -359,7 +409,7 @@ async def read_root():
359
  padding: 25px;
360
  background: #f8f9ff;
361
  border-radius: 10px;
362
- border-left: 4px solid #4facfe;
363
  }
364
  .result h3 {
365
  color: #333;
@@ -389,7 +439,7 @@ async def read_root():
389
  border: 1px solid #e0e0e0;
390
  font-size: 0.9em;
391
  }
392
- .stat strong { color: #4facfe; }
393
  .loading {
394
  display: none;
395
  text-align: center;
@@ -400,7 +450,7 @@ async def read_root():
400
  width: 40px;
401
  height: 40px;
402
  border: 4px solid #f3f3f3;
403
- border-top: 4px solid #4facfe;
404
  border-radius: 50%;
405
  animation: spin 1s linear infinite;
406
  }
@@ -408,6 +458,10 @@ async def read_root():
408
  0% { transform: rotate(0deg); }
409
  100% { transform: rotate(360deg); }
410
  }
 
 
 
 
411
  .error {
412
  background: #ffe6e6;
413
  border-left-color: #ff4757;
@@ -426,31 +480,32 @@ async def read_root():
426
  <body>
427
  <div class="container">
428
  <div class="header">
429
- <h1>🌐 English to Telugu Translation</h1>
430
- <p>Powered by Advanced AI • Fast Accurate</p>
 
431
  </div>
432
 
433
  <div class="content">
434
  <div class="form-group">
435
  <label for="inputText">📝 Enter English Text:</label>
436
- <textarea id="inputText" placeholder="Type or paste your English text here...&#10;&#10;You can enter multiple sentences or even paragraphs.&#10;The system will translate everything while preserving the structure."></textarea>
437
  </div>
438
 
439
  <div class="controls">
440
  <div class="control-group">
441
  <label>Batch Size:</label>
442
- <input type="number" id="batchSize" value="8" min="1" max="32">
443
  </div>
444
  <div class="control-group">
445
  <label>Max Length:</label>
446
- <input type="number" id="maxLength" value="512" min="1" max="1024">
447
  </div>
448
- <button onclick="translateText()">🚀 Translate</button>
449
  </div>
450
 
451
  <div class="loading" id="loading">
452
  <div class="spinner"></div>
453
- <p>Translating your text...</p>
454
  </div>
455
 
456
  <div id="result" class="result" style="display: none;">
@@ -458,7 +513,7 @@ async def read_root():
458
  <div id="translatedText" class="translated-text"></div>
459
  <div class="stats">
460
  <div class="stat">
461
- <strong>Processing Time:</strong> <span id="processingTime">-</span> seconds
462
  </div>
463
  <div class="stat">
464
  <strong>Model:</strong> <span id="modelUsed">-</span>
@@ -474,8 +529,8 @@ async def read_root():
474
  <script>
475
  async function translateText() {
476
  const inputText = document.getElementById('inputText').value;
477
- const batchSize = parseInt(document.getElementById('batchSize').value) || 8;
478
- const maxLength = parseInt(document.getElementById('maxLength').value) || 512;
479
 
480
  if (!inputText.trim()) {
481
  alert('⚠️ Please enter some text to translate');
@@ -487,11 +542,13 @@ async def read_root():
487
  const result = document.getElementById('result');
488
 
489
  // Show loading state
490
- button.textContent = ' Translating...';
491
  button.disabled = true;
492
  loading.style.display = 'block';
493
  result.style.display = 'none';
494
 
 
 
495
  try {
496
  const response = await fetch('/translate/', {
497
  method: 'POST',
@@ -510,10 +567,24 @@ async def read_root():
510
  }
511
 
512
  const data = await response.json();
 
513
 
514
  // Display results
515
  document.getElementById('translatedText').textContent = data.translated_text;
516
- document.getElementById('processingTime').textContent = data.processing_time.toFixed(2);
 
 
 
 
 
 
 
 
 
 
 
 
 
517
  document.getElementById('modelUsed').textContent = data.model_used;
518
  document.getElementById('charCount').textContent = data.original_text.length;
519
 
@@ -537,7 +608,7 @@ async def read_root():
537
  result.classList.add('error');
538
 
539
  } finally {
540
- button.textContent = '🚀 Translate';
541
  button.disabled = false;
542
  }
543
  }
@@ -568,6 +639,7 @@ async def health_check():
568
  "status": "healthy",
569
  "device": str(device) if device else "not_initialized",
570
  "model_loaded": translator is not None,
 
571
  "timestamp": time.time()
572
  }
573
 
@@ -575,21 +647,24 @@ async def health_check():
575
  async def api_info():
576
  """API information endpoint"""
577
  return {
578
- "title": "English to Telugu Translation API",
579
- "version": "1.0.0",
580
  "model": "aryaumesh/english-to-telugu",
 
 
 
 
 
 
 
 
 
581
  "endpoints": {
582
  "translate": "/translate/",
583
  "health": "/health",
584
  "docs": "/docs",
585
  "redoc": "/redoc"
586
- },
587
- "supported_features": [
588
- "batch_processing",
589
- "caching",
590
- "async_processing",
591
- "input_validation"
592
- ]
593
  }
594
 
595
  if __name__ == "__main__":
@@ -600,4 +675,4 @@ if __name__ == "__main__":
600
  port=port,
601
  workers=1,
602
  log_level="info"
603
- )
 
3
  from pydantic import BaseModel, Field
4
  import torch
5
  import re
6
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 
7
  from fastapi.middleware.cors import CORSMiddleware
8
  import asyncio
9
  from concurrent.futures import ThreadPoolExecutor
 
13
  from typing import List, Optional
14
  import os
15
  import uvicorn
16
+ import spacy
17
+ from spacy.lang.en import English
 
 
 
18
 
19
  # Configure logging
20
  logging.basicConfig(level=logging.INFO)
21
  logger = logging.getLogger(__name__)
22
 
 
 
 
 
 
 
23
  app = FastAPI(
24
  title="English to Telugu Translation API",
25
  description="High-performance translation service powered by MBart",
26
+ version="2.0.0"
27
  )
28
 
29
  # Add CORS middleware
 
39
  translator = None
40
  device = None
41
  executor = None
42
+ nlp = None
43
+ translation_pipeline = None
44
 
45
  # Pydantic models
46
  class TranslationRequest(BaseModel):
47
  text: str = Field(..., max_length=5000, min_length=1)
48
+ batch_size: Optional[int] = Field(default=16, ge=1, le=64)
49
+ max_length: Optional[int] = Field(default=256, ge=1, le=512)
50
 
51
  class TranslationResponse(BaseModel):
52
  original_text: str
 
54
  processing_time: float
55
  model_used: str
56
 
57
+ # Fast sentence splitter using spaCy
58
+ def get_fast_sentence_splitter():
59
+ """Initialize fast sentence splitter"""
60
+ try:
61
+ nlp = English()
62
+ nlp.add_pipe('sentencizer')
63
+ return nlp
64
+ except:
65
+ # Fallback to simple regex splitting
66
+ return None
67
+
68
+ @lru_cache(maxsize=5000)
69
+ def cached_sentence_split(text: str) -> tuple:
70
+ """Ultra-fast cached sentence splitting"""
71
+ if nlp:
72
+ doc = nlp(text)
73
+ return tuple([sent.text.strip() for sent in doc.sents if sent.text.strip()])
74
+ else:
75
+ # Fast regex fallback
76
+ sentences = re.split(r'(?<=[.!?])\s+', text)
77
+ return tuple([s.strip() for s in sentences if s.strip()])
78
 
79
+ class UltraFastTranslator:
80
  def __init__(self, model, tokenizer, device):
81
  self.model = model
82
  self.tokenizer = tokenizer
83
  self.device = device
84
 
85
+ # Move model to device and set to eval mode
86
+ self.model = self.model.to(device).eval()
87
+
88
+ # Enable optimizations
89
+ if hasattr(torch, 'jit') and device.type == 'cuda':
90
+ try:
91
+ self.model = torch.jit.script(self.model)
92
+ logger.info("Model optimized with TorchScript")
93
+ except:
94
+ logger.info("TorchScript optimization failed, using regular model")
95
+
96
+ # Set model to half precision if using GPU
97
+ if device.type == 'cuda':
98
+ try:
99
+ self.model = self.model.half()
100
+ logger.info("Model converted to half precision")
101
+ except:
102
+ logger.info("Half precision conversion failed")
103
 
104
+ def translate_batch_ultra_fast(self, sentences: List[str], max_length: int = 256) -> List[str]:
105
+ """Ultra-fast batch translation with aggressive optimizations"""
106
  if not sentences:
107
  return []
108
 
109
+ # Filter and prepare sentences
110
+ valid_sentences = []
111
+ indices = []
112
+
113
+ for i, sentence in enumerate(sentences):
114
  sentence = sentence.strip()
115
+ if sentence:
116
+ if not sentence.endswith(('.', '?', '!', ':', ';')):
117
+ sentence += '.'
118
+ valid_sentences.append(sentence)
119
+ indices.append(i)
120
+
121
+ if not valid_sentences:
122
+ return [""] * len(sentences)
123
 
124
  try:
125
+ # Tokenize with aggressive truncation
126
  inputs = self.tokenizer(
127
+ valid_sentences,
128
  return_tensors="pt",
129
  padding=True,
130
  truncation=True,
131
+ max_length=min(max_length, 128) # Aggressive truncation
132
  )
133
 
134
  inputs = {k: v.to(self.device) for k, v in inputs.items()}
135
 
136
  with torch.no_grad():
137
+ # Ultra-fast generation settings
138
  outputs = self.model.generate(
139
  **inputs,
140
+ max_length=min(max_length, 128),
141
+ num_beams=1, # Greedy decoding (fastest)
142
  early_stopping=True,
143
  do_sample=False,
144
  pad_token_id=self.tokenizer.pad_token_id,
145
+ eos_token_id=self.tokenizer.eos_token_id,
146
+ use_cache=True
147
  )
148
 
149
+ # Fast decoding
150
  translations = []
151
  for output in outputs:
152
+ translated_text = self.tokenizer.decode(
153
+ output,
154
+ skip_special_tokens=True,
155
+ clean_up_tokenization_spaces=False
156
+ )
157
  translations.append(translated_text)
158
 
159
+ # Map back to original indices
160
+ result = [""] * len(sentences)
161
+ for i, translation in enumerate(translations):
162
+ result[indices[i]] = translation
163
 
164
+ return result
 
 
 
 
 
 
 
 
165
 
 
 
 
 
 
 
 
 
 
 
166
  except Exception as e:
167
+ logger.error(f"Batch translation error: {e}")
168
+ return [sentence for sentence in sentences] # Return original on error
169
 
170
  @app.on_event("startup")
171
  async def load_models():
172
+ global translator, device, executor, nlp, translation_pipeline
173
 
174
  start_time = time.time()
175
  logger.info("🚀 Loading translation models...")
176
 
177
+ # Use GPU if available
178
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
179
  logger.info(f"📱 Using device: {device}")
180
 
181
+ # Increase thread pool for better concurrency
182
+ executor = ThreadPoolExecutor(max_workers=4)
183
+
184
+ # Initialize fast sentence splitter
185
+ nlp = get_fast_sentence_splitter()
186
 
187
  try:
188
  model_name = "aryaumesh/english-to-telugu"
189
  logger.info(f"📦 Loading model: {model_name}")
190
 
191
+ # Load with optimizations
192
+ tokenizer = AutoTokenizer.from_pretrained(
193
+ model_name,
194
+ use_fast=True # Use fast tokenizer
195
+ )
196
+
197
+ model = AutoModelForSeq2SeqLM.from_pretrained(
198
+ model_name,
199
+ torch_dtype=torch.float16 if device.type == 'cuda' else torch.float32,
200
+ low_cpu_mem_usage=True
201
+ )
202
 
203
+ translator = UltraFastTranslator(model, tokenizer, device)
204
 
205
+ # Quick warm-up with small text
206
  logger.info("🔥 Warming up model...")
207
+ _ = translator.translate_batch_ultra_fast(["Hello."], max_length=64)
208
 
209
  load_time = time.time() - start_time
210
  logger.info(f"✅ Models loaded successfully in {load_time:.2f} seconds")
 
221
  executor.shutdown(wait=True)
222
  logger.info("✅ Shutdown complete")
223
 
224
+ def process_translation_ultra_fast(text: str, batch_size: int = 16, max_length: int = 256) -> tuple:
225
+ """Ultra-fast translation processing with aggressive optimizations"""
226
  start_time = time.time()
227
 
228
+ # Fast line processing
229
  lines = text.split('\n')
230
  translated_lines = []
231
 
 
235
  translated_lines.append("")
236
  continue
237
 
238
+ # Use cached sentence splitting
239
+ sentences = list(cached_sentence_split(line))
240
 
241
  if not sentences:
242
  translated_lines.append("")
243
  continue
244
 
245
+ # Process in larger batches for better GPU utilization
246
  translated_sentences = []
247
  for i in range(0, len(sentences), batch_size):
248
  batch = sentences[i:i + batch_size]
249
+ batch_translations = translator.translate_batch_ultra_fast(batch, max_length)
250
  translated_sentences.extend(batch_translations)
251
 
252
  translated_line = " ".join(translated_sentences)
 
259
 
260
  @app.post("/translate/", response_model=TranslationResponse)
261
  async def translate_text(request: TranslationRequest):
262
+ """Ultra-fast translation endpoint"""
263
  if not request.text.strip():
264
  return TranslationResponse(
265
  original_text=request.text,
 
272
  loop = asyncio.get_event_loop()
273
  translation, processing_time = await loop.run_in_executor(
274
  executor,
275
+ process_translation_ultra_fast,
276
  request.text,
277
+ request.batch_size or 16,
278
+ min(request.max_length or 256, 256) # Cap max length for speed
279
  )
280
 
281
  return TranslationResponse(
282
  original_text=request.text,
283
  translated_text=translation,
284
  processing_time=processing_time,
285
+ model_used="aryaumesh/english-to-telugu-optimized"
286
  )
287
 
288
  except Exception as e:
 
296
 
297
  @app.get("/", response_class=HTMLResponse)
298
  async def read_root():
299
+ """Root endpoint with optimized HTML interface"""
300
  html_content = """
301
  <!DOCTYPE html>
302
  <html lang="en">
303
  <head>
304
  <meta charset="UTF-8">
305
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
306
+ <title>Ultra-Fast English to Telugu Translation</title>
307
  <style>
308
  * { margin: 0; padding: 0; box-sizing: border-box; }
309
  body {
 
321
  overflow: hidden;
322
  }
323
  .header {
324
+ background: linear-gradient(135deg, #ff6b6b 0%, #feca57 100%);
325
  color: white;
326
  padding: 30px;
327
  text-align: center;
328
  }
329
  .header h1 { font-size: 2.5em; margin-bottom: 10px; }
330
  .header p { font-size: 1.1em; opacity: 0.9; }
331
+ .speed-badge {
332
+ display: inline-block;
333
+ background: rgba(255,255,255,0.2);
334
+ padding: 5px 15px;
335
+ border-radius: 20px;
336
+ margin-top: 10px;
337
+ font-weight: bold;
338
+ }
339
  .content { padding: 40px; }
340
  .form-group { margin-bottom: 25px; }
341
  label {
 
358
  }
359
  textarea:focus {
360
  outline: none;
361
+ border-color: #ff6b6b;
362
+ box-shadow: 0 0 0 3px rgba(255, 107, 107, 0.1);
363
  }
364
  .controls {
365
  display: flex;
 
384
  width: 100px;
385
  }
386
  button {
387
+ background: linear-gradient(135deg, #ff6b6b 0%, #feca57 100%);
388
  color: white;
389
  padding: 15px 30px;
390
  border: none;
 
397
  }
398
  button:hover {
399
  transform: translateY(-2px);
400
+ box-shadow: 0 10px 20px rgba(255, 107, 107, 0.3);
401
  }
402
  button:disabled {
403
  opacity: 0.7;
 
409
  padding: 25px;
410
  background: #f8f9ff;
411
  border-radius: 10px;
412
+ border-left: 4px solid #ff6b6b;
413
  }
414
  .result h3 {
415
  color: #333;
 
439
  border: 1px solid #e0e0e0;
440
  font-size: 0.9em;
441
  }
442
+ .stat strong { color: #ff6b6b; }
443
  .loading {
444
  display: none;
445
  text-align: center;
 
450
  width: 40px;
451
  height: 40px;
452
  border: 4px solid #f3f3f3;
453
+ border-top: 4px solid #ff6b6b;
454
  border-radius: 50%;
455
  animation: spin 1s linear infinite;
456
  }
 
458
  0% { transform: rotate(0deg); }
459
  100% { transform: rotate(360deg); }
460
  }
461
+ .speed-indicator {
462
+ color: #27ae60;
463
+ font-weight: bold;
464
+ }
465
  .error {
466
  background: #ffe6e6;
467
  border-left-color: #ff4757;
 
480
  <body>
481
  <div class="container">
482
  <div class="header">
483
+ <h1>⚡ Ultra-Fast English to Telugu Translation</h1>
484
+ <p>Lightning Speed AI Translation Optimized for Performance</p>
485
+ <div class="speed-badge">🚀 Target: &lt;0.5s processing time</div>
486
  </div>
487
 
488
  <div class="content">
489
  <div class="form-group">
490
  <label for="inputText">📝 Enter English Text:</label>
491
+ <textarea id="inputText" placeholder="Type or paste your English text here...&#10;&#10;Optimized for ultra-fast processing!&#10;Try shorter sentences for best performance."></textarea>
492
  </div>
493
 
494
  <div class="controls">
495
  <div class="control-group">
496
  <label>Batch Size:</label>
497
+ <input type="number" id="batchSize" value="16" min="1" max="64">
498
  </div>
499
  <div class="control-group">
500
  <label>Max Length:</label>
501
+ <input type="number" id="maxLength" value="256" min="1" max="512">
502
  </div>
503
+ <button onclick="translateText()">⚡ Ultra-Fast Translate</button>
504
  </div>
505
 
506
  <div class="loading" id="loading">
507
  <div class="spinner"></div>
508
+ <p>Processing at lightning speed...</p>
509
  </div>
510
 
511
  <div id="result" class="result" style="display: none;">
 
513
  <div id="translatedText" class="translated-text"></div>
514
  <div class="stats">
515
  <div class="stat">
516
+ <strong>Processing Time:</strong> <span id="processingTime" class="speed-indicator">-</span> seconds
517
  </div>
518
  <div class="stat">
519
  <strong>Model:</strong> <span id="modelUsed">-</span>
 
529
  <script>
530
  async function translateText() {
531
  const inputText = document.getElementById('inputText').value;
532
+ const batchSize = parseInt(document.getElementById('batchSize').value) || 16;
533
+ const maxLength = parseInt(document.getElementById('maxLength').value) || 256;
534
 
535
  if (!inputText.trim()) {
536
  alert('⚠️ Please enter some text to translate');
 
542
  const result = document.getElementById('result');
543
 
544
  // Show loading state
545
+ button.textContent = ' Processing...';
546
  button.disabled = true;
547
  loading.style.display = 'block';
548
  result.style.display = 'none';
549
 
550
+ const startTime = performance.now();
551
+
552
  try {
553
  const response = await fetch('/translate/', {
554
  method: 'POST',
 
567
  }
568
 
569
  const data = await response.json();
570
+ const totalTime = (performance.now() - startTime) / 1000;
571
 
572
  // Display results
573
  document.getElementById('translatedText').textContent = data.translated_text;
574
+
575
+ const processingTimeElement = document.getElementById('processingTime');
576
+ processingTimeElement.textContent = data.processing_time.toFixed(3);
577
+
578
+ // Color code based on speed
579
+ if (data.processing_time < 0.5) {
580
+ processingTimeElement.style.color = '#27ae60'; // Green
581
+ processingTimeElement.textContent += ' ⚡';
582
+ } else if (data.processing_time < 2) {
583
+ processingTimeElement.style.color = '#f39c12'; // Orange
584
+ } else {
585
+ processingTimeElement.style.color = '#e74c3c'; // Red
586
+ }
587
+
588
  document.getElementById('modelUsed').textContent = data.model_used;
589
  document.getElementById('charCount').textContent = data.original_text.length;
590
 
 
608
  result.classList.add('error');
609
 
610
  } finally {
611
+ button.textContent = ' Ultra-Fast Translate';
612
  button.disabled = false;
613
  }
614
  }
 
639
  "status": "healthy",
640
  "device": str(device) if device else "not_initialized",
641
  "model_loaded": translator is not None,
642
+ "optimization_level": "ultra_fast",
643
  "timestamp": time.time()
644
  }
645
 
 
647
  async def api_info():
648
  """API information endpoint"""
649
  return {
650
+ "title": "Ultra-Fast English to Telugu Translation API",
651
+ "version": "2.0.0",
652
  "model": "aryaumesh/english-to-telugu",
653
+ "optimizations": [
654
+ "spacy_sentence_splitting",
655
+ "aggressive_caching",
656
+ "gpu_half_precision",
657
+ "greedy_decoding",
658
+ "larger_batch_sizes",
659
+ "reduced_beam_search"
660
+ ],
661
+ "target_processing_time": "< 0.5 seconds",
662
  "endpoints": {
663
  "translate": "/translate/",
664
  "health": "/health",
665
  "docs": "/docs",
666
  "redoc": "/redoc"
667
+ }
 
 
 
 
 
 
668
  }
669
 
670
  if __name__ == "__main__":
 
675
  port=port,
676
  workers=1,
677
  log_level="info"
678
+ )