| |
| """ |
| MTP 4 API - ASISTENTE AVANZADO |
| - Modelo: d_model=384, n_layers=6 (25M parámetros) |
| - Temperatura 0.4 |
| - Sistema anti-alucinaciones |
| """ |
|
|
| import os |
| import sys |
| import torch |
| import json |
| import time |
| import gc |
| import re |
| from fastapi import FastAPI |
| from fastapi.responses import HTMLResponse |
| from fastapi.middleware.cors import CORSMiddleware |
| from pydantic import BaseModel, Field |
| from huggingface_hub import snapshot_download |
| import uvicorn |
| import math |
| import torch.nn as nn |
| import torch.nn.functional as F |
| import sentencepiece as spm |
| from enum import Enum |
| from typing import Tuple, Optional |
|
|
| |
| |
| |
| if torch.cuda.is_available(): |
| DEVICE = "cuda" |
| torch.backends.cudnn.benchmark = True |
| print("✅ GPU detectada. Modo rápido activado.") |
| else: |
| DEVICE = "cpu" |
| torch.set_num_threads(min(4, os.cpu_count() or 2)) |
| torch.set_num_interop_threads(2) |
| print("⚠️ Usando CPU optimizado.") |
|
|
| torch.set_grad_enabled(False) |
|
|
| MODEL_REPO = "TeszenAI/MTP-4" |
|
|
| |
| |
| |
| class AntiHallucination: |
| def __init__(self): |
| self.uncertainty_words = [ |
| 'no se', 'no lo se', 'no tengo idea', 'no estoy seguro', |
| 'no puedo responder', 'no sé', 'desconozco' |
| ] |
| self.empty_patterns = [ |
| r'^[.,!?;:]+$', r'^[\s]+$', r'^[0-9]+$', r'^[a-zA-Z]{1,3}$', |
| ] |
| self.repetition_patterns = [ |
| r'(\b\w+\b)(?:\s+\1){5,}', r'(.)\1{10,}', |
| ] |
| self.max_safe_chars = 500 |
| |
| def is_hallucinating(self, text: str) -> Tuple[bool, str]: |
| if not text: |
| return True, "Respuesta vacía" |
| if len(text) < 5: |
| return True, "Respuesta demasiado corta" |
| for pattern in self.empty_patterns: |
| if re.match(pattern, text): |
| return True, "Patrón vacío detectado" |
| for pattern in self.repetition_patterns: |
| if re.search(pattern, text): |
| return True, "Repetición excesiva" |
| words = text.lower().split()[:5] |
| for uw in self.uncertainty_words: |
| if uw in ' '.join(words): |
| return True, f"Expresa incertidumbre: '{uw}'" |
| if len(text) > self.max_safe_chars: |
| return True, "Respuesta demasiado larga" |
| return False, "OK" |
| |
| def is_coherent(self, text: str, question: str) -> Tuple[bool, str]: |
| if not text or not question: |
| return True, "Sin datos suficientes" |
| text_lower = text.lower() |
| question_lower = question.lower() |
| question_words = set(re.findall(r'\b[a-záéíóúüñ]{3,}\b', question_lower)) |
| if question_words: |
| matches = sum(1 for w in question_words if w in text_lower) |
| ratio = matches / len(question_words) |
| if len(question_words) >= 2 and ratio < 0.2: |
| return False, f"No responde a la pregunta" |
| return True, "OK" |
|
|
| |
| |
| |
| class CompletionState(Enum): |
| INCOMPLETE = "incomplete" |
| COMPLETE = "complete" |
| SHOULD_STOP = "should_stop" |
|
|
| class IntelligentStopper: |
| def __init__(self): |
| self.completion_patterns = [r'\.\s*$', r'\!?\s*$', r'\?\s*$', r'\.\.\.\s*$'] |
| self.continuation_patterns = [r'[,;:]\s*$', r' y $', r' o $', r' pero $', r' porque $'] |
| self.completion_phrases = [ |
| 'gracias', 'saludos', 'adios', 'hasta luego', |
| 'espero haberte ayudado', 'cualquier otra pregunta', |
| 'que tengas un buen dia', 'nos vemos' |
| ] |
| |
| def analyze(self, text: str, min_length: int = 40) -> Tuple[CompletionState, str]: |
| if not text or len(text) < min_length: |
| return CompletionState.INCOMPLETE, "Demasiado corto" |
| text = text.strip() |
| for pattern in self.continuation_patterns: |
| if re.search(pattern, text, re.IGNORECASE): |
| return CompletionState.INCOMPLETE, "Indica continuación" |
| text_lower = text.lower() |
| for phrase in self.completion_phrases: |
| if phrase in text_lower[-80:]: |
| return CompletionState.COMPLETE, "Frase de finalización" |
| for pattern in self.completion_patterns: |
| if re.search(pattern, text): |
| if len(text) > min_length: |
| return CompletionState.COMPLETE, "Termina naturalmente" |
| if len(text) > 350: |
| return CompletionState.COMPLETE, "Longitud suficiente" |
| return CompletionState.INCOMPLETE, "Puede continuar" |
|
|
| |
| |
| |
| class LayerNorm(nn.Module): |
| def __init__(self, d_model, eps=1e-5): |
| super().__init__() |
| self.weight = nn.Parameter(torch.ones(d_model)) |
| self.bias = nn.Parameter(torch.zeros(d_model)) |
| self.eps = eps |
| def forward(self, x): |
| return self.weight * (x - x.mean(-1, keepdim=True)) / (x.std(-1, keepdim=True) + self.eps) + self.bias |
|
|
| class MultiHeadAttention(nn.Module): |
| def __init__(self, d_model, n_heads, dropout=0.2): |
| super().__init__() |
| assert d_model % n_heads == 0 |
| self.d_model = d_model |
| self.n_heads = n_heads |
| self.d_k = d_model // n_heads |
| self.w_q = nn.Linear(d_model, d_model) |
| self.w_k = nn.Linear(d_model, d_model) |
| self.w_v = nn.Linear(d_model, d_model) |
| self.w_o = nn.Linear(d_model, d_model) |
| self.dropout = nn.Dropout(dropout) |
| self.scale = math.sqrt(self.d_k) |
| def forward(self, x, mask=None): |
| b, s, _ = x.shape |
| Q = self.w_q(x).view(b, s, self.n_heads, self.d_k).transpose(1, 2) |
| K = self.w_k(x).view(b, s, self.n_heads, self.d_k).transpose(1, 2) |
| V = self.w_v(x).view(b, s, self.n_heads, self.d_k).transpose(1, 2) |
| scores = torch.matmul(Q, K.transpose(-2, -1)) / self.scale |
| if mask is not None: |
| scores = scores.masked_fill(mask == 0, float('-inf')) |
| attn = self.dropout(F.softmax(scores, dim=-1)) |
| out = torch.matmul(attn, V).transpose(1, 2).contiguous().view(b, s, self.d_model) |
| return self.w_o(out) |
|
|
| class FeedForward(nn.Module): |
| def __init__(self, d_model, d_ff, dropout=0.2): |
| super().__init__() |
| self.linear1 = nn.Linear(d_model, d_ff) |
| self.linear2 = nn.Linear(d_ff, d_model) |
| self.dropout = nn.Dropout(dropout) |
| def forward(self, x): |
| return self.linear2(self.dropout(F.gelu(self.linear1(x)))) |
|
|
| class TransformerBlock(nn.Module): |
| def __init__(self, d_model, n_heads, d_ff, dropout=0.2): |
| super().__init__() |
| self.attn = MultiHeadAttention(d_model, n_heads, dropout) |
| self.ff = FeedForward(d_model, d_ff, dropout) |
| self.norm1 = LayerNorm(d_model) |
| self.norm2 = LayerNorm(d_model) |
| self.dropout1 = nn.Dropout(dropout) |
| self.dropout2 = nn.Dropout(dropout) |
| def forward(self, x, mask=None): |
| x = x + self.dropout1(self.attn(self.norm1(x), mask)) |
| x = x + self.dropout2(self.ff(self.norm2(x))) |
| return x |
|
|
| class PositionalEncoding(nn.Module): |
| def __init__(self, d_model, max_len=512): |
| super().__init__() |
| pe = torch.zeros(max_len, d_model) |
| pos = torch.arange(0, max_len).float().unsqueeze(1) |
| div = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) |
| pe[:, 0::2] = torch.sin(pos * div) |
| pe[:, 1::2] = torch.cos(pos * div) |
| self.register_buffer('pe', pe.unsqueeze(0)) |
| def forward(self, x): |
| return x + self.pe[:, :x.size(1)] |
|
|
| class MTP4Model(nn.Module): |
| def __init__(self, vocab_size, d_model=384, n_heads=8, n_layers=6, d_ff=1536, dropout=0.2, max_len=512): |
| super().__init__() |
| self.vocab_size = vocab_size |
| self.d_model = d_model |
| self.max_len = max_len |
| self.embedding = nn.Embedding(vocab_size, d_model) |
| self.pos_encoding = PositionalEncoding(d_model, max_len) |
| self.blocks = nn.ModuleList([TransformerBlock(d_model, n_heads, d_ff, dropout) for _ in range(n_layers)]) |
| self.norm = LayerNorm(d_model) |
| self.lm_head = nn.Linear(d_model, vocab_size) |
| self.dropout = nn.Dropout(dropout) |
| self._init_weights() |
| def _init_weights(self): |
| for p in self.parameters(): |
| if p.dim() > 1: |
| nn.init.xavier_uniform_(p) |
| def forward(self, x): |
| seq_len = x.size(1) |
| mask = torch.tril(torch.ones(seq_len, seq_len)).unsqueeze(0).unsqueeze(0).to(x.device) |
| x = self.embedding(x) * math.sqrt(self.d_model) |
| x = self.pos_encoding(x) |
| x = self.dropout(x) |
| for block in self.blocks: |
| x = block(x, mask) |
| return self.lm_head(self.norm(x)) |
| |
| @torch.no_grad() |
| def generate(self, input_ids, max_new=120, temperature=0.4, top_k=30, top_p=0.85, |
| repetition_penalty=1.3, stopper=None): |
| generated = input_ids |
| eos_id = 3 |
| last_tokens = [] |
| |
| for step in range(max_new): |
| if generated.size(1) > self.max_len: |
| context = generated[:, -self.max_len:] |
| else: |
| context = generated |
| logits = self(context) |
| next_logits = logits[0, -1, :].clone() / temperature |
| |
| if repetition_penalty != 1.0: |
| for token_id in set(generated[0].tolist()): |
| next_logits[token_id] /= repetition_penalty |
| |
| if top_k > 0: |
| indices = next_logits < torch.topk(next_logits, top_k)[0][..., -1, None] |
| next_logits[indices] = float('-inf') |
| if top_p < 1.0: |
| sorted_logits, sorted_indices = torch.sort(next_logits, descending=True) |
| cum_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) |
| remove = cum_probs > top_p |
| remove[..., 1:] = remove[..., :-1].clone() |
| remove[..., 0] = 0 |
| indices = sorted_indices[remove] |
| next_logits[indices] = float('-inf') |
| |
| probs = F.softmax(next_logits, dim=-1) |
| next_token = torch.multinomial(probs, 1).item() |
| |
| last_tokens.append(next_token) |
| if len(last_tokens) > 6 and len(set(last_tokens)) <= 2: |
| break |
| |
| if next_token == eos_id or next_token == 0: |
| break |
| |
| generated = torch.cat([generated, torch.tensor([[next_token]], device=generated.device)], dim=1) |
| |
| if stopper and step > 20 and step % 5 == 0: |
| gen_tokens = generated[0, len(input_ids):].tolist() |
| gen_tokens = [t for t in gen_tokens if t not in [0, 1, 2, 3]] |
| if gen_tokens: |
| current_text = sp.decode(gen_tokens) if 'sp' in dir() else "" |
| if current_text and len(current_text) > 50: |
| state, _ = stopper.analyze(current_text, min_length=40) |
| if state == CompletionState.COMPLETE: |
| break |
| |
| return generated |
|
|
| |
| |
| |
| def clean_response(text: str, question: str = "") -> str: |
| if not text: |
| return "" |
| |
| words = text.split() |
| cleaned = [] |
| last = "" |
| for w in words: |
| if w.lower() != last.lower(): |
| cleaned.append(w) |
| last = w |
| text = " ".join(cleaned) |
| text = re.sub(r'\s+', ' ', text).strip() |
| |
| greetings = ["hola", "buenos dias", "buenas tardes", "buenas noches", "hey"] |
| if question.lower().strip() in greetings: |
| if '.' in text: |
| text = text.split('.')[0] + '.' |
| elif len(text) > 100: |
| text = text[:100] + '...' |
| |
| if len(text) > 400: |
| period = text[:400].rfind('.') |
| if period > 50: |
| text = text[:period+1] |
| else: |
| text = text[:400] + "..." |
| |
| if len(text) < 3: |
| return "Lo siento, no pude generar una respuesta clara." |
| |
| if text and text[0].islower(): |
| text = text[0].upper() + text[1:] |
| |
| return text |
|
|
| |
| |
| |
| print(f"📦 Descargando MTP 4 desde {MODEL_REPO}...") |
| repo_path = snapshot_download(repo_id=MODEL_REPO, repo_type="model", local_dir="mtp_repo") |
|
|
| config_path = os.path.join(repo_path, "config.json") |
| with open(config_path, "r") as f: |
| config = json.load(f) |
|
|
| print(f"📋 Configuración encontrada:") |
| print(f" → d_model: {config.get('d_model', 'No especificado')}") |
| print(f" → n_layers: {config.get('n_layers', 'No especificado')}") |
| print(f" → n_heads: {config.get('n_heads', 'No especificado')}") |
| print(f" → d_ff: {config.get('d_ff', 'No especificado')}") |
|
|
| tokenizer_path = os.path.join(repo_path, "mtp_tokenizer.model") |
| sp = spm.SentencePieceProcessor() |
| sp.load(tokenizer_path) |
| VOCAB_SIZE = sp.get_piece_size() |
| config["vocab_size"] = VOCAB_SIZE |
|
|
| print(f"🧠 Inicializando MTP 4...") |
| print(f" → Vocabulario: {VOCAB_SIZE}") |
| print(f" → Dispositivo: {DEVICE.upper()}") |
|
|
| |
| model = MTP4Model(**config) |
| model.to(DEVICE) |
|
|
| model_path = os.path.join(repo_path, "mtp_model.pt") |
| if os.path.exists(model_path): |
| state_dict = torch.load(model_path, map_location=DEVICE) |
| |
| model.load_state_dict(state_dict, strict=False) |
| print("✅ Pesos del modelo cargados") |
|
|
| model.eval() |
| param_count = sum(p.numel() for p in model.parameters()) |
| print(f"✅ MTP 4 listo: {param_count:,} parámetros ({param_count/1e6:.2f}M)") |
|
|
| stopper = IntelligentStopper() |
| anti_hallucination = AntiHallucination() |
|
|
| |
| |
| |
| app = FastAPI(title="MTP 4 API", description="Asistente IA Avanzado", version="4.0") |
| app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"]) |
|
|
| class PromptRequest(BaseModel): |
| text: str = Field(..., max_length=2000) |
|
|
| def build_prompt(user_input: str) -> str: |
| return f"### Instrucción:\n{user_input}\n\n### Respuesta:\n" |
|
|
| ACTIVE_REQUESTS = 0 |
|
|
| @app.post("/generate") |
| async def generate(req: PromptRequest): |
| global ACTIVE_REQUESTS |
| ACTIVE_REQUESTS += 1 |
| |
| user_input = req.text.strip() |
| if not user_input: |
| ACTIVE_REQUESTS -= 1 |
| return {"reply": ""} |
| |
| full_prompt = build_prompt(user_input) |
| tokens = sp.encode(full_prompt) |
| |
| if len(tokens) > 350: |
| tokens = tokens[:350] |
| |
| input_ids = torch.tensor([tokens], device=DEVICE) |
| |
| try: |
| start = time.time() |
| |
| output_ids = model.generate( |
| input_ids, |
| max_new=100, |
| temperature=0.4, |
| top_k=30, |
| top_p=0.85, |
| repetition_penalty=1.3, |
| stopper=stopper |
| ) |
| |
| elapsed = time.time() - start |
| |
| gen_tokens = output_ids[0, len(tokens):].tolist() |
| safe_tokens = [t for t in gen_tokens if 0 <= t < VOCAB_SIZE and t != 0] |
| |
| response = sp.decode(safe_tokens).strip() if safe_tokens else "" |
| |
| |
| is_hallucinating, reason = anti_hallucination.is_hallucinating(response) |
| if is_hallucinating: |
| print(f"⚠️ Alucinación detectada: {reason}") |
| if safe_tokens and len(safe_tokens) > 20: |
| safe_tokens = safe_tokens[:20] |
| response = sp.decode(safe_tokens).strip() |
| is_hallucinating, _ = anti_hallucination.is_hallucinating(response) |
| if is_hallucinating: |
| response = "" |
| |
| |
| is_coherent, _ = anti_hallucination.is_coherent(response, user_input) |
| if not is_coherent and len(response) > 20: |
| first_sentence = response.split('.')[0] if '.' in response else response[:100] |
| if len(first_sentence) > 10: |
| response = first_sentence + '.' |
| |
| response = clean_response(response, user_input) |
| |
| if len(response) < 3: |
| response = "Lo siento, no pude generar una respuesta clara." |
|
|
| return { |
| "reply": response, |
| "tokens_generated": len(safe_tokens), |
| "time": round(elapsed, 2), |
| "model": "MTP-4" |
| } |
| |
| except Exception as e: |
| print(f"Error: {e}") |
| import traceback |
| traceback.print_exc() |
| return {"reply": "Lo siento, ocurrió un error."} |
| |
| finally: |
| ACTIVE_REQUESTS -= 1 |
| if DEVICE == "cuda": |
| torch.cuda.empty_cache() |
| gc.collect() |
|
|
| @app.get("/health") |
| def health(): |
| return {"status": "ok", "model": "MTP-4", "device": DEVICE} |
|
|
| @app.get("/info") |
| def info(): |
| return { |
| "model": "MTP-4", |
| "version": "4.0", |
| "parameters": param_count, |
| "parameters_millions": round(param_count / 1e6, 2), |
| "device": DEVICE, |
| "vocab_size": VOCAB_SIZE |
| } |
|
|
| |
| |
| |
| @app.get("/", response_class=HTMLResponse) |
| def chat_ui(): |
| return """ |
| <!DOCTYPE html> |
| <html lang="es"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>MTP 4 - Asistente IA</title> |
| <style> |
| * { margin: 0; padding: 0; box-sizing: border-box; } |
| body { |
| background: linear-gradient(135deg, #0a0a0a 0%, #1a1a2e 100%); |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
| height: 100vh; |
| display: flex; |
| flex-direction: column; |
| } |
| .header { |
| padding: 16px 20px; |
| background: rgba(0,0,0,0.5); |
| backdrop-filter: blur(10px); |
| border-bottom: 1px solid rgba(255,255,255,0.1); |
| } |
| .header h1 { color: white; font-size: 1.2rem; } |
| .header p { color: #888; font-size: 0.7rem; margin-top: 4px; } |
| .messages { |
| flex: 1; |
| overflow-y: auto; |
| padding: 20px; |
| display: flex; |
| flex-direction: column; |
| gap: 12px; |
| } |
| .message { |
| max-width: 80%; |
| padding: 10px 16px; |
| border-radius: 18px; |
| font-size: 0.9rem; |
| line-height: 1.4; |
| animation: fadeIn 0.2s ease; |
| } |
| @keyframes fadeIn { |
| from { opacity: 0; transform: translateY(8px); } |
| to { opacity: 1; transform: translateY(0); } |
| } |
| .user { |
| background: linear-gradient(135deg, #4a9eff, #3a7ecc); |
| color: white; |
| align-self: flex-end; |
| border-radius: 18px 4px 18px 18px; |
| } |
| .bot { |
| background: rgba(30, 31, 40, 0.95); |
| color: #e0e0e0; |
| align-self: flex-start; |
| border-radius: 4px 18px 18px 18px; |
| border: 1px solid rgba(255,255,255,0.05); |
| } |
| .input-area { |
| padding: 16px 20px; |
| background: rgba(0,0,0,0.5); |
| backdrop-filter: blur(10px); |
| border-top: 1px solid rgba(255,255,255,0.1); |
| display: flex; |
| gap: 12px; |
| } |
| input { |
| flex: 1; |
| padding: 12px 16px; |
| background: rgba(255,255,255,0.1); |
| border: 1px solid rgba(255,255,255,0.2); |
| border-radius: 24px; |
| color: white; |
| font-size: 0.9rem; |
| outline: none; |
| } |
| input:focus { border-color: #4a9eff; } |
| input::placeholder { color: #666; } |
| button { |
| padding: 12px 24px; |
| background: linear-gradient(135deg, #4a9eff, #3a7ecc); |
| border: none; |
| border-radius: 24px; |
| color: white; |
| font-weight: 500; |
| cursor: pointer; |
| } |
| button:hover { opacity: 0.9; } |
| button:disabled { opacity: 0.5; cursor: not-allowed; } |
| .typing { |
| background: rgba(30, 31, 40, 0.95); |
| padding: 10px 16px; |
| border-radius: 18px; |
| align-self: flex-start; |
| display: flex; |
| gap: 4px; |
| } |
| .typing span { |
| width: 8px; |
| height: 8px; |
| background: #888; |
| border-radius: 50%; |
| animation: bounce 1.4s infinite; |
| } |
| .typing span:nth-child(1) { animation-delay: -0.32s; } |
| .typing span:nth-child(2) { animation-delay: -0.16s; } |
| @keyframes bounce { |
| 0%, 80%, 100% { transform: scale(0); } |
| 40% { transform: scale(1); } |
| } |
| .suggestions { |
| display: flex; |
| gap: 8px; |
| padding: 10px 20px; |
| overflow-x: auto; |
| background: rgba(0,0,0,0.3); |
| } |
| .suggestion { |
| padding: 5px 12px; |
| background: rgba(255,255,255,0.1); |
| border-radius: 20px; |
| color: #aaa; |
| font-size: 0.75rem; |
| cursor: pointer; |
| white-space: nowrap; |
| } |
| .suggestion:hover { |
| background: linear-gradient(135deg, #4a9eff, #3a7ecc); |
| color: white; |
| } |
| .badge { |
| position: fixed; |
| bottom: 8px; |
| right: 8px; |
| font-size: 0.6rem; |
| color: #555; |
| background: rgba(0,0,0,0.5); |
| padding: 2px 8px; |
| border-radius: 12px; |
| } |
| @media (max-width: 600px) { |
| .message { max-width: 95%; } |
| .suggestions { display: none; } |
| } |
| </style> |
| </head> |
| <body> |
| <div class="header"> |
| <h1>🤖 MTP 4 - Asistente IA</h1> |
| <p>✨ Temperatura 0.4 | Anti-alucinaciones | Respuestas precisas</p> |
| </div> |
| <div class="suggestions"> |
| <div class="suggestion">Hola</div> |
| <div class="suggestion">¿Quién eres?</div> |
| <div class="suggestion">¿Qué puedes hacer?</div> |
| <div class="suggestion">Explícame la IA</div> |
| <div class="suggestion">Háblame de BTS</div> |
| <div class="suggestion">¿Qué es un agujero negro?</div> |
| <div class="suggestion">Dime un chiste</div> |
| <div class="suggestion">Adiós</div> |
| </div> |
| <div class="messages" id="messages"> |
| <div class="message bot">✨ Hola, soy MTP 4. Estoy optimizado para dar respuestas coherentes y evitar alucinaciones. ¿En qué puedo ayudarte?</div> |
| </div> |
| <div class="input-area"> |
| <input type="text" id="input" placeholder="Escribe tu pregunta..." autocomplete="off"> |
| <button id="send">Enviar</button> |
| </div> |
| <div class="badge">⚡ MTP 4 | 🌡️ 0.4 | 🛡️ Anti-alucinaciones</div> |
| <script> |
| const messages = document.getElementById('messages'); |
| const input = document.getElementById('input'); |
| const sendBtn = document.getElementById('send'); |
| let loading = false; |
| |
| function addMessage(text, isUser, time = null) { |
| const div = document.createElement('div'); |
| div.className = `message ${isUser ? 'user' : 'bot'}`; |
| div.innerHTML = `<div>${escapeHtml(text)}</div>${time ? `<div style="font-size:0.6rem;color:#666;margin-top:6px;">⚡ ${time}s</div>` : ''}`; |
| messages.appendChild(div); |
| messages.scrollTop = messages.scrollHeight; |
| } |
| |
| function escapeHtml(text) { |
| const div = document.createElement('div'); |
| div.textContent = text; |
| return div.innerHTML; |
| } |
| |
| function showTyping() { |
| const div = document.createElement('div'); |
| div.className = 'typing'; |
| div.id = 'typing'; |
| div.innerHTML = '<span></span><span></span><span></span>'; |
| messages.appendChild(div); |
| messages.scrollTop = messages.scrollHeight; |
| } |
| |
| function hideTyping() { |
| const el = document.getElementById('typing'); |
| if (el) el.remove(); |
| } |
| |
| async function sendMessage() { |
| const text = input.value.trim(); |
| if (!text || loading) return; |
| |
| input.value = ''; |
| addMessage(text, true); |
| loading = true; |
| sendBtn.disabled = true; |
| showTyping(); |
| |
| try { |
| const response = await fetch('/generate', { |
| method: 'POST', |
| headers: { 'Content-Type': 'application/json' }, |
| body: JSON.stringify({ text: text }) |
| }); |
| const data = await response.json(); |
| hideTyping(); |
| addMessage(data.reply, false, data.time); |
| } catch (error) { |
| hideTyping(); |
| addMessage('⚠️ Error de conexión. Intenta de nuevo.', false); |
| } finally { |
| loading = false; |
| sendBtn.disabled = false; |
| input.focus(); |
| } |
| } |
| |
| input.addEventListener('keypress', (e) => { if (e.key === 'Enter') sendMessage(); }); |
| sendBtn.addEventListener('click', sendMessage); |
| input.focus(); |
| </script> |
| </body> |
| </html> |
| """ |
|
|
| if __name__ == "__main__": |
| port = int(os.environ.get("PORT", 7860)) |
| print("\n" + "=" * 60) |
| print(f"🚀 MTP 4 en http://0.0.0.0:{port}") |
| print(f"🌡️ Temperatura: 0.4 | 🔁 Repetition penalty: 1.3") |
| print("=" * 60) |
| |
| uvicorn.run(app, host="0.0.0.0", port=port, log_level="info") |