Spaces:
Sleeping
Sleeping
| """ | |
| Espacio de Hugging Face: Resumidor de Texto (BERT2BERT) | |
| ======================================================== | |
| Modelo: mrm8488/bert2bert_shared-spanish-finetuned-summarization | |
| Entrada: Texto largo en español | |
| Salida: Texto resumido | |
| """ | |
| import gradio as gr | |
| import torch | |
| from transformers import BertTokenizerFast, EncoderDecoderModel | |
| class SummarizationService: | |
| def __init__(self): | |
| ckpt = "mrm8488/bert2bert_shared-spanish-finetuned-summarization" | |
| self.device = torch.device("cpu") | |
| print(f"Cargando modelo BERT2BERT: {ckpt}...") | |
| self.tokenizer = BertTokenizerFast.from_pretrained(ckpt) | |
| self.model = EncoderDecoderModel.from_pretrained( | |
| ckpt, | |
| low_cpu_mem_usage=False, | |
| use_safetensors=False, | |
| torch_dtype=torch.float32, | |
| ) | |
| self.model.eval() | |
| print("Modelo cargado correctamente.") | |
| def summarize(self, text: str) -> str: | |
| """Resume el texto usando micro-chunking para manejar textos largos.""" | |
| text = text.replace("\n", " ").strip() | |
| gen_params = { | |
| "min_length": 25, | |
| "max_length": 100, | |
| "num_beams": 4, | |
| "length_penalty": 2.0, | |
| "no_repeat_ngram_size": 3, | |
| "early_stopping": True | |
| } | |
| chunks = self._chunk_text(text, max_tokens=200) | |
| summaries = [] | |
| for chunk in chunks: | |
| inputs = self.tokenizer( | |
| [chunk], | |
| padding="max_length", | |
| truncation=True, | |
| max_length=512, | |
| return_tensors="pt" | |
| ) | |
| input_ids = inputs["input_ids"].to(self.device) | |
| attention_mask = inputs["attention_mask"].to(self.device) | |
| with torch.no_grad(): | |
| output_ids = self.model.generate( | |
| input_ids=input_ids, | |
| attention_mask=attention_mask, | |
| **gen_params | |
| ) | |
| summary_piece = self.tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| if summary_piece.strip(): | |
| summaries.append(summary_piece.strip()) | |
| return " ".join(summaries) | |
| def _chunk_text(self, text: str, max_tokens: int) -> list: | |
| """Divide el texto en fragmentos manejables para BERT.""" | |
| sentences = text.split('. ') | |
| chunks = [] | |
| current_chunk = [] | |
| current_length = 0 | |
| for sentence in sentences: | |
| sentence = sentence.strip() | |
| if not sentence: | |
| continue | |
| tokens = self.tokenizer.tokenize(sentence) | |
| sent_len = len(tokens) | |
| if sent_len > max_tokens: | |
| if current_chunk: | |
| chunks.append(". ".join(current_chunk) + ".") | |
| current_chunk = [] | |
| current_length = 0 | |
| chunks.append(sentence + ".") | |
| continue | |
| if current_length + sent_len > max_tokens: | |
| chunks.append(". ".join(current_chunk) + ".") | |
| current_chunk = [sentence] | |
| current_length = sent_len | |
| else: | |
| current_chunk.append(sentence) | |
| current_length += sent_len | |
| if current_chunk: | |
| chunks.append(". ".join(current_chunk) + ".") | |
| return chunks | |
| # Inicializar servicio | |
| print("Inicializando servicio de resumen...") | |
| service = SummarizationService() | |
| print("Servicio listo.") | |
| def resumir_texto(texto: str) -> str: | |
| """Función principal para Gradio.""" | |
| if not texto or not texto.strip(): | |
| return "Por favor, introduce un texto para resumir." | |
| try: | |
| resumen = service.summarize(texto) | |
| return resumen | |
| except Exception as e: | |
| return f"Error al resumir: {str(e)}" | |
| # Interfaz Gradio | |
| iface = gr.Interface( | |
| fn=resumir_texto, | |
| inputs=gr.Textbox( | |
| lines=10, | |
| placeholder="Pega aquí tu texto largo en español...", | |
| label="Texto a Resumir" | |
| ), | |
| outputs=gr.Textbox(label="Resumen"), | |
| title="📝 Resumidor de Texto (BERT2BERT)", | |
| description="Resume textos largos en español usando el modelo BERT2BERT con técnica de micro-chunking.", | |
| examples=[ | |
| ["La inteligencia artificial es un campo de la informática que se centra en crear sistemas inteligentes. Estos sistemas pueden aprender de la experiencia y realizar tareas como reconocimiento de voz y toma de decisiones. El aprendizaje automático permite a las computadoras mejorar su rendimiento a través de la experiencia."] | |
| ], | |
| flagging_mode="never", | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |