Spaces:
Runtime error
Runtime error
| import warnings | |
| warnings.simplefilter(action='ignore', category=FutureWarning) | |
| import PyPDF2 | |
| import gradio as gr | |
| from langchain.prompts import PromptTemplate | |
| from langchain.chains.summarize import load_summarize_chain | |
| from huggingface_hub import login | |
| from pathlib import Path | |
| from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| import os | |
| huggingface_token = os.getenv('HUGGINGFACE_TOKEN') | |
| # Realizar el inicio de sesi贸n de Hugging Face solo si el token est谩 disponible | |
| if huggingface_token: | |
| login(token=huggingface_token) | |
| # Configuraci贸n del modelo LLM | |
| llm = HuggingFaceEndpoint( | |
| repo_id="mistralai/Mistral-7B-Instruct-v0.3", | |
| task="text-generation", | |
| max_new_tokens=4096, | |
| temperature=0.5, | |
| do_sample=False, | |
| return_full_text=True, | |
| ) | |
| llm_engine_hf = ChatHuggingFace(llm=llm) | |
| # Configuraci贸n del modelo de clasificaci贸n | |
| tokenizer = AutoTokenizer.from_pretrained("mrm8488/legal-longformer-base-8192-spanish") | |
| model = AutoModelForSequenceClassification.from_pretrained("mrm8488/legal-longformer-base-8192-spanish") | |
| id2label = {0: "multas", 1: "politicas_de_privacidad", 2: "contratos", 3: "denuncias", 4: "otros"} | |
| def read_file(file): | |
| file_path = file.name | |
| if file_path.endswith('.pdf'): | |
| return read_pdf(file_path) | |
| else: | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| return f.read() | |
| def read_pdf(file_path): | |
| pdf_reader = PyPDF2.PdfReader(file_path) | |
| text = "" | |
| for page in range(len(pdf_reader.pages)): | |
| text += pdf_reader.pages[page].extract_text() | |
| return text | |
| def summarize(text, summary_length): | |
| if summary_length == 'Corto': | |
| length_instruction = "El resumen debe tener un m谩ximo de 100 palabras." | |
| elif summary_length == 'Medio': | |
| length_instruction = "El resumen debe tener un m谩ximo de 500 palabras." | |
| else: | |
| length_instruction = "El resumen debe tener un m谩ximo de 1000 palabras." | |
| template = f''' | |
| Por favor, lea detenidamente el siguiente documento: | |
| <document> | |
| {{TEXT}} | |
| </document> | |
| Despu茅s de leer el documento, identifique los puntos clave y las ideas principales cubiertas en el texto. {length_instruction} | |
| Su objetivo es ser exhaustivo en la captura del contenido central del documento, mientras que tambi茅n es conciso en la expresi贸n de cada punto del resumen. Omita los detalles menores y conc茅ntrese en los temas centrales y hechos importantes. | |
| ''' | |
| prompt = PromptTemplate( | |
| template=template, | |
| input_variables=['TEXT'] | |
| ) | |
| formatted_prompt = prompt.format(TEXT=text) | |
| output_summary = llm_engine_hf.invoke(formatted_prompt) | |
| return output_summary.content | |
| def classify_text(text): | |
| inputs = tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length") | |
| model.eval() | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits = outputs.logits | |
| predicted_class_id = logits.argmax(dim=-1).item() | |
| predicted_label = id2label[predicted_class_id] | |
| return f"Clasificaci贸n: {predicted_label}" | |
| def translate(text, target_language): | |
| template = ''' | |
| Por favor, traduzca el siguiente documento al {LANGUAGE}: | |
| <document> | |
| {TEXT} | |
| </document> | |
| Aseg煤rese de que la traducci贸n sea precisa y conserve el significado original del documento. | |
| ''' | |
| prompt = PromptTemplate( | |
| template=template, | |
| input_variables=['TEXT', 'LANGUAGE'] | |
| ) | |
| formatted_prompt = prompt.format(TEXT=text, LANGUAGE=target_language) | |
| translated_text = llm_engine_hf.invoke(formatted_prompt) | |
| return translated_text.content | |
| def process_file(file, action, target_language=None, summary_length=None): | |
| text = read_file(file) | |
| if action == "Resumen": | |
| return summarize(text, summary_length) | |
| elif action == "Clasificar": | |
| return classify_text(text) | |
| elif action == "Traducir": | |
| return translate(text, target_language) | |
| else: | |
| return "Acci贸n no v谩lida" | |
| # Crear la interfaz de Gradio | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## LexAIcon: Traducci贸n Resumen y Clasificaci贸n de textos legales.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| file = gr.File(label="Subir un archivo") | |
| action = gr.Radio(label="Seleccione una acci贸n", choices=["Resumen", "Clasificar", "Traducir"]) | |
| summary_length = gr.Radio(label="Seleccione la longitud del resumen", choices=["Corto", "Medio", "Largo"], visible=False) | |
| target_language = gr.Dropdown(label="Seleccionar idioma de traducci贸n", choices=["en", "fr", "de"], visible=False) | |
| with gr.Column(): | |
| output_text = gr.Textbox(label="Resultado", lines=20) | |
| def update_ui(action): | |
| if action == "Traducir": | |
| return gr.update(visible=False), gr.update(visible=True) | |
| elif action == "Resumen": | |
| return gr.update(visible=True), gr.update(visible=False) | |
| elif action == "Clasificar": | |
| return gr.update(visible=False), gr.update(visible(False)) | |
| else: | |
| return gr.update(visible=False), gr.update(visible(False)) | |
| action.change(update_ui, inputs=action, outputs=[summary_length, target_language]) | |
| submit_button = gr.Button("Procesar") | |
| submit_button.click(process_file, inputs=[file, action, target_language, summary_length], outputs=output_text) | |
| # Ejecutar la aplicaci贸n Gradio | |
| demo.launch(share=True) |