|
import gradio as gr |
|
from setfit import SetFitModel |
|
|
|
def cortar_en_bloques(texto, longitud_bloque): |
|
palabras = texto.split() |
|
bloques = [] |
|
bloque_actual = [] |
|
|
|
for palabra in palabras: |
|
bloque_actual.append(palabra) |
|
|
|
if len(bloque_actual) == longitud_bloque: |
|
bloques.append(" ".join(bloque_actual)) |
|
bloque_actual = [] |
|
|
|
|
|
if bloque_actual: |
|
bloques.append(" ".join(bloque_actual)) |
|
|
|
return bloques |
|
|
|
|
|
model = SetFitModel.from_pretrained("desarrolloasesoreslocales/SetFitPruebaRecorte") |
|
|
|
|
|
|
|
|
|
def predict(payload): |
|
|
|
recorte_general = "" |
|
|
|
|
|
chunks = cortar_en_bloques(ocr_text, 150) |
|
first = -1 |
|
margin = int(len(chunks) * 0.25) |
|
chunks_removable = chunks[:margin] + chunks[-margin:] |
|
|
|
for i in range(len(chunks)): |
|
print('Recortando -', round((i/len(chunks))*100), '%') |
|
if chunks[i] not in chunks_removable or trim_model.predict([chunks[i]]).item() == 1: |
|
if first == -1: |
|
first = i |
|
recorte_general += chunks[i] + " " |
|
|
|
if first > 0: |
|
recorte_general = chunks[first-1] + recorte_general |
|
print(100, '%') |
|
|
|
recorte_final = "" |
|
|
|
|
|
|
|
|
|
chunks2 = cortar_en_bloques(recorte_general, 80) |
|
margin_s = int(len(chunks2) * 0.1) |
|
margin_e = int(len(chunks2) * 0.1) |
|
|
|
chunks_removable2 = chunks2[:margin_s] + chunks2[-margin_e:] |
|
|
|
|
|
for i in range(len(chunks2)): |
|
print('Recortando -', round((i/len(chunks2))*100), '%') |
|
if chunks2[i] not in chunks_removable2 or trim_model.predict([chunks2[i]]).item() == 1: |
|
recorte_final += chunks2[i] + " " |
|
print(100, '%') |
|
|
|
return recorte_final |
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict, |
|
inputs=gr.Textbox(), |
|
outputs=gr.Textbox(), |
|
live=False, |
|
title="Recortador de Texto" |
|
) |
|
|
|
|
|
iface.launch() |