Spaces:
Sleeping
Sleeping
Rdurango92
commited on
Commit
•
1858a28
1
Parent(s):
5f3c4a8
Upload 2 files
Browse files- app.py +56 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## App: Escrito-Claro
|
2 |
+
## Por: Ruben Durango
|
3 |
+
|
4 |
+
# Importaciones
|
5 |
+
import gradio as gr
|
6 |
+
from PIL import Image
|
7 |
+
from transformers import AutoProcessor, AutoModelForCausalLM
|
8 |
+
import torch
|
9 |
+
import numpy as np
|
10 |
+
|
11 |
+
# Cargar modelo y procesador
|
12 |
+
ocr_model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)
|
13 |
+
ocr_processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)
|
14 |
+
|
15 |
+
# Función para ejecutar el OCR
|
16 |
+
def run_ocr(task_prompt, image):
|
17 |
+
# Asegurarse de que la imagen es un objeto PIL
|
18 |
+
if isinstance(image, np.ndarray):
|
19 |
+
image = Image.fromarray(image)
|
20 |
+
|
21 |
+
prompt = task_prompt
|
22 |
+
inputs = ocr_processor(text=prompt, images=image, return_tensors="pt")
|
23 |
+
generated_ids = ocr_model.generate(
|
24 |
+
input_ids=inputs["input_ids"],
|
25 |
+
pixel_values=inputs["pixel_values"],
|
26 |
+
max_new_tokens=1024,
|
27 |
+
num_beams=3
|
28 |
+
)
|
29 |
+
generated_text = ocr_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
|
30 |
+
parsed_answer = ocr_processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
|
31 |
+
return parsed_answer['<OCR>']
|
32 |
+
|
33 |
+
# Función para ser llamada en Gradio
|
34 |
+
def process_image(image):
|
35 |
+
ocr_text = run_ocr("<OCR>", image)
|
36 |
+
return ocr_text
|
37 |
+
|
38 |
+
# Interfaz princital
|
39 |
+
with gr.Blocks(theme='bethecloud/storj_theme') as demo:
|
40 |
+
gr.Markdown("<center><h1>🤓 Escrito-Claro</h1><br><h3>OCR con Florence-2</h3></center>")
|
41 |
+
gr.Markdown("Con **EscritoClaro**, convierte tus notas manuscritas en texto digital de forma rápida y precisa, utilizando el modelo Florence-2 de Microsoft.Esta aplicación extrae el texto de tus documentos con buena precisión. Simplemente sube una imagen y deja que EscritoClaro haga el resto.")
|
42 |
+
|
43 |
+
# Botón de Procesado
|
44 |
+
submit_btn = gr.Button(value="Procesar ⚙️")
|
45 |
+
|
46 |
+
with gr.Row():
|
47 |
+
# Entradas
|
48 |
+
with gr.Column():
|
49 |
+
image = gr.Image(label="Imagen de entrada")
|
50 |
+
|
51 |
+
# Salidas
|
52 |
+
with gr.Column():
|
53 |
+
ocr_text = gr.Textbox(label="Texto OCR", placeholder="Texto extraído")
|
54 |
+
|
55 |
+
submit_btn.click(fn=process_image, inputs=image, outputs=ocr_text)
|
56 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
Pillow
|
3 |
+
torch
|
4 |
+
numpy
|
5 |
+
transformers
|