Spaces:

fredcaixeta
/

ocr_extractor

Sleeping

App Files Files Community

fredcaixeta commited on Oct 9

Commit

d043f81

1 Parent(s): 8f90e63

go

Browse files

Files changed (5) hide show

.python-version +1 -0
agent.py +1 -1
app.py +34 -20
main.py +6 -0
pyproject.toml +7 -0

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

agent.py CHANGED Viewed

@@ -33,7 +33,7 @@ model = GroqModel(
 )
-def start_convo(user_input, messages_history):
     search_agent = Agent(
         model,
         system_prompt=DEFAULT_SYSTEM_PROMPT

 )
+def 'start_convo(user_input, messages_history):
     search_agent = Agent(
         model,
         system_prompt=DEFAULT_SYSTEM_PROMPT

app.py CHANGED Viewed

@@ -2,45 +2,59 @@ import gradio as gr
 from ocr_script import ocr_tesseract_only
 import uuid
 from agent import start_convo
-# def ocr_tesseract_only(img):  # img pode ser PIL/np/str conforme 'type'
-#     # chamar seu OCR aqui e retornar texto
-#     return "texto extraído"
-def respond(message, history, user_id):
-    return "mock response"
-    # pydantic_history = convert_to_pydantic_history(history)
-    # tools_instance = SearchingTools()
-    # deps = SearchAgentDeps(tools=tools_instance)
-    # agent_config = start_convo(user_input=str(message), messages_history=pydantic_history)
-    # result = agent_config.run_sync(
-    #     str(message),
-    #     deps=deps,
-    #     usage_limits=usage_limits,
-    #     message_history=pydantic_history,
-    # )
     return result.output
 with gr.Blocks() as demo:
     with gr.Tabs():
         with gr.Tab("Text OCR Tesseract only"):
             with gr.Row():
                 img_in = gr.Image(label="Imagem (png, jpg, jpeg)", type="pil")
                 txt_out = gr.Textbox(label="Texto OCR", lines=12)
-            img_in.change(fn=ocr_tesseract_only, inputs=img_in, outputs=txt_out)
         with gr.Tab("Chat"):
             user_id = gr.State(str(uuid.uuid4()))
             gr.ChatInterface(
                 fn=respond,
-                additional_inputs=[user_id],
                 type="messages",
                 title="Chat with AI Agent with Access to Extracted Data",
                 description="Envie perguntas sobre os dados extraídos.",
                 save_history=True,
-                examples =[
                     ["What is the name of the invoice document available?"],
                     ["Which document has the ID aZwfUT2Zs?"]
-                    ],
                 cache_examples=True,
             )
 demo.launch()

 from ocr_script import ocr_tesseract_only
 import uuid
 from agent import start_convo
+import os
+from dotenv import load_dotenv
+from pydantic_ai import Agent, RunContext
+from pydantic_ai.usage import UsageLimits
+from pydantic_ai.models.groq import GroqModel
+load_dotenv()
+api_key = os.getenv("GROQ_API_KEY")
+# Modelo Groq via Pydantic AI
+model = GroqModel(model_name="openai/gpt-oss-120b")
+def respond(message, history, user_id, ocr_text):
+    # Garantir que o system prompt seja o texto OCR atual
+    system_prompt_text = ocr_text or "Nenhum texto OCR disponível."
+    search_agent = Agent(model, system_prompt=system_prompt_text)
+    # Se usar seu start_convo, injete o mesmo prompt no agente interno, ou remova se for redundante
+    # agent_config = start_convo(user_input=str(message), messages_history=history, system_prompt=system_prompt_text)
+    result = search_agent.run_sync(str(message))
     return result.output
 with gr.Blocks() as demo:
     with gr.Tabs():
         with gr.Tab("Text OCR Tesseract only"):
+            ocr_state = gr.State("")  # Armazena o texto OCR para uso no chat
             with gr.Row():
                 img_in = gr.Image(label="Imagem (png, jpg, jpeg)", type="pil")
                 txt_out = gr.Textbox(label="Texto OCR", lines=12)
+            def run_ocr(img):
+                text = ocr_tesseract_only(img)
+                return text, text
+            img_in.change(fn=run_ocr, inputs=img_in, outputs=[txt_out, ocr_state])
         with gr.Tab("Chat"):
             user_id = gr.State(str(uuid.uuid4()))
             gr.ChatInterface(
                 fn=respond,
+                additional_inputs=[user_id, ocr_state],  # injeta o texto OCR no fn
                 type="messages",
                 title="Chat with AI Agent with Access to Extracted Data",
                 description="Envie perguntas sobre os dados extraídos.",
                 save_history=True,
+                examples=[
                     ["What is the name of the invoice document available?"],
                     ["Which document has the ID aZwfUT2Zs?"]
+                ],
                 cache_examples=True,
             )
 demo.launch()

main.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def main():
+    print("Hello from extractor!")
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,7 @@

+[project]
+name = "extractor"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = []