import gradio as gr from transformers import RAGMultiModalModel # Importing the ColPali model # Initialize the ColPali model model = RAGMultiModalModel.from_pretrained("vidore/colpali-v1.2") def extract_and_search(image, keyword): # Use the model to extract text from the image inputs = {"images": [image]} extracted_text = model.generate(**inputs) # Replace with actual prediction method # Perform keyword search matching_lines = [line for line in extracted_text.splitlines() if keyword.lower() in line.lower()] return extracted_text, matching_lines # Create Gradio interface interface = gr.Interface( fn=extract_and_search, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Enter Keyword") ], outputs=[ gr.Textbox(label="Extracted Text"), gr.Textbox(label="Matching Lines") ], title="ColPali OCR with Keyword Search", description="Upload an image and enter a keyword to search within the extracted text." ) # Launch the app interface.launch(share=True)