from transformers import pipeline import torch import gradio as gr import os from PIL import Image import pytesseract import tempfile import shutil from pdf2image import convert_from_path model_name = "deepset/roberta-base-squad2" text_qna = pipeline("question-answering", model=model_name, tokenizer=model_name) vision_qna = pipeline("document-question-answering", model="impira/layoutlm-document-qa") # Vision QnA requires: PyTesseract for OCR. Tesseract executable needs to be installed separately. # sudo apt install tesseract-ocr (https://tesseract-ocr.github.io/tessdoc/Installation.html) def load_file(file_input, encoding = 'utf-8'): if not os.path.exists(file_input): raise FileNotFoundError(f"The file does not exist.") with open(file_input, 'r', encoding=encoding) as file: try: content = file.read() except UnicodeDecodeError: # If a UnicodeDecodeError occurs, try reading with 'latin1' encoding with open(file_input, 'r', encoding='latin1') as file: content = file.read() return content def save_image(file): try: temp_dir = tempfile.mkdtemp() file_path = os.path.join(temp_dir, os.path.basename(file.name)) # Copy the file from the temporary Gradio directory to our temporary directory shutil.copyfile(file.name, file_path) # when working with saving image files through Gradio, # using `shutil.copyfile` to handle `NamedString` objects for file uploads is the correct approach return file_path except Exception as e: print(e) def save_pdf(file): temp_dir = tempfile.mkdtemp() pdf_path = os.path.join(temp_dir, os.path.basename(file.name)) # Copy the file from the temporary Gradio directory to our temporary directory shutil.copyfile(file.name, pdf_path) # Convert PDF to images images = convert_from_path(pdf_path) image_paths = [] for i, img in enumerate(images): image_path = os.path.join(temp_dir, f'page_{i}.png') img.save(image_path, 'PNG') image_paths.append(image_path) print(image_paths) return image_paths def qna_text_content(content, question): result = text_qna(question=question, context=content) return result def qna_image_content(content, question): # result = vision_qna(question=question, image=content) result = vision_qna(content, question) print(f"image question: {question}") return result def qna_pdf_content(image_paths, question): answers = [] try: for image_path in image_paths: result = vision_qna(image=image_path, question=question) print(result[0]['answer'], result[0]['score']) answers.append(result[0]['answer']) return " \n".join(answers) except Exception as e: return f"An error occurred during processing: {e}" def answer_the_question_for_doc(text_input, file_input, question): # Order of input parameters is Imp. for Gradio to accept respective Inputs if file_input is not None: print(f"File type: {type(file_input)}") print(f"File name: {file_input.name}") file_extension = file_input.name.split('.')[-1].lower() if file_extension in ['txt']: try: content = load_file(file_input) if not content or not question: return "Please provide both content and a question." result = qna_text_content(content, question) return result["answer"] except FileNotFoundError or Exception as e: print(e) exit(1) elif file_extension in ['png', 'jpeg', 'jpg']: try: img_file_path = save_image(file_input) if not question: return "Please provide a question." result = qna_image_content(img_file_path, question) print(result) return result[0]["answer"] except Exception as e: return f"An error occurred during vision processing: {e}" elif file_extension in ['pdf']: try: image_paths = save_pdf(file_input) if not question: return "Please provide a question." result = qna_pdf_content(image_paths, question) print(result) return result except Exception as e: return f"An error occurred during vision processing: {e}" else: return "Unsupported file type. Please upload a .txt, ,.pdf, .png, or .jpeg file." else: if not text_input or not question: return "Please provide both content and a question." content = text_input result = qna_text_content(content, question) return result["answer"] gr.close_all() with gr.Blocks() as demo: gr.Markdown("# QnA System") gr.Markdown("This App answers a question based on text content or uploaded file (txt, png, jpeg, pdf).") with gr.Row(): text_input = gr.Textbox(label="Text Input", placeholder="Enter text content here...") file_input = gr.File(label="File Upload", file_types=['txt', 'png', 'jpeg', 'pdf']) question = gr.Textbox(label="Question", placeholder="Enter your question here...") output = gr.Textbox(label="Answer", placeholder="The answer will appear here...") text_input.change(lambda x: gr.update(visible=not x), inputs=text_input, outputs=file_input) file_input.change(lambda x: gr.update(visible=not x), inputs=file_input, outputs=text_input) button = gr.Button("Get Answer") button.click(answer_the_question_for_doc, inputs=[text_input, file_input, question], outputs=output) demo.launch() # print(qna_image_content("https://gradientflow.com/wp-content/uploads/2023/10/newsletter87-RAG-simple.png", "What is the step prior to embedding?"))