Spaces:

dhairyashah
/

langchain-que-gen

Runtime error

App Files Files Community

dhairyashah commited on Aug 27

Commit

2c8b539

•

1 Parent(s): a0e2927

update

Browse files

Files changed (2) hide show

app.py +101 -4
requirements.txt +13 -0

app.py CHANGED Viewed

@@ -1,7 +1,104 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import tqdm
+from PIL import Image
+import torch
+import fitz
 import gradio as gr
+import spaces
+import os
+from transformers import AutoModel
+from transformers import AutoTokenizer
+import numpy as np
+cache_dir = 'pdf_cache'
+os.makedirs(cache_dir, exist_ok=True)
+device = 'cuda'
+print("Embedding model loading...")
+model_path = 'RhapsodyAI/minicpm-visual-embedding-v0'
+tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
+model.eval()
+model.to(device)
+print("Embedding model loaded successfully!")
+print("Generation model loading...")
+gen_model_path = 'openbmb/MiniCPM-V-2_6'
+gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_path, trust_remote_code=True)
+gen_model = AutoModel.from_pretrained(gen_model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
+gen_model.eval()
+gen_model.to(device)
+print("Generation model loaded successfully!")
+@spaces.GPU(duration=100)
+def process_pdf(pdf_file, max_pages, progress=gr.Progress()):
+    doc = fitz.open("pdf", pdf_file)
+    num_pages = min(max_pages, len(doc))
+    images = []
+    for page_num in progress.tqdm(range(num_pages)):
+        page = doc[page_num]
+        pix = page.get_pixmap(dpi=200)
+        image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+        images.append(image)
+    return images
+@spaces.GPU(duration=50)
+def answer_question(images, question):
+    global gen_model, gen_tokenizer
+    images_ = [img.convert('RGB') for img in images]
+    msgs = [{'role': 'user', 'content': [question, *images_]}]
+    answer = gen_model.chat(
+        image=None,
+        msgs=msgs,
+        tokenizer=gen_tokenizer
+    )
+    print(answer)
+    return answer
+with gr.Blocks() as app:
+    gr.Markdown("# PDF Question Answering with Vision Language Model")
+    gr.Markdown("""
+    This application uses a Vision Language Model to answer questions about PDF documents.
+    1. Upload a PDF file
+    2. Set the maximum number of pages to process
+    3. Click "Process PDF" to extract the pages
+    4. Enter your question about the PDF content
+    5. Click "Answer Question" to get the model's response
+    """)
+    with gr.Row():
+        file_input = gr.File(type="binary", label="Upload PDF")
+        max_pages = gr.Number(value=10, minimum=1, maximum=50, step=1, label="Maximum number of pages to process")
+        process_button = gr.Button("Process PDF")
+    with gr.Row():
+        query_input = gr.Text(label="Your Question")
+        answer_button = gr.Button("Answer Question")
+    images_output = gr.Gallery(label="Processed PDF Pages", visible=False)
+    gen_model_response = gr.Textbox(label="Model's Answer")
+    def process_and_show(pdf_file, max_pages):
+        images = process_pdf(pdf_file, max_pages)
+        return gr.Gallery.update(value=images, visible=True)
+    process_button.click(
+        process_and_show,
+        inputs=[file_input, max_pages],
+        outputs=images_output
+    )
+    answer_button.click(
+        answer_question,
+        inputs=[images_output, query_input],
+        outputs=gen_model_response
+    )
+app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+PyMuPDF
+tqdm
+gradio
+Pillow==10.1.0
+sentencepiece==0.1.99
+numpy==1.26.0
+transformers==4.40.2
+timm
+torch==2.1.2
+torchvision==0.16.2
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.2/flash_attn-2.6.2+cu123torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl