Spaces:

p3nguknight
/

colpali-pixtral

Running on Zero

App Files Files Community

p3nguknight commited on Sep 22, 2024

Commit

61931a6

1 Parent(s): 05954fa

Use preload option

Browse files

Files changed (2) hide show

README.md +7 -2
app.py +28 -26

README.md CHANGED Viewed

@@ -1,6 +1,7 @@
 ---
-title: Question Answering with ColPali & Pixtral
-emoji: 🖺
 colorFrom: purple
 colorTo: blue
 sdk: gradio
@@ -8,4 +9,8 @@ sdk_version: 4.44.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---

 ---
+title: ColPali & Pixtral
+short_description: Document Question Answering with ColPali & Pixtral
+emoji: 👀
 colorFrom: purple
 colorTo: blue
 sdk: gradio
 app_file: app.py
 pinned: false
 license: apache-2.0
+preload_from_hub:
+  - vidore/colpaligemma-3b-pt-448-base config.json,model-00001-of-00002.safetensors,model-00002-of-00002.safetensors,model.safetensors.index.json,preprocessor_config.json,special_tokens_map.json,tokenizer.json,tokenizer_config.json 12c59eb7e23bc4c26876f7be7c17760d5d3a1ffa
+  - vidore/colpali-v1.2 adapter_config.json,adapter_model.safetensors,preprocessor_config.json,special_tokens_map.json,tokenizer.json,tokenizer_config.json 2d54d5d3684a4f5ceeefbef95df0c94159fd6a45
+  - mistralai/Pixtral-12B-2409 params.json,tekken.json,consolidated.safetensors ba6a661500dabeddb2e531e732f0ca39f82ee694
 ---

app.py CHANGED Viewed

@@ -1,13 +1,10 @@
 import base64
-import os
-from pathlib import Path
 from typing import cast
 import gradio as gr
 import spaces
 import torch
 from colpali_engine.models.paligemma.colpali import ColPali, ColPaliProcessor
-from huggingface_hub import snapshot_download
 from mistral_common.protocol.instruct.messages import (
     ImageURLChunk,
     TextChunk,
@@ -21,13 +18,20 @@ from pdf2image import convert_from_path
 from torch.utils.data import DataLoader
 from tqdm import tqdm
-models_path = Path.home().joinpath("pixtral", "Pixtral")
-models_path.mkdir(parents=True, exist_ok=True)
-snapshot_download(
-    repo_id="mistral-community/pixtral-12b-240910",
-    allow_patterns=["params.json", "consolidated.safetensors", "tekken.json"],
-    local_dir=models_path,
 )
@@ -42,8 +46,8 @@ def model_inference(
     images,
     text,
 ):
-    tokenizer = MistralTokenizer.from_file(f"{models_path}/tekken.json")
-    model = Transformer.from_folder(models_path)
     messages = [
         UserMessage(
@@ -73,19 +77,16 @@ def model_inference(
 @spaces.GPU
 def search(query: str, ds, images, k):
-    model_name = "vidore/colpali-v1.2"
-    token = os.environ.get("HF_TOKEN")
     model = ColPali.from_pretrained(
-        "vidore/colpaligemma-3b-pt-448-base",
         torch_dtype=torch.bfloat16,
         device_map="cuda",
-        token=token,
     ).eval()
-    model.load_adapter(model_name)
     model = model.eval()
     processor = cast(
-        ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name, token=token)
     )
     qs = []
@@ -99,7 +100,7 @@ def search(query: str, ds, images, k):
     top_k_indices = scores.argsort(axis=1)[0][-k:]
     results = []
     for idx in top_k_indices:
-        results.append((images[idx]))  # , f"Page {idx}"
     del model
     del processor
     torch.cuda.empty_cache()
@@ -123,19 +124,16 @@ def convert_files(files):
 @spaces.GPU
 def index_gpu(images, ds):
-    model_name = "vidore/colpali-v1.2"
-    token = os.environ.get("HF_TOKEN")
     model = ColPali.from_pretrained(
-        "vidore/colpaligemma-3b-pt-448-base",
         torch_dtype=torch.bfloat16,
         device_map="cuda",
-        token=token,
     ).eval()
-    model.load_adapter(model_name)
     model = model.eval()
     processor = cast(
-        ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name, token=token)
     )
     # run inference - docs
@@ -173,9 +171,13 @@ css = """
 file = gr.File(file_types=["pdf"], file_count="multiple", label="pdfs")
 query = gr.Textbox(placeholder="Enter your query here", label="query")
-with gr.Blocks(title="Question Answering with ColPali & Pixtral", theme=gr.themes.Soft(), css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown("# Question Answering with ColPali & Pixtral")
         with gr.Row():
             gr.Examples(

 import base64
 from typing import cast
 import gradio as gr
 import spaces
 import torch
 from colpali_engine.models.paligemma.colpali import ColPali, ColPaliProcessor
 from mistral_common.protocol.instruct.messages import (
     ImageURLChunk,
     TextChunk,
 from torch.utils.data import DataLoader
 from tqdm import tqdm
+PIXTAL_MODEL_ID = "mistralai--Pixtral-12B-2409"
+PIXTRAL_MODEL_SNAPSHOT = "ba6a661500dabeddb2e531e732f0ca39f82ee694"
+PIXTRAL_MODEL_PATH = (
+    f"~/.cache/huggingface/hub/models--{PIXTAL_MODEL_ID}/{PIXTRAL_MODEL_SNAPSHOT}"
+)
+COLPALI_GEMMA_MODEL_ID = "vidore--colpaligemma-3b-pt-448-base"
+COLPALI_GEMMA_MODEL_SNAPSHOT = "12c59eb7e23bc4c26876f7be7c17760d5d3a1ffa"
+COLPALI_GEMMA_MODEL_PATH = f"~/.cache/huggingface/hub/models--{COLPALI_GEMMA_MODEL_ID}/{COLPALI_GEMMA_MODEL_SNAPSHOT}"
+COLPALI_MODEL_ID = "vidore--colpali-v1.2"
+COLPALI_MODEL_SNAPSHOT = "2d54d5d3684a4f5ceeefbef95df0c94159fd6a45"
+COLPALI_MODEL_PATH = (
+    f"~/.cache/huggingface/hub/models--{COLPALI_MODEL_ID}/{COLPALI_MODEL_SNAPSHOT}"
 )
     images,
     text,
 ):
+    tokenizer = MistralTokenizer.from_file(f"{PIXTRAL_MODEL_PATH}/tekken.json")
+    model = Transformer.from_folder(PIXTRAL_MODEL_PATH)
     messages = [
         UserMessage(
 @spaces.GPU
 def search(query: str, ds, images, k):
     model = ColPali.from_pretrained(
+        COLPALI_GEMMA_MODEL_PATH,
         torch_dtype=torch.bfloat16,
         device_map="cuda",
     ).eval()
+    model.load_adapter(COLPALI_MODEL_PATH)
     model = model.eval()
     processor = cast(
+        ColPaliProcessor, ColPaliProcessor.from_pretrained(COLPALI_MODEL_PATH)
     )
     qs = []
     top_k_indices = scores.argsort(axis=1)[0][-k:]
     results = []
     for idx in top_k_indices:
+        results.append((images[idx]), f"Page {idx}")
     del model
     del processor
     torch.cuda.empty_cache()
 @spaces.GPU
 def index_gpu(images, ds):
     model = ColPali.from_pretrained(
+        COLPALI_GEMMA_MODEL_PATH,
         torch_dtype=torch.bfloat16,
         device_map="cuda",
     ).eval()
+    model.load_adapter(COLPALI_MODEL_PATH)
     model = model.eval()
     processor = cast(
+        ColPaliProcessor, ColPaliProcessor.from_pretrained(COLPALI_MODEL_PATH)
     )
     # run inference - docs
 file = gr.File(file_types=["pdf"], file_count="multiple", label="pdfs")
 query = gr.Textbox(placeholder="Enter your query here", label="query")
+with gr.Blocks(
+    title="Document Question Answering with ColPali & Pixtral",
+    theme=gr.themes.Soft(),
+    css=css,
+) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("# Document Question Answering with ColPali & Pixtral")
         with gr.Row():
             gr.Examples(