Spaces:
Running
on
Zero
Running
on
Zero
Commit
ยท
61931a6
1
Parent(s):
05954fa
Use preload option
Browse files
README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
|
|
4 |
colorFrom: purple
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
@@ -8,4 +9,8 @@ sdk_version: 4.44.0
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
|
|
|
|
|
|
|
|
11 |
---
|
|
|
1 |
---
|
2 |
+
title: ColPali & Pixtral
|
3 |
+
short_description: Document Question Answering with ColPali & Pixtral
|
4 |
+
emoji: ๐
|
5 |
colorFrom: purple
|
6 |
colorTo: blue
|
7 |
sdk: gradio
|
|
|
9 |
app_file: app.py
|
10 |
pinned: false
|
11 |
license: apache-2.0
|
12 |
+
preload_from_hub:
|
13 |
+
- vidore/colpaligemma-3b-pt-448-base config.json,model-00001-of-00002.safetensors,model-00002-of-00002.safetensors,model.safetensors.index.json,preprocessor_config.json,special_tokens_map.json,tokenizer.json,tokenizer_config.json 12c59eb7e23bc4c26876f7be7c17760d5d3a1ffa
|
14 |
+
- vidore/colpali-v1.2 adapter_config.json,adapter_model.safetensors,preprocessor_config.json,special_tokens_map.json,tokenizer.json,tokenizer_config.json 2d54d5d3684a4f5ceeefbef95df0c94159fd6a45
|
15 |
+
- mistralai/Pixtral-12B-2409 params.json,tekken.json,consolidated.safetensors ba6a661500dabeddb2e531e732f0ca39f82ee694
|
16 |
---
|
app.py
CHANGED
@@ -1,13 +1,10 @@
|
|
1 |
import base64
|
2 |
-
import os
|
3 |
-
from pathlib import Path
|
4 |
from typing import cast
|
5 |
|
6 |
import gradio as gr
|
7 |
import spaces
|
8 |
import torch
|
9 |
from colpali_engine.models.paligemma.colpali import ColPali, ColPaliProcessor
|
10 |
-
from huggingface_hub import snapshot_download
|
11 |
from mistral_common.protocol.instruct.messages import (
|
12 |
ImageURLChunk,
|
13 |
TextChunk,
|
@@ -21,13 +18,20 @@ from pdf2image import convert_from_path
|
|
21 |
from torch.utils.data import DataLoader
|
22 |
from tqdm import tqdm
|
23 |
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
31 |
)
|
32 |
|
33 |
|
@@ -42,8 +46,8 @@ def model_inference(
|
|
42 |
images,
|
43 |
text,
|
44 |
):
|
45 |
-
tokenizer = MistralTokenizer.from_file(f"{
|
46 |
-
model = Transformer.from_folder(
|
47 |
|
48 |
messages = [
|
49 |
UserMessage(
|
@@ -73,19 +77,16 @@ def model_inference(
|
|
73 |
|
74 |
@spaces.GPU
|
75 |
def search(query: str, ds, images, k):
|
76 |
-
model_name = "vidore/colpali-v1.2"
|
77 |
-
token = os.environ.get("HF_TOKEN")
|
78 |
model = ColPali.from_pretrained(
|
79 |
-
|
80 |
torch_dtype=torch.bfloat16,
|
81 |
device_map="cuda",
|
82 |
-
token=token,
|
83 |
).eval()
|
84 |
|
85 |
-
model.load_adapter(
|
86 |
model = model.eval()
|
87 |
processor = cast(
|
88 |
-
ColPaliProcessor, ColPaliProcessor.from_pretrained(
|
89 |
)
|
90 |
|
91 |
qs = []
|
@@ -99,7 +100,7 @@ def search(query: str, ds, images, k):
|
|
99 |
top_k_indices = scores.argsort(axis=1)[0][-k:]
|
100 |
results = []
|
101 |
for idx in top_k_indices:
|
102 |
-
results.append((images[idx])
|
103 |
del model
|
104 |
del processor
|
105 |
torch.cuda.empty_cache()
|
@@ -123,19 +124,16 @@ def convert_files(files):
|
|
123 |
|
124 |
@spaces.GPU
|
125 |
def index_gpu(images, ds):
|
126 |
-
model_name = "vidore/colpali-v1.2"
|
127 |
-
token = os.environ.get("HF_TOKEN")
|
128 |
model = ColPali.from_pretrained(
|
129 |
-
|
130 |
torch_dtype=torch.bfloat16,
|
131 |
device_map="cuda",
|
132 |
-
token=token,
|
133 |
).eval()
|
134 |
|
135 |
-
model.load_adapter(
|
136 |
model = model.eval()
|
137 |
processor = cast(
|
138 |
-
ColPaliProcessor, ColPaliProcessor.from_pretrained(
|
139 |
)
|
140 |
|
141 |
# run inference - docs
|
@@ -173,9 +171,13 @@ css = """
|
|
173 |
file = gr.File(file_types=["pdf"], file_count="multiple", label="pdfs")
|
174 |
query = gr.Textbox(placeholder="Enter your query here", label="query")
|
175 |
|
176 |
-
with gr.Blocks(
|
|
|
|
|
|
|
|
|
177 |
with gr.Column(elem_id="col-container"):
|
178 |
-
gr.Markdown("# Question Answering with ColPali & Pixtral")
|
179 |
|
180 |
with gr.Row():
|
181 |
gr.Examples(
|
|
|
1 |
import base64
|
|
|
|
|
2 |
from typing import cast
|
3 |
|
4 |
import gradio as gr
|
5 |
import spaces
|
6 |
import torch
|
7 |
from colpali_engine.models.paligemma.colpali import ColPali, ColPaliProcessor
|
|
|
8 |
from mistral_common.protocol.instruct.messages import (
|
9 |
ImageURLChunk,
|
10 |
TextChunk,
|
|
|
18 |
from torch.utils.data import DataLoader
|
19 |
from tqdm import tqdm
|
20 |
|
21 |
+
PIXTAL_MODEL_ID = "mistralai--Pixtral-12B-2409"
|
22 |
+
PIXTRAL_MODEL_SNAPSHOT = "ba6a661500dabeddb2e531e732f0ca39f82ee694"
|
23 |
+
PIXTRAL_MODEL_PATH = (
|
24 |
+
f"~/.cache/huggingface/hub/models--{PIXTAL_MODEL_ID}/{PIXTRAL_MODEL_SNAPSHOT}"
|
25 |
+
)
|
26 |
+
|
27 |
|
28 |
+
COLPALI_GEMMA_MODEL_ID = "vidore--colpaligemma-3b-pt-448-base"
|
29 |
+
COLPALI_GEMMA_MODEL_SNAPSHOT = "12c59eb7e23bc4c26876f7be7c17760d5d3a1ffa"
|
30 |
+
COLPALI_GEMMA_MODEL_PATH = f"~/.cache/huggingface/hub/models--{COLPALI_GEMMA_MODEL_ID}/{COLPALI_GEMMA_MODEL_SNAPSHOT}"
|
31 |
+
COLPALI_MODEL_ID = "vidore--colpali-v1.2"
|
32 |
+
COLPALI_MODEL_SNAPSHOT = "2d54d5d3684a4f5ceeefbef95df0c94159fd6a45"
|
33 |
+
COLPALI_MODEL_PATH = (
|
34 |
+
f"~/.cache/huggingface/hub/models--{COLPALI_MODEL_ID}/{COLPALI_MODEL_SNAPSHOT}"
|
35 |
)
|
36 |
|
37 |
|
|
|
46 |
images,
|
47 |
text,
|
48 |
):
|
49 |
+
tokenizer = MistralTokenizer.from_file(f"{PIXTRAL_MODEL_PATH}/tekken.json")
|
50 |
+
model = Transformer.from_folder(PIXTRAL_MODEL_PATH)
|
51 |
|
52 |
messages = [
|
53 |
UserMessage(
|
|
|
77 |
|
78 |
@spaces.GPU
|
79 |
def search(query: str, ds, images, k):
|
|
|
|
|
80 |
model = ColPali.from_pretrained(
|
81 |
+
COLPALI_GEMMA_MODEL_PATH,
|
82 |
torch_dtype=torch.bfloat16,
|
83 |
device_map="cuda",
|
|
|
84 |
).eval()
|
85 |
|
86 |
+
model.load_adapter(COLPALI_MODEL_PATH)
|
87 |
model = model.eval()
|
88 |
processor = cast(
|
89 |
+
ColPaliProcessor, ColPaliProcessor.from_pretrained(COLPALI_MODEL_PATH)
|
90 |
)
|
91 |
|
92 |
qs = []
|
|
|
100 |
top_k_indices = scores.argsort(axis=1)[0][-k:]
|
101 |
results = []
|
102 |
for idx in top_k_indices:
|
103 |
+
results.append((images[idx]), f"Page {idx}")
|
104 |
del model
|
105 |
del processor
|
106 |
torch.cuda.empty_cache()
|
|
|
124 |
|
125 |
@spaces.GPU
|
126 |
def index_gpu(images, ds):
|
|
|
|
|
127 |
model = ColPali.from_pretrained(
|
128 |
+
COLPALI_GEMMA_MODEL_PATH,
|
129 |
torch_dtype=torch.bfloat16,
|
130 |
device_map="cuda",
|
|
|
131 |
).eval()
|
132 |
|
133 |
+
model.load_adapter(COLPALI_MODEL_PATH)
|
134 |
model = model.eval()
|
135 |
processor = cast(
|
136 |
+
ColPaliProcessor, ColPaliProcessor.from_pretrained(COLPALI_MODEL_PATH)
|
137 |
)
|
138 |
|
139 |
# run inference - docs
|
|
|
171 |
file = gr.File(file_types=["pdf"], file_count="multiple", label="pdfs")
|
172 |
query = gr.Textbox(placeholder="Enter your query here", label="query")
|
173 |
|
174 |
+
with gr.Blocks(
|
175 |
+
title="Document Question Answering with ColPali & Pixtral",
|
176 |
+
theme=gr.themes.Soft(),
|
177 |
+
css=css,
|
178 |
+
) as demo:
|
179 |
with gr.Column(elem_id="col-container"):
|
180 |
+
gr.Markdown("# Document Question Answering with ColPali & Pixtral")
|
181 |
|
182 |
with gr.Row():
|
183 |
gr.Examples(
|