p3nguknight commited on
Commit
61931a6
ยท
1 Parent(s): 05954fa

Use preload option

Browse files
Files changed (2) hide show
  1. README.md +7 -2
  2. app.py +28 -26
README.md CHANGED
@@ -1,6 +1,7 @@
1
  ---
2
- title: Question Answering with ColPali & Pixtral
3
- emoji: ๐Ÿ–บ
 
4
  colorFrom: purple
5
  colorTo: blue
6
  sdk: gradio
@@ -8,4 +9,8 @@ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
 
 
 
11
  ---
 
1
  ---
2
+ title: ColPali & Pixtral
3
+ short_description: Document Question Answering with ColPali & Pixtral
4
+ emoji: ๐Ÿ‘€
5
  colorFrom: purple
6
  colorTo: blue
7
  sdk: gradio
 
9
  app_file: app.py
10
  pinned: false
11
  license: apache-2.0
12
+ preload_from_hub:
13
+ - vidore/colpaligemma-3b-pt-448-base config.json,model-00001-of-00002.safetensors,model-00002-of-00002.safetensors,model.safetensors.index.json,preprocessor_config.json,special_tokens_map.json,tokenizer.json,tokenizer_config.json 12c59eb7e23bc4c26876f7be7c17760d5d3a1ffa
14
+ - vidore/colpali-v1.2 adapter_config.json,adapter_model.safetensors,preprocessor_config.json,special_tokens_map.json,tokenizer.json,tokenizer_config.json 2d54d5d3684a4f5ceeefbef95df0c94159fd6a45
15
+ - mistralai/Pixtral-12B-2409 params.json,tekken.json,consolidated.safetensors ba6a661500dabeddb2e531e732f0ca39f82ee694
16
  ---
app.py CHANGED
@@ -1,13 +1,10 @@
1
  import base64
2
- import os
3
- from pathlib import Path
4
  from typing import cast
5
 
6
  import gradio as gr
7
  import spaces
8
  import torch
9
  from colpali_engine.models.paligemma.colpali import ColPali, ColPaliProcessor
10
- from huggingface_hub import snapshot_download
11
  from mistral_common.protocol.instruct.messages import (
12
  ImageURLChunk,
13
  TextChunk,
@@ -21,13 +18,20 @@ from pdf2image import convert_from_path
21
  from torch.utils.data import DataLoader
22
  from tqdm import tqdm
23
 
24
- models_path = Path.home().joinpath("pixtral", "Pixtral")
25
- models_path.mkdir(parents=True, exist_ok=True)
 
 
 
 
26
 
27
- snapshot_download(
28
- repo_id="mistral-community/pixtral-12b-240910",
29
- allow_patterns=["params.json", "consolidated.safetensors", "tekken.json"],
30
- local_dir=models_path,
 
 
 
31
  )
32
 
33
 
@@ -42,8 +46,8 @@ def model_inference(
42
  images,
43
  text,
44
  ):
45
- tokenizer = MistralTokenizer.from_file(f"{models_path}/tekken.json")
46
- model = Transformer.from_folder(models_path)
47
 
48
  messages = [
49
  UserMessage(
@@ -73,19 +77,16 @@ def model_inference(
73
 
74
  @spaces.GPU
75
  def search(query: str, ds, images, k):
76
- model_name = "vidore/colpali-v1.2"
77
- token = os.environ.get("HF_TOKEN")
78
  model = ColPali.from_pretrained(
79
- "vidore/colpaligemma-3b-pt-448-base",
80
  torch_dtype=torch.bfloat16,
81
  device_map="cuda",
82
- token=token,
83
  ).eval()
84
 
85
- model.load_adapter(model_name)
86
  model = model.eval()
87
  processor = cast(
88
- ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name, token=token)
89
  )
90
 
91
  qs = []
@@ -99,7 +100,7 @@ def search(query: str, ds, images, k):
99
  top_k_indices = scores.argsort(axis=1)[0][-k:]
100
  results = []
101
  for idx in top_k_indices:
102
- results.append((images[idx])) # , f"Page {idx}"
103
  del model
104
  del processor
105
  torch.cuda.empty_cache()
@@ -123,19 +124,16 @@ def convert_files(files):
123
 
124
  @spaces.GPU
125
  def index_gpu(images, ds):
126
- model_name = "vidore/colpali-v1.2"
127
- token = os.environ.get("HF_TOKEN")
128
  model = ColPali.from_pretrained(
129
- "vidore/colpaligemma-3b-pt-448-base",
130
  torch_dtype=torch.bfloat16,
131
  device_map="cuda",
132
- token=token,
133
  ).eval()
134
 
135
- model.load_adapter(model_name)
136
  model = model.eval()
137
  processor = cast(
138
- ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name, token=token)
139
  )
140
 
141
  # run inference - docs
@@ -173,9 +171,13 @@ css = """
173
  file = gr.File(file_types=["pdf"], file_count="multiple", label="pdfs")
174
  query = gr.Textbox(placeholder="Enter your query here", label="query")
175
 
176
- with gr.Blocks(title="Question Answering with ColPali & Pixtral", theme=gr.themes.Soft(), css=css) as demo:
 
 
 
 
177
  with gr.Column(elem_id="col-container"):
178
- gr.Markdown("# Question Answering with ColPali & Pixtral")
179
 
180
  with gr.Row():
181
  gr.Examples(
 
1
  import base64
 
 
2
  from typing import cast
3
 
4
  import gradio as gr
5
  import spaces
6
  import torch
7
  from colpali_engine.models.paligemma.colpali import ColPali, ColPaliProcessor
 
8
  from mistral_common.protocol.instruct.messages import (
9
  ImageURLChunk,
10
  TextChunk,
 
18
  from torch.utils.data import DataLoader
19
  from tqdm import tqdm
20
 
21
+ PIXTAL_MODEL_ID = "mistralai--Pixtral-12B-2409"
22
+ PIXTRAL_MODEL_SNAPSHOT = "ba6a661500dabeddb2e531e732f0ca39f82ee694"
23
+ PIXTRAL_MODEL_PATH = (
24
+ f"~/.cache/huggingface/hub/models--{PIXTAL_MODEL_ID}/{PIXTRAL_MODEL_SNAPSHOT}"
25
+ )
26
+
27
 
28
+ COLPALI_GEMMA_MODEL_ID = "vidore--colpaligemma-3b-pt-448-base"
29
+ COLPALI_GEMMA_MODEL_SNAPSHOT = "12c59eb7e23bc4c26876f7be7c17760d5d3a1ffa"
30
+ COLPALI_GEMMA_MODEL_PATH = f"~/.cache/huggingface/hub/models--{COLPALI_GEMMA_MODEL_ID}/{COLPALI_GEMMA_MODEL_SNAPSHOT}"
31
+ COLPALI_MODEL_ID = "vidore--colpali-v1.2"
32
+ COLPALI_MODEL_SNAPSHOT = "2d54d5d3684a4f5ceeefbef95df0c94159fd6a45"
33
+ COLPALI_MODEL_PATH = (
34
+ f"~/.cache/huggingface/hub/models--{COLPALI_MODEL_ID}/{COLPALI_MODEL_SNAPSHOT}"
35
  )
36
 
37
 
 
46
  images,
47
  text,
48
  ):
49
+ tokenizer = MistralTokenizer.from_file(f"{PIXTRAL_MODEL_PATH}/tekken.json")
50
+ model = Transformer.from_folder(PIXTRAL_MODEL_PATH)
51
 
52
  messages = [
53
  UserMessage(
 
77
 
78
  @spaces.GPU
79
  def search(query: str, ds, images, k):
 
 
80
  model = ColPali.from_pretrained(
81
+ COLPALI_GEMMA_MODEL_PATH,
82
  torch_dtype=torch.bfloat16,
83
  device_map="cuda",
 
84
  ).eval()
85
 
86
+ model.load_adapter(COLPALI_MODEL_PATH)
87
  model = model.eval()
88
  processor = cast(
89
+ ColPaliProcessor, ColPaliProcessor.from_pretrained(COLPALI_MODEL_PATH)
90
  )
91
 
92
  qs = []
 
100
  top_k_indices = scores.argsort(axis=1)[0][-k:]
101
  results = []
102
  for idx in top_k_indices:
103
+ results.append((images[idx]), f"Page {idx}")
104
  del model
105
  del processor
106
  torch.cuda.empty_cache()
 
124
 
125
  @spaces.GPU
126
  def index_gpu(images, ds):
 
 
127
  model = ColPali.from_pretrained(
128
+ COLPALI_GEMMA_MODEL_PATH,
129
  torch_dtype=torch.bfloat16,
130
  device_map="cuda",
 
131
  ).eval()
132
 
133
+ model.load_adapter(COLPALI_MODEL_PATH)
134
  model = model.eval()
135
  processor = cast(
136
+ ColPaliProcessor, ColPaliProcessor.from_pretrained(COLPALI_MODEL_PATH)
137
  )
138
 
139
  # run inference - docs
 
171
  file = gr.File(file_types=["pdf"], file_count="multiple", label="pdfs")
172
  query = gr.Textbox(placeholder="Enter your query here", label="query")
173
 
174
+ with gr.Blocks(
175
+ title="Document Question Answering with ColPali & Pixtral",
176
+ theme=gr.themes.Soft(),
177
+ css=css,
178
+ ) as demo:
179
  with gr.Column(elem_id="col-container"):
180
+ gr.Markdown("# Document Question Answering with ColPali & Pixtral")
181
 
182
  with gr.Row():
183
  gr.Examples(