Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
35a12a7
1
Parent(s):
1d82c63
add og prompt
Browse files
app.py
CHANGED
@@ -37,11 +37,9 @@ class GeneralRetrievalQuery(BaseModel):
|
|
37 |
visual_element_explanation: str
|
38 |
|
39 |
|
40 |
-
def get_retrieval_prompt(prompt_name: str) -> Tuple[str,
|
41 |
-
if prompt_name
|
42 |
-
|
43 |
-
|
44 |
-
prompt = """You are an AI assistant specialized in document retrieval tasks. Given an image of a document page, your task is to generate retrieval queries that someone might use to find this document in a large corpus.
|
45 |
|
46 |
Please generate 3 different types of retrieval queries:
|
47 |
|
@@ -73,15 +71,52 @@ Here is the document image to analyze:
|
|
73 |
<image>
|
74 |
|
75 |
Generate the queries based on this image and provide the response in the specified JSON format."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
|
80 |
# defined like this so we can later add more prompting options
|
81 |
prompt, pydantic_model = get_retrieval_prompt("general")
|
82 |
|
83 |
|
84 |
-
def _prep_data_for_input(image):
|
85 |
messages = [
|
86 |
{
|
87 |
"role": "user",
|
@@ -111,10 +146,10 @@ def _prep_data_for_input(image):
|
|
111 |
|
112 |
|
113 |
@spaces.GPU
|
114 |
-
def generate_response(image):
|
115 |
-
|
116 |
-
inputs =
|
117 |
-
|
118 |
generated_ids = model.generate(**inputs, max_new_tokens=200)
|
119 |
generated_ids_trimmed = [
|
120 |
out_ids[len(in_ids) :]
|
@@ -156,7 +191,14 @@ examples = [
|
|
156 |
|
157 |
demo = gr.Interface(
|
158 |
fn=generate_response,
|
159 |
-
inputs=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
outputs=gr.Json(),
|
161 |
title=title,
|
162 |
description=description,
|
|
|
37 |
visual_element_explanation: str
|
38 |
|
39 |
|
40 |
+
def get_retrieval_prompt(prompt_name: str) -> Tuple[str, BaseModel]:
|
41 |
+
if prompt_name == "general":
|
42 |
+
prompt = """You are an AI assistant specialized in document retrieval tasks. Given an image of a document page, your task is to generate retrieval queries that someone might use to find this document in a large corpus.
|
|
|
|
|
43 |
|
44 |
Please generate 3 different types of retrieval queries:
|
45 |
|
|
|
71 |
<image>
|
72 |
|
73 |
Generate the queries based on this image and provide the response in the specified JSON format."""
|
74 |
+
return prompt, GeneralRetrievalQuery
|
75 |
+
elif prompt_name == "multimodal_rag":
|
76 |
+
prompt = """You are an assistant specialized in Multimodal RAG tasks.
|
77 |
+
|
78 |
+
The task is the following: given an image from a pdf page, you will have to generate questions that can be asked by a user to retrieve information from a large documentary corpus.
|
79 |
+
|
80 |
+
The question should be relevant to the page, and should not be too specific or too general. The question should be about the subject of the page, and the answer needs to be found in the page.
|
81 |
|
82 |
+
Remember that the question is asked by a user to get some information from a large documentary corpus that contains multimodal data. Generate a question that could be asked by a user without knowing the existence and the content of the corpus.
|
83 |
+
|
84 |
+
Generate as well the answer to the question, which should be found in the page. And the format of the answer should be a list of words answering the question.
|
85 |
+
|
86 |
+
Generate at most THREE pairs of questions and answers per page as JSON with the following format, answer ONLY using JSON, NOTHING ELSE:
|
87 |
+
|
88 |
+
{
|
89 |
+
"questions": [
|
90 |
+
{
|
91 |
+
"question": "XXXXXX",
|
92 |
+
"answer": ["YYYYYY"]
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"question": "XXXXXX",
|
96 |
+
"answer": ["YYYYYY"]
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"question": "XXXXXX",
|
100 |
+
"answer": ["YYYYYY"]
|
101 |
+
}
|
102 |
+
]
|
103 |
+
}
|
104 |
+
|
105 |
+
where XXXXXX is the question and ['YYYYYY'] is the corresponding list of answers that could be as long as needed.
|
106 |
+
|
107 |
+
Note: If there are no questions to ask about the page, return an empty list. Focus on making relevant questions concerning the page.
|
108 |
+
|
109 |
+
Here is the page:"""
|
110 |
+
return prompt, BaseModel
|
111 |
+
else:
|
112 |
+
raise ValueError("Invalid prompt name")
|
113 |
|
114 |
|
115 |
# defined like this so we can later add more prompting options
|
116 |
prompt, pydantic_model = get_retrieval_prompt("general")
|
117 |
|
118 |
|
119 |
+
def _prep_data_for_input(image, prompt):
|
120 |
messages = [
|
121 |
{
|
122 |
"role": "user",
|
|
|
146 |
|
147 |
|
148 |
@spaces.GPU
|
149 |
+
def generate_response(image, prompt_name="general"):
|
150 |
+
prompt, _ = get_retrieval_prompt(prompt_name)
|
151 |
+
inputs = _prep_data_for_input(image, prompt)
|
152 |
+
inputs.to("cuda")
|
153 |
generated_ids = model.generate(**inputs, max_new_tokens=200)
|
154 |
generated_ids_trimmed = [
|
155 |
out_ids[len(in_ids) :]
|
|
|
191 |
|
192 |
demo = gr.Interface(
|
193 |
fn=generate_response,
|
194 |
+
inputs=[
|
195 |
+
gr.Image(type="pil"),
|
196 |
+
gr.Dropdown(
|
197 |
+
choices=["ColPali paper prompt", "retrieval focused prompt"],
|
198 |
+
value="general",
|
199 |
+
label="Prompt Type",
|
200 |
+
),
|
201 |
+
],
|
202 |
outputs=gr.Json(),
|
203 |
title=title,
|
204 |
description=description,
|