manu commited on
Commit
6efb913
Β·
verified Β·
1 Parent(s): 10f6f3f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -3
app.py CHANGED
@@ -25,9 +25,80 @@ model = ColQwen2.from_pretrained(
25
  device_map="cuda:0", # or "mps" if on Apple Silicon
26
  # attn_implementation="flash_attention_2", # should work on A100
27
  ).eval()
28
- processor = ColQwen2Processor.from_pretrained("manu/colqwen2-v1.0-alpha")
29
 
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  @spaces.GPU
33
  def search(query: str, ds, images, k):
@@ -50,7 +121,10 @@ def search(query: str, ds, images, k):
50
  for idx in top_k_indices:
51
  results.append((images[idx], f"Page {idx}"))
52
 
53
- return results
 
 
 
54
 
55
 
56
  def index(files, ds):
@@ -126,9 +200,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
126
  # Define the actions
127
  search_button = gr.Button("πŸ” Search", variant="primary")
128
  output_gallery = gr.Gallery(label="Retrieved Documents", height=600, show_label=True)
 
129
 
130
  convert_button.click(index, inputs=[file, embeds], outputs=[message, embeds, imgs])
131
- search_button.click(search, inputs=[query, embeds, imgs, k], outputs=[output_gallery])
132
 
133
  if __name__ == "__main__":
134
  demo.queue(max_size=10).launch(debug=True)
 
25
  device_map="cuda:0", # or "mps" if on Apple Silicon
26
  # attn_implementation="flash_attention_2", # should work on A100
27
  ).eval()
28
+ processor = ColQwen2Processor.from_pretrained("manu/colqwen2-v1.0")
29
 
30
 
31
+ def encode_image_to_base64(image):
32
+ """Encodes a PIL image to a base64 string."""
33
+ buffered = BytesIO()
34
+ image.save(buffered, format="JPEG")
35
+ return base64.b64encode(buffered.getvalue()).decode("utf-8")
36
+
37
+
38
+ def query_gpt4o_mini(query, images):
39
+ """Calls OpenAI's GPT-4o-mini with the query and image data."""
40
+ from openai import OpenAI
41
+
42
+ images = [encode_image_to_base64(image) for image in images]
43
+ client = OpenAI(api_key=os.env.get("OPENAI_KEY"))
44
+ PROMPT = """
45
+ You are a smart assistant designed to answer questions about a PDF document.
46
+ You are given relevant information in the form of PDF pages. Use them to construct a response to the question, and cite your sources.
47
+ If it is not possible to answer using the provided pages, do not attempt to provide an answer and simply say the answer is not present within the documents.
48
+ Give detailed and extensive answers, only containing info in the pages you are given.
49
+ Answer in the same language as the query.
50
+
51
+ Query: {query}
52
+ PDF pages:
53
+ """
54
+
55
+ response = client.chat.completions.create(
56
+ model="gpt-4o-mini",
57
+ messages=[
58
+ {
59
+ "role": "user",
60
+ "content": [
61
+ {
62
+ "type": "text",
63
+ "text": PROMPT.format(query=query)
64
+ },
65
+ {
66
+ "type": "image_url",
67
+ "image_url": {
68
+ "url": f"data:image/jpeg;base64,{base64_images[0]}"
69
+ },
70
+ },
71
+ {
72
+ "type": "image_url",
73
+ "image_url": {
74
+ "url": f"data:image/jpeg;base64,{base64_images[1]}"
75
+ },
76
+ },
77
+ {
78
+ "type": "image_url",
79
+ "image_url": {
80
+ "url": f"data:image/jpeg;base64,{base64_images[2]}"
81
+ },
82
+ },
83
+ {
84
+ "type": "image_url",
85
+ "image_url": {
86
+ "url": f"data:image/jpeg;base64,{base64_images[3]}"
87
+ },
88
+ },
89
+ {
90
+ "type": "image_url",
91
+ "image_url": {
92
+ "url": f"data:image/jpeg;base64,{base64_images[4]}"
93
+ },
94
+ },
95
+ ],
96
+ }
97
+ ],
98
+ max_tokens=500,
99
+ )
100
+ return response.choices[0].message.content
101
+
102
 
103
  @spaces.GPU
104
  def search(query: str, ds, images, k):
 
121
  for idx in top_k_indices:
122
  results.append((images[idx], f"Page {idx}"))
123
 
124
+ # Generate response from GPT-4o-mini
125
+ ai_response = "Activate AI response by forking and adding your GPT-4o key" # query_gpt4o_mini(query, results)
126
+
127
+ return results, ai_response
128
 
129
 
130
  def index(files, ds):
 
200
  # Define the actions
201
  search_button = gr.Button("πŸ” Search", variant="primary")
202
  output_gallery = gr.Gallery(label="Retrieved Documents", height=600, show_label=True)
203
+ output_text = gr.Textbox(label="AI Response", placeholder="Generated response based on retrieved documents")
204
 
205
  convert_button.click(index, inputs=[file, embeds], outputs=[message, embeds, imgs])
206
+ search_button.click(search, inputs=[query, embeds, imgs, k], outputs=[output_gallery, output_text])
207
 
208
  if __name__ == "__main__":
209
  demo.queue(max_size=10).launch(debug=True)