ndurner commited on
Commit
dede9df
·
1 Parent(s): ea00238

PDF support

Browse files
Files changed (2) hide show
  1. app.py +78 -28
  2. requirements.txt +2 -1
app.py CHANGED
@@ -3,6 +3,9 @@ import base64
3
  import os
4
  from openai import OpenAI
5
  import json
 
 
 
6
  from settings_mgr import generate_download_settings_js, generate_upload_settings_js
7
 
8
  from doc2json import process_docx
@@ -45,6 +48,79 @@ def encode_image(image_data):
45
 
46
  return f"data:image/{image_type};base64,{base64.b64encode(image_data).decode('utf-8')}"
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def undo(history):
49
  history.pop()
50
  return history
@@ -108,30 +184,7 @@ def bot(message, history, oai_key, system_prompt, seed, temperature, max_tokens,
108
  for human, assi in history:
109
  if human is not None:
110
  if type(human) is tuple:
111
- fn = human[0]
112
- if fn.endswith(".docx"):
113
- content = process_docx(fn)
114
- else:
115
- with open(fn, mode="rb") as f:
116
- content = f.read()
117
-
118
- isImage = False
119
- if isinstance(content, bytes):
120
- try:
121
- # try to add as image
122
- content = encode_image(content)
123
- isImage = True
124
- except:
125
- # not an image, try text
126
- content = content.decode('utf-8', 'replace')
127
- else:
128
- content = str(content)
129
-
130
- if isImage:
131
- user_msg_parts.append({"type": "image_url",
132
- "image_url":{"url": content}})
133
- else:
134
- user_msg_parts.append({"type": "text", "text": content})
135
  else:
136
  user_msg_parts.append({"type": "text", "text": human})
137
 
@@ -146,10 +199,7 @@ def bot(message, history, oai_key, system_prompt, seed, temperature, max_tokens,
146
  user_msg_parts.append({"type": "text", "text": message['text']})
147
  if message['files']:
148
  for file in message['files']:
149
- with open(file['path'], mode="rb") as f:
150
- content = f.read()
151
- user_msg_parts.append({"type": "image_url",
152
- "image_url":{"url": encode_image(content)}})
153
  history_openai_format.append({"role": "user", "content": user_msg_parts})
154
  user_msg_parts = []
155
 
 
3
  import os
4
  from openai import OpenAI
5
  import json
6
+ import fitz
7
+ from PIL import Image
8
+ import io
9
  from settings_mgr import generate_download_settings_js, generate_upload_settings_js
10
 
11
  from doc2json import process_docx
 
48
 
49
  return f"data:image/{image_type};base64,{base64.b64encode(image_data).decode('utf-8')}"
50
 
51
+ def process_pdf_img(pdf_fn: str):
52
+ pdf = fitz.open(pdf_fn)
53
+ message_parts = []
54
+
55
+ for page in pdf.pages():
56
+ # Create a transformation matrix for rendering at the calculated scale
57
+ mat = fitz.Matrix(0.6, 0.6)
58
+
59
+ # Render the page to a pixmap
60
+ pix = page.get_pixmap(matrix=mat, alpha=False)
61
+
62
+ # Convert pixmap to PIL Image
63
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
64
+
65
+ # Convert PIL Image to bytes
66
+ img_byte_arr = io.BytesIO()
67
+ img.save(img_byte_arr, format='PNG')
68
+ img_byte_arr = img_byte_arr.getvalue()
69
+
70
+ # Encode image to base64
71
+ base64_encoded = base64.b64encode(img_byte_arr).decode('utf-8')
72
+
73
+ # Construct the data URL
74
+ image_url = f"data:image/png;base64,{base64_encoded}"
75
+
76
+ # Append the message part
77
+ message_parts.append({
78
+ "type": "text",
79
+ "text": f"Page {page.number} of file '{pdf_fn}'"
80
+ })
81
+ message_parts.append({
82
+ "type": "image_url",
83
+ "image_url": {
84
+ "url": image_url,
85
+ "detail": "high"
86
+ }
87
+ })
88
+
89
+ pdf.close()
90
+
91
+ return message_parts
92
+
93
+ def encode_file(fn: str) -> list:
94
+ user_msg_parts = []
95
+
96
+ if fn.endswith(".docx"):
97
+ user_msg_parts.append({"type": "text", "text": process_docx(fn)})
98
+ elif fn.endswith(".pdf"):
99
+ user_msg_parts.extend(process_pdf_img(fn))
100
+ else:
101
+ with open(fn, mode="rb") as f:
102
+ content = f.read()
103
+
104
+ isImage = False
105
+ if isinstance(content, bytes):
106
+ try:
107
+ # try to add as image
108
+ content = encode_image(content)
109
+ isImage = True
110
+ except:
111
+ # not an image, try text
112
+ content = content.decode('utf-8', 'replace')
113
+ else:
114
+ content = str(content)
115
+
116
+ if isImage:
117
+ user_msg_parts.append({"type": "image_url",
118
+ "image_url":{"url": content}})
119
+ else:
120
+ user_msg_parts.append({"type": "text", "text": content})
121
+
122
+ return user_msg_parts
123
+
124
  def undo(history):
125
  history.pop()
126
  return history
 
184
  for human, assi in history:
185
  if human is not None:
186
  if type(human) is tuple:
187
+ user_msg_parts.extend(encode_file(human[0]))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  else:
189
  user_msg_parts.append({"type": "text", "text": human})
190
 
 
199
  user_msg_parts.append({"type": "text", "text": message['text']})
200
  if message['files']:
201
  for file in message['files']:
202
+ user_msg_parts.extend(encode_file(file['path']))
 
 
 
203
  history_openai_format.append({"role": "user", "content": user_msg_parts})
204
  user_msg_parts = []
205
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  gradio >= 4.38.1
2
  openai >= 1.0.0
3
- lxml
 
 
1
  gradio >= 4.38.1
2
  openai >= 1.0.0
3
+ lxml
4
+ PyMuPDF