hhhwmws commited on
Commit
9b92a87
·
verified ·
1 Parent(s): 13d2bd0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +331 -109
app.py CHANGED
@@ -1,112 +1,334 @@
1
  import gradio as gr
2
- from io import BytesIO
3
- import fitz
4
- import tempfile
5
- import openai
6
-
7
-
8
- class TranslationAgent:
9
- def __init__(self, openai_key):
10
- self.memory = []
11
- system_msg = "You are a translator from english to Chinese.\n" \
12
- " The only thing you do is to translate.\n" \
13
- " You don't write anything other then the translation of the text you get.\n" \
14
- " The user will only provide the text without asking anything, but what he wants is the translation.\n" \
15
- " Never return the translation of a previously translated part!\n " \
16
- "The text you will need to translate will often include none sense stuff because it is coming from a text extraction of a pdf file including images and table.\n" \
17
- " Do your best to translate also this messy parts."
18
-
19
- self.memory.append({"role": "system", "content": system_msg})
20
-
21
- openai.api_key = openai_key
22
-
23
- def fade_memory(self):
24
- if len(self.memory) >= 5:
25
- del self.memory[1:3]
26
-
27
- def translate_chunk(self, chunk):
28
- self.memory.append({"role": "user", "content": chunk})
29
- response = openai.ChatCompletion.create(
30
- model="gpt-3.5-turbo",
31
- messages=self.memory
32
- )
33
- reply = response["choices"][0]["message"]["content"]
34
- self.memory.append({"role": "assistant", "content": reply})
35
- self.fade_memory()
36
- return reply
37
-
38
-
39
- def extract_text_from_pdf(pdf, start, stop):
40
- text = ""
41
- with fitz.open(stream=BytesIO(pdf), filetype='pdf') as doc: # remove .read()
42
- for i, page in enumerate(doc):
43
- if start <= i:
44
- if i <= stop:
45
- text += page.get_text()
46
- else:
47
- break
48
- return text
49
-
50
-
51
- def split_text(text, chunk_size=100):
52
- words = text.split()
53
- chunks = []
54
- current_chunk_words = []
55
-
56
- for word in words:
57
- current_chunk_words.append(word)
58
- if word.endswith('.') and len(current_chunk_words) >= chunk_size:
59
- chunks.append(' '.join(current_chunk_words))
60
- current_chunk_words = []
61
-
62
- # add the last chunk if any words remain
63
- if current_chunk_words:
64
- chunks.append(' '.join(current_chunk_words))
65
-
66
- return chunks
67
-
68
-
69
- def translate_pdf(openai_key, pdf, start, stop):
70
- translator = TranslationAgent(openai_key)
71
- translated_text = ""
72
- error_message = "Translation Successful"
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  try:
75
- # extract text
76
- if pdf is not None:
77
- text = extract_text_from_pdf(pdf, start=start, stop=stop)
78
- chunks = split_text(text)
79
-
80
- translated_chunks = []
81
- for chunk in chunks:
82
- translated_chunk = translator.translate_chunk(chunk)
83
- translated_chunks.append(translated_chunk + " ")
84
-
85
- translated_text = ' '.join(translated_chunks)
86
- except Exception as e:
87
- error_message = f"Translation Failed: {e}"
88
-
89
- # Create a temporary file with a specific prefix
90
- temp = tempfile.NamedTemporaryFile(delete=False, prefix="translatedPDF_", suffix=".txt")
91
-
92
- # Write to the temporary file
93
- with open(temp.name, 'w', encoding='utf-8') as f:
94
- f.write(translated_text)
95
-
96
- return translated_text, error_message, temp.name
97
-
98
-
99
- iface = gr.Interface(
100
- fn=translate_pdf,
101
- inputs=[
102
- gr.Textbox(lines=1, label="OpenAI API key",
103
- placeholder="Enter your OpenAI API key here"),
104
- gr.File(type="binary", label="PDF file", ),
105
- gr.Number(label="Starting Page", ),
106
- gr.Number(label="Final Page")
107
- ],
108
- outputs=["text", "text", gr.File(label="Translated Text File")],
109
- title="Pdf Translator: English ==> Chinese",
110
- )
111
-
112
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import os
3
+ import random
4
+ import string
5
+ from PIL import Image
6
+ from src.GameMaster import GameMaster
7
+ from collections import deque
8
+ import time
9
+ import threading
10
+ import queue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ from src.Founder import Founder
13
+
14
+ import os
15
+ # os.environ['HTTP_PROXY'] = 'http://localhost:8234'
16
+ # os.environ['HTTPS_PROXY'] = 'http://localhost:8234'
17
+
18
+ game_master = GameMaster()
19
+
20
+ founder_base = Founder()
21
+
22
+ blank_image_path = "datas/blank_item.jpg"
23
+
24
+ random_data = game_master.random_image_text_data( 12 )
25
+ recent_generated_items = deque(random_data , maxlen=12)
26
+
27
+ def refresh_contribution_ladder():
28
+ top_contributors = founder_base.get_top_rank(top_k=20)
29
+ contributions = []
30
+
31
+ for founder_name, items in top_contributors:
32
+ random_items = random.sample(items, min(5, len(items)))
33
+ cultivation_names = []
34
+ for item in random_items:
35
+ result = game_master.textdb.search_by_en_keyword(item)
36
+ if result is not None and "name_in_cultivation" in result:
37
+ cultivation_names.append(result['name_in_cultivation'])
38
+ else:
39
+ cultivation_names.append(item)
40
+
41
+ items_description = ', '.join(cultivation_names)
42
+
43
+ contribution = f"""道友姓名: {founder_name}\n发现了{items_description}等物品"""
44
+ contributions.append(contribution)
45
+
46
+ while len(contributions) < 20:
47
+ contributions.append("")
48
+
49
+ return contributions
50
+
51
+
52
+ def expensive_generating(save_path, image_feature, backup_results, game_master, founder_name = None):
53
+ # for now it's a idle implementation for debug
54
+
55
+ # re-search incase redundant generate
56
+ image_search_result = game_master.imgdb.top_k_search(image_feature, top_k=1)
57
+
58
+ if image_search_result and len(image_search_result)>0 and image_search_result[0]['similarity'] > game_master.minimal_image_threshold:
59
+ return
60
+
61
+ global recent_generated_items
62
+
63
+ recent_generated_items.append((save_path, "天机阁长老正在鉴定,道友请耐心等待。。"))
64
+
65
+ search_result, backup_results, image_feature = game_master.search_with_path(save_path)
66
+ if search_result is None:
67
+ # real implementation should be
68
+ cultivation_data = game_master.generate_cultivation_data( \
69
+ save_path, image_feature, backup_results )
70
+ description = cultivation_data["description_in_cultivation"]
71
+ print("鉴定得到新物品:", description)
72
+ else:
73
+ cultivation_data = search_result
74
+ description = cultivation_data["description_in_cultivation"]
75
+ print("鉴定这是记载过的物品,但是第一次发现:", description)
76
+
77
+ suffix = ""
78
+
79
+ if founder_name is not None:
80
+ global founder_base
81
+ if "translated_word" in cultivation_data:
82
+ translated_word = cultivation_data["translated_word"]
83
+ else:
84
+ translated_word = cultivation_data["name_in_cultivation"]
85
+
86
+ if founder_base.get_founder(translated_word) is None:
87
+ if founder_name.strip() != "":
88
+ founder_base.set_founder(translated_word, founder_name)
89
+ suffix = f" 由道友 {founder_name} 发现"
90
+ else:
91
+ suffix = f" 由道友 {founder_base.get_founder(translated_word)} 发现"
92
+
93
+
94
+ # this function will automatically update the database
95
+ # cultivation_data = backup_results[0]
96
+
97
+
98
+ # recent_generated_items.append((save_path, description))
99
+ # replace the same save_path in recent_generated_items
100
+ flag = True
101
+ for index, item in enumerate(recent_generated_items):
102
+ if item[0] == save_path:
103
+ if "name_in_cultivation" in cultivation_data:
104
+ description = cultivation_data["name_in_cultivation"] + suffix + "--" + description
105
+ recent_generated_items[index] = (save_path, description)
106
+ flag = False
107
+ if flag:
108
+ if "name_in_cultivation" in cultivation_data:
109
+ description = cultivation_data["name_in_cultivation"] + suffix + "--" + description
110
+ recent_generated_items.append((save_path, description))
111
+
112
+ return
113
+
114
+ ###
115
+
116
+ # Queue for managing tasks to process expensive operations
117
+ task_queue = queue.Queue()
118
+
119
+ # Function that processes the tasks in the queue
120
+ def worker():
121
+ while True:
122
+ save_path, image_feature, backup_results, founder_base = task_queue.get()
123
+ if save_path is None:
124
+ break
125
+ expensive_generating(save_path, image_feature, backup_results, game_master, founder_base)
126
+ task_queue.task_done()
127
+
128
+ # Start the worker thread
129
+ thread = threading.Thread(target=worker, daemon=True)
130
+ thread.start()
131
+
132
+ ###
133
+
134
+ def similarity2level( sim, max_val , min_val ):
135
+ level_num = 3
136
+ level = int( (sim - min_val) / (max_val - min_val) * level_num )
137
+ level = max(0, min(level_num, level))
138
+ return level
139
+
140
+ from src.get_comments_from_level import get_comments_from_level
141
+
142
+ def is_empty_name( founder_name ):
143
+ empty_names = ["","测试","鲁鲁道祖"]
144
+ if founder_name.strip() in empty_names:
145
+ return True
146
+
147
+ # Function to handle image upload and search
148
+ def process_image(image, founder_name):
149
+ # Ensure temp_images directory exists
150
+ os.makedirs('temp_images', exist_ok=True)
151
+
152
+ prefix = ""
153
+
154
+ founder_name = founder_name.strip()
155
+
156
+ if is_empty_name(founder_name):
157
+ prefix = "道友,请在下面留下您的尊姓大名,提供大量珍宝鉴定的道友,天机阁将在后面送出灵泉玉液(珍珠奶茶)。"
158
+
159
  try:
160
+
161
+ # Generate a random hash name for the image
162
+ random_name = ''.join(random.choices(string.ascii_lowercase + string.digits, k=12)) + '.jpg'
163
+ save_path = os.path.join('temp_images', random_name)
164
+
165
+ # Convert numpy.ndarray to PIL.Image
166
+ img = Image.fromarray(image)
167
+
168
+ # Resize image to height not exceeding 480 pixels while maintaining aspect ratio
169
+ img.thumbnail((img.width, 480))
170
+ img.save(save_path)
171
+ except:
172
+ return "","",prefix + "道友你有什么物品要来鉴定吗?"
173
+
174
+ search_result, backup_results, image_feature = game_master.search_with_path(save_path, threshold = 0)
175
+
176
+
177
+ image_similarity = search_result['similarity']
178
+ print("image_similarity:", image_similarity)
179
+
180
+ inbase_similarity_level = similarity2level(image_similarity, max_val=0.99, min_val=0.80)
181
+
182
+ suffix = ""
183
+
184
+ if image_similarity > game_master.minimal_image_threshold:
185
+ result = search_result
186
+ # remove the temp image
187
+
188
+ # 一般来说这个时候inbase_similarity_level是2或者3
189
+ inlibrary_similarity_level = inbase_similarity_level
190
+
191
+ if "translated_word" in search_result:
192
+ translated_word = search_result["translated_word"]
193
+ else:
194
+ translated_word = search_result["name_in_cultivation"]
195
+
196
+ find_founder = founder_base.get_founder(translated_word)
197
+
198
+ if find_founder is None:
199
+ suffix = "这个物品很早就在天机阁中记载了,道友还有什么物品要鉴定吗?"
200
+ else:
201
+ if find_founder == founder_name:
202
+ suffix = "感谢道友送来这个物品进行鉴定,天机阁的天梯上已经记录了你的贡献。"
203
+ else:
204
+ suffix = f"这个物品是由道友{find_founder}发现的,道友还有什么物品要鉴定吗?"
205
+ else:
206
+ if len(backup_results) > 0 and "similarity" in backup_results[0]:
207
+ text_similarity = backup_results[0]['similarity']
208
+ print("text_similarity:", text_similarity)
209
+ inlibrary_similarity_level = similarity2level(text_similarity, max_val=0.79, min_val=0.35)
210
+
211
+ result = backup_results[0]
212
+ # should call here
213
+ # expensive_generating(save_path, image_feature, backup_results, game_master)
214
+ task_queue.put((save_path, image_feature, backup_results, founder_name))
215
+
216
+ suffix = "道友请移步天机阁内阁查询长老的鉴定结果。"
217
+
218
+ if is_empty_name( founder_name ):
219
+ suffix += "另外,只有留下姓名的道友,才能最终被记录在天梯获取天机阁奖励。"
220
+
221
+ # print(comments)
222
+
223
+ # update to recent items
224
+ # global recent_generated_items
225
+ # recent_generated_items.append((save_path, result["description_in_cultivation"]))
226
+
227
+ # Get the name and description from the first result
228
+ name = result["name_in_cultivation"]
229
+ description_in_cultivation = result["description_in_cultivation"]
230
+
231
+ comments = get_comments_from_level( inbase_similarity_level, inlibrary_similarity_level )
232
+ # 将comments种的{name} format为name变量
233
+ comments = prefix + comments.format(name=name) + suffix
234
+ # print(comments)
235
+
236
+ return name, description_in_cultivation, comments
237
+
238
+
239
+ # Function to refresh and display recent items
240
+ def refresh_recent_items():
241
+ image_paths = []
242
+ descriptions = []
243
+ # for img_path, desc in recent_generated_items:
244
+ # loop in inverse order
245
+ for img_path, desc in reversed(recent_generated_items):
246
+ img = Image.open(img_path)
247
+ img.thumbnail((200, 200))
248
+ image_paths.append(img)
249
+ descriptions.append(desc)
250
+ return image_paths + descriptions
251
+
252
+ # Prepare example images
253
+ example_images_dir = "datas/example_images"
254
+ example_images = [os.path.join(example_images_dir, img) for img in os.listdir(example_images_dir) if img.endswith('.jpg')]
255
+
256
+
257
+ TODO_list = """
258
+ # TODO
259
+
260
+ - [ ] 增加天梯匹配系统
261
+
262
+ - [ ] 增加readme
263
+
264
+ - [ ] 部署到gitee
265
+ """
266
+
267
+ # Gradio Interface
268
+ with gr.Blocks() as demo:
269
+ gr.Markdown("# 这个系统是智障,错把日常当修仙\n\nby [李鲁鲁](https://github.com/LC1332)")
270
+
271
+ # Tab 1: Image processing and identification
272
+ with gr.Tab("鉴定"):
273
+ with gr.Row():
274
+ with gr.Column(scale = 1):
275
+ image_input = gr.Image(label="上传图片")
276
+
277
+ with gr.Column(scale = 2):
278
+ comments = gr.Text("道友你有什么物品要来鉴定吗?", label = "")
279
+
280
+ name_output = gr.Textbox(label="鉴定物品名称")
281
+
282
+ description_output = gr.Textbox(label="物品描述")
283
+
284
+ submit_button = gr.Button("鉴定")
285
+
286
+ with gr.Row():
287
+ founder_name = gr.Textbox(label="道友尊姓大名?", interactive=True)
288
+
289
+ image_input.upload(process_image, inputs=[image_input,founder_name], outputs=[name_output, description_output, comments])
290
+ submit_button.click(process_image, inputs=[image_input,founder_name], outputs=[name_output, description_output, comments])
291
+
292
+
293
+
294
+ gr.Examples(examples=example_images, inputs=image_input, label="选择一个示例图片")
295
+ gr.Markdown(TODO_list)
296
+
297
+ # Tab 2: Recent items
298
+ with gr.Tab("天机阁最新鉴定"):
299
+ refresh_button = gr.Button("询问长老最新鉴定")
300
+ recent_images = []
301
+ recent_descriptions = []
302
+
303
+ for i in range(4): # Repeat for 4 rows, 3 columns each = 12 items
304
+ # with gr.Column():
305
+ # Create a grid layout with 3 columns, each item having an image and a description below it
306
+ with gr.Row():
307
+ with gr.Column():
308
+ recent_image = gr.Image(label=f"物品 { i * 4 + 1}")
309
+ recent_description = gr.Textbox(label="描述", interactive=False)
310
+ recent_images.append(recent_image)
311
+ recent_descriptions.append(recent_description)
312
+ with gr.Column():
313
+ recent_image = gr.Image(label=f"物品 { i * 4 + 2}")
314
+ recent_description = gr.Textbox(label="描述", interactive=False)
315
+ recent_images.append(recent_image)
316
+ recent_descriptions.append(recent_description)
317
+ with gr.Column():
318
+ recent_image = gr.Image(label=f"物品 { i * 4 + 3}")
319
+ recent_description = gr.Textbox(label="描述", interactive=False)
320
+ recent_images.append(recent_image)
321
+ recent_descriptions.append(recent_description)
322
+
323
+ # On clicking the refresh button, update the recent items
324
+ refresh_button.click(refresh_recent_items, outputs=recent_images + recent_descriptions)
325
+
326
+ with gr.Tab("贡献天梯"):
327
+ refresh_ladder_button = gr.Button("刷新天梯")
328
+ contribution_textboxes = [gr.Textbox(label=f"贡献者 {i + 1}", interactive=False) for i in range(20)]
329
+
330
+ refresh_ladder_button.click(refresh_contribution_ladder, outputs=contribution_textboxes)
331
+
332
+ # Launch the demo
333
+ demo.launch(share=True)
334
+