phyloforfun commited on
Commit
e223e6f
·
1 Parent(s): 1cc9cdc

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

Browse files
Files changed (1) hide show
  1. app.py +122 -112
app.py CHANGED
@@ -249,6 +249,126 @@ def load_gallery(converted_files, uploaded_file):
249
  file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
250
  st.session_state['input_list_small'].append(file_path_small)
251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  def content_input_images(col_left, col_right):
253
  st.write('---')
254
  # col1, col2 = st.columns([2,8])
@@ -272,120 +392,10 @@ def content_input_images(col_left, col_right):
272
 
273
  with col_right:
274
  if st.session_state.is_hf:
275
- if uploaded_files:
276
- # Clear input image gallery and input list
277
- clear_image_uploads()
278
-
279
- for uploaded_file in uploaded_files:
280
- # Determine the file type
281
- if uploaded_file.name.lower().endswith('.pdf'):
282
- # Handle PDF files
283
- file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
284
- # Convert each page of the PDF to an image
285
- n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
286
- # Update the input list for each page image
287
- converted_files = os.listdir(st.session_state['dir_uploaded_images'])
288
- for file_name in converted_files:
289
- if file_name.split('.')[1].lower() in ['jpg','jpeg']:
290
- jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
291
- st.session_state['input_list'].append(jpg_file_path)
292
-
293
- # Optionally, create a thumbnail for the gallery
294
- img = Image.open(jpg_file_path)
295
- img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
296
- try:
297
- file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file_name, img)
298
- except:
299
- file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file_name, img)
300
- st.session_state['input_list_small'].append(file_path_small)
301
-
302
- else:
303
- # Handle JPG/JPEG files (existing process)
304
- file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
305
- st.session_state['input_list'].append(file_path)
306
- img = Image.open(file_path)
307
- img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
308
- file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
309
- st.session_state['input_list_small'].append(file_path_small)
310
-
311
- # After processing all files
312
- st.info(f"Processing images from {st.session_state.config['leafmachine']['project']['dir_images_local']}")
313
-
314
- if st.session_state['input_list_small']:
315
- if len(st.session_state['input_list_small']) > MAX_GALLERY_IMAGES:
316
- # Only take the first 100 images from the list
317
- images_to_display = st.session_state['input_list_small'][:MAX_GALLERY_IMAGES]
318
- else:
319
- # If there are less than 100 images, take them all
320
- images_to_display = st.session_state['input_list_small']
321
- show_gallery_small_hf(images_to_display)
322
 
323
  else:
324
- st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
325
-
326
- if st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
327
- if MAX_GALLERY_IMAGES <= st.session_state['processing_add_on']:
328
- info_txt = f"Showing {MAX_GALLERY_IMAGES} out of {st.session_state['processing_add_on']} images"
329
- else:
330
- info_txt = f"Showing {st.session_state['processing_add_on']} out of {st.session_state['processing_add_on']} images"
331
- st.info(info_txt)
332
- try:
333
- show_gallery_small()
334
- except:
335
- pass
336
-
337
- elif not st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
338
- pass
339
- elif not st.session_state['view_local_gallery'] and not st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
340
- pass
341
- # elif st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
342
- elif (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
343
- has_pdf = False
344
- clear_image_uploads()
345
-
346
- for input_file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
347
- if input_file.split('.')[1].lower() in ['jpg','jpeg']:
348
- pass
349
- elif input_file.split('.')[1].lower() in ['pdf',]:
350
- has_pdf = True
351
- # Handle PDF files
352
- file_path = save_uploaded_file_local(st.session_state.config['leafmachine']['project']['dir_images_local'], st.session_state['dir_uploaded_images'], input_file)
353
- # Convert each page of the PDF to an image
354
- n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
355
-
356
-
357
- # pdf_files_pattern = os.path.join(st.session_state['dir_uploaded_images'], '*.pdf')
358
- # for pdf_file in glob.glob(pdf_files_pattern):
359
- # os.remove(pdf_file)
360
-
361
- # # Update the input list for each page image
362
- # converted_files = os.listdir(st.session_state['dir_uploaded_images'])
363
- # for file_name in converted_files:
364
- # if file_name.lower().endswith('.jpg'):
365
- # jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
366
- # st.session_state['input_list'].append(jpg_file_path)
367
-
368
- # # Optionally, create a thumbnail for the gallery
369
- # img = Image.open(jpg_file_path)
370
- # img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
371
- # file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images'], st.session_state['dir_uploaded_images_small'], file_name, img)
372
- # st.session_state['input_list_small'].append(file_path_small)
373
-
374
- # st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
375
-
376
- else:
377
- pass
378
- # st.warning("Inputs must be '.PDF' or '.jpg' or '.jpeg'")
379
- if has_pdf:
380
- st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
381
-
382
- dir_images_local = st.session_state.config['leafmachine']['project']['dir_images_local']
383
- count_n_imgs = list_jpg_files(dir_images_local)
384
- st.session_state['processing_add_on'] = count_n_imgs
385
- # print(st.session_state['processing_add_on'])
386
- st.session_state['dir_images_local_TEMP'] = st.session_state.config['leafmachine']['project']['dir_images_local']
387
- print("rerun")
388
- st.rerun()
389
 
390
  def list_jpg_files(directory_path):
391
  jpg_count = 0
 
249
  file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
250
  st.session_state['input_list_small'].append(file_path_small)
251
 
252
+ @st.cache_data
253
+ def handle_image_upload_and_gallery_hf(uploaded_files):
254
+ if uploaded_files:
255
+ # Clear input image gallery and input list
256
+ clear_image_uploads()
257
+
258
+ for uploaded_file in uploaded_files:
259
+ # Determine the file type
260
+ if uploaded_file.name.lower().endswith('.pdf'):
261
+ # Handle PDF files
262
+ file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
263
+ # Convert each page of the PDF to an image
264
+ n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
265
+ # Update the input list for each page image
266
+ converted_files = os.listdir(st.session_state['dir_uploaded_images'])
267
+ for file_name in converted_files:
268
+ if file_name.split('.')[1].lower() in ['jpg','jpeg']:
269
+ jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
270
+ st.session_state['input_list'].append(jpg_file_path)
271
+
272
+ # Optionally, create a thumbnail for the gallery
273
+ img = Image.open(jpg_file_path)
274
+ img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
275
+ try:
276
+ file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file_name, img)
277
+ except:
278
+ file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file_name, img)
279
+ st.session_state['input_list_small'].append(file_path_small)
280
+
281
+ else:
282
+ # Handle JPG/JPEG files (existing process)
283
+ file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
284
+ st.session_state['input_list'].append(file_path)
285
+ img = Image.open(file_path)
286
+ img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
287
+ file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
288
+ st.session_state['input_list_small'].append(file_path_small)
289
+
290
+ # After processing all files
291
+ st.info(f"Processing images from {st.session_state.config['leafmachine']['project']['dir_images_local']}")
292
+
293
+ if st.session_state['input_list_small']:
294
+ if len(st.session_state['input_list_small']) > MAX_GALLERY_IMAGES:
295
+ # Only take the first 100 images from the list
296
+ images_to_display = st.session_state['input_list_small'][:MAX_GALLERY_IMAGES]
297
+ else:
298
+ # If there are less than 100 images, take them all
299
+ images_to_display = st.session_state['input_list_small']
300
+ show_gallery_small_hf(images_to_display)
301
+
302
+
303
+ @st.cache_data
304
+ def handle_image_upload_and_gallery():
305
+ st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
306
+
307
+ if st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
308
+ if MAX_GALLERY_IMAGES <= st.session_state['processing_add_on']:
309
+ info_txt = f"Showing {MAX_GALLERY_IMAGES} out of {st.session_state['processing_add_on']} images"
310
+ else:
311
+ info_txt = f"Showing {st.session_state['processing_add_on']} out of {st.session_state['processing_add_on']} images"
312
+ st.info(info_txt)
313
+ try:
314
+ show_gallery_small()
315
+ except:
316
+ pass
317
+
318
+ elif not st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
319
+ pass
320
+ elif not st.session_state['view_local_gallery'] and not st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
321
+ pass
322
+ # elif st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
323
+ elif (st.session_state['dir_images_local_TEMP'] != st.session_state.config['leafmachine']['project']['dir_images_local']):
324
+ has_pdf = False
325
+ clear_image_uploads()
326
+
327
+ for input_file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
328
+ if input_file.split('.')[1].lower() in ['jpg','jpeg']:
329
+ pass
330
+ elif input_file.split('.')[1].lower() in ['pdf',]:
331
+ has_pdf = True
332
+ # Handle PDF files
333
+ file_path = save_uploaded_file_local(st.session_state.config['leafmachine']['project']['dir_images_local'], st.session_state['dir_uploaded_images'], input_file)
334
+ # Convert each page of the PDF to an image
335
+ n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
336
+
337
+
338
+ # pdf_files_pattern = os.path.join(st.session_state['dir_uploaded_images'], '*.pdf')
339
+ # for pdf_file in glob.glob(pdf_files_pattern):
340
+ # os.remove(pdf_file)
341
+
342
+ # # Update the input list for each page image
343
+ # converted_files = os.listdir(st.session_state['dir_uploaded_images'])
344
+ # for file_name in converted_files:
345
+ # if file_name.lower().endswith('.jpg'):
346
+ # jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
347
+ # st.session_state['input_list'].append(jpg_file_path)
348
+
349
+ # # Optionally, create a thumbnail for the gallery
350
+ # img = Image.open(jpg_file_path)
351
+ # img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
352
+ # file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images'], st.session_state['dir_uploaded_images_small'], file_name, img)
353
+ # st.session_state['input_list_small'].append(file_path_small)
354
+
355
+ # st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
356
+
357
+ else:
358
+ pass
359
+ # st.warning("Inputs must be '.PDF' or '.jpg' or '.jpeg'")
360
+ if has_pdf:
361
+ st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
362
+
363
+ dir_images_local = st.session_state.config['leafmachine']['project']['dir_images_local']
364
+ count_n_imgs = list_jpg_files(dir_images_local)
365
+ st.session_state['processing_add_on'] = count_n_imgs
366
+ # print(st.session_state['processing_add_on'])
367
+ st.session_state['dir_images_local_TEMP'] = st.session_state.config['leafmachine']['project']['dir_images_local']
368
+ print("rerun")
369
+ st.rerun()
370
+
371
+
372
  def content_input_images(col_left, col_right):
373
  st.write('---')
374
  # col1, col2 = st.columns([2,8])
 
392
 
393
  with col_right:
394
  if st.session_state.is_hf:
395
+ handle_image_upload_and_gallery_hf(uploaded_files)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
 
397
  else:
398
+ handle_image_upload_and_gallery()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
  def list_jpg_files(directory_path):
401
  jpg_count = 0