Multimodal-PDF-Chatbot

Running

Multimodal-PDF-Chatbot / utils.py

bug fixes and improvement

e014b81 unverified 6 months ago

1.39 kB

	import pymupdf
	from PIL import Image
	import io
	import gradio as gr
	import base64
	import pandas as pd
	import pymupdf


	def image_to_bytes(image):
	img_byte_arr = io.BytesIO()
	image.save(img_byte_arr, format="PNG")
	return base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")


	def extract_pdfs(docs, doc_collection):
	if docs:
	doc_collection = []
	doc_collection.extend(docs)
	return (
	doc_collection,
	gr.Tabs(selected=1),
	pd.DataFrame([i.split("/")[-1] for i in list(docs)], columns=["Filename"]),
	)


	def extract_images(docs):
	images = []
	for doc_path in docs:
	doc = pymupdf.open(doc_path)

	for page_index in range(len(doc)):
	page = doc[page_index]
	image_list = page.get_images()

	for _, img in enumerate(image_list, start=1):
	xref = img[0]
	pix = pymupdf.Pixmap(doc, xref)

	if pix.n - pix.alpha > 3:
	pix = pymupdf.Pixmap(pymupdf.csRGB, pix)

	images.append(Image.open(io.BytesIO(pix.pil_tobytes("JPEG"))))
	return images


	def clean_text(text):
	text = text.strip()
	cleaned_text = text.replace("\n", " ")
	cleaned_text = cleaned_text.replace("\t", " ")
	cleaned_text = cleaned_text.replace(" ", " ")
	cleaned_text = cleaned_text.strip()
	return cleaned_text