Wise_Chat_labs / app.py
parvezalmuqtadir's picture
Update app.py
126529d verified
import os
import fitz # PyMuPDF
from PIL import Image
from gtts import gTTS
import pygame # Import pygame
import gradio as gr
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_loaders import PyPDFLoader
# Load environment variables from .env file
load_dotenv()
# Global variables
count = 0
n = 0
chat_history = []
chain = ''
# Function to set the OpenAI API key
def set_api_key(api_key):
os.environ['OPENAI_API_KEY'] = api_key
return 'OpenAI API key is set'
# Function to enable the API key input box
def enable_api_box():
return
# Function to add text to the chat history
def add_text(history, text):
if not text:
raise gr.Error('Enter text')
history.append((text, ''))
return history
# Function to process the PDF file and create a conversation chain
def process_file(file):
api_key = os.getenv('OPENAI_API_KEY')
if api_key is None:
raise gr.Error('OpenAI API key not found in environment variables or .env file')
loader = PyPDFLoader(file.name)
documents = loader.load()
# Set the OpenAI API key in the environment variable
os.environ['OPENAI_API_KEY'] = api_key
print("API Key set:", api_key) # Debug print
# Assuming OpenAIEmbeddings uses the environment variable
embeddings = OpenAIEmbeddings()
pdf_search = Chroma.from_documents(documents, embeddings)
chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3),
retriever=pdf_search.as_retriever(search_kwargs={"k": 1}),
return_source_documents=True)
return chain
# Function to generate a response based on the chat history and query
def generate_response(history, query, btn):
global count, n, chat_history, chain
if not btn:
raise gr.Error(message='Upload a PDF')
if count == 0:
chain = process_file(btn)
count += 1
result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True)
chat_history.append((query, result["answer"]))
n = list(result['source_documents'][0])[1][1]['page']
for char in result['answer']:
history[-1][-1] += char
# Generate speech from the answer
generate_speech(result["answer"])
return history, " "
# Function to render a specific page of a PDF file as an image
def render_file(file):
global n
doc = fitz.open(file.name)
page = doc[n]
pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
return image
# Function to generate speech from text
def generate_speech(text):
tts = gTTS(text=text, lang='en')
tts.save("output.mp3")
play_sound("output.mp3")
def play_sound(file_path):
try:
pygame.mixer.init()
except pygame.error:
print("Unable to initialize audio device. Audio playback will be disabled.")
return
pygame.mixer.music.load(file_path)
pygame.mixer.music.play()
while pygame.mixer.music.get_busy():
pygame.time.Clock().tick(10)
# Additional cleanup to remove temporary files
def cleanup():
if os.path.exists("output.mp3"):
os.remove("output.mp3")
def create_demo():
with gr.Blocks(title="PDF Chatbot", theme="Soft") as demo:
with gr.Column():
with gr.Row():
chatbot = gr.Chatbot(value=[], elem_id='chatbot', height=680)
show_img = gr.Image(label='PDF Preview', height=680)
with gr.Row():
with gr.Column(scale=0.60):
text_input = gr.Textbox(
show_label=False,
placeholder="Ask your pdf?",
container=False
)
with gr.Column(scale=0.20):
submit_btn = gr.Button('Send')
with gr.Column(scale=0.20):
upload_btn = gr.UploadButton("πŸ“ Upload PDF", file_types=[".pdf"])
return demo, chatbot, show_img, text_input, submit_btn, upload_btn
if __name__ == '__main__':
# Create the UI components
demo, chatbot, show_img, txt, submit_btn, btn = create_demo()
# Set up the Gradio UI
with demo:
# Upload PDF file and render it as an image
btn.upload(render_file, inputs=[btn], outputs=[show_img])
# Add text to chat history, generate response, and render file
submit_btn.click(add_text, inputs=[chatbot, txt], outputs=[chatbot], queue=False).\
success(generate_response, inputs=[chatbot, txt, btn], outputs=[chatbot, txt]).\
success(render_file, inputs=[btn], outputs=[show_img])
# Launch the app with text-to-speech cleanup
try:
demo.launch(share=True)
finally:
cleanup()