Spaces:
Sleeping
Sleeping
File size: 4,646 Bytes
8408dd3 fa3b4b8 8408dd3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import os
from langchain_community.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_loaders import PyPDFLoader
import fitz
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from PIL import Image
from gtts import gTTS
import playsound
import gradio as gr
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# Global variables
count = 0
n = 0
chat_history = []
chain = ''
# Function to set the OpenAI API key
def set_api_key(api_key):
os.environ['OPENAI_API_KEY'] = api_key
return 'OpenAI API key is set'
# Function to enable the API key input box
def enable_api_box():
return
# Function to add text to the chat history
def add_text(history, text):
if not text:
raise gr.Error('Enter text')
history.append((text, ''))
return history
# Function to process the PDF file and create a conversation chain
def process_file(file):
api_key = os.getenv('OPENAI_API_KEY')
if api_key is None:
raise gr.Error('OpenAI API key not found in environment variables or .env file')
loader = PyPDFLoader(file.name)
documents = loader.load()
# Set the OpenAI API key in the environment variable
os.environ['OPENAI_API_KEY'] = api_key
print("API Key set:", api_key) # Debug print
# Assuming OpenAIEmbeddings uses the environment variable
embeddings = OpenAIEmbeddings()
pdf_search = Chroma.from_documents(documents, embeddings)
chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3),
retriever=pdf_search.as_retriever(search_kwargs={"k": 1}),
return_source_documents=True)
return chain
# Function to generate a response based on the chat history and query
def generate_response(history, query, btn):
global count, n, chat_history, chain
if not btn:
raise gr.Error(message='Upload a PDF')
if count == 0:
chain = process_file(btn)
count += 1
result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True)
chat_history.append((query, result["answer"]))
n = list(result['source_documents'][0])[1][1]['page']
for char in result['answer']:
history[-1][-1] += char
# Generate speech from the answer
generate_speech(result["answer"])
return history, " "
# Function to render a specific page of a PDF file as an image
def render_file(file):
global n
doc = fitz.open(file.name)
page = doc[n]
pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
return image
# Function to generate speech from text
def generate_speech(text):
tts = gTTS(text=text, lang='en')
tts.save("output.mp3")
playsound.playsound("output.mp3")
# Additional cleanup to remove temporary files
def cleanup():
if os.path.exists("output.mp3"):
os.remove("output.mp3")
import gradio as gr
def create_demo():
with gr.Blocks(title="PDF Chatbot", theme="Soft") as demo:
with gr.Column():
with gr.Row():
chatbot = gr.Chatbot(value=[], elem_id='chatbot', height=680)
show_img = gr.Image(label='PDF Preview', height=680)
with gr.Row():
with gr.Column(scale=0.60):
text_input = gr.Textbox(
show_label=False,
placeholder="Ask your pdf?",
container=False
)
with gr.Column(scale=0.20):
submit_btn = gr.Button('Send')
with gr.Column(scale=0.20):
upload_btn = gr.UploadButton("📁 Upload PDF", file_types=[".pdf"])
return demo, chatbot, show_img, text_input, submit_btn, upload_btn
if __name__ == '__main__':
# Create the UI components
demo, chatbot, show_img, txt, submit_btn, btn = create_demo()
# Set up the Gradio UI
with demo:
# Upload PDF file and render it as an image
btn.upload(render_file, inputs=[btn], outputs=[show_img])
# Add text to chat history, generate response, and render file
submit_btn.click(add_text, inputs=[chatbot, txt], outputs=[chatbot], queue=False).\
success(generate_response, inputs=[chatbot, txt, btn], outputs=[chatbot, txt]).\
success(render_file, inputs=[btn], outputs=[show_img])
# Launch the app with text-to-speech cleanup
try:
demo.launch(share=True)
finally:
cleanup()
|