File size: 4,646 Bytes
8408dd3
 
 
 
 
fa3b4b8
8408dd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import os
from langchain_community.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_loaders import PyPDFLoader
import fitz
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from PIL import Image
from gtts import gTTS
import playsound
import gradio as gr
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Global variables
count = 0
n = 0
chat_history = []
chain = ''

# Function to set the OpenAI API key
def set_api_key(api_key):
    os.environ['OPENAI_API_KEY'] = api_key
    return 'OpenAI API key is set'

# Function to enable the API key input box
def enable_api_box():
    return

# Function to add text to the chat history
def add_text(history, text):
    if not text:
        raise gr.Error('Enter text')
    history.append((text, ''))
    return history

# Function to process the PDF file and create a conversation chain
def process_file(file):
    api_key = os.getenv('OPENAI_API_KEY')
    if api_key is None:
        raise gr.Error('OpenAI API key not found in environment variables or .env file')

    loader = PyPDFLoader(file.name)
    documents = loader.load()

    # Set the OpenAI API key in the environment variable
    os.environ['OPENAI_API_KEY'] = api_key
    print("API Key set:", api_key)  # Debug print

    # Assuming OpenAIEmbeddings uses the environment variable
    embeddings = OpenAIEmbeddings()

    pdf_search = Chroma.from_documents(documents, embeddings)

    chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3),
                                                  retriever=pdf_search.as_retriever(search_kwargs={"k": 1}),
                                                  return_source_documents=True)
    return chain

# Function to generate a response based on the chat history and query
def generate_response(history, query, btn):
    global count, n, chat_history, chain

    if not btn:
        raise gr.Error(message='Upload a PDF')
    if count == 0:
        chain = process_file(btn)
        count += 1

    result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True)
    chat_history.append((query, result["answer"]))
    n = list(result['source_documents'][0])[1][1]['page']

    for char in result['answer']:
        history[-1][-1] += char

    # Generate speech from the answer
    generate_speech(result["answer"])

    return history, " "

# Function to render a specific page of a PDF file as an image
def render_file(file):
    global n
    doc = fitz.open(file.name)
    page = doc[n]
    pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
    image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
    return image

# Function to generate speech from text
def generate_speech(text):
    tts = gTTS(text=text, lang='en')
    tts.save("output.mp3")
    playsound.playsound("output.mp3")

# Additional cleanup to remove temporary files
def cleanup():
    if os.path.exists("output.mp3"):
        os.remove("output.mp3")

import gradio as gr

def create_demo():
    with gr.Blocks(title="PDF Chatbot", theme="Soft") as demo:
        with gr.Column():
            with gr.Row():
                chatbot = gr.Chatbot(value=[], elem_id='chatbot', height=680)
                show_img = gr.Image(label='PDF Preview', height=680)

        with gr.Row():
            with gr.Column(scale=0.60):
                text_input = gr.Textbox(
                    show_label=False,
                    placeholder="Ask your pdf?",
                    container=False
                )

            with gr.Column(scale=0.20):
                submit_btn = gr.Button('Send')

            with gr.Column(scale=0.20):
                upload_btn = gr.UploadButton("📁 Upload PDF", file_types=[".pdf"])

        return demo, chatbot, show_img, text_input, submit_btn, upload_btn
    
if __name__ == '__main__':
    # Create the UI components
    demo, chatbot, show_img, txt, submit_btn, btn = create_demo()

    # Set up the Gradio UI
    with demo:
        # Upload PDF file and render it as an image
        btn.upload(render_file, inputs=[btn], outputs=[show_img])

        # Add text to chat history, generate response, and render file
        submit_btn.click(add_text, inputs=[chatbot, txt], outputs=[chatbot], queue=False).\
            success(generate_response, inputs=[chatbot, txt, btn], outputs=[chatbot, txt]).\
            success(render_file, inputs=[btn], outputs=[show_img])

    # Launch the app with text-to-speech cleanup
    try:
        demo.launch(share=True)
    finally:
        cleanup()