File size: 4,975 Bytes
8408dd3
720d97e
8408dd3
 
720d97e
8408dd3
 
720d97e
 
 
 
8408dd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126529d
8408dd3
126529d
 
 
720d97e
 
f85fb1d
 
 
 
 
 
720d97e
 
f85fb1d
720d97e
8408dd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
720d97e
8408dd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import os
import fitz  # PyMuPDF
from PIL import Image
from gtts import gTTS
import pygame  # Import pygame
import gradio as gr
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_loaders import PyPDFLoader

# Load environment variables from .env file
load_dotenv()

# Global variables
count = 0
n = 0
chat_history = []
chain = ''

# Function to set the OpenAI API key
def set_api_key(api_key):
    os.environ['OPENAI_API_KEY'] = api_key
    return 'OpenAI API key is set'

# Function to enable the API key input box
def enable_api_box():
    return

# Function to add text to the chat history
def add_text(history, text):
    if not text:
        raise gr.Error('Enter text')
    history.append((text, ''))
    return history

# Function to process the PDF file and create a conversation chain
def process_file(file):
    api_key = os.getenv('OPENAI_API_KEY')
    if api_key is None:
        raise gr.Error('OpenAI API key not found in environment variables or .env file')

    loader = PyPDFLoader(file.name)
    documents = loader.load()

    # Set the OpenAI API key in the environment variable
    os.environ['OPENAI_API_KEY'] = api_key
    print("API Key set:", api_key)  # Debug print

    # Assuming OpenAIEmbeddings uses the environment variable
    embeddings = OpenAIEmbeddings()

    pdf_search = Chroma.from_documents(documents, embeddings)

    chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3),
                                                  retriever=pdf_search.as_retriever(search_kwargs={"k": 1}),
                                                  return_source_documents=True)
    return chain

# Function to generate a response based on the chat history and query
def generate_response(history, query, btn):
    global count, n, chat_history, chain

    if not btn:
        raise gr.Error(message='Upload a PDF')
    if count == 0:
        chain = process_file(btn)
        count += 1

    result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True)
    chat_history.append((query, result["answer"]))
    n = list(result['source_documents'][0])[1][1]['page']

    for char in result['answer']:
        history[-1][-1] += char

    # Generate speech from the answer
    generate_speech(result["answer"])

    return history, " "

# Function to render a specific page of a PDF file as an image
def render_file(file):
    global n
    doc = fitz.open(file.name)
    page = doc[n]
    pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
    image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
    return image

# Function to generate speech from text
def generate_speech(text):
    tts = gTTS(text=text, lang='en')
    tts.save("output.mp3")
    play_sound("output.mp3")

def play_sound(file_path):
    try:
        pygame.mixer.init()
    except pygame.error:
        print("Unable to initialize audio device. Audio playback will be disabled.")
        return

    pygame.mixer.music.load(file_path)
    pygame.mixer.music.play()
    while pygame.mixer.music.get_busy():
        pygame.time.Clock().tick(10)

# Additional cleanup to remove temporary files
def cleanup():
    if os.path.exists("output.mp3"):
        os.remove("output.mp3")

def create_demo():
    with gr.Blocks(title="PDF Chatbot", theme="Soft") as demo:
        with gr.Column():
            with gr.Row():
                chatbot = gr.Chatbot(value=[], elem_id='chatbot', height=680)
                show_img = gr.Image(label='PDF Preview', height=680)

        with gr.Row():
            with gr.Column(scale=0.60):
                text_input = gr.Textbox(
                    show_label=False,
                    placeholder="Ask your pdf?",
                    container=False
                )

            with gr.Column(scale=0.20):
                submit_btn = gr.Button('Send')

            with gr.Column(scale=0.20):
                upload_btn = gr.UploadButton("📁 Upload PDF", file_types=[".pdf"])

        return demo, chatbot, show_img, text_input, submit_btn, upload_btn

if __name__ == '__main__':
    # Create the UI components
    demo, chatbot, show_img, txt, submit_btn, btn = create_demo()

    # Set up the Gradio UI
    with demo:
        # Upload PDF file and render it as an image
        btn.upload(render_file, inputs=[btn], outputs=[show_img])

        # Add text to chat history, generate response, and render file
        submit_btn.click(add_text, inputs=[chatbot, txt], outputs=[chatbot], queue=False).\
            success(generate_response, inputs=[chatbot, txt, btn], outputs=[chatbot, txt]).\
            success(render_file, inputs=[btn], outputs=[show_img])

    # Launch the app with text-to-speech cleanup
    try:
        demo.launch(share=True)
    finally:
        cleanup()