parvezalmuqtadir commited on
Commit
8408dd3
β€’
1 Parent(s): 5d43753

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -0
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.embeddings.openai import OpenAIEmbeddings
3
+ from langchain_community.vectorstores import Chroma
4
+ from langchain.chains import ConversationalRetrievalChain
5
+ from langchain_community.chat_models import ChatOpenAI
6
+ from langchain_community.document_loaders import PyPDFLoader
7
+ import fitz
8
+ from PIL import Image
9
+ from gtts import gTTS
10
+ import playsound
11
+ import gradio as gr
12
+ from dotenv import load_dotenv
13
+
14
+ # Load environment variables from .env file
15
+ load_dotenv()
16
+
17
+ # Global variables
18
+ count = 0
19
+ n = 0
20
+ chat_history = []
21
+ chain = ''
22
+
23
+ # Function to set the OpenAI API key
24
+ def set_api_key(api_key):
25
+ os.environ['OPENAI_API_KEY'] = api_key
26
+ return 'OpenAI API key is set'
27
+
28
+ # Function to enable the API key input box
29
+ def enable_api_box():
30
+ return
31
+
32
+ # Function to add text to the chat history
33
+ def add_text(history, text):
34
+ if not text:
35
+ raise gr.Error('Enter text')
36
+ history.append((text, ''))
37
+ return history
38
+
39
+ # Function to process the PDF file and create a conversation chain
40
+ def process_file(file):
41
+ api_key = os.getenv('OPENAI_API_KEY')
42
+ if api_key is None:
43
+ raise gr.Error('OpenAI API key not found in environment variables or .env file')
44
+
45
+ loader = PyPDFLoader(file.name)
46
+ documents = loader.load()
47
+
48
+ # Set the OpenAI API key in the environment variable
49
+ os.environ['OPENAI_API_KEY'] = api_key
50
+ print("API Key set:", api_key) # Debug print
51
+
52
+ # Assuming OpenAIEmbeddings uses the environment variable
53
+ embeddings = OpenAIEmbeddings()
54
+
55
+ pdf_search = Chroma.from_documents(documents, embeddings)
56
+
57
+ chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3),
58
+ retriever=pdf_search.as_retriever(search_kwargs={"k": 1}),
59
+ return_source_documents=True)
60
+ return chain
61
+
62
+ # Function to generate a response based on the chat history and query
63
+ def generate_response(history, query, btn):
64
+ global count, n, chat_history, chain
65
+
66
+ if not btn:
67
+ raise gr.Error(message='Upload a PDF')
68
+ if count == 0:
69
+ chain = process_file(btn)
70
+ count += 1
71
+
72
+ result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True)
73
+ chat_history.append((query, result["answer"]))
74
+ n = list(result['source_documents'][0])[1][1]['page']
75
+
76
+ for char in result['answer']:
77
+ history[-1][-1] += char
78
+
79
+ # Generate speech from the answer
80
+ generate_speech(result["answer"])
81
+
82
+ return history, " "
83
+
84
+ # Function to render a specific page of a PDF file as an image
85
+ def render_file(file):
86
+ global n
87
+ doc = fitz.open(file.name)
88
+ page = doc[n]
89
+ pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
90
+ image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
91
+ return image
92
+
93
+ # Function to generate speech from text
94
+ def generate_speech(text):
95
+ tts = gTTS(text=text, lang='en')
96
+ tts.save("output.mp3")
97
+ playsound.playsound("output.mp3")
98
+
99
+ # Additional cleanup to remove temporary files
100
+ def cleanup():
101
+ if os.path.exists("output.mp3"):
102
+ os.remove("output.mp3")
103
+
104
+ import gradio as gr
105
+
106
+ def create_demo():
107
+ with gr.Blocks(title="PDF Chatbot", theme="Soft") as demo:
108
+ with gr.Column():
109
+ with gr.Row():
110
+ chatbot = gr.Chatbot(value=[], elem_id='chatbot', height=680)
111
+ show_img = gr.Image(label='PDF Preview', height=680)
112
+
113
+ with gr.Row():
114
+ with gr.Column(scale=0.60):
115
+ text_input = gr.Textbox(
116
+ show_label=False,
117
+ placeholder="Ask your pdf?",
118
+ container=False
119
+ )
120
+
121
+ with gr.Column(scale=0.20):
122
+ submit_btn = gr.Button('Send')
123
+
124
+ with gr.Column(scale=0.20):
125
+ upload_btn = gr.UploadButton("πŸ“ Upload PDF", file_types=[".pdf"])
126
+
127
+ return demo, chatbot, show_img, text_input, submit_btn, upload_btn
128
+
129
+ if __name__ == '__main__':
130
+ # Create the UI components
131
+ demo, chatbot, show_img, txt, submit_btn, btn = create_demo()
132
+
133
+ # Set up the Gradio UI
134
+ with demo:
135
+ # Upload PDF file and render it as an image
136
+ btn.upload(render_file, inputs=[btn], outputs=[show_img])
137
+
138
+ # Add text to chat history, generate response, and render file
139
+ submit_btn.click(add_text, inputs=[chatbot, txt], outputs=[chatbot], queue=False).\
140
+ success(generate_response, inputs=[chatbot, txt, btn], outputs=[chatbot, txt]).\
141
+ success(render_file, inputs=[btn], outputs=[show_img])
142
+
143
+ # Launch the app with text-to-speech cleanup
144
+ try:
145
+ demo.launch(share=True)
146
+ finally:
147
+ cleanup()