shukdevdatta123 commited on
Commit
6648da4
·
verified ·
1 Parent(s): b22d203

Upload 2 files

Browse files
Files changed (2) hide show
  1. generate_answer.py +95 -0
  2. helpers.py +42 -0
generate_answer.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from glob import glob
3
+ import openai
4
+ from dotenv import load_dotenv
5
+
6
+ from langchain.embeddings import OpenAIEmbeddings
7
+ from langchain.vectorstores import Chroma
8
+ from langchain.document_loaders import PyPDFLoader
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+
11
+ from langchain_community.chat_models import ChatOpenAI
12
+ from langchain.chains import RetrievalQA
13
+ from langchain.memory import ConversationBufferMemory
14
+
15
+ load_dotenv()
16
+ api_key = os.getenv("OPENAI_API_KEY")
17
+ openai.api_key = api_key
18
+
19
+ # Helper function to validate response completeness
20
+ def is_response_complete(response: str) -> bool:
21
+ return response.strip()[-1] in ".!?"
22
+
23
+ # Retry mechanism for incomplete responses
24
+ def retry_response(messages):
25
+ response = openai.ChatCompletion.create(
26
+ model="gpt-3.5-turbo",
27
+ messages=messages
28
+ ).choices[0].message['content']
29
+ if not is_response_complete(response):
30
+ response += " This is the end of the response. Please let me know if you need further clarification."
31
+ return response
32
+
33
+ def base_model_chatbot(messages):
34
+ system_message = [
35
+ {"role": "system", "content": "You are a helpful AI chatbot that provides clear, complete, and coherent responses to User's questions. Ensure your answers are in full sentences and complete the thought or idea."}
36
+ ]
37
+ messages = system_message + messages
38
+ response = openai.ChatCompletion.create(
39
+ model="gpt-3.5-turbo",
40
+ messages=messages
41
+ ).choices[0].message['content']
42
+ # Validate response completeness
43
+ if not is_response_complete(response):
44
+ response = retry_response(messages)
45
+ return response
46
+
47
+ class VectorDB:
48
+ """Class to manage document loading and vector database creation."""
49
+
50
+ def __init__(self, docs_directory: str):
51
+ self.docs_directory = docs_directory
52
+
53
+ def create_vector_db(self):
54
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
55
+
56
+ files = glob(os.path.join(self.docs_directory, "*.pdf"))
57
+
58
+ loadPDFs = [PyPDFLoader(pdf_file) for pdf_file in files]
59
+
60
+ pdf_docs = list()
61
+ for loader in loadPDFs:
62
+ pdf_docs.extend(loader.load())
63
+ chunks = text_splitter.split_documents(pdf_docs)
64
+
65
+ return Chroma.from_documents(chunks, OpenAIEmbeddings())
66
+
67
+ class ConversationalRetrievalChain:
68
+ """Class to manage the QA chain setup."""
69
+
70
+ def __init__(self, model_name="gpt-3.5-turbo", temperature=0):
71
+ self.model_name = model_name
72
+ self.temperature = temperature
73
+
74
+ def create_chain(self):
75
+ model = ChatOpenAI(
76
+ model_name=self.model_name,
77
+ temperature=self.temperature,
78
+ system_prompt="You are a knowledgeable AI that answers questions based on provided documents. Always give responses in clear, complete sentences."
79
+ )
80
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
81
+ vector_db = VectorDB('docs/')
82
+ retriever = vector_db.create_vector_db().as_retriever(search_type="similarity", search_kwargs={"k": 2})
83
+ return RetrievalQA.from_chain_type(
84
+ llm=model,
85
+ retriever=retriever,
86
+ memory=memory,
87
+ )
88
+
89
+ def with_pdf_chatbot(messages):
90
+ query = messages[-1]['content'].strip()
91
+ qa_chain = ConversationalRetrievalChain().create_chain()
92
+ result = qa_chain({"query": query})
93
+ if not is_response_complete(result['result']):
94
+ result['result'] += " This is the end of the response. Let me know if you need further clarification."
95
+ return result['result']
helpers.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import streamlit as st
3
+ import os
4
+ import openai
5
+ from dotenv import load_dotenv
6
+ from gtts import gTTS
7
+
8
+ # Function to accept OpenAI API Key as input from the user
9
+ def get_api_key():
10
+ api_key = st.text_input("Enter your OpenAI API Key", type="password")
11
+ if api_key:
12
+ openai.api_key = api_key
13
+ return api_key
14
+ else:
15
+ return None
16
+
17
+ def speech_to_text(audio_data):
18
+ """Transcribes audio data to text using OpenAI's API."""
19
+ with open(audio_data, "rb") as audio_file:
20
+ transcript = openai.Audio.transcribe(
21
+ model="whisper-1",
22
+ file=audio_file
23
+ )
24
+ return transcript["text"]
25
+
26
+ def text_to_speech(input_text):
27
+ """Generates a TTS audio file from the input text."""
28
+ tts = gTTS(text=input_text, lang="en")
29
+ audio_file_path = "temp_audio_play.mp3"
30
+ tts.save(audio_file_path)
31
+ return audio_file_path
32
+
33
+ def autoplay_audio(file_path: str):
34
+ with open(file_path, "rb") as f:
35
+ data = f.read()
36
+ b64 = base64.b64encode(data).decode("utf-8")
37
+ md = f"""
38
+ <audio autoplay>
39
+ <source src="data:audio/mp3;base64,{b64}" type="audio/mp3">
40
+ </audio>
41
+ """
42
+ st.markdown(md, unsafe_allow_html=True)