Abhishek-D7 commited on
Commit
6fd5732
Β·
verified Β·
1 Parent(s): 836c1ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -27
app.py CHANGED
@@ -1,5 +1,7 @@
1
- # Voice-Based Real Estate Assistant - Enhanced Version
2
- # Requirements: faster-whisper, openai, gradio, transformers, torchaudio, langdetect, langchain, langchain-community, langchain-openai, faiss-cpu, datasets
 
 
3
 
4
  import gradio as gr
5
  import torch
@@ -8,17 +10,17 @@ from langdetect import detect
8
  from transformers import pipeline
9
  import os
10
  import traceback
 
11
  from langchain.chains import ConversationalRetrievalChain
12
  from langchain_community.chat_models import ChatOpenAI
13
  from langchain.memory import ConversationBufferMemory
14
  from langchain_community.vectorstores import FAISS
15
  from langchain_openai.embeddings import OpenAIEmbeddings
16
- from langchain_community.document_loaders import TextLoader
17
  from langchain.text_splitter import CharacterTextSplitter
18
- from datasets import load_dataset
19
  import numpy as np
20
 
21
- # SETUP
22
  openai_api_key = os.getenv("OPENAI_API_KEY")
23
 
24
  # STT model
@@ -34,29 +36,28 @@ tts_models = {
34
  llm = ChatOpenAI(api_key=openai_api_key, model="gpt-3.5-turbo")
35
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
36
 
37
- # Load small FAQ dataset for RAG
38
- faq_text = """
39
- Q: What is the average price of a 2BHK in Delhi?
40
- A: The average price is around β‚Ή60–80 lakhs depending on the location.
41
-
42
- Q: Do builders offer possession-linked plans?
43
- A: Yes, many real estate projects offer possession-linked payment plans.
44
-
45
- Q: Are Noida Extension flats RERA approved?
46
- A: Most ongoing projects are RERA approved but always verify on the RERA website.
47
- """
48
- loader = TextLoader("faq.txt")
49
- with open("faq.txt", "w") as f:
50
- f.write(faq_text)
51
- documents = loader.load()
52
- text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=0)
53
- docs = text_splitter.split_documents(documents)
54
- embeddings = OpenAIEmbeddings(api_key=openai_api_key)
55
- vectorstore = FAISS.from_documents(docs, embeddings)
56
  retriever = vectorstore.as_retriever()
57
  qa_chain = ConversationalRetrievalChain.from_llm(llm, retriever, memory=memory)
58
 
59
- #FUNCTIONS
60
  def transcribe_audio(audio_path):
61
  try:
62
  segments, _ = asr_model.transcribe(audio_path, beam_size=5)
@@ -107,7 +108,7 @@ def full_pipeline(audio):
107
  return transcription, reply, None
108
  return transcription, reply, tts_audio
109
 
110
- # GRADIO UI
111
  with gr.Blocks() as demo:
112
  gr.Markdown("# 🏠 Voice-Based Real Estate Assistant (Hindi + English)")
113
 
@@ -115,7 +116,7 @@ with gr.Blocks() as demo:
115
  audio_input = gr.Audio(type="filepath", label="🎀 Speak your real estate question")
116
 
117
  with gr.Row():
118
- transcribed_text = gr.Textbox(label="πŸ“ Transcription")
119
  llm_reply = gr.Textbox(label="πŸ€– Assistant's Reply")
120
 
121
  audio_output = gr.Audio(label="πŸ”Š Assistant's Voice")
@@ -125,3 +126,4 @@ with gr.Blocks() as demo:
125
 
126
  if __name__ == "__main__":
127
  demo.launch()
 
 
1
+ # Voice-Based Real Estate Assistant with Kaggle Dataset Integration
2
+
3
+ # Requirements: faster-whisper, openai, gradio, transformers, torchaudio, langdetect,
4
+ # langchain, langchain-community, langchain-openai, faiss-cpu, datasets, pandas
5
 
6
  import gradio as gr
7
  import torch
 
10
  from transformers import pipeline
11
  import os
12
  import traceback
13
+ import pandas as pd
14
  from langchain.chains import ConversationalRetrievalChain
15
  from langchain_community.chat_models import ChatOpenAI
16
  from langchain.memory import ConversationBufferMemory
17
  from langchain_community.vectorstores import FAISS
18
  from langchain_openai.embeddings import OpenAIEmbeddings
19
+ from langchain.docstore.document import Document
20
  from langchain.text_splitter import CharacterTextSplitter
 
21
  import numpy as np
22
 
23
+ # ---- SETUP ----
24
  openai_api_key = os.getenv("OPENAI_API_KEY")
25
 
26
  # STT model
 
36
  llm = ChatOpenAI(api_key=openai_api_key, model="gpt-3.5-turbo")
37
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
38
 
39
+ # ---- LOAD OR BUILD VECTORSTORE ----
40
+ INDEX_PATH = "db/real_estate_index"
41
+
42
+ if os.path.exists(INDEX_PATH):
43
+ vectorstore = FAISS.load_local(INDEX_PATH, OpenAIEmbeddings(api_key=openai_api_key))
44
+ else:
45
+ df = pd.read_csv("real_estate_data.csv")
46
+ df.dropna(subset=["Property Title", "Description"], inplace=True)
47
+ docs = []
48
+ for _, row in df.iterrows():
49
+ content = f"Title: {row['Property Title']}\nPrice: {row['Price']}\nLocation: {row['Location']}\nArea: {row['Total Area']}\nDescription: {row['Description']}"
50
+ docs.append(Document(page_content=content))
51
+ text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
52
+ chunks = text_splitter.split_documents(docs)
53
+ embeddings = OpenAIEmbeddings(api_key=openai_api_key)
54
+ vectorstore = FAISS.from_documents(chunks, embeddings)
55
+ vectorstore.save_local(INDEX_PATH)
56
+
 
57
  retriever = vectorstore.as_retriever()
58
  qa_chain = ConversationalRetrievalChain.from_llm(llm, retriever, memory=memory)
59
 
60
+ # ---- FUNCTIONS ----
61
  def transcribe_audio(audio_path):
62
  try:
63
  segments, _ = asr_model.transcribe(audio_path, beam_size=5)
 
108
  return transcription, reply, None
109
  return transcription, reply, tts_audio
110
 
111
+ # ---- GRADIO UI ----
112
  with gr.Blocks() as demo:
113
  gr.Markdown("# 🏠 Voice-Based Real Estate Assistant (Hindi + English)")
114
 
 
116
  audio_input = gr.Audio(type="filepath", label="🎀 Speak your real estate question")
117
 
118
  with gr.Row():
119
+ transcribed_text = gr.Textbox(label="πŸ“œ Transcription")
120
  llm_reply = gr.Textbox(label="πŸ€– Assistant's Reply")
121
 
122
  audio_output = gr.Audio(label="πŸ”Š Assistant's Voice")
 
126
 
127
  if __name__ == "__main__":
128
  demo.launch()
129
+ Update app.py