barghavani commited on
Commit
8fb982e
·
verified ·
1 Parent(s): fd1ffda

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -25
app.py CHANGED
@@ -12,13 +12,22 @@ from dotenv import load_dotenv
12
  import speech_recognition as sr
13
  import sounddevice as sd
14
  import scipy.io.wavfile as wav
 
 
 
15
 
16
  load_dotenv()
17
  os.getenv("GOOGLE_API_KEY")
18
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
19
 
20
 
 
 
21
 
 
 
 
 
22
 
23
 
24
 
@@ -85,37 +94,32 @@ def user_input(user_question):
85
  DURATION = 5 # seconds
86
  SAMPLERATE = 44100 # Hz
87
 
88
- def record_audio():
89
- st.write("Recording for {} seconds...".format(DURATION))
90
- audio = sd.rec(int(DURATION * SAMPLERATE), samplerate=SAMPLERATE, channels=2, dtype='float64')
91
- sd.wait() # Wait until recording is finished
92
- wav.write('temp_audio.wav', SAMPLERATE, audio) # Save as WAV file (optional)
93
- st.write("Recording finished. Processing the audio...")
94
- return 'temp_audio.wav' # Return path to the audio file
95
-
96
 
97
  def main():
98
  st.set_page_config("Chat PDF")
99
- st.header("Chat with PDF using Gemini💁")
100
-
101
  with st.sidebar:
102
  st.title("Menu:")
103
  pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
 
104
  if st.button("Submit & Process"):
105
- with st.spinner("Processing..."):
106
- raw_text = get_pdf_text(pdf_docs)
107
- text_chunks = get_text_chunks(raw_text)
108
- get_vector_store(text_chunks)
109
- st.success("Done")
110
-
111
- user_question = st.text_input("Ask a Question from the PDF Files")
112
- if st.button("Record Question via Microphone"):
113
- audio_path = record_audio()
114
- # Implement audio processing to text or use a service like Google Speech-to-Text here
115
- # user_question = transcribe_audio(audio_path) # You'd need to implement this function
116
-
117
- if user_question:
118
- user_input(user_question)
 
 
119
 
120
  if __name__ == "__main__":
121
- main()
 
12
  import speech_recognition as sr
13
  import sounddevice as sd
14
  import scipy.io.wavfile as wav
15
+ import whisper
16
+
17
+
18
 
19
  load_dotenv()
20
  os.getenv("GOOGLE_API_KEY")
21
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
22
 
23
 
24
+ # Load the Whisper model
25
+ model = whisper.load_model("large")
26
 
27
+ def speech_to_text(audio_path):
28
+ # Load and decode the audio file
29
+ result = model.transcribe(audio_path, language="en",fp16=False)
30
+ return result['text']
31
 
32
 
33
 
 
94
  DURATION = 5 # seconds
95
  SAMPLERATE = 44100 # Hz
96
 
 
 
 
 
 
 
 
 
97
 
98
  def main():
99
  st.set_page_config("Chat PDF")
100
+ st.header("QnA with Multiple PDF files💁")
101
+
102
  with st.sidebar:
103
  st.title("Menu:")
104
  pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
105
+ audio_file = st.file_uploader("Upload your voice query", type=['wav', 'mp3', 'ogg'])
106
  if st.button("Submit & Process"):
107
+ if pdf_docs and audio_file:
108
+ with st.spinner("Processing..."):
109
+ # Handle PDF text extraction and processing
110
+ raw_text = get_pdf_text(pdf_docs)
111
+ text_chunks = get_text_chunks(raw_text)
112
+ get_vector_store(text_chunks)
113
+
114
+ # Handle audio processing
115
+ audio_path = audio_file.name
116
+ with open(audio_path, "wb") as f:
117
+ f.write(audio_file.getbuffer())
118
+ user_question = speech_to_text(audio_path)
119
+ st.write(f"Your question: {user_question}")
120
+ user_input(user_question)
121
+
122
+ st.success("Done")
123
 
124
  if __name__ == "__main__":
125
+ main()