Spaces:

saillab
/

TeacherAssistant

Sleeping

App Files Files Community

barghavani commited on Apr 11, 2024

Commit

8fb982e

verified ·

1 Parent(s): fd1ffda

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -25

app.py CHANGED Viewed

@@ -12,13 +12,22 @@ from dotenv import load_dotenv
 import speech_recognition as sr
 import sounddevice as sd
 import scipy.io.wavfile as wav
 load_dotenv()
 os.getenv("GOOGLE_API_KEY")
 genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
@@ -85,37 +94,32 @@ def user_input(user_question):
 DURATION = 5  # seconds
 SAMPLERATE = 44100  # Hz
-def record_audio():
-    st.write("Recording for {} seconds...".format(DURATION))
-    audio = sd.rec(int(DURATION * SAMPLERATE), samplerate=SAMPLERATE, channels=2, dtype='float64')
-    sd.wait()  # Wait until recording is finished
-    wav.write('temp_audio.wav', SAMPLERATE, audio)  # Save as WAV file (optional)
-    st.write("Recording finished. Processing the audio...")
-    return 'temp_audio.wav'  # Return path to the audio file
 def main():
     st.set_page_config("Chat PDF")
-    st.header("Chat with PDF using Gemini💁")
     with st.sidebar:
         st.title("Menu:")
         pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
         if st.button("Submit & Process"):
-            with st.spinner("Processing..."):
-                raw_text = get_pdf_text(pdf_docs)
-                text_chunks = get_text_chunks(raw_text)
-                get_vector_store(text_chunks)
-                st.success("Done")
-    user_question = st.text_input("Ask a Question from the PDF Files")
-    if st.button("Record Question via Microphone"):
-        audio_path = record_audio()
-        # Implement audio processing to text or use a service like Google Speech-to-Text here
-        # user_question = transcribe_audio(audio_path) # You'd need to implement this function
-    if user_question:
-        user_input(user_question)
 if __name__ == "__main__":
-    main()

 import speech_recognition as sr
 import sounddevice as sd
 import scipy.io.wavfile as wav
+import whisper
 load_dotenv()
 os.getenv("GOOGLE_API_KEY")
 genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+# Load the Whisper model
+model = whisper.load_model("large")
+def speech_to_text(audio_path):
+    # Load and decode the audio file
+    result = model.transcribe(audio_path, language="en",fp16=False)
+    return result['text']
 DURATION = 5  # seconds
 SAMPLERATE = 44100  # Hz
 def main():
     st.set_page_config("Chat PDF")
+    st.header("QnA with Multiple PDF files💁")
     with st.sidebar:
         st.title("Menu:")
         pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
+        audio_file = st.file_uploader("Upload your voice query", type=['wav', 'mp3', 'ogg'])
         if st.button("Submit & Process"):
+            if pdf_docs and audio_file:
+                with st.spinner("Processing..."):
+                    # Handle PDF text extraction and processing
+                    raw_text = get_pdf_text(pdf_docs)
+                    text_chunks = get_text_chunks(raw_text)
+                    get_vector_store(text_chunks)
+                    # Handle audio processing
+                    audio_path = audio_file.name
+                    with open(audio_path, "wb") as f:
+                        f.write(audio_file.getbuffer())
+                    user_question = speech_to_text(audio_path)
+                    st.write(f"Your question: {user_question}")
+                    user_input(user_question)
+                    st.success("Done")
 if __name__ == "__main__":
+    main()