Spaces:

adititewari
/

SoundScripter

Sleeping

App Files Files Community

Aditi Tewari commited on Feb 4, 2024

Commit

ee1587c

1 Parent(s): fd31c98

Add files

Browse files

Files changed (7) hide show

Dockerfile +24 -0
requirements.txt +7 -0
run.sh +1 -0
soundscripter.html +147 -0
soundscripter_flaskAPI.py +99 -0
ss_flaskAPI.py +92 -0
wave.png +0 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+# Use an official Python runtime as a parent image
+FROM python:3.8-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the current directory contents into the container at /app
+COPY . /app
+# Install any needed packages specified in requirements.txt
+RUN apt-get update && \
+    apt-get install -y ffmpeg && \
+    rm -rf /var/lib/apt/lists/* && \
+    pip install --no-cache-dir -r requirements.txt && \
+    pip install python-multipart
+# Make port 7860 available to the world outside this container
+EXPOSE 7860
+# Define environment variable for FastAPI
+ENV PYTHONUNBUFFERED 1
+# Command to run the application
+CMD ["uvicorn", "back-end:app", "--host", "0.0.0.0", "--port", "7860"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn
+pydub
+SpeechRecognition
+numpy
+librosa
+sounddevice

run.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ python -m uvicorn soundscripter_flaskAPI:app --reload

soundscripter.html ADDED Viewed

	@@ -0,0 +1,147 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>SoundScripter</title>
+  <!-- Add Bootstrap CSS link here -->
+  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/5.3.0/css/bootstrap.min.css">
+  <style>
+    * {
+      margin: 10px;
+    }
+  </style>
+</head>
+<body style="background-color:#daeaf7";>
+<nav class="navbar" style="background-color:whitesmoke; height:98px; border-radius:30px" >
+  <div>
+    <img src="wave.png" alt="SoundScripter" width="60" height="70" style="margin-top:-5px">
+    <span class="navbar-brand" style="font-size:35px;"><b>SoundScripter</b></span>
+  </div>
+  <h5>Automatic Speech Recognition</h5>
+</nav>
+<div class="container mt-5">
+  <div class="form-group">
+    <input type="file" class="form-control" id="audioUpload" style="height: 40px; width: 50%; max-width: 300px;">
+    <div><h6><b>OR</b></h6></div>
+    <button class="btn" id="recordButton" style="background-color: white; border-radius:30px; height:50px; width:150px;"><h3>Record</h3></button>
+  </div>
+  <div class="form-group" style="text-align: center;">
+    <button class="btn btn-danger" id="submitButton" style="border-radius:30px;"><b>UPLOAD</b></button>
+  </div>
+  <div class="form-group">
+    <textarea class="form-control" id="outputText" rows="5" style="border-radius:15px; margin-top:5%" readonly></textarea>
+  </div>
+</div>
+<!-- Add Bootstrap JS link and any other required scripts here -->
+<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.16.0/umd/popper.min.js"></script>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/5.3.0/js/bootstrap.min.js"></script>
+<script>
+  let isRecording = false;
+  let recordedChunks = [];
+  let uploadedFile;
+  let recordedAudioBlob;
+  let mediaRecorder;
+  const recordButton = document.getElementById("recordButton");
+  const submitButton = document.getElementById("submitButton");
+  const outputText = document.getElementById("outputText");
+  const audioUpload = document.getElementById("audioUpload");
+  let mediaStream; // Store the media stream to stop it later
+  recordButton.addEventListener("click", function () {
+    if (!isRecording) {
+      navigator.mediaDevices.getUserMedia({ audio: true })
+        .then(function (stream) {
+          mediaStream = stream; // Save the stream for stopping later
+          mediaRecorder = new MediaRecorder(stream);
+          recordedChunks = [];
+          mediaRecorder.ondataavailable = function (e) {
+            if (e.data.size > 0) {
+              recordedChunks.push(e.data);
+            }
+          };
+          mediaRecorder.onstop = function () {
+            recordedAudioBlob = new Blob(recordedChunks, { type: "audio/wav" });
+          };
+          mediaRecorder.start();
+          isRecording = true;
+          recordButton.innerText = "Stop Recording";
+        })
+        .catch(function (err) {
+          console.error("Error Accessing Microphone", err);
+        });
+    } else {
+      mediaRecorder.stop();
+      mediaStream.getTracks().forEach(track => track.stop()); // Stop the media stream
+      isRecording = false;
+      recordButton.innerText = "RECORD";
+    }
+  });
+  audioUpload.addEventListener("change", function (event) {
+    uploadedFile = event.target.files[0];
+    if (uploadedFile) {
+      console.error("File uploading")
+      const fileReader = new FileReader();
+      fileReader.onload = function () {
+        uploadedArrayBuffer = fileReader.result;
+      };
+      fileReader.readAsArrayBuffer(uploadedFile);
+    }
+  });
+  submitButton.addEventListener("click", function () {
+    if (recordedAudioBlob || uploadedFile) {
+      const formData = new FormData();
+      if (recordedAudioBlob) {
+        formData.append("audio", recordedAudioBlob, "recorded_audio.wav");
+      } else if (uploadedFile) {
+        formData.append("audio", uploadedFile, uploadedFile.name);
+      }
+      fetch("http://localhost:8000/asr", {
+        method: "POST",
+        body: formData,
+      })
+        .then((response) => response.json())
+        .then((data) => {
+          if (data.Text) {
+            outputText.value = data.text;
+          } else {
+            outputText.value = "No text recognized.";
+          }
+        })
+        .catch((error) => {
+          console.error("Error while connecting with backend", error);
+          outputText.value = "Backend communication failed.";
+        });
+      if (recordedAudioBlob) {
+        const downloadLink = document.createElement("a");
+        downloadLink.href = URL.createObjectURL(recordedAudioBlob);
+        downloadLink.download = "recorded_audio.wav";
+        downloadLink.style.display = "none";
+        document.body.appendChild(downloadLink);
+        downloadLink.click();
+        URL.revokeObjectURL(downloadLink.href);
+        document.body.removeChild(downloadLink);
+      }
+    } else {
+      outputText.value = "Please upload an audio file or record.";
+    }
+  });
+</script>
+</body>
+</html>

soundscripter_flaskAPI.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import io
+import os
+import uvicorn
+import sounddevice as sd
+import numpy as np
+import speech_recognition as sr
+from fastapi import FastAPI, File, UploadFile
+from fastapi.responses import FileResponse, JSONResponse
+from fastapi.responses import HTMLResponse
+from pydub import AudioSegment
+import librosa
+from fastapi.middleware.cors import CORSMiddleware
+app = FastAPI()
+recognizer = sr.Recognizer()
+origins = ["*"]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/", response_class=HTMLResponse)
+async def read_root():
+    # Provide the path to the HTML file containing the front-end code
+    with open("soundscripter.html", "r") as file:
+        html_content = file.read()
+    return HTMLResponse(content=html_content)
+def convert_audio_format(input_data, input_format, output_format='wav'):
+    # Convert audio data to WAV format
+    audio = AudioSegment.from_file(io.BytesIO(input_data), format=input_format)
+    output_data = audio.export(format=output_format).read()
+    return output_data
+def recognize_speech(audio_data, language="hi-IN"):
+    with io.BytesIO(audio_data) as audio_io:
+        with sr.AudioFile(audio_io) as source:
+            audio = recognizer.record(source)
+    try:
+        text = recognizer.recognize_google(audio, language=language)
+        return text
+    except sr.UnknownValueError:
+        return "Speech not recognized."
+    except sr.RequestError as e:
+        return f"API request failed: {e}"
+@app.post("/asr")
+async def transcribe_audio(file: UploadFile = File(...)):
+    contents = await file.read()
+    # Determine the input audio format (assumes the format is part of the file name)
+    input_format = file.filename.split('.')[-1].lower()
+    # Convert audio to WAV format
+    wav_data = convert_audio_format(contents, input_format)
+    # Saving the received audio file in WAV format for future analysis (optional)
+    wav_file_path = "received_audio.wav"
+    with open(wav_file_path, "wb") as f:
+        f.write(wav_data)
+    # Transcribe the audio
+    result = recognize_speech(wav_data)
+    # return {"Text": result}
+    return JSONResponse(content={"text": result})
+# @app.post("/asr/live")
+# async def transcribe_live_audio():
+#     fs = 16000  # Target sample rate
+#     duration = 3  # seconds
+#     chunks = int(fs * duration)
+#     # Record live audio
+#     audio_data = sd.rec(chunks, samplerate=fs, channels=1, dtype=np.float32)
+#     sd.wait()
+#     # Resample the audio data to the target sample rate
+#     audio_data_resampled = librosa.resample(audio_data.flatten(), orig_sr=fs, target_sr=16000)
+#     # Convert audio data to bytes (use np.int16)
+#     audio_bytes = audio_data_resampled.astype(np.int16).tobytes()
+#     # Transcribe the audio
+#     result = recognize_speech(audio_bytes)
+#     return {"Text": result}
+#Run the FastAPI app
+# if __name__ == "__main__":
+#     uvicorn.run(app, host="127.0.0.1", port=8000)

ss_flaskAPI.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import os
+import shutil
+import subprocess
+from fastapi import FastAPI, File, UploadFile, Form
+from fastapi.responses import FileResponse, JSONResponse
+from fastapi.responses import HTMLResponse
+from pydub import AudioSegment
+import shutil
+import tempfile
+import speech_recognition as sr
+import os
+r = sr.Recognizer()
+app = FastAPI()
+def resample_audio(input_path, output_path, target_sample_rate):
+    ffmpeg_cmd = [
+        "ffmpeg",
+        "-i", input_path,
+        "-ar", str(target_sample_rate),
+        output_path
+    ]
+    subprocess.run(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+@app.get("/", response_class=HTMLResponse)
+async def read_root():
+    # Provide the path to the HTML file containing the front-end code
+    with open("soundscripter.html", "r") as file:
+        html_content = file.read()
+    return html_content
+def get_sampling_rate(audio_file_path):
+    audio = AudioSegment.from_file(audio_file_path)
+    return audio.frame_rate
+@app.post("/process_audio")
+async def process_audio(audio: UploadFile = File(...), language: str = Form(...)):
+    if not audio or not language:
+        return JSONResponse(content={"success": False}, status_code=400)
+    # Check if the uploaded file is in WAV format
+    if audio.content_type != "audio/wav":
+        return JSONResponse(content={"success": False, "message": "Audio must be in WAV format."}, status_code=400)
+    try:
+        # Save the received audio to a temporary file
+         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+            temp_file_path = temp_file.name
+            shutil.copyfileobj(audio.file, temp_file)
+        # Print the file path for debugging
+         print(temp_file_path)
+         output_path = tempfile.mktemp(suffix=".wav")
+        # Resample the audio to 16000 Hz
+         resample_audio(temp_file_path, output_path, target_sample_rate=16000)
+         print(output_path)
+        # Get the sampling rate of the received audio
+         sampling_rate = get_sampling_rate(output_path)
+        # Resample the audio to 16 kHz if needed
+         if sampling_rate != 16000:
+            return JSONResponse(content={"success": False, "message": "Sample rate is not 16000Hz."}, status_code=500)
+    except Exception as e:
+        print("Error processing audio:", e)
+        return JSONResponse(content={"success": False, "message": "Error processing audio."}, status_code=500)
+    # finally:
+    #     # Cleanup: remove the temporary received audio file
+    #     if os.path.exists(audio_file_path):
+    #         os.remove(audio_file_path)
+    return JSONResponse(content={"success": True, "language":calling_asr(output_path,"hi-IN")})
+def calling_asr(wav_file,lid):
+    AUDIO_FILE=wav_file
+    # aud_name=AUDIO_FILE.split('/')[-1].split('.')[0]
+    file=open(wav_file+".txt","w")
+    text="cant read wav file"
+    try:
+        with sr.AudioFile(AUDIO_FILE) as source:
+            audio = r.record(source)
+        text = r.recognize_google(audio, language=lid)
+        #file.write(aud_name +"\t"+text)
+        return text
+    except:
+        #file.write(" "+"Error in segement"+" ")
+        return text
+    #file.close()

wave.png ADDED Viewed