Spaces:

DataMine
/

Adhan_prep

Sleeping

App Files Files Community

DataMine commited on Oct 10, 2024

Commit

47a9a07

verified ·

1 Parent(s): 5fe22f4

Upload 3 files

Browse files

Files changed (4) hide show

.gitattributes +1 -0
Hafiz muqeem.wav +3 -0
app.py +196 -0
requirements.txt +21 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Hafiz[[:space:]]muqeem.wav filter=lfs diff=lfs merge=lfs -text

Hafiz muqeem.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be0a43c18576c77b164356dbdbf82cdd1f66c1d57b0e18e97720967884efeb57
+size 40542318

app.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import os
+import streamlit as st
+import torch
+import torch.nn.functional as F
+import librosa
+import speech_recognition as sr
+# from transformers import Wav2Vec2Processor, Wav2Vec2Model
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+from groq import Groq
+# # Load pretrained model and processor
+# processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
+# model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")
+# Initialize Groq client
+groq_client = Groq(api_key="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")
+# Function to transcribe audio into text
+def transcribe_audio(audio_file):
+    recognizer = sr.Recognizer()
+    try:
+        with sr.AudioFile(audio_file) as source:
+            audio_data = recognizer.record(source)  # Read the entire audio file
+            text = recognizer.recognize_google(audio_data, language='ar-SA')  # Arabic transcription
+            return text
+    except sr.UnknownValueError:
+        return None
+    except sr.RequestError:
+        return None
+# Function to convert Arabic text to Romanized text
+def romanize_arabic(text):
+    romanized_mapping = {
+        "الله": "Allahu",
+        "اكبر": "akbar",
+        "اشهد": "Ashhadu",
+        "ان": "an",
+        "لا": "la",
+        "اله": "ilaha",
+        "الا": "illa",
+        "محمد": "Muhammad",
+        "رسول": "Rasul",
+        "حي": "Hayya",
+        "على": "'ala",
+        "الصلاه": "as-salah",
+        "الفلاح": "al-falah",
+        "لا": "la",
+        "الا": "illa",
+    }
+    words = text.split()
+    romanized_text = ' '.join(romanized_mapping.get(word, word) for word in words)
+    return romanized_text
+# Function to convert audio file into embeddings
+import torch
+from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model
+import librosa
+# Load pretrained model and processor
+feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
+model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")
+# Function to convert audio file into embeddings
+def get_audio_embedding(audio_path):
+    audio, sr = librosa.load(audio_path, sr=16000)
+    inputs = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding=True)
+    with torch.no_grad():
+        embeddings = model(**inputs).last_hidden_state.mean(dim=1)
+    return embeddings
+# Function to calculate cosine similarity for embeddings
+def compare_embeddings(embedding_1, embedding_2):
+    similarity = F.cosine_similarity(embedding_1, embedding_2, dim=1)
+    return similarity.item()
+# Function to calculate text similarity using Cosine Similarity
+def compare_text_similarity(text1, text2):
+    vectorizer = CountVectorizer().fit_transform([text1, text2])
+    vectors = vectorizer.toarray()
+    cosine_sim = cosine_similarity(vectors)
+    return cosine_sim[0][1]
+# LLM feedback function using Groq
+def generate_llm_feedback(similarity_score):
+    feedback_prompt = f"""
+    A user has just pronounced part of the Azaan, and the similarity score between their pronunciation and the reference Azaan is {similarity_score:.2f}.
+    Based on this score:
+    - If the score is above 0.9, the pronunciation is excellent.
+    - If the score is between 0.7 and 0.9, the pronunciation is good but may need slight improvement.
+    - If the score is below 0.7, the pronunciation requires significant improvement.
+    Provide detailed feedback for the user about their pronunciation, considering their score of {similarity_score:.2f}.
+    """
+    chat_completion = groq_client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": feedback_prompt,
+            }
+        ],
+        model="llama3-8b-8192",
+    )
+    return chat_completion.choices[0].message.content
+# Custom CSS for styling
+st.markdown(
+    """
+    <style>
+    .main {
+        background-color: #f5f5f5;
+        font-family: 'Arial', sans-serif;
+    }
+    .title {
+        text-align: center;
+        color: #2a9d8f;
+    }
+    .subtitle {
+        text-align: center;
+        color: #264653;
+    }
+    .footer {
+        text-align: center;
+        font-size: 0.8em;
+        color: #555;
+    }
+    .feedback {
+        background-color: #e9c6c6;
+        border-radius: 10px;
+        padding: 20px;
+        margin: 10px;
+        box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+    }
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+# Streamlit UI
+def main():
+    st.title("🔔 Azaan Pronunciation Evaluation")
+    st.markdown("<h3 class='subtitle'>Welcome to the Azaan Pronunciation Evaluation!</h3>", unsafe_allow_html=True)
+    st.subheader("Upload Your Audio")
+    uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3", "m4a"])
+    if uploaded_file is not None:
+        st.audio(uploaded_file, format='audio/wav')
+        # Step 1: Transcribe expert audio and user audio
+        expert_audio_path = r"C:\Users\USER\Downloads\azan\Hafiz muqeem.wav" # Change this to the correct path
+        st.write("🎤 Step 1: Checking if the words match...")
+        # Transcribe user audio
+        user_text = transcribe_audio(uploaded_file)
+        expert_text = transcribe_audio(expert_audio_path)
+        if user_text and expert_text:
+            st.write("✅ Transcription successful!")
+            st.write(f"**Expert Azaan Text:** {expert_text}")
+            st.write(f"**Your Azaan Text:** {user_text}")
+            # Step 2: Romanize and compare texts
+            user_romanized = romanize_arabic(user_text)
+            expert_romanized = romanize_arabic(expert_text)
+            text_similarity = compare_text_similarity(user_romanized, expert_romanized)
+            st.write(f"📝 Text Similarity Score: {text_similarity:.2f}")
+            if text_similarity >= 0.1:
+                st.success("✅ Great! Your words match well enough. Now, let's evaluate your pronunciation.")
+                # Step 3: Evaluate pronunciation similarity
+                expert_embedding = get_audio_embedding(expert_audio_path)
+                user_embedding = get_audio_embedding(uploaded_file)
+                pronunciation_similarity = compare_embeddings(expert_embedding, user_embedding)
+                st.write(f"🔊 Pronunciation Similarity Score: {pronunciation_similarity:.2f}")
+                # Get feedback
+                feedback = generate_llm_feedback(pronunciation_similarity)
+                st.markdown(f"<div class='feedback'>{feedback}</div>", unsafe_allow_html=True)
+            else:
+                st.warning("⚠️ Your words do not match sufficiently. Please try again.")
+        else:
+            st.error("❌ There was an error transcribing one or both audio files.")
+    st.markdown("<div class='footer'>© 2024 Azaan Pronunciation Evaluation Tool</div>", unsafe_allow_html=True)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+cohere==5.11.0
+faiss_cpu==1.8.0.post1
+groq==0.11.0
+gTTS==2.5.3
+langchain_huggingface==0.1.0
+librosa==0.10.2.post1
+matplotlib==3.9.2
+numpy==2.1.2
+protobuf==5.28.2
+PyAudio==0.2.14
+pydub==0.25.1
+Requests==2.32.3
+scikit_learn==1.5.2
+scipy==1.14.1
+sounddevice==0.5.0
+SpeechRecognition==3.10.4
+streamlit==1.38.0
+tensorflow==2.17.0
+tensorflow_intel==2.17.0
+torch==2.4.1
+transformers==4.45.1