DataMine commited on
Commit
47a9a07
ยท
verified ยท
1 Parent(s): 5fe22f4

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. Hafiz muqeem.wav +3 -0
  3. app.py +196 -0
  4. requirements.txt +21 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Hafiz[[:space:]]muqeem.wav filter=lfs diff=lfs merge=lfs -text
Hafiz muqeem.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be0a43c18576c77b164356dbdbf82cdd1f66c1d57b0e18e97720967884efeb57
3
+ size 40542318
app.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import torch
4
+ import torch.nn.functional as F
5
+ import librosa
6
+ import speech_recognition as sr
7
+ # from transformers import Wav2Vec2Processor, Wav2Vec2Model
8
+ from sklearn.feature_extraction.text import CountVectorizer
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+ from groq import Groq
11
+
12
+ # # Load pretrained model and processor
13
+ # processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
14
+ # model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")
15
+
16
+ # Initialize Groq client
17
+ groq_client = Groq(api_key="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")
18
+
19
+ # Function to transcribe audio into text
20
+ def transcribe_audio(audio_file):
21
+ recognizer = sr.Recognizer()
22
+ try:
23
+ with sr.AudioFile(audio_file) as source:
24
+ audio_data = recognizer.record(source) # Read the entire audio file
25
+ text = recognizer.recognize_google(audio_data, language='ar-SA') # Arabic transcription
26
+ return text
27
+ except sr.UnknownValueError:
28
+ return None
29
+ except sr.RequestError:
30
+ return None
31
+
32
+ # Function to convert Arabic text to Romanized text
33
+ def romanize_arabic(text):
34
+ romanized_mapping = {
35
+ "ุงู„ู„ู‡": "Allahu",
36
+ "ุงูƒุจุฑ": "akbar",
37
+ "ุงุดู‡ุฏ": "Ashhadu",
38
+ "ุงู†": "an",
39
+ "ู„ุง": "la",
40
+ "ุงู„ู‡": "ilaha",
41
+ "ุงู„ุง": "illa",
42
+ "ู…ุญู…ุฏ": "Muhammad",
43
+ "ุฑุณูˆู„": "Rasul",
44
+ "ุญูŠ": "Hayya",
45
+ "ุนู„ู‰": "'ala",
46
+ "ุงู„ุตู„ุงู‡": "as-salah",
47
+ "ุงู„ูู„ุงุญ": "al-falah",
48
+ "ู„ุง": "la",
49
+ "ุงู„ุง": "illa",
50
+ }
51
+
52
+ words = text.split()
53
+ romanized_text = ' '.join(romanized_mapping.get(word, word) for word in words)
54
+ return romanized_text
55
+
56
+ # Function to convert audio file into embeddings
57
+ import torch
58
+ from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model
59
+ import librosa
60
+
61
+ # Load pretrained model and processor
62
+ feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
63
+ model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")
64
+
65
+ # Function to convert audio file into embeddings
66
+ def get_audio_embedding(audio_path):
67
+ audio, sr = librosa.load(audio_path, sr=16000)
68
+ inputs = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding=True)
69
+ with torch.no_grad():
70
+ embeddings = model(**inputs).last_hidden_state.mean(dim=1)
71
+ return embeddings
72
+
73
+
74
+ # Function to calculate cosine similarity for embeddings
75
+ def compare_embeddings(embedding_1, embedding_2):
76
+ similarity = F.cosine_similarity(embedding_1, embedding_2, dim=1)
77
+ return similarity.item()
78
+
79
+ # Function to calculate text similarity using Cosine Similarity
80
+ def compare_text_similarity(text1, text2):
81
+ vectorizer = CountVectorizer().fit_transform([text1, text2])
82
+ vectors = vectorizer.toarray()
83
+ cosine_sim = cosine_similarity(vectors)
84
+ return cosine_sim[0][1]
85
+
86
+ # LLM feedback function using Groq
87
+ def generate_llm_feedback(similarity_score):
88
+ feedback_prompt = f"""
89
+ A user has just pronounced part of the Azaan, and the similarity score between their pronunciation and the reference Azaan is {similarity_score:.2f}.
90
+
91
+ Based on this score:
92
+ - If the score is above 0.9, the pronunciation is excellent.
93
+ - If the score is between 0.7 and 0.9, the pronunciation is good but may need slight improvement.
94
+ - If the score is below 0.7, the pronunciation requires significant improvement.
95
+
96
+ Provide detailed feedback for the user about their pronunciation, considering their score of {similarity_score:.2f}.
97
+ """
98
+
99
+ chat_completion = groq_client.chat.completions.create(
100
+ messages=[
101
+ {
102
+ "role": "user",
103
+ "content": feedback_prompt,
104
+ }
105
+ ],
106
+ model="llama3-8b-8192",
107
+ )
108
+
109
+ return chat_completion.choices[0].message.content
110
+
111
+ # Custom CSS for styling
112
+ st.markdown(
113
+ """
114
+ <style>
115
+ .main {
116
+ background-color: #f5f5f5;
117
+ font-family: 'Arial', sans-serif;
118
+ }
119
+ .title {
120
+ text-align: center;
121
+ color: #2a9d8f;
122
+ }
123
+ .subtitle {
124
+ text-align: center;
125
+ color: #264653;
126
+ }
127
+ .footer {
128
+ text-align: center;
129
+ font-size: 0.8em;
130
+ color: #555;
131
+ }
132
+ .feedback {
133
+ background-color: #e9c6c6;
134
+ border-radius: 10px;
135
+ padding: 20px;
136
+ margin: 10px;
137
+ box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
138
+ }
139
+ </style>
140
+ """,
141
+ unsafe_allow_html=True
142
+ )
143
+
144
+ # Streamlit UI
145
+ def main():
146
+ st.title("๐Ÿ”” Azaan Pronunciation Evaluation")
147
+ st.markdown("<h3 class='subtitle'>Welcome to the Azaan Pronunciation Evaluation!</h3>", unsafe_allow_html=True)
148
+
149
+ st.subheader("Upload Your Audio")
150
+ uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3", "m4a"])
151
+
152
+ if uploaded_file is not None:
153
+ st.audio(uploaded_file, format='audio/wav')
154
+
155
+ # Step 1: Transcribe expert audio and user audio
156
+ expert_audio_path = r"C:\Users\USER\Downloads\azan\Hafiz muqeem.wav" # Change this to the correct path
157
+ st.write("๐ŸŽค Step 1: Checking if the words match...")
158
+
159
+ # Transcribe user audio
160
+ user_text = transcribe_audio(uploaded_file)
161
+ expert_text = transcribe_audio(expert_audio_path)
162
+
163
+ if user_text and expert_text:
164
+ st.write("โœ… Transcription successful!")
165
+ st.write(f"**Expert Azaan Text:** {expert_text}")
166
+ st.write(f"**Your Azaan Text:** {user_text}")
167
+
168
+ # Step 2: Romanize and compare texts
169
+ user_romanized = romanize_arabic(user_text)
170
+ expert_romanized = romanize_arabic(expert_text)
171
+
172
+ text_similarity = compare_text_similarity(user_romanized, expert_romanized)
173
+ st.write(f"๐Ÿ“ Text Similarity Score: {text_similarity:.2f}")
174
+
175
+ if text_similarity >= 0.1:
176
+ st.success("โœ… Great! Your words match well enough. Now, let's evaluate your pronunciation.")
177
+
178
+ # Step 3: Evaluate pronunciation similarity
179
+ expert_embedding = get_audio_embedding(expert_audio_path)
180
+ user_embedding = get_audio_embedding(uploaded_file)
181
+
182
+ pronunciation_similarity = compare_embeddings(expert_embedding, user_embedding)
183
+ st.write(f"๐Ÿ”Š Pronunciation Similarity Score: {pronunciation_similarity:.2f}")
184
+
185
+ # Get feedback
186
+ feedback = generate_llm_feedback(pronunciation_similarity)
187
+ st.markdown(f"<div class='feedback'>{feedback}</div>", unsafe_allow_html=True)
188
+ else:
189
+ st.warning("โš ๏ธ Your words do not match sufficiently. Please try again.")
190
+ else:
191
+ st.error("โŒ There was an error transcribing one or both audio files.")
192
+
193
+ st.markdown("<div class='footer'>ยฉ 2024 Azaan Pronunciation Evaluation Tool</div>", unsafe_allow_html=True)
194
+
195
+ if __name__ == "__main__":
196
+ main()
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cohere==5.11.0
2
+ faiss_cpu==1.8.0.post1
3
+ groq==0.11.0
4
+ gTTS==2.5.3
5
+ langchain_huggingface==0.1.0
6
+ librosa==0.10.2.post1
7
+ matplotlib==3.9.2
8
+ numpy==2.1.2
9
+ protobuf==5.28.2
10
+ PyAudio==0.2.14
11
+ pydub==0.25.1
12
+ Requests==2.32.3
13
+ scikit_learn==1.5.2
14
+ scipy==1.14.1
15
+ sounddevice==0.5.0
16
+ SpeechRecognition==3.10.4
17
+ streamlit==1.38.0
18
+ tensorflow==2.17.0
19
+ tensorflow_intel==2.17.0
20
+ torch==2.4.1
21
+ transformers==4.45.1