import streamlit as st from audiorecorder import audiorecorder from transformers import AutoModelForAudioClassification, AutoFeatureExtractor import torch import librosa from scipy.io.wavfile import read as read_wav from io import BytesIO import soundfile as sf import io from PIL import Image import pandas as pd import torch.nn as nn import yaml from yaml.loader import SafeLoader import streamlit as st import streamlit_authenticator as stauth from streamlit_authenticator import Authenticate hugging_face_model = "MeshalAlamr/wav2vec2-xls-r-300m-arabic_speech_commands" @st.cache(allow_output_mutation=True) def load_model(): feature_extractor = AutoFeatureExtractor.from_pretrained(hugging_face_model) model = AutoModelForAudioClassification.from_pretrained(hugging_face_model) return model, feature_extractor model, feature_extractor = load_model() # st.write(hashed_passwords) with open('config.yaml') as file: config = yaml.load(file, Loader=SafeLoader) authenticator = Authenticate( config['credentials'], config['cookie']['name'], config['cookie']['key'], config['cookie']['expiry_days'], config['preauthorized'] ) name, authentication_status, username = authenticator.login('تسجيل الدخول', 'main') if st.session_state["authentication_status"]: st.write(f'مرحبا، *{st.session_state["name"]}*') st.title("التعرف على الأوامر العربية") @st.cache(allow_output_mutation=True) def load_model(): feature_extractor = AutoFeatureExtractor.from_pretrained(hugging_face_model) model = AutoModelForAudioClassification.from_pretrained(hugging_face_model) return model, feature_extractor model, feature_extractor = load_model() audio = audiorecorder("اضغط هنا للتسجيل", "يتم التسجيل... اضغط لإيقاف التسجيل") english_to_arabic = { 'backward' : 'خلف', 'cancel' : 'إلغاء', 'close' : 'إغلاق', 'digit' : 'رقم', 'direction' : 'اتجاه', 'disable' : 'تعطيل', 'down' : 'أسفل', 'eight' : 'ثمانية', 'enable' : 'تفعيل', 'enter' : 'إدخال', 'five' : 'خمسة', 'forward' : 'أمام', 'four' : 'أربعة', 'left' : 'يسار', 'move' : 'تحريك', 'next' : 'التالي', 'nine' : 'تسعة', 'no' : 'لا', 'ok' : 'موافق', 'one' : 'واحد', 'open' : 'فتح', 'options' : 'خيارات', 'previous' : 'السابق', 'receive' : 'استقبال', 'record' : 'تسجيل', 'right' : 'يمين', 'rotate' : 'تدوير', 'send' : 'إرسال', 'seven' : 'سبعة', 'six' : 'ستة', 'start' : 'ابدأ', 'stop' : 'توقف', 'three' : 'ثلاثة', 'two' : 'اثنان', 'undo' : 'تراجع', 'up' : 'أعلى', 'yes' : 'نعم', 'zero' : 'صفر', 'zoom in' : 'تكبير', 'zoom out' : 'تصغير', } if len(audio) > 0: # To play audio in frontend: st.audio(audio) # To save audio to a file: wav_file = open("temp_audio.wav", "wb") wav_file.write(audio.tobytes()) classify = st.button("اضغط هنا للتعرف") if classify: array, sampling_rate= librosa.load("temp_audio.wav", sr=48000) array = librosa.resample(array, orig_sr = sampling_rate, target_sr = 16000) input_audio = feature_extractor(array, sampling_rate=16000, padding=True, return_tensors="pt") softmax = nn.Softmax(-1) logit = model(input_audio['input_values']).logits predicted_id = int(torch.argmax(logit, dim=-1)) confidence_score = str(round(torch.max(softmax(logit)).item()*100,2)) st.subheader("الكلمة المتوقعة" + ": " + english_to_arabic[model.config.id2label[predicted_id]] ) st.subheader("%" + "نسبة التأكد" + ": " + confidence_score) classes = [english_to_arabic[i] for i in [v for v in model.config.id2label.values()]] if len(classes)%2 == 0: df = pd.DataFrame({"الأمر" : classes[:len(classes)//2], " الأمر" : classes[len(classes)//2:]}) else: df = pd.DataFrame({"الأمر" : classes}) # CSS to inject contained in a string hide_table_row_index = """ """ # Inject CSS with Markdown st.markdown(hide_table_row_index, unsafe_allow_html=True) # Display a static table st.header("الأوامر المتوفرة") st.table(df) authenticator.logout('تسجيل الخروج', 'main') elif st.session_state["authentication_status"] == False: st.error('اسم المستخدم أو كلمة المرور خاطئة') elif st.session_state["authentication_status"] == None: st.warning('أدخل اسم المستخدم وكلمة المرور')