import speech_recognition as sr from pydub import AudioSegment import soundfile import pyrebase from pydub import AudioSegment import soundfile import os import pyrebase import gradio as gr from transformers import AutoProcessor from transformers import Wav2Vec2ForCTC import gradio as gr import torch import torchaudio processor = AutoProcessor.from_pretrained("omarelsayeed/wav2vec2_ar_anz2") model = Wav2Vec2ForCTC.from_pretrained("omarelsayeed/wav2vec2_ar_anz2") processor.save_pretrained("my_model") model.save_pretrained("my_model") def recite_wav(path_name): # fix pcm_16 error # data, samplerate = soundfile.read(path_name) # soundfile.write('_.wav', data , samplerate , subtype='PCM_16') audio , sr = torchaudio.load("xd.wav") # resampler = torchaudio.transforms.Resample(sr , 16000) # audio = resampler(audio)[None] inputs = processor(audio, return_tensors="pt", padding="longest" , sampling_rate = 16_000) with torch.no_grad(): logits = model(inputs.input_values.squeeze(0))[0] transcription = processor.batch_decode(logits.numpy()).text return transcription[0] def list_all_files(): for _file in storage.child("wavfiles").list_files(): print(_file.name) def download_wav_file(wav_name): storage.child("wavfiles").child(wav_name).download("xd.wav") def get_quran_text(wav_file_path): download_wav_file(wav_file_path) recitation = recite_wav("xd.wav") os.remove("xd.wav") return recitation firebaseConfig = { "apiKey": "AIzaSyDjgBD762KveE8GBO7jqTTkj_mKhUTDwGM", "authDomain": "quran-c5cbe.firebaseapp.com", "databaseURL":"quran-c5cbe.firebaseio.com/", "projectId": "quran-c5cbe", "storageBucket": "quran-c5cbe.appspot.com", "serviceAccount":"quran-c5cbe-firebase-adminsdk-jvpbe-cebaf5aaa6.json" } firebase = pyrebase.initialize_app(firebaseConfig) storage = firebase.storage() iface = gr.Interface(fn=get_quran_text, inputs="text", outputs="text") iface.launch()