from transformers import pipeline asr_pipe = pipeline("automatic-speech-recognition", model="Abdullah17/whisper-small-urdu") transcript_pipe = pipeline("automatic-speech-recognition", model="ihanif/whisper-medium-urdu") from difflib import SequenceMatcher import json import socket import soundfile as sf import gradio as gr def get_local_ip(): try: # Create a socket connection to a remote host (here, google.com) s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.connect(("8.8.8.8", 80)) local_ip = s.getsockname()[0] s.close() return local_ip except Exception as e: print(f"Error getting local IP: {e}") return None with open("tasks.json", "r",encoding="utf-8") as json_file: urdu_data = json.load(json_file) # List of commands # commands = [ # "نمائندے ایجنٹ نمائندہ", # " سم ایکٹیویٹ ", # " سم بلاک بند ", # "موبائل پیکیجز انٹرنیٹ پیکیج", # " چالان جمع چلان", # " گانا " # ] # replies = [ # 1,2, # ] # Function to find the most similar command def find_most_similar_command(statement, command_list): best_match = None highest_similarity = 0 i=0 for sub_list in command_list: for command in sub_list: similarity = SequenceMatcher(None, statement, command).ratio() print(i,"similarity",similarity) if similarity > highest_similarity: highest_similarity = similarity best_match = command reply=i i+=1 return best_match,reply transcript_only=["1","3","4"] match_and_save=["2"] col_names={'1':"name",'3':"address",'4':"order"} def send_data_to_db(menu_id,col_value,order_id): import requests col_name=col_names[menu_id] # API endpoint URL url = 'https://pizzahut.softinfix.tech/api/save_order?'+col_name+'='+col_value+"&order_id"+"="+order_id payload = {} headers = {} response = requests.request("GET", url, headers=headers, data=payload) # Print response print(response.status_code) print(response.text) def transcribe_the_command(audio,menu_id,order_id): local_ip = get_local_ip() if local_ip: print(f"Local IP Address: {local_ip}") else: print("Local IP could not be determined.") sample_rate, audio_data = audio file_name = "recorded_audio.wav" sf.write(file_name, audio_data, sample_rate) # Convert stereo to mono by averaging the two channels print(menu_id) if menu_id in transcript_only: transcript = transcript_pipe(file_name)["text"] col_value=transcript send_data_to_db(menu_id,col_value,order_id) print("data uploaded successfully!") elif menu_id in match_and_save: transcript = asr_pipe(file_name)["text"] commands=urdu_data[menu_id] most_similar_command,reply = find_most_similar_command(transcript, commands) print(f"Given Statement: {transcript}") print(f"Most Similar Command: {most_similar_command}\n") print(reply) send_data_to_db(menu_id,reply,order_id) else: transcript = asr_pipe(file_name)["text"] commands=urdu_data[menu_id] print(commands) most_similar_command,reply = find_most_similar_command(transcript, commands) print(f"Given Statement: {transcript}") print(f"Most Similar Command: {most_similar_command}\n") print(reply) return reply # get_text_from_voice("urdu.wav") iface = gr.Interface( fn=transcribe_the_command, inputs=[gr.inputs.Audio(label="Recorded Audio",source="microphone"),gr.inputs.Textbox(label="menu_id"),gr.inputs.Textbox(label="order_id")], outputs="text", title="Whisper Small Urdu Command", description="Realtime demo for Urdu speech recognition using a fine-tuned Whisper small model and outputting the estimated command on the basis of speech transcript.", ) iface.launch()