import whisper import gradio as gr import time from pyChatGPT import ChatGPT import warnings warnings.filterwarnings("ignore") secret_token = "eyJhbGciOiJkaXIiLCJlbmMiOiJBMjU2R0NNIn0..7mhijdQ19ze0tuj2.sjEQP7Gi4PYli8CUWMGC0GOj4tVDlZOmU8DWlCtRO-uUrkvPLn0hbhHpg0RMERCbBORAmHZ-wawqvZMmiZ2rfxBuaNw0aGp5bZ1IR2N3Na1qyTHuZ29D2TS287MJcsmfiXJaYTb_sxwxi8d9_uY4HEv6aseywDplaGQFmrLnRSKy2kbFXoYXmfXwolIm9miO68JXlYm4tRig2gNf5YsfEvd0rEBf7q7NzdSUud2DVYu7Q-qq6h_AVDCFFgJ7Y3zzhGSJ0c8DPHnynxBzv5KaB94GA05oqWtdqYxZGJ9xLaZOa-KFPGVRK7flzY3jCa8QKxHz8v-bFjdfb8jqU3cGjgyU47_B-P_aLaZ_K2ZFksjImgGzYvOGA1DIk9twImBpDwWtJwZuFifrsIL-d51D33xJJkP4LS_WKjRJJBkS2BPR2A8-NFJ3KaizZLiUdYa1pDlvTHpmd51QwJ7i4cjr41G4VY4O1n-sLgyFcoEbYKsIpuaiFffjyVg1WNJNLl-qOOzkslNjE-xcx5Y6Uo5fEurRfp1vEFlb2PXycaRRUT4r95_39PhSNZciHHFGkNSOoc7Zk90DtrJIcRvFA61YdgSo4_g5H90qSpDL_JvZIuAut58hWbBdxrZDV5c1GIgotX5ZmSyN9DuqRBTwEqdenqo1krMdZc6B1epHV0fON6Ur7PI_CgmIjBWclLc2uxSmnlvBJTvsDp-xOaJty6wFtkyui_QiLXZ0SzJAAOmDgXem91MtjfFF5h0Kc2AviplqyOw6BIFp7i5-oDEBI6FEpDyPNnSHJiMGP8HDV0RIqZs2RK4xUb1FolSqP-DfpZ7gikpcE_Gr-wSM8daHU65g--RxhtgJXi12pR0hz4io5qCfneF__D81Y6tc6x1T79ezPJh9lFRMxYO8G8tJHbUHioHmt4zt-NNy8o0h_fVD3qfRE2w46nju0DGLCw_A4VVL_gTxurVRutvVXj3mgVVEtWNG74jcPSR6jIwJTpmRl7pmj94pQqINJH_-365E2OG55HdZnFhil05_n3uN_0ZZIsaGbznmDvUaGSuwdrhnbnbLSMrAVC9CYs8Euovgh6hQd9aAo4vaboOOECZOeyTbyBmvaOqzHev0GMoXdeUXXCOJAlUQ3wYnFFZ27gr5NNAMqC7uSo0fVEupyop8m3hjjDrO4kVBarCF8IvA-hM2DZ-AUMWuyjcRn4gmJAJMLNxiBM6wPG_Y4HUuYTHZMW_-f5Id0csjCqo9f4TkMDZj6-h3CR6F-Uis80A0HzKSI9jIedrnkS6VfiZw-ZP4T9Ef3AemSqRvhgLN8kcfRwGcB9zotrKDHfrJJQHT4tIEzE5P6JC0KUiObId9nIGAe2IPCed66DbkLF8V_iiQ-ttuQ8ID6gTdSAo7Fl2iV140EbATlNRYCRjrcfFq_Vz2rSNoFJg3r-iE0xh4kmnYTkf7C8NUWY56-r39PP4qRaNbAIEePOBqBa7NKbUem9BSJDa9HZEL3HZ8Xo91yCNOexm-T_MW7_0QgsMbzaZhe3A9KxQjd-Iebn6NIANYqYZ-XubKL_S2OkUCfx_DCCsKFUhsjl1RMF3e-QwX-W2NeTtfpv-7XEe-SgyJsL9eXwYhdZioETLe0McvPNTA3MaZJ068AvWEPsh3NIt5TFIRFhwekGI6wbFIP_amtRSczhGIVmjZblSgvg7XL1V42_vTERz3yHkZmcF7-_T0OV08fOfDQmmU1Q8jUq-v3psPhpORhhzRgNl3gkBeXatDOTKAn6MrGz-EMb44_LVFcqelPABM0Y2H8-h8Yz2d5gdykEyLw6fMF0RCQ8P_OG2AFJMCEqcupzezuf5Q-bzHCZ0yIfksCOX_cMFqVJ2IJTn1SKljgpQ29Bm0IH7zdKWUiu54fVkW9Ie8OpnF6EckCatioCWY3tuA0A0mK2eob4Rn7RnC_KynIOoWjc7SOAjHX42g0rjKqPNk4eQGa7n0QZ5q1tuP2qtvQOhX5ai0QVj_zsGfFYVHtJ8mpA8Mq4fR_BZZ5XdHJF2PpFSXxsvfnZKK_Dz_UMjhwqUtezlxy7aQTr_Bs0JzVMfjfx7Y7sbM_ZymFmcUIq9QzYViMudwpESUjAKTQU_Aw9xIrNxzp-ZZA7EztXIrP5XQSEE0PTVutubJqXDKvTbxSA00c0zs0IF8yVXqi3tsIqsEmM2RFHkDjxwf-U6-ac3rsYtqoTC5nnj6dojw71bgDpwiFBvsrlIDKSgPu2yRM-EXwQFkLOttS4YAWu89yoJDaThRsDOP93wWaAmH1QQc8-kbWfbgOo2pREMUQBNw8pCZEUDMDm1TzNjfpuI8cABMtMu6AjUoesp1sXaa5ZLAeNvuraJ0dDQAvi8.4lEJ1vUP4kQSy_8xka6HOQ" model = whisper.load_model("base") model.device def transcribe(audio) : # load audio and pad/trim it to fit 30 seconds audio = whisper.load_audio(audio) audio = whisper.pad_or_trim(audio) # make log-Mel spectogram and move to the same device as the model mel = whisper.log_mel_spectogram(audio).to(model.device) # detect the spoken language _, probs = model.detect_language(mel) # decode audio options = whisper.DecodingOptions() result = whisper.decode(model, mel, options) result_text = result.text # Pass the generated text to audio chatgpt_api = ChatGPT(secret_token) resp = chatgpt_api.send_message(result_text) out_result = resp['message'] return [result_text, out_result] output_1 = gr.Textbox(label="Speech to Text") output_2 = gr.Textbox(label="ChatGPT Output") gr.Interface( title = 'OpenAI Whisper and ChatGPT ASR Gradio Web UI', fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type ="filepath") ], outputs= [ output_1, output_2 ], live=True).launch