File size: 2,001 Bytes
fe1b089
 
 
 
 
 
d3d4299
 
fe1b089
526849b
fe1b089
 
d3d4299
fe1b089
 
d3d4299
fe1b089
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c9d3908
 
 
 
 
fe1b089
 
 
 
c9d3908
fe1b089
 
 
 
 
 
 
 
 
 
 
 
 
 
204cf04
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from transformers import pipeline
import gradio as gr
from huggingface_hub import HfFolder
import requests
import asyncio
from gtts import gTTS
from dotenv import load_dotenv
import os

load_dotenv()
model_id = "sanchit-gandhi/whisper-small-dv"  # update with your model id
pipe = pipeline("automatic-speech-recognition", model=model_id)
hugging_face_token=os.getenv("HUGGING_FACE_TOKEN")
async def query(text, model_id="tiiuae/falcon-7b-instruct"):
    api_url = f"https://api-inference.huggingface.co/models/{model_id}"
    headers = {"Authorization": f"Bearer {hugging_face_token}"}
    payload = {"inputs": text}

    print(f"Querying...: {text}")
    loop = asyncio.get_event_loop()
    response = await loop.run_in_executor(None, lambda: requests.post(api_url, headers=headers, json=payload))
    print("\n")
    print("\n")
    print(response.json())
    print("\n")
    return response.json()[0]["generated_text"].split("\n")[1]

async def transcribe_speech(filepath):
    output = pipe(
        filepath,
        max_new_tokens=256,
        generate_kwargs={
            "task": "transcribe",
            "language": "english",
        },  # update with the language you've fine-tuned on
        chunk_length_s=30,
        batch_size=8,
    )
    return await query(output["text"])


def final(filepath):
    answer=asyncio.run(transcribe_speech(filepath))
    return answer

def main(filepath):
    response=final(filepath)
    print(response)
    myobj = gTTS(text=response, lang='en', slow=False) 
    myobj.save(filepath)
    return filepath
    # return response

mic_transcribe = gr.Interface(
    fn=main,
    inputs=gr.Audio(sources="microphone", type="filepath"),
    outputs="audio",
)

file_transcribe = gr.Interface(
    fn=main,
    inputs=gr.Audio(sources="upload", type="filepath"),
    outputs="audio",
)


demo=gr.TabbedInterface(
        [mic_transcribe, file_transcribe],
        ["Transcribe Microphone", "Transcribe Audio File"],
    )

demo.launch(debug=True,share=True)