File size: 3,860 Bytes
b4eadf1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e9d30e
b4eadf1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e9d30e
b4eadf1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f1466d
 
 
b4eadf1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS

from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
import gradio as gr
import openai
import os

from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
from IPython.display import Audio


api_key = os.getenv('OPENAI_API_KEY')
openai.api_key = api_key

# connect your Google Drive
"""from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
root_dir = "/content/gdrive/My Drive/"
data_path = '/content/gdrive/My Drive/CDSS/LLM Demos/ASHA material'
"""
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

pdf_folder_path = 'ASHAmaterial'
loader = PyPDFDirectoryLoader(pdf_folder_path)

pages = loader.load_and_split()
embeddings = OpenAIEmbeddings(openai_api_key =api_key)
docsearch = Chroma.from_documents(pages, embeddings).as_retriever()
from pydub import AudioSegment

# download and load all models
preload_models()


lang_dict = {
    "English": "en",
}


# generate audio from text
text_prompt = """
     Hello, my name is Suno. And, uh — and I like pizza. [laughs] 
     But I also have other interests such as playing tic tac toe.
"""
#audio_array = generate_audio(text_prompt)

# save audio to disk
#write_wav("bark_generation.wav", SAMPLE_RATE, audio_array)
  
# play text in notebook
#Audio(audio_array, rate=SAMPLE_RATE)

def get_asr_output(audio_path,lang = 'English'):
    audio = AudioSegment.from_file(audio_path)
    audio.export("temp.wav", format="wav")
    file = open("temp.wav","rb")
    
    
    transcription = openai.Audio.transcribe("whisper-1", file, language=lang)
    op_text = transcription.text

    """ if lang == "hi":
        op_text = asr_pipe("temp.wav")['text']
        print('whisper',transcription)
        print('ai4b',op_text) """
    
    return op_text

def greet(audio, lang, if_audio=True):
    query = get_asr_output(audio, lang_dict[lang])
    
    return query

def greet2(query):
    
    docs = docsearch.get_relevant_documents(query)
    chain = load_qa_chain(OpenAI(temperature=0,openai_api_key =api_key ), chain_type="stuff")
    answer = chain.run(input_documents=docs, question=query)

    return answer

def get_audio2(answer):
    audio_array = generate_audio(answer)
    write_wav("bark_generation.wav", SAMPLE_RATE, audio_array)
    
    return 24000, audio_array
    
def hello():
    return "hey"
def dummy(name):
    return "bark_generation.wav"

lang = gr.Radio(list(lang_dict.keys()), label="Select a Language")

with gr.Blocks(title="ASHA Saheli") as demo:
    gr.Image('asha.png', shape = (10,10))
    lang = gr.Radio(list(lang_dict.keys()), label="Select a Language")
    user_audio = gr.Audio(source="microphone",type="filepath",label = "Speak your query")
    text = gr.Textbox(placeholder="Question", name = "Question / Voice Transcription", show_label=False)
    output = gr.Textbox(placeholder="The answer will appear here", interactive=False, show_label = False)
    
    get_text_from_audio = gr.Button("Transcribe")
    get_text_from_audio.click(greet,[user_audio, lang],[text])
    

    submit = gr.Button("Submit")
    submit.click(greet2, [text], [output])

    #get_audio = gr.Button('Get Audio')
    #audio = gr.Audio()
    #get_audio.click(get_audio2, output, audio)
    
demo.launch()