mukul-wai commited on
Commit
b4eadf1
Β·
1 Parent(s): 9bd6869

Upload 6 files

Browse files
Files changed (7) hide show
  1. .gitattributes +1 -0
  2. README.md +5 -5
  3. app.py +124 -0
  4. asha.jpeg +0 -0
  5. asha.png +3 -0
  6. requirements.txt +173 -0
  7. temp.wav +0 -0
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ asha.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: ASHA Assistant
3
- emoji: πŸ“ˆ
4
- colorFrom: yellow
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 3.29.0
8
  app_file: app.py
9
  pinned: false
10
  ---
 
1
  ---
2
+ title: Cho Assistant
3
+ emoji: 🌍
4
+ colorFrom: indigo
5
+ colorTo: yellow
6
  sdk: gradio
7
+ sdk_version: 3.28.1
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings.openai import OpenAIEmbeddings
2
+ from langchain.text_splitter import CharacterTextSplitter
3
+ from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS
4
+
5
+ from langchain.chains.question_answering import load_qa_chain
6
+ from langchain.llms import OpenAI
7
+ import gradio as gr
8
+ import openai
9
+ import os
10
+
11
+ from bark import SAMPLE_RATE, generate_audio, preload_models
12
+ from scipy.io.wavfile import write as write_wav
13
+ from IPython.display import Audio
14
+
15
+
16
+ api_key = os.getenv('OPENAI_API_KEY')
17
+ openai.api_key = api_key
18
+
19
+ # connect your Google Drive
20
+ """from google.colab import drive
21
+ drive.mount('/content/gdrive', force_remount=True)
22
+ root_dir = "/content/gdrive/My Drive/"
23
+ data_path = '/content/gdrive/My Drive/CDSS/LLM Demos/ASHA material'
24
+ """
25
+ from langchain.document_loaders import PyPDFDirectoryLoader
26
+ from langchain.indexes import VectorstoreIndexCreator
27
+ from langchain.document_loaders import PyPDFLoader
28
+ from langchain.vectorstores import Chroma
29
+ from langchain.embeddings.openai import OpenAIEmbeddings
30
+ from langchain.document_loaders import UnstructuredPDFLoader
31
+ from langchain.llms import OpenAI
32
+ from langchain.chains.question_answering import load_qa_chain
33
+
34
+ pdf_folder_path = 'ASHAmaterial'
35
+ loader = PyPDFDirectoryLoader(pdf_folder_path)
36
+
37
+ pages = loader.load_and_split()
38
+ embeddings = OpenAIEmbeddings(openai_api_key ='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')
39
+ docsearch = Chroma.from_documents(pages, embeddings).as_retriever()
40
+ from pydub import AudioSegment
41
+
42
+ # download and load all models
43
+ preload_models()
44
+
45
+
46
+ lang_dict = {
47
+ "English": "en",
48
+ }
49
+
50
+
51
+ # generate audio from text
52
+ text_prompt = """
53
+ Hello, my name is Suno. And, uh β€” and I like pizza. [laughs]
54
+ But I also have other interests such as playing tic tac toe.
55
+ """
56
+ #audio_array = generate_audio(text_prompt)
57
+
58
+ # save audio to disk
59
+ #write_wav("bark_generation.wav", SAMPLE_RATE, audio_array)
60
+
61
+ # play text in notebook
62
+ #Audio(audio_array, rate=SAMPLE_RATE)
63
+
64
+ def get_asr_output(audio_path,lang = 'English'):
65
+ audio = AudioSegment.from_file(audio_path)
66
+ audio.export("temp.wav", format="wav")
67
+ file = open("temp.wav","rb")
68
+
69
+
70
+ transcription = openai.Audio.transcribe("whisper-1", file, language=lang)
71
+ op_text = transcription.text
72
+
73
+ """ if lang == "hi":
74
+ op_text = asr_pipe("temp.wav")['text']
75
+ print('whisper',transcription)
76
+ print('ai4b',op_text) """
77
+
78
+ return op_text
79
+
80
+ def greet(audio, lang, if_audio=True):
81
+ query = get_asr_output(audio, lang_dict[lang])
82
+
83
+ return query
84
+
85
+ def greet2(query):
86
+
87
+ docs = docsearch.get_relevant_documents(query)
88
+ chain = load_qa_chain(OpenAI(temperature=0,openai_api_key ='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' ), chain_type="stuff")
89
+ answer = chain.run(input_documents=docs, question=query)
90
+
91
+ return answer
92
+
93
+ def get_audio2(answer):
94
+ audio_array = generate_audio(answer)
95
+ write_wav("bark_generation.wav", SAMPLE_RATE, audio_array)
96
+
97
+ return 24000, audio_array
98
+
99
+ def hello():
100
+ return "hey"
101
+ def dummy(name):
102
+ return "bark_generation.wav"
103
+
104
+ lang = gr.Radio(list(lang_dict.keys()), label="Select a Language")
105
+
106
+ with gr.Blocks(title="ASHA Saheli") as demo:
107
+ gr.Image('asha.png', shape = (10,10))
108
+ lang = gr.Radio(list(lang_dict.keys()), label="Select a Language")
109
+ user_audio = gr.Audio(source="microphone",type="filepath",label = "Speak your query")
110
+ text = gr.Textbox(placeholder="Question", name = "Question / Voice Transcription", show_label=False)
111
+ output = gr.Textbox(placeholder="The answer will appear here", interactive=False, show_label = False)
112
+
113
+ get_text_from_audio = gr.Button("Transcribe")
114
+ get_text_from_audio.click(greet,[user_audio, lang],[text])
115
+
116
+
117
+ submit = gr.Button("Submit")
118
+ submit.click(greet2, [text], [output])
119
+
120
+ get_audio = gr.Button('Get Audio')
121
+ audio = gr.Audio()
122
+ get_audio.click(get_audio2, output, audio)
123
+
124
+ demo.launch()
asha.jpeg ADDED
asha.png ADDED

Git LFS Details

  • SHA256: f69f43be10c861ccc7d7283cc706c6a703c0a734d833ab402258bc97b707b805
  • Pointer size: 132 Bytes
  • Size of remote file: 1.62 MB
requirements.txt ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.1.0
2
+ aiohttp==3.8.4
3
+ aiosignal==1.3.1
4
+ altair==5.0.0
5
+ anyio==3.6.2
6
+ argilla==1.6.0
7
+ asttokens==2.2.1
8
+ async-timeout==4.0.2
9
+ attrs==23.1.0
10
+ backcall==0.2.0
11
+ backoff==2.2.1
12
+ boto3==1.26.131
13
+ botocore==1.29.131
14
+ certifi==2023.5.7
15
+ cffi==1.15.1
16
+ charset-normalizer==3.1.0
17
+ chromadb==0.3.22
18
+ click==8.1.3
19
+ clickhouse-connect==0.5.23
20
+ cmake==3.26.3
21
+ commonmark==0.9.1
22
+ contourpy==1.0.7
23
+ cryptography==40.0.2
24
+ cycler==0.11.0
25
+ Cython==0.29.34
26
+ dataclasses-json==0.5.7
27
+ decorator==5.1.1
28
+ Deprecated==1.2.13
29
+ duckdb==0.7.1
30
+ einops==0.6.1
31
+ encodec==0.1.1
32
+ et-xmlfile==1.1.0
33
+ executing==1.2.0
34
+ faiss-cpu==1.7.4
35
+ fastapi==0.95.1
36
+ ffmpy==0.3.0
37
+ filelock==3.12.0
38
+ fonttools==4.39.3
39
+ frozenlist==1.3.3
40
+ fsspec==2023.5.0
41
+ funcy==2.0
42
+ gradio==3.29.0
43
+ gradio_client==0.2.2
44
+ greenlet==2.0.2
45
+ h11==0.14.0
46
+ hnswlib==0.7.0
47
+ httpcore==0.16.3
48
+ httptools==0.5.0
49
+ httpx==0.23.3
50
+ huggingface-hub==0.14.1
51
+ idna==3.4
52
+ importlib-metadata==6.6.0
53
+ importlib-resources==5.12.0
54
+ ipython==8.12.2
55
+ jedi==0.18.2
56
+ Jinja2==3.1.2
57
+ jmespath==1.0.1
58
+ joblib==1.2.0
59
+ jsonschema==4.17.3
60
+ kiwisolver==1.4.4
61
+ langchain==0.0.163
62
+ linkify-it-py==2.0.2
63
+ lit==16.0.3
64
+ lxml==4.9.2
65
+ lz4==4.3.2
66
+ Markdown==3.4.3
67
+ markdown-it-py==2.2.0
68
+ MarkupSafe==2.1.2
69
+ marshmallow==3.19.0
70
+ marshmallow-enum==1.5.1
71
+ matplotlib==3.7.1
72
+ matplotlib-inline==0.1.6
73
+ mdit-py-plugins==0.3.3
74
+ mdurl==0.1.2
75
+ monotonic==1.6
76
+ mpmath==1.3.0
77
+ msg-parser==1.2.0
78
+ multidict==6.0.4
79
+ mypy-extensions==1.0.0
80
+ networkx==3.1
81
+ nltk==3.8.1
82
+ numexpr==2.8.4
83
+ numpy==1.23.5
84
+ nvidia-cublas-cu11==11.10.3.66
85
+ nvidia-cuda-cupti-cu11==11.7.101
86
+ nvidia-cuda-nvrtc-cu11==11.7.99
87
+ nvidia-cuda-runtime-cu11==11.7.99
88
+ nvidia-cudnn-cu11==8.5.0.96
89
+ nvidia-cufft-cu11==10.9.0.58
90
+ nvidia-curand-cu11==10.2.10.91
91
+ nvidia-cusolver-cu11==11.4.0.1
92
+ nvidia-cusparse-cu11==11.7.4.91
93
+ nvidia-nccl-cu11==2.14.3
94
+ nvidia-nvtx-cu11==11.7.91
95
+ olefile==0.46
96
+ openai==0.27.6
97
+ openapi-schema-pydantic==1.2.4
98
+ openpyxl==3.1.2
99
+ orjson==3.8.12
100
+ packaging==23.1
101
+ pandas==1.5.3
102
+ parso==0.8.3
103
+ pdfminer.six==20221105
104
+ pexpect==4.8.0
105
+ pickleshare==0.7.5
106
+ Pillow==9.5.0
107
+ pkgutil_resolve_name==1.3.10
108
+ posthog==3.0.1
109
+ prompt-toolkit==3.0.38
110
+ ptyprocess==0.7.0
111
+ pure-eval==0.2.2
112
+ pycparser==2.21
113
+ pydantic==1.10.7
114
+ pydub==0.25.1
115
+ Pygments==2.15.1
116
+ pypandoc==1.11
117
+ pyparsing==3.0.9
118
+ pypdf==3.8.1
119
+ PyPDF2==3.0.1
120
+ pyrsistent==0.19.3
121
+ python-dateutil==2.8.2
122
+ python-docx==0.8.11
123
+ python-dotenv==1.0.0
124
+ python-magic==0.4.27
125
+ python-multipart==0.0.6
126
+ python-pptx==0.6.21
127
+ pytz==2023.3
128
+ PyYAML==6.0
129
+ regex==2023.5.5
130
+ requests==2.30.0
131
+ rfc3986==1.5.0
132
+ rich==13.0.1
133
+ s3transfer==0.6.1
134
+ scikit-learn==1.2.2
135
+ scipy==1.10.1
136
+ semantic-version==2.10.0
137
+ sentence-transformers==2.2.2
138
+ sentencepiece==0.1.99
139
+ six==1.16.0
140
+ sniffio==1.3.0
141
+ SQLAlchemy==2.0.12
142
+ stack-data==0.6.2
143
+ starlette==0.26.1
144
+ suno-bark @ git+https://github.com/suno-ai/bark.git@1ad007171e0c46078eb6d3afb6db4daf0c4f41cd
145
+ sympy==1.11.1
146
+ tenacity==8.2.2
147
+ threadpoolctl==3.1.0
148
+ tiktoken==0.4.0
149
+ tokenizers==0.13.3
150
+ toolz==0.12.0
151
+ torch==2.0.1
152
+ torchaudio==2.0.2
153
+ torchvision==0.15.2
154
+ tqdm==4.65.0
155
+ traitlets==5.9.0
156
+ transformers==4.28.1
157
+ triton==2.0.0
158
+ typing-inspect==0.8.0
159
+ typing_extensions==4.5.0
160
+ tzdata==2023.3
161
+ uc-micro-py==1.0.2
162
+ unstructured==0.6.5
163
+ urllib3==1.26.15
164
+ uvicorn==0.22.0
165
+ uvloop==0.17.0
166
+ watchfiles==0.19.0
167
+ wcwidth==0.2.6
168
+ websockets==11.0.3
169
+ wrapt==1.14.1
170
+ XlsxWriter==3.1.0
171
+ yarl==1.9.2
172
+ zipp==3.15.0
173
+ zstandard==0.21.0
temp.wav ADDED
Binary file (766 kB). View file