DiegoLigtenberg commited on
Commit
f14d11b
1 Parent(s): e711356

Add ither file

Browse files
utils/Dockerfile.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt ./requirements.txt
6
+
7
+ RUN apt-get update \
8
+ && apt-get install libportaudio2 libportaudiocpp0 portaudio19-dev libsndfile1-dev -y \
9
+ && pip3 install pyaudio
10
+
11
+ RUN pip install -r requirements.txt
12
+
13
+ EXPOSE 8501
14
+
15
+ WORKDIR /src
16
+ COPY . /src
17
+
18
+ ENTRYPOINT ["streamlit", "run"]
19
+
20
+ CMD ["src/main.py"]
utils/model_names.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ INSERT Hugging face models
2
+ 1) Insert tokenizer model name
3
+ 2) Insert space
4
+ 3) Insert huggingface link to model name
5
+
6
+ speech_to_text
7
+ facebook/wav2vec2-base-960h https://huggingface.co/facebook/wav2vec2-base-960h
utils/model_names.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models that generate text from audio data.
2
+ model_task: # model task
3
+ speech_to_text:
4
+ model_name: # model name
5
+ wav2vec:
6
+ model_size: # model size
7
+ base:
8
+ name: facebook/wav2vec2-base-960h
9
+ url: https://huggingface.co/facebook/wav2vec2-base-960h
10
+ year: 2020
11
+ whisper:
12
+ model_size:
13
+ tiny:
14
+ name: openai/whisper-tiny
15
+ url: https://huggingface.co/openai/whisper-tiny
16
+ year: 2022
17
+ base:
18
+ name: openai/whisper-base
19
+ url: https://huggingface.co/openai/whisper-base
20
+ year: 2022
21
+ medium:
22
+ name: openai/whisper-medium
23
+ url: https://huggingface.co/openai/whisper-medium
24
+ year: 2022
25
+
26
+ # models that generate summaries from text data.
27
+ text_to_summary:
28
+ model_name:
29
+ bert:
30
+ model_size:
31
+ large:
32
+ name: facebook/bart-large-cnn
33
+ url: https://huggingface.co/facebook/bart-large-cnn
34
+ year: 2019
35
+ fbs: 31231
36
+
37
+
38
+
39
+
40
+
41
+
42
+
utils/models.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models that generate text from audio data.
2
+ wav2vec:
3
+ task: text_to_speech
4
+ url: https://huggingface.co/facebook/wav2vec2-base-960h
5
+
6
+ wav2vec2:
7
+ task: text_to_speech
8
+ url: https://huggingface.co/yongjian/wav2vec2-large-a
9
+
10
+ whisper_tiny:
11
+ task: text_to_speech
12
+ url: https://huggingface.co/openai/whisper-tiny
13
+ description: "this is the smallest whisper model that will be used for cloud deployment"
14
+ year: 2022
15
+
16
+ whisper_base:
17
+ task: text_to_speech
18
+ url: https://huggingface.co/openai/whisper-base
19
+ year: 2022
20
+
21
+ whisper_medium:
22
+ task: text_to_speech
23
+ url: https://huggingface.co/openai/whisper-medium
24
+ year: 2022
25
+
26
+ bart_large:
27
+ task: text_to_summary
28
+ url: https://huggingface.co/facebook/bart-large-cnn
29
+ year: 2022
utils/oldmodel.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ import torch
3
+ import torchaudio
4
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
5
+ import speech_recognition as sr
6
+ import io
7
+ from pydub import AudioSegment
8
+ import librosa
9
+ import whisper
10
+ from scipy.io import wavfile
11
+ from test import record_voice
12
+
13
+ model = Wav2Vec2ForCTC.from_pretrained(r'yongjian/wav2vec2-large-a') # Note: PyTorch Model
14
+ tokenizer = Wav2Vec2Processor.from_pretrained(r'yongjian/wav2vec2-large-a')
15
+
16
+
17
+ r = sr.Recognizer()
18
+
19
+ from transformers import pipeline
20
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
21
+
22
+ with sr.Microphone(sample_rate=16000) as source:
23
+ print("You can start speaking now")
24
+ record_voice()
25
+ x,_ = librosa.load("output.wav")
26
+ model_inputs = tokenizer(x, sampling_rate=16000, return_tensors="pt", padding=True)
27
+ logits = model(model_inputs.input_values, attention_mask=model_inputs.attention_mask).logits.cuda() # use .cuda() for GPU acceleration
28
+ pred_ids = torch.argmax(logits, dim=-1).cpu()
29
+ pred_text = tokenizer.batch_decode(pred_ids)
30
+ print(x[:10],x.shape)
31
+ print('Transcription:', pred_text)
32
+
33
+ model = whisper.load_model("base")
34
+ result = model.transcribe("output.wav")
35
+ print(result["text"])
36
+ summary_input = result["text"]
37
+
38
+ summary_output = (summarizer(summary_input, max_length=30, min_length=20, do_sample=False))
39
+ print(summary_output)
40
+ with open("raw_text.txt",'w',encoding = 'utf-8') as f:
41
+ f.write(summary_input)
42
+ f.close()
43
+ with open("summary_text.txt",'w',encoding = 'utf-8') as f:
44
+ f.write(summary_output[0]["summary_text"])
45
+ f.close()
46
+
47
+ '''