mskov commited on
Commit
546a5e2
1 Parent(s): 4eaeb87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -123
app.py CHANGED
@@ -1,126 +1,29 @@
1
- import os
2
- from pprint import pprint
3
- os.system("pip install git+https://github.com/openai/whisper.git")
4
- import gradio as gr
5
- import whisper
6
- from transformers import pipeline
7
- import torch
8
- from transformers import AutoModelForCausalLM
9
- from transformers import AutoTokenizer
10
- import time
11
- # import streaming.py
12
- # from next_word_prediction import GPT2
13
-
14
-
15
-
16
-
17
- gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
18
- tokenizer = AutoTokenizer.from_pretrained("gpt2")
19
-
20
- ### /code snippet
21
-
22
-
23
- # get gpt2 model
24
- generator = pipeline('text-generation', model='gpt2')
25
-
26
- # whisper model specification
27
- model = whisper.load_model("tiny")
28
 
29
 
30
-
31
- def inference(audio, state=""):
32
 
33
- #time.sleep(2)
34
- #text = p(audio)["text"]
35
- #state += text + " "
36
- # load audio data
37
- audio = whisper.load_audio(audio)
38
- # ensure sample is in correct format for inference
39
- audio = whisper.pad_or_trim(audio)
40
-
41
- # generate a log-mel spetrogram of the audio data
42
- mel = whisper.log_mel_spectrogram(audio).to(model.device)
43
-
44
- _, probs = model.detect_language(mel)
45
-
46
- # decode audio data
47
- options = whisper.DecodingOptions(fp16 = False)
48
- # transcribe speech to text
49
- result = whisper.decode(model, mel, options)
50
- result_len = len(result.text)
51
-
52
- # Added prompt below
53
- input_prompt = "The following is a transcript of someone talking, please predict what they will say next. \n"
54
- ### code
55
- input_total = input_prompt + result.text
56
- input_ids = tokenizer(input_total, return_tensors="pt").input_ids
57
- print("inputs ", input_ids)
58
-
59
- # prompt length
60
- # prompt_length = len(tokenizer.decode(inputs_ids[0]))
61
-
62
- # length penalty for gpt2.generate???
63
- #Prompt
64
- #generated_outputs = gpt2.generate(input_ids, do_sample=True, num_return_sequences=3, output_scores=True, max_length=4)
65
- output = gpt2.generate(input_ids, max_length=5, do_sample=True, top_k=50, top_p=0.95, num_return_sequences=5)
66
- print("output ", output)
67
- #outputs = [output[-4:] for output in output.tolist()]
68
- # print("outputs generated ", generated_outputs[0])
69
- # only use id's that were generated
70
- # gen_sequences has shape [3, 15]
71
-
72
- #gen_sequences = outputs.sequences[:, input_ids.shape[-1]:]
73
- #print("gen sequences: ", gen_sequences)
74
-
75
- # let's stack the logits generated at each step to a tensor and transform
76
- # logits to probs
77
- #probs = torch.stack(generated_outputs.scores, dim=1).softmax(-1) # -> shape [3, 15, vocab_size]
78
-
79
- # now we need to collect the probability of the generated token
80
- # we need to add a dummy dim in the end to make gather work
81
- #gen_probs = torch.gather(probs, 2, gen_sequences[:, :, None]).squeeze(-1)
82
- #print("gen probs result: ", gen_probs)
83
- # now we can do all kinds of things with the probs
84
-
85
- # 1) the probs that exactly those sequences are generated again
86
- # those are normally going to be very small
87
- # unique_prob_per_sequence = gen_probs.prod(-1)
88
-
89
- # 2) normalize the probs over the three sequences
90
- # normed_gen_probs = gen_probs / gen_probs.sum(0)
91
- # assert normed_gen_probs[:, 0].sum() == 1.0, "probs should be normalized"
92
-
93
- # 3) compare normalized probs to each other like in 1)
94
- # unique_normed_prob_per_sequence = normed_gen_probs.prod(-1)
95
-
96
- ### end code
97
- # print audio data as text
98
- # print(result.text)
99
- # prompt
100
- getText = generator(result.text, max_new_tokens=10, num_return_sequences=5)
101
- state = getText
102
- print(state)
103
- gt = [gt['generated_text'] for gt in state]
104
- print(type(gt))
105
- gtTrim = [gt[:reasult_len] for val in gt]
106
-
107
- # result.text
108
- #return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
109
- return result.text, state, gtTrim
110
-
111
-
112
-
113
- # get audio from microphone
114
-
115
- gr.Interface(
116
- fn=inference,
117
- inputs=[
118
- gr.inputs.Audio(source="microphone", type="filepath"),
119
- "state"
120
- ],
121
- outputs=[
122
- "textbox",
123
- "state",
124
- "textbox"
125
- ],
126
- live=True).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
 
 
 
3
 
4
+ '''
5
+ This script calls the ada model from openai api to predict the next few words.
6
+ '''
7
+ import os
8
+ import openai
9
+ PROMPT = """The following is a transcript of a conversation. Predict a few nouns, verbs, or adjectives that may be used next. Predict the next few words as a list of options.
10
+ A few examples are provided below and then the current transcript is provided.
11
+ Examples:
12
+ Transcript: I'm making spaghetti for dinner
13
+ Next: Tonight, Tomorrow, for us, our neighbors
14
+ Transcript: I would like to order a cheeseburger with a side of
15
+ Next: Fries, Milkshake, Apples
16
+ Current Transcript:
17
+ Transcript: I'm going to the store to buy
18
+ Next:"""
19
+
20
+ openai.api_key = os.environ["Openai_APIkey"]
21
+
22
+ response = openai.Completion.create(
23
+ model="text-ada-001",
24
+ prompt=PROMPT,
25
+ temperature=1,
26
+ max_tokens=4,
27
+ n=4)
28
+ for i in range(4):
29
+ print(response['choices'][i]['text'])