awacke1 commited on
Commit
4ad4f3c
1 Parent(s): 3e967dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -94
app.py CHANGED
@@ -20,84 +20,66 @@ import huggingface_hub
20
  from huggingface_hub import Repository, hf_hub_download, upload_file
21
  from datetime import datetime
22
 
 
23
  # Dataset and Token links - change awacke1 to your own HF id, and add a HF_TOKEN copy to your repo for write permissions
24
  # This should allow you to save your results to your own Dataset hosted on HF. ---
25
- DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/Carddata.csv"
26
- DATASET_REPO_ID = "awacke1/Carddata.csv"
27
- DATA_FILENAME = "Carddata.csv"
28
- DATA_FILE = os.path.join("data", DATA_FILENAME)
29
- HF_TOKEN = os.environ.get("HF_TOKEN")
30
- # ---------------------------------------------
31
-
32
- SCRIPT = """
33
- <script>
34
- if (!window.hasBeenRun) {
35
- window.hasBeenRun = true;
36
- console.log("should only happen once");
37
- document.querySelector("button.submit").click();
38
- }
39
- </script>
40
- """
41
- try:
42
- hf_hub_download(
43
- repo_id=DATASET_REPO_ID,
44
- filename=DATA_FILENAME,
45
- cache_dir=DATA_DIRNAME,
46
- force_filename=DATA_FILENAME
47
- )
48
- except:
49
- print("file not found")
50
- repo = Repository(
51
- local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
52
- )
53
- def generate_html() -> str:
54
- with open(DATA_FILE) as csvfile:
55
- reader = csv.DictReader(csvfile)
56
- rows = []
57
- for row in reader:
58
- rows.append(row)
59
- rows.reverse()
60
- if len(rows) == 0:
61
- return "no messages yet"
62
- else:
63
- html = "<div class='chatbot'>"
64
- for row in rows:
65
- html += "<div>"
66
- html += f"<span>{row['inputs']}</span>"
67
- html += f"<span class='outputs'>{row['outputs']}</span>"
68
- html += "</div>"
69
- html += "</div>"
70
- return html
71
-
72
-
73
- def store_message(name: str, message: str):
74
- if name and message:
75
- with open(DATA_FILE, "a") as csvfile:
76
- writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"])
77
- writer.writerow(
78
- {"name": name.strip(), "message": message.strip(), "time": str(datetime.now())}
79
- )
80
- # uncomment line below to begin saving -
81
- commit_url = repo.push_to_hub()
82
- return ""
83
-
84
-
85
- iface = gr.Interface(
86
- store_message,
87
- [
88
- inputs.Textbox(placeholder="Your name"),
89
- inputs.Textbox(placeholder="Your message", lines=2),
90
- ],
91
- "html",
92
- css="""
93
- .message {background-color:cornflowerblue;color:white; padding:4px;margin:4px;border-radius:4px; }
94
- """,
95
- title="Reading/writing to a HuggingFace dataset repo from Spaces",
96
- description=f"This is a demo of how to do simple *shared data persistence* in a Gradio Space, backed by a dataset repo.",
97
- article=f"The dataset repo is [{DATASET_REPO_URL}]({DATASET_REPO_URL})",
98
- )
99
-
100
-
101
  mname = "facebook/blenderbot-400M-distill"
102
  model = BlenderbotForConditionalGeneration.from_pretrained(mname)
103
  tokenizer = BlenderbotTokenizer.from_pretrained(mname)
@@ -132,8 +114,9 @@ def chat(message, history):
132
  history_useful = add_note_to_history(response, history_useful)
133
  list_history = history_useful[0].split('</s> <s>')
134
  history.append((list_history[-2], list_history[-1]))
135
- store_message(message, response) # Save to dataset
136
  return history, history
 
137
 
138
  SAMPLE_RATE = 16000
139
  model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/stt_en_conformer_transducer_xlarge")
@@ -148,26 +131,19 @@ def process_audio_file(file):
148
  data = librosa.to_mono(data)
149
  return data
150
 
151
- #def transcribe(audio, state = "", im4 = "", file = ""):
152
- #def transcribe(audio, state = "", im4 = None, file = None):
153
- def transcribe(audio, state = ""): # two parms - had been testing video and file inputs at same time.
154
- # Grant additional context
155
- # time.sleep(1)
156
  if state is None:
157
  state = ""
158
  audio_data = process_audio_file(audio)
159
  with tempfile.TemporaryDirectory() as tmpdir:
160
- # Filepath transcribe
161
  audio_path = os.path.join(tmpdir, f'audio_{uuid.uuid4()}.wav')
162
  soundfile.write(audio_path, audio_data, SAMPLE_RATE)
163
  transcriptions = model.transcribe([audio_path])
164
- # Direct transcribe
165
- # transcriptions = model.transcribe([audio])
166
- # if transcriptions form a tuple (from RNNT), extract just "best" hypothesis
167
  if type(transcriptions) == tuple and len(transcriptions) == 2:
168
  transcriptions = transcriptions[0]
169
  transcriptions = transcriptions[0]
170
- store_message(transcriptions, state) # Save to dataset
171
  state = state + transcriptions + " "
172
  return state, state
173
 
@@ -176,16 +152,10 @@ iface = gr.Interface(
176
  inputs=[
177
  gr.Audio(source="microphone", type='filepath', streaming=True),
178
  "state",
179
- #gr.Image(label="Webcam", source="webcam"),
180
- #gr.File(label="File"),
181
  ],
182
  outputs=[
183
  "textbox",
184
  "state",
185
- #gr.HighlightedText(label="HighlightedText", color_map={"punc": "pink", "test 0": "blue"}),
186
- #gr.HighlightedText(label="HighlightedText", show_legend=True),
187
- #gr.JSON(label="JSON"),
188
- #gr.HTML(label="HTML"),
189
  ],
190
  layout="horizontal",
191
  theme="huggingface",
@@ -193,6 +163,6 @@ iface = gr.Interface(
193
  description=f"Live Automatic Speech Recognition (ASR) with Memory💾 Dataset.",
194
  allow_flagging='never',
195
  live=True,
196
- article=f"Result Output Saved to Memory💾 Dataset: [{DATASET_REPO_URL}]({DATASET_REPO_URL})"
197
  )
198
  iface.launch()
 
20
  from huggingface_hub import Repository, hf_hub_download, upload_file
21
  from datetime import datetime
22
 
23
+ # ---------------------------------------------
24
  # Dataset and Token links - change awacke1 to your own HF id, and add a HF_TOKEN copy to your repo for write permissions
25
  # This should allow you to save your results to your own Dataset hosted on HF. ---
26
+ #DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/Carddata.csv"
27
+ #DATASET_REPO_ID = "awacke1/Carddata.csv"
28
+ #DATA_FILENAME = "Carddata.csv"
29
+ #DATA_FILE = os.path.join("data", DATA_FILENAME)
30
+ #HF_TOKEN = os.environ.get("HF_TOKEN")
31
+ #SCRIPT = """
32
+
33
+ #<script>
34
+ #if (!window.hasBeenRun) {
35
+ # window.hasBeenRun = true;
36
+ # console.log("should only happen once");
37
+ # document.querySelector("button.submit").click();
38
+ #}
39
+ #</script>
40
+ #"""
41
+
42
+ #try:
43
+ # hf_hub_download(
44
+ # repo_id=DATASET_REPO_ID,
45
+ # filename=DATA_FILENAME,
46
+ # cache_dir=DATA_DIRNAME,
47
+ # force_filename=DATA_FILENAME
48
+ # )
49
+ #except:
50
+ # print("file not found")
51
+ #repo = Repository(
52
+ # local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
53
+ #)
54
+
55
+ #def store_message(name: str, message: str):
56
+ # if name and message:
57
+ # with open(DATA_FILE, "a") as csvfile:
58
+ # writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"])
59
+ # writer.writerow(
60
+ # {"name": name.strip(), "message": message.strip(), "time": str(datetime.now())}
61
+ # )
62
+ # # uncomment line below to begin saving -
63
+ # commit_url = repo.push_to_hub()
64
+ # return ""
65
+
66
+ #iface = gr.Interface(
67
+ # store_message,
68
+ # [
69
+ # inputs.Textbox(placeholder="Your name"),
70
+ # inputs.Textbox(placeholder="Your message", lines=2),
71
+ # ],
72
+ # "html",
73
+ # css="""
74
+ # .message {background-color:cornflowerblue;color:white; padding:4px;margin:4px;border-radius:4px; }
75
+ # """,
76
+ # title="Reading/writing to a HuggingFace dataset repo from Spaces",
77
+ # description=f"This is a demo of how to do simple *shared data persistence* in a Gradio Space, backed by a dataset repo.",
78
+ # article=f"The dataset repo is [{DATASET_REPO_URL}]({DATASET_REPO_URL})",
79
+ #)
80
+
81
+
82
+ # main -------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  mname = "facebook/blenderbot-400M-distill"
84
  model = BlenderbotForConditionalGeneration.from_pretrained(mname)
85
  tokenizer = BlenderbotTokenizer.from_pretrained(mname)
 
114
  history_useful = add_note_to_history(response, history_useful)
115
  list_history = history_useful[0].split('</s> <s>')
116
  history.append((list_history[-2], list_history[-1]))
117
+ # store_message(message, response) # Save to dataset - uncomment if you uncomment above to save inputs and outputs to your dataset
118
  return history, history
119
+
120
 
121
  SAMPLE_RATE = 16000
122
  model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/stt_en_conformer_transducer_xlarge")
 
131
  data = librosa.to_mono(data)
132
  return data
133
 
134
+
135
+ def transcribe(audio, state = ""):
 
 
 
136
  if state is None:
137
  state = ""
138
  audio_data = process_audio_file(audio)
139
  with tempfile.TemporaryDirectory() as tmpdir:
 
140
  audio_path = os.path.join(tmpdir, f'audio_{uuid.uuid4()}.wav')
141
  soundfile.write(audio_path, audio_data, SAMPLE_RATE)
142
  transcriptions = model.transcribe([audio_path])
 
 
 
143
  if type(transcriptions) == tuple and len(transcriptions) == 2:
144
  transcriptions = transcriptions[0]
145
  transcriptions = transcriptions[0]
146
+ # store_message(transcriptions, state) # Save to dataset - uncomment to store into a dataset - hint you will need your HF_TOKEN
147
  state = state + transcriptions + " "
148
  return state, state
149
 
 
152
  inputs=[
153
  gr.Audio(source="microphone", type='filepath', streaming=True),
154
  "state",
 
 
155
  ],
156
  outputs=[
157
  "textbox",
158
  "state",
 
 
 
 
159
  ],
160
  layout="horizontal",
161
  theme="huggingface",
 
163
  description=f"Live Automatic Speech Recognition (ASR) with Memory💾 Dataset.",
164
  allow_flagging='never',
165
  live=True,
166
+ # article=f"Result Output Saved to Memory💾 Dataset: [{DATASET_REPO_URL}]({DATASET_REPO_URL})"
167
  )
168
  iface.launch()