awacke1 commited on
Commit
795b0f2
1 Parent(s): f55579a

Upload 3 files

Browse files
Files changed (3) hide show
  1. TriageScript.txt +6 -0
  2. app.py +137 -0
  3. requirements.txt +6 -0
TriageScript.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ How do you treat a serious allergic reaction?
2
+ I take an allergy medication, but it doesn't work as well as I'd like.
3
+ How do you treat serious shock?
4
+ I have to take an epi-pen every time I go to the doctor.
5
+ What do you do if you are confused?
6
+ I just have to keep an eye on my throat and try not to sneeze.
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
2
+ import torch
3
+ import gradio as gr
4
+
5
+
6
+ # PersistDataset -----
7
+ import os
8
+ import csv
9
+ import gradio as gr
10
+ from gradio import inputs, outputs
11
+ import huggingface_hub
12
+ from huggingface_hub import Repository, hf_hub_download, upload_file
13
+ from datetime import datetime
14
+ DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/Carddata.csv"
15
+ DATASET_REPO_ID = "awacke1/Carddata.csv"
16
+ DATA_FILENAME = "Carddata.csv"
17
+ DATA_FILE = os.path.join("data", DATA_FILENAME)
18
+ HF_TOKEN = os.environ.get("HF_TOKEN")
19
+
20
+ SCRIPT = """
21
+ <script>
22
+ if (!window.hasBeenRun) {
23
+ window.hasBeenRun = true;
24
+ console.log("should only happen once");
25
+ document.querySelector("button.submit").click();
26
+ }
27
+ </script>
28
+ """
29
+
30
+ try:
31
+ hf_hub_download(
32
+ repo_id=DATASET_REPO_ID,
33
+ filename=DATA_FILENAME,
34
+ cache_dir=DATA_DIRNAME,
35
+ force_filename=DATA_FILENAME
36
+ )
37
+ except:
38
+ print("file not found")
39
+ repo = Repository(
40
+ local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
41
+ )
42
+
43
+ def generate_html() -> str:
44
+ with open(DATA_FILE) as csvfile:
45
+ reader = csv.DictReader(csvfile)
46
+ rows = []
47
+ for row in reader:
48
+ rows.append(row)
49
+ rows.reverse()
50
+ if len(rows) == 0:
51
+ return "no messages yet"
52
+ else:
53
+ html = "<div class='chatbot'>"
54
+ for row in rows:
55
+ html += "<div>"
56
+ html += f"<span>{row['inputs']}</span>"
57
+ html += f"<span class='outputs'>{row['outputs']}</span>"
58
+ html += "</div>"
59
+ html += "</div>"
60
+ return html
61
+
62
+ def store_message(name: str, message: str):
63
+ if name and message:
64
+ with open(DATA_FILE, "a") as csvfile:
65
+ writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"])
66
+ writer.writerow(
67
+ {"name": name.strip(), "message": message.strip(), "time": str(datetime.now())}
68
+ )
69
+ commit_url = repo.push_to_hub()
70
+ return ""
71
+
72
+ iface = gr.Interface(
73
+ store_message,
74
+ [
75
+ inputs.Textbox(placeholder="Your name"),
76
+ inputs.Textbox(placeholder="Your message", lines=2),
77
+ ],
78
+ "html",
79
+ css="""
80
+ .message {background-color:cornflowerblue;color:white; padding:4px;margin:4px;border-radius:4px; }
81
+ """,
82
+ title="Reading/writing to a HuggingFace dataset repo from Spaces",
83
+ description=f"This is a demo of how to do simple *shared data persistence* in a Gradio Space, backed by a dataset repo.",
84
+ article=f"The dataset repo is [{DATASET_REPO_URL}]({DATASET_REPO_URL})",
85
+ )
86
+
87
+
88
+ mname = "facebook/blenderbot-400M-distill"
89
+ model = BlenderbotForConditionalGeneration.from_pretrained(mname)
90
+ tokenizer = BlenderbotTokenizer.from_pretrained(mname)
91
+
92
+ def take_last_tokens(inputs, note_history, history):
93
+ """Filter the last 128 tokens"""
94
+ if inputs['input_ids'].shape[1] > 128:
95
+ inputs['input_ids'] = torch.tensor([inputs['input_ids'][0][-128:].tolist()])
96
+ inputs['attention_mask'] = torch.tensor([inputs['attention_mask'][0][-128:].tolist()])
97
+ note_history = ['</s> <s>'.join(note_history[0].split('</s> <s>')[2:])]
98
+ history = history[1:]
99
+ return inputs, note_history, history
100
+
101
+ def add_note_to_history(note, note_history):
102
+ """Add a note to the historical information"""
103
+ note_history.append(note)
104
+ note_history = '</s> <s>'.join(note_history)
105
+ return [note_history]
106
+
107
+ title = "Chatbot State of the Art now with Memory Saved to Dataset"
108
+ description = """Chatbot With Memory"""
109
+
110
+ def chat(message, history):
111
+ history = history or []
112
+ if history:
113
+ history_useful = ['</s> <s>'.join([str(a[0])+'</s> <s>'+str(a[1]) for a in history])]
114
+ else:
115
+ history_useful = []
116
+ history_useful = add_note_to_history(message, history_useful)
117
+ inputs = tokenizer(history_useful, return_tensors="pt")
118
+ inputs, history_useful, history = take_last_tokens(inputs, history_useful, history)
119
+ reply_ids = model.generate(**inputs)
120
+ response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]
121
+ history_useful = add_note_to_history(response, history_useful)
122
+ list_history = history_useful[0].split('</s> <s>')
123
+ history.append((list_history[-2], list_history[-1]))
124
+ store_message(message, response) # Save to dataset
125
+ return history, history
126
+
127
+ gr.Interface(
128
+ fn=chat,
129
+ theme="huggingface",
130
+ css=".footer {display:none !important}",
131
+ inputs=["text", "state"],
132
+ outputs=["chatbot", "state"],
133
+ title=title,
134
+ allow_flagging="never",
135
+ description=f"Gradio chatbot backed by memory in a dataset repository.",
136
+ article=f"The dataset repo is [{DATASET_REPO_URL}]({DATASET_REPO_URL})"
137
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ gradio
4
+ Werkzeug
5
+ huggingface_hub
6
+ Pillow