Brendan King commited on
Commit
ce78cc4
1 Parent(s): 07aa55e

Initial run: example explorer in huggingface

Browse files
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import List, Dict, Tuple, Union, Any
3
+
4
+ import streamlit as st
5
+ from annotated_text import annotated_text
6
+ sidebar = st.sidebar
7
+ def bs_unigram_match_annotated_text(belief_state_example) -> List[Union[str, Tuple]]:
8
+ gold_set = set(belief_state_example['gold'].split(' '))
9
+ input_set = set(" ".join(belief_state_example['input']).split(' '))
10
+ generated = belief_state_example['generated']
11
+ result = []
12
+ for word in generated.split(' '):
13
+ if word in gold_set:
14
+ result.append((word, 'gold', '#dfd')) # gold overlap => label green
15
+ elif word in input_set:
16
+ result.append((word, 'in', '#eea')) # input overlap => label yellow
17
+ else:
18
+ result.append(word + ' ') # no overlap => no label (replace space)
19
+ return result
20
+
21
+ # load in data
22
+ pptod_examples: List[Dict] = []
23
+
24
+ models: Dict[str, Dict[str, Any]] = {
25
+ 'pptod-small': {
26
+ 'name': 'pptod-small',
27
+ 'description': 'a T5 model that has been pre-trained on the ToD-BERT dataset **in this data format.** As such, '
28
+ 'it is familiar with the meaning of these special separator tokens. However, it does not have'
29
+ 'MultiWoZ training experience, so while it has adapted to the belief state grammar generally, it '
30
+ 'is unaware of the particular slot name conventions of MultiWoZ.',
31
+ 'output_file': './output/pptod-small-10-percent.jsonl'
32
+ },
33
+ 't5-small': {
34
+ 'name': 't5-small',
35
+ 'description': 'a T5 model with no dialogue experience. Data input has been transformed to exclude special tokens'
36
+ 'that the model could not be familiar with.',
37
+ 'output_file': './output/t5-small-10-percent.jsonl'
38
+ },
39
+ 'bart': {
40
+ 'name': 'bart',
41
+ 'description': 'a BART model with no dialogue experience. Data input has been transformed to exclude special tokens'
42
+ 'that the model could not be familiar with.',
43
+ 'output_file': './output/bart-100ish-examples.jsonl'
44
+ },
45
+ 'dialogpt': {
46
+ 'name': 'dialogpt',
47
+ 'description': 'DialoGPT is a (fine-tuned GPT-2) dialogue response generation model for multiturn conversations from 147M Reddit Conversation chains',
48
+ 'output_file': './output/dialogpt-100ish-examples.jsonl'
49
+ }
50
+ }
51
+ for model_def in models.values():
52
+ model_def['examples'] = []
53
+ with open(model_def['output_file'], 'r') as f:
54
+ for line in f.readlines():
55
+ model_def['examples'].append(json.loads(line.strip()))
56
+
57
+
58
+ model_names = list(models.keys())
59
+
60
+
61
+ model_name = sidebar.selectbox('Model', model_names)
62
+ active_model = models[model_name]
63
+
64
+ st.write(f"""
65
+ #### Inputs
66
+
67
+ **Selected Model:** `{active_model['name']}`
68
+
69
+ {active_model['description']}
70
+
71
+ """)
72
+ """
73
+ ### Belief State Prediction
74
+
75
+ Below is the predicted belief state as a sequence.
76
+
77
+ - `input` denotes the input, which has been transformed into a list for
78
+ human readability but is presented to the model as a sequence.
79
+ - `gold` is the target belief state in sequence form (slot-name slot-value pairs)
80
+ - `generated` is the model generated belief state sequence
81
+ """
82
+ titles = [f"{i}: {e[0]['turn_domain'][0]} (Turn {e[0]['turn_num']})" for i, e in enumerate(active_model['examples'])]
83
+ title = sidebar.selectbox("Development Example", titles)
84
+ active_example = active_model['examples'][int(title[0])][0]
85
+
86
+ active_belief_spans = active_example['bspn_input'].split("> <")
87
+ active_example_bs = {'input':
88
+ [ ('<' if i > 0 else '') +
89
+ string +
90
+ ('>' if string[-1] is not '>' and len(active_belief_spans) > 1 else '')
91
+ for i, string in enumerate(active_belief_spans)],
92
+ 'generated': active_example['bspn_gen'],
93
+ 'gold': active_example['bspn']}
94
+
95
+ st.write(active_example_bs)
96
+ """
97
+ ##### Generated Overlap
98
+ """
99
+ annotated_text(*bs_unigram_match_annotated_text(active_example_bs))
100
+
101
+ """
102
+ ---
103
+
104
+ ### Response Generation
105
+
106
+ Below is the predicted response as a sequence.
107
+
108
+ - `input` denotes the input, which has been transformed into a list for
109
+ human readability but is presented to the model as a sequence.
110
+ - `gold` is the target response sequence
111
+ - `generated` is the model generated response
112
+ """
113
+ #title = st.selectbox("Development Example", titles)
114
+
115
+ active_example_resp = {'input':
116
+ [ ('<' if i > 0 else '') +
117
+ string +
118
+ ('>' if string[-1] is not '>' else '')
119
+ for i, string in enumerate(active_example['resp_input'].split("> <"))],
120
+ 'generated': active_example['resp_gen'],
121
+ 'gold': active_example['resp']}
122
+
123
+ st.write(active_example_resp)
124
+ """
125
+ ##### Generated Overlap
126
+ """
127
+ annotated_text(*bs_unigram_match_annotated_text(active_example_resp))
output/bart-100ish-examples.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
output/dialogpt-100ish-examples.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
output/pptod-small-10-percent.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
output/t5-small-10-percent.jsonl ADDED
The diff for this file is too large to render. See raw diff