alexpap commited on
Commit
24f7c24
1 Parent(s): a896ab1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -1
app.py CHANGED
@@ -44,6 +44,8 @@ if menu == "Introduction":
44
  ''')
45
 
46
  elif menu == "Parsing NLU data into SQuAD 2.0":
 
 
47
  st.markdown('''
48
  Here, we show a small example of how NLU data can be transformed into QANLU data.
49
  The same method can be used to transform [MATIS++](https://github.com/amazon-research/multiatis)
@@ -120,15 +122,54 @@ elif menu == "Parsing NLU data into SQuAD 2.0":
120
  "intent": "restaurant"
121
  },
122
  ... <More questions>
 
 
123
  ````
124
 
125
  There are many tunable parameters when generating the above file, such as how many negative examples to include per question. Follow the same process for training a slot-tagging model.
126
 
127
  ''')
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  elif menu == "Evaluation":
131
- st.header('QANLU Evaluation')
 
 
 
 
 
 
 
132
  tokenizer = AutoTokenizer.from_pretrained("AmazonScience/qanlu", use_auth_token=True)
133
 
134
  model = AutoModelForQuestionAnswering.from_pretrained("AmazonScience/qanlu", use_auth_token=True)
44
  ''')
45
 
46
  elif menu == "Parsing NLU data into SQuAD 2.0":
47
+ st.header('QA-NLU Data Parsing')
48
+
49
  st.markdown('''
50
  Here, we show a small example of how NLU data can be transformed into QANLU data.
51
  The same method can be used to transform [MATIS++](https://github.com/amazon-research/multiatis)
122
  "intent": "restaurant"
123
  },
124
  ... <More questions>
125
+
126
+ ... <More paragraphs>
127
  ````
128
 
129
  There are many tunable parameters when generating the above file, such as how many negative examples to include per question. Follow the same process for training a slot-tagging model.
130
 
131
  ''')
132
 
133
+ elif menu == "Training":
134
+ st.header('QA-NLU Training')
135
+
136
+ st.markdown('''
137
+ To train a QA-NLU model on the data we created, we use the `run_squad.py` script from [huggingface](https://github.com/huggingface/transformers/blob/master/examples/legacy/question-answering/run_squad.py) and a SQuAD-trained QA model as our base. As an example, we can use `deepset/roberta-base-squad2` model from [here](https://huggingface.co/deepset/roberta-base-squad2) (assuming 8 GPUs are present):
138
+
139
+ ```
140
+ mkdir models
141
+
142
+ python -m torch.distributed.launch --nproc_per_node=8 run_squad.py \
143
+ --model_type roberta \
144
+ --model_name_or_path deepset/roberta-base-squad2 \
145
+ --do_train \
146
+ --do_eval \
147
+ --do_lower_case \
148
+ --train_file data/matis_en_train_squad.json \
149
+ --predict_file data/matis_en_test_squad.json \
150
+ --learning_rate 3e-5 \
151
+ --num_train_epochs 2 \
152
+ --max_seq_length 384 \
153
+ --doc_stride 64 \
154
+ --output_dir models/qanlu/ \
155
+ --per_gpu_train_batch_size 8 \
156
+ --overwrite_output_dir \
157
+ --version_2_with_negative \
158
+ --save_steps 100000 \
159
+ --gradient_accumulation_steps 8 \
160
+ --seed $RANDOM
161
+ ```
162
+ ''')
163
 
164
  elif menu == "Evaluation":
165
+ st.header('QA-NLU Evaluation')
166
+
167
+ st.markdown('''
168
+ To assess the performance of the trained model, we can use the `calculate_pr.py` script from the [QA-NLU Amazon Research repository](https://github.com/amazon-research/question-answering-nlu).
169
+
170
+ Feel free to query the pre-trained QA-NLU model using the buttons below.
171
+ ''')
172
+
173
  tokenizer = AutoTokenizer.from_pretrained("AmazonScience/qanlu", use_auth_token=True)
174
 
175
  model = AutoModelForQuestionAnswering.from_pretrained("AmazonScience/qanlu", use_auth_token=True)