Spaces:

Lisibonny
/

buscador-periodicos-dominicanos

Running

App Files Files Community

Lisibonny commited on May 29

Commit

bc75b5c

•

1 Parent(s): 64dce50

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -15

app.py CHANGED Viewed

@@ -136,28 +136,43 @@ def main():
                text=remove_html_markup(df_answer.loc[i, "resumen"])
                text=remove_URL(text)
                inputs =  tokenizer(query, text[0:512], return_tensors='tf')
-               outputs = qa_model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
-               answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
-               answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
                #answer_start_scores = tf.nn.softmax(outputs.start_logits)
                #answer_end_scores = tf.nn.softmax(outputs.end_logits)
                #######################
-               start_probabilities = tf.nn.softmax(outputs.start_logits, axis=-1)[0]
-               end_probabilities = tf.nn.softmax(outputs.end_logits, axis=-1)[0]
-               scores = start_probabilities[:, None] * end_probabilities[None, :]
-               scores = tf.linalg.band_part(scores, 0, -1)
-               scores = tf.reshape(scores, [-1])
-               st.write(scores)
-               max_index = np.argmax(scores)
-               st.write(max_index)
-               start_index = max_index // scores.shape[1]
-               end_index = max_index % scores.shape[1]
                #st.write(start_index)
                #st.write(scores[start_index:end_index])
                #######################
-               predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
-               answer=tokenizer.decode(predict_answer_tokens)
                if (len(answer)>0):

                text=remove_html_markup(df_answer.loc[i, "resumen"])
                text=remove_URL(text)
                inputs =  tokenizer(query, text[0:512], return_tensors='tf')
+               input_ids = inputs["input_ids"].numpy()[0]
+               text_tokens = tokenizer.convert_ids_to_tokens(input_ids)
+               answer_start_scores, answer_end_scores = qa_model(inputs)
+               answer_start = tf.argmax(answer_start_scores, axis=1).numpy()[0]
+               answer_end = (tf.argmax(answer_end_scores, axis=1) + 1).numpy()[0]
+               answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
+               df_answer.loc[x] = answer, max(answer_start_scores.numpy()[0]), 0, 0
+               st.write(df_answer.sort_values(by=['score']).tail(10))
+               #outputs = qa_model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
+               #answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
+               #answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
                #answer_start_scores = tf.nn.softmax(outputs.start_logits)
                #answer_end_scores = tf.nn.softmax(outputs.end_logits)
                #######################
+               #start_probabilities = tf.nn.softmax(outputs.start_logits, axis=-1)[0]
+               #end_probabilities = tf.nn.softmax(outputs.end_logits, axis=-1)[0]
+               #scores = start_probabilities[:, None] * end_probabilities[None, :]
+               #scores = tf.linalg.band_part(scores, 0, -1)
+               #scores = tf.reshape(scores, [-1])
+               #st.write(scores)
+               #max_index = np.argmax(scores)
+               #st.write(max_index)
+               #start_index = max_index // scores.shape[1]
+               #end_index = max_index % scores.shape[1]
                #st.write(start_index)
                #st.write(scores[start_index:end_index])
                #######################
+               #predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+               #answer=tokenizer.decode(predict_answer_tokens)
                if (len(answer)>0):