Spaces:

Kuaaangwen
/

auto-grader

Runtime error

App Files Files Community

Kuaaangwen commited on May 10, 2022

Commit

917d2f9

•

1 Parent(s): 411304b

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -14

app.py CHANGED Viewed

@@ -1,21 +1,30 @@
-''' To-do
-Create a side bar to compare two or upload CSV
-In the second tab, allow them to compare all CSV files
-'''
-import streamlit as st
-import pandas as pd
-from sentence_transformers import SentenceTransformer
-from sklearn.metrics.pairwise import cosine_similarity
-model = SentenceTransformer('paraphrase-xlm-r-multilingual-v1')
-# Streamlit interface
 st.title("Sentence Similarity")
@@ -40,6 +49,10 @@ if sidebar_selectbox == "Compare two sentences":
        # If submit_button_compare clicked
        if submit_button_compare:
               # Perform calculations
@@ -51,14 +64,61 @@ if sidebar_selectbox == "Compare two sentences":
               sentences.append(sentence_2)
               # Create embeddings for both sentences
-              sentence_embeddings = model.encode(sentences)
               cos_sim = cosine_similarity(sentence_embeddings[0].reshape(1, -1), sentence_embeddings[1].reshape(1, -1))[0][0]
               cos_sim = round(cos_sim * 100) # Convert to percentage and round-off
-              st.write('Similarity between {} and {} is {}%'.format(sentence_1,
-                     sentence_2, cos_sim))
@@ -93,7 +153,7 @@ if sidebar_selectbox == "Bulk upload and mark":
                             sentences.append(sentence_reference)
                             sentences.append(sentence_comparison)
-                            sentence_embeddings = model.encode(sentences)
                             cos_sim = cosine_similarity(sentence_embeddings[0].reshape(1, -1), sentence_embeddings[1].reshape(1, -1))[0][0]
                             cos_sim = round(cos_sim * 100)

+import streamlit as st
+# Library for Sentence Similarity
+import pandas as pd
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+# Library for Entailment
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+# Library for keyword extraction
+import yake
+# Load models and tokenisers for both sentence transformers and text classification
+sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
+tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
+text_classification_model = AutoModelForSequenceClassification.from_pretrained("roberta-large-mnli")
+### Streamlit interface ###
 st.title("Sentence Similarity")
        # If submit_button_compare clicked
        if submit_button_compare:
+              print("Comparing sentences...")
+              ### Compare Sentence Similarity ###
               # Perform calculations
               sentences.append(sentence_2)
               # Create embeddings for both sentences
+              sentence_embeddings = sentence_transformer_model.encode(sentences)
               cos_sim = cosine_similarity(sentence_embeddings[0].reshape(1, -1), sentence_embeddings[1].reshape(1, -1))[0][0]
               cos_sim = round(cos_sim * 100) # Convert to percentage and round-off
+              # st.write('Similarity between "{}" and "{}" is {}%'.format(sentence_1,
+              #        sentence_2, cos_sim))
+              st.subheader("Similarity")
+              st.write(f"Similarity between the two sentences is {cos_sim}%.")
+              ### Text classification - entailment, neutral or contradiction ###
+              raw_inputs = [f"{sentence_1}</s></s>{sentence_2}"]
+              inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")
+              # print(inputs)
+              outputs = text_classification_model(**inputs)
+              outputs = torch.nn.functional.softmax(outputs.logits, dim = -1)
+              # print(outputs)
+              # argmax_index = torch.argmax(outputs).item()
+              print(text_classification_model.config.id2label[0], ":", round(outputs[0][0].item()*100,2),"%")
+              print(text_classification_model.config.id2label[1], ":", round(outputs[0][1].item()*100,2),"%")
+              print(text_classification_model.config.id2label[2], ":", round(outputs[0][2].item()*100,2),"%")
+              st.subheader("Text classification for both sentences:")
+              st.write(text_classification_model.config.id2label[1], ":", round(outputs[0][1].item()*100,2),"%")
+              st.write(text_classification_model.config.id2label[0], ":", round(outputs[0][0].item()*100,2),"%")
+              st.write(text_classification_model.config.id2label[2], ":", round(outputs[0][2].item()*100,2),"%")
+              ### Extract keywords with YAKE ### (might make more sense with word cloud)
+              st.subheader("Keywords:")
+              kw_extractor = yake.KeywordExtractor(top=10, stopwords=None)
+              keywords = kw_extractor.extract_keywords(sentence_2)
+              # keywords_array = []
+              for kw, v in keywords:
+                # print("Keyphrase: ", kw, ": score", v)
+                # keywords_array.append(kw)
+                st.write(kw)
                             sentences.append(sentence_reference)
                             sentences.append(sentence_comparison)
+                            sentence_embeddings = sentence_transformer_model.encode(sentences)
                             cos_sim = cosine_similarity(sentence_embeddings[0].reshape(1, -1), sentence_embeddings[1].reshape(1, -1))[0][0]
                             cos_sim = round(cos_sim * 100)