Spaces:

mscsasem3
/

CHAAT

Build error

App Files Files Community

mscsasem3 commited on May 12, 2023

Commit

991cb0c

•

1 Parent(s): 5441cfb

Update app.py

Browse files

Files changed (1) hide show

app.py +161 -8

app.py CHANGED Viewed

@@ -1,10 +1,3 @@
-# import gradio as gr
-# def greet(name):
-#     return "Hello " + name + "!!"
-# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-# iface.launch()
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
 import requests
@@ -19,6 +12,24 @@ import gradio as gr
 from skimage.filters import threshold_otsu
 from skimage.util import invert
 import cv2,imageio
 processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
 model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
 plt.switch_backend('Agg')
@@ -305,7 +316,149 @@ def extract(image):
         result=result+" "
     return result
-iface = gr.Interface(fn=extract,
                      inputs=[gr.inputs.Image(type='filepath', label='Ideal Answer'),gr.inputs.Image(type='filepath', label='Ideal Answer Diagram'),gr.inputs.Image(type='filepath', label='Submitted Answer'),gr.inputs.Image(type='filepath', label='Submitted Answer Diagram')],
                      outputs=gr.outputs.Textbox(),)

 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
 import requests
 from skimage.filters import threshold_otsu
 from skimage.util import invert
 import cv2,imageio
+from matplotlib.dates import SU
+from regex import F
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sentence_transformers import SentenceTransformer, util
+from sklearn.metrics.pairwise import cosine_similarity
+import spacy
+import pandas as pd
+from tqdm import tqdm
+import textdistance
+from spacy.lang.en.stop_words import STOP_WORDS
+#import psycopg2
+import os
+rom tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input, decode_predictions
+from tensorflow.keras.preprocessing import image
+from sklearn.feature_extraction.text import TfidfVectorizer
 processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
 model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
 plt.switch_backend('Agg')
         result=result+" "
     return result
+nlp = spacy.load("en_core_web_md")
+def listToString(s):
+    # initialize an empty string
+    str1 = " "
+    # return string
+    return (str1.join(s))
+def rm_stop(my_doc):
+    # Create list of word tokens
+    token_list = []
+    for token in my_doc:
+        token_list.append(token.text)
+    # Create list of word tokens after removing stopwords
+    filtered_sentence =[]
+    for word in token_list:
+        lexeme = nlp.vocab[word]
+        if lexeme.is_stop == False:
+            filtered_sentence.append(word)
+    return filtered_sentence
+def text_processing(sentence):
+    sentence = [token.lemma_.lower()
+                for token in nlp(sentence)
+                if token.is_alpha and not token.is_stop]
+    return sentence
+def jaccard_sim(sent1,sent2):
+    # Text Processing
+    sentence1 = text_processing(sent1)
+    sentence2 = text_processing(sent2)
+    # Jaccard similarity
+    return textdistance.jaccard.normalized_similarity(sentence1, sentence2)
+def sim(Ideal_Answer,Submitted_Answer):
+# SBERT EMBEDDINGS
+    text1=Ideal_Answer.replace("\"","").replace("\'","")
+    text2=Submitted_Answer.replace("\"","").replace("\'","")
+    output=[]
+    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+    #Compute embedding for both lists
+    embedding_1= model.encode(text1, convert_to_tensor=True)
+    embedding_2 = model.encode(text2, convert_to_tensor=True)
+    score=util.pytorch_cos_sim(embedding_1, embedding_2)
+    output.append("SBERT:"+str(int(float(str(score).split("[")[2].split("]")[0])*10.0))+",")
+    sbert=int(float(str(score).split("[")[2].split("]")[0])*10.0)
+    #Jaccard
+    output.append("Jaccard:"+str(int(jaccard_sim(text1,text2)*10.0))+",")
+    #spacy average word2vec
+    nlp = spacy.load("en_core_web_md")  # make sure to use larger package!
+    doc1 =  listToString(rm_stop(nlp(text1)))
+    doc2 =  listToString(rm_stop(nlp(text2)))
+    # Similarity of two documents
+    w2v=int(nlp(doc1).similarity(nlp(doc2))*10.0)
+    final_score=int(0.8*sbert+0.2*w2v)
+    output.append("Word2Vec:"+str(int(nlp(doc1).similarity(nlp(doc2))*10.0))+",final_score:"+str(final_score))
+    out_string=listToString(output)
+    #return out_string
+    return str(out_string),final_score
+def return_image_embedding(model,img_path):
+    img = image.load_img(img_path, target_size=(224, 224))
+    x = image.img_to_array(img)
+    x = np.expand_dims(x, axis=0)
+    x = preprocess_input(x)
+    preds = model.predict(x)
+    curr_df = pd.DataFrame(preds[0]).T
+    return curr_df
+def draw_boxes(image, bounds, color='yellow', width=2):
+    draw = ImageDraw.Draw(image)
+    for bound in bounds:
+        p0, p1, p2, p3 = bound[0]
+        draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
+    return image
+def inference(img, lang):
+    reader = easyocr.Reader(lang)
+    bounds = reader.readtext(img.name)
+    im = PIL.Image.open(img.name)
+    draw_boxes(im, bounds)
+    im.save('result.jpg')
+    return ['result.jpg', pd.DataFrame(bounds).iloc[: , 1:]]
+def compute_tfidf_embeddings(documents1, documents2):
+    # Combine both lists of words into a single list
+    combined_documents = documents1 + documents2
+    # Initialize the TF-IDF vectorizer
+    vectorizer = TfidfVectorizer()
+    # Fit the vectorizer on the combined documents
+    vectorizer.fit(combined_documents)
+    # Transform the documents to TF-IDF embeddings
+    embeddings1 = vectorizer.transform(documents1)
+    embeddings2 = vectorizer.transform(documents2)
+    return embeddings1, embeddings2
+def extract_eval(image1,image2,image3,image4):
+    ideal_text=extract(image1)
+    print("Extracting Ideal Text \n")
+    print(ideal_text)
+    submitted_text=extract(image3)
+    print("Extracting Submitted Text \n")
+    print(submitted_text_text)
+    a,b=sim(ideal_text,submitted_text)
+    print(a)
+    text_sim_score=b
+    model = ResNet50(include_top=False, weights='imagenet', pooling='avg')
+    diagram_1_embed=return_image_embedding(model,image2)
+    diagram_2_embed=return_image_embedding(model,image4)
+    diagram_embed_sim_score=util.pytorch_cos_sim(embedding_1, embedding_2)
+    print("Diagram Embedding Similarity Score \n")
+    print(diagram_embed_sim_score)
+iface = gr.Interface(fn=extract_eval,
                      inputs=[gr.inputs.Image(type='filepath', label='Ideal Answer'),gr.inputs.Image(type='filepath', label='Ideal Answer Diagram'),gr.inputs.Image(type='filepath', label='Submitted Answer'),gr.inputs.Image(type='filepath', label='Submitted Answer Diagram')],
                      outputs=gr.outputs.Textbox(),)