mscsasem3 commited on
Commit
241c81c
1 Parent(s): cd5b22e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -16
app.py CHANGED
@@ -28,6 +28,7 @@ import os
28
  from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input, decode_predictions
29
  from tensorflow.keras.preprocessing import image
30
  from sklearn.feature_extraction.text import TfidfVectorizer
 
31
 
32
 
33
 
@@ -433,24 +434,29 @@ def inference(img, lang):
433
  #im.save('result.jpg')
434
  return bounds
435
 
436
- def compute_tfidf_embeddings(documents1, documents2):
437
- # Combine both lists of words into a single list
438
- combined_documents = documents1 + documents2
439
 
440
  # Initialize the TF-IDF vectorizer
441
  vectorizer = TfidfVectorizer()
442
 
443
- # Fit the vectorizer on the combined documents
444
- vectorizer.fit(combined_documents)
445
 
446
- # Transform the documents to TF-IDF embeddings
447
- embeddings1 = vectorizer.transform(documents1)
448
- embeddings2 = vectorizer.transform(documents2)
449
 
450
- return embeddings1, embeddings2
 
 
 
 
 
 
 
451
 
452
- import requests
453
- import base64
454
  def extract_eval(Ideal_Answer_Text,Ideal_Answer_Diagram,Submitted_Answer_Text,Submitted_Answer_Diagram):
455
  # print(image1)
456
  # ideal_text=extract(Ideal_Answer_Text)
@@ -470,16 +476,18 @@ def extract_eval(Ideal_Answer_Text,Ideal_Answer_Diagram,Submitted_Answer_Text,Su
470
  print(str(int(float(str(diagram_embed_sim_score).split("[")[2].split("]")[0])*10.0)))
471
  diagram_1_text=inference(Ideal_Answer_Diagram,['en'])
472
  diagram_2_text=inference(Submitted_Answer_Diagram,['en'])
473
- print(diagram_1_text)
474
- print(diagram_2_text)
475
- diagram_1_text_embed,diagram_2_text_embed=compute_tfidf_embeddings(diagram_1_text,diagram_2_text)
476
- diagram_text_similarity=util.pytorch_cos_sim(diagram_1_text_embed,diagram_2_text_embed)
477
  print("Diagram Text Embedding Similarity Score \n")
478
- print(str(int(float(str(diagram_text_similarity).split("[")[2].split("]")[0])*10.0)))
 
479
 
480
 
481
 
482
 
 
 
 
483
  iface = gr.Interface(fn=extract_eval,
484
  inputs=["image","image","image","image"],
485
  outputs=gr.outputs.Textbox(),)
 
28
  from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input, decode_predictions
29
  from tensorflow.keras.preprocessing import image
30
  from sklearn.feature_extraction.text import TfidfVectorizer
31
+ from sklearn.metrics.pairwise import cosine_similarity
32
 
33
 
34
 
 
434
  #im.save('result.jpg')
435
  return bounds
436
 
437
+ def compute_tfidf_embeddings(words_list1, words_list2):
438
+ # Combine the words from both lists
439
+ combined_words = words_list1 + words_list2
440
 
441
  # Initialize the TF-IDF vectorizer
442
  vectorizer = TfidfVectorizer()
443
 
444
+ # Compute the TF-IDF matrix
445
+ tfidf_matrix = vectorizer.fit_transform(combined_words)
446
 
447
+ # Split the matrix into separate parts for the two lists
448
+ tfidf_matrix_list1 = tfidf_matrix[:len(words_list1)]
449
+ tfidf_matrix_list2 = tfidf_matrix[len(words_list1):]
450
 
451
+ return tfidf_matrix_list1, tfidf_matrix_list2
452
+
453
+ def compute_cosine_similarity(tfidf_matrix_list1, tfidf_matrix_list2):
454
+ # Compute the cosine similarity between the two TF-IDF matrices
455
+ similarity_matrix = cosine_similarity(tfidf_matrix_list1, tfidf_matrix_list2)
456
+
457
+ return similarity_matrix
458
+
459
 
 
 
460
  def extract_eval(Ideal_Answer_Text,Ideal_Answer_Diagram,Submitted_Answer_Text,Submitted_Answer_Diagram):
461
  # print(image1)
462
  # ideal_text=extract(Ideal_Answer_Text)
 
476
  print(str(int(float(str(diagram_embed_sim_score).split("[")[2].split("]")[0])*10.0)))
477
  diagram_1_text=inference(Ideal_Answer_Diagram,['en'])
478
  diagram_2_text=inference(Submitted_Answer_Diagram,['en'])
479
+ tfidf_matrix_list1, tfidf_matrix_list2 = compute_tfidf_embeddings(diagram_1_text, diagram_2_text)
480
+ similarity_matrix = compute_cosine_similarity(tfidf_matrix_list1, tfidf_matrix_list2)
 
 
481
  print("Diagram Text Embedding Similarity Score \n")
482
+ print(similarity_matrix)
483
+
484
 
485
 
486
 
487
 
488
+ print(similarity_matrix)
489
+
490
+
491
  iface = gr.Interface(fn=extract_eval,
492
  inputs=["image","image","image","image"],
493
  outputs=gr.outputs.Textbox(),)