UNIST-Eunchan commited on
Commit
950ede6
1 Parent(s): dda7218

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -4,7 +4,7 @@ import nltk
4
  from nltk import sent_tokenize
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
  import json
7
-
8
  from sentence_transformers import SentenceTransformer
9
 
10
  nltk.download('punkt')
@@ -38,6 +38,14 @@ def infer(input_ids, max_length, temperature, top_k, top_p):
38
  return output_sequences
39
 
40
 
 
 
 
 
 
 
 
 
41
  @st.cache_data
42
  def chunking(book_text):
43
  sentences = sent_tokenize(book_text)
 
4
  from nltk import sent_tokenize
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
  import json
7
+ import numpy as np
8
  from sentence_transformers import SentenceTransformer
9
 
10
  nltk.download('punkt')
 
38
  return output_sequences
39
 
40
 
41
+ def cos_similarity(v1, v2):
42
+ dot_product = np.dot(v1, v2)
43
+ l2_norm = (np.sqrt(sum(np.square(v1))) * np.sqrt(sum(np.square(v2))))
44
+ similarity = dot_product / l2_norm
45
+
46
+ return similarity
47
+
48
+
49
  @st.cache_data
50
  def chunking(book_text):
51
  sentences = sent_tokenize(book_text)