Spaces:

UNIST-Eunchan
/

Book-Summarization

Running

UNIST-Eunchan commited on Aug 22, 2023

Commit

e0b483f

•

1 Parent(s): 56b6b47

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,6 +4,11 @@ import nltk
 from nltk import sent_tokenize
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import json
 nltk.download('punkt')
 with open('testbook.json') as f:
     test_book = json.load(f)
@@ -63,7 +68,7 @@ def chunking(book_text):
                     next_token_len += token_lens[i+t]
                     next_pseudo_segment += sentences[i+t]
-            embs = model.encode([current_segment, next_pseudo_segment, sentences[i]]) # current, next, sent
             if cos_similarity(embs[1],embs[2]) > cos_similarity(embs[0],embs[2]):
                 segments.append(current_segment)
                 current_segment = sentences[i]

 from nltk import sent_tokenize
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import json
+from sentence_transformers import SentenceTransformer
+sentence_transformer_model = SentenceTransformer("sentence-transformers/all-roberta-large-v1")
 nltk.download('punkt')
 with open('testbook.json') as f:
     test_book = json.load(f)
                     next_token_len += token_lens[i+t]
                     next_pseudo_segment += sentences[i+t]
+            embs = sentence_transformer_model.encode([current_segment, next_pseudo_segment, sentences[i]]) # current, next, sent
             if cos_similarity(embs[1],embs[2]) > cos_similarity(embs[0],embs[2]):
                 segments.append(current_segment)
                 current_segment = sentences[i]