Spaces:
Build error
Build error
import openai | |
from utils import * | |
import mdforest | |
import pandas as pd | |
import spacy | |
class Insights: | |
EMBEDDING_MAX_TOKENS = 1023 | |
def __init__(self, text:str) -> None: | |
cleaned_text = mdforest.clean_markdown(text) | |
self.keywords = [] | |
self.corpus = preprocess(cleaned_text) | |
self.text = create_nest_sentences(self.corpus, self.EMBEDDING_MAX_TOKENS) | |
self.model = load_keyword_model() | |
self.embedder = load_embedder() | |
def generate_topics(self) -> list: | |
for sentence in self.text: | |
self.keywords = self.keywords + generate_keywords(self.model, sentence) | |
return self.keywords | |
def generate_embeddings(self) -> list: | |
# generate embeddings for all the sentences | |
nlp = spacy.load("en_core_web_sm") | |
final_embeddings = [] | |
for text in self.text: | |
print(text[0]) | |
doc = nlp(text[0]) | |
sentence_embeddings = [sent.vector for sent in doc.sents] | |
final_embeddings += sentence_embeddings | |