import openai from utils import * import mdforest import pandas as pd import spacy class Insights: EMBEDDING_MAX_TOKENS = 1023 def __init__(self, text:str) -> None: cleaned_text = mdforest.clean_markdown(text) self.keywords = [] self.corpus = preprocess(cleaned_text) self.text = create_nest_sentences(self.corpus, self.EMBEDDING_MAX_TOKENS) self.model = load_keyword_model() self.embedder = load_embedder() def generate_topics(self) -> list: for sentence in self.text: self.keywords = self.keywords + generate_keywords(self.model, sentence) return self.keywords def generate_embeddings(self) -> list: # generate embeddings for all the sentences nlp = spacy.load("en_core_web_sm") final_embeddings = [] for text in self.text: print(text[0]) doc = nlp(text[0]) sentence_embeddings = [sent.vector for sent in doc.sents] final_embeddings += sentence_embeddings