File size: 1,117 Bytes
28e14c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import openai 
from utils import *
import mdforest 
import pandas as pd
import spacy

class Insights:
    
    EMBEDDING_MAX_TOKENS = 1023
    
    def __init__(self, text:str) -> None:
        cleaned_text = mdforest.clean_markdown(text)
        self.keywords = []
        
        self.corpus = preprocess(cleaned_text)        
        self.text = create_nest_sentences(self.corpus, self.EMBEDDING_MAX_TOKENS)
        self.model = load_keyword_model()
        self.embedder = load_embedder()
        
        
    def generate_topics(self) -> list:
        for sentence in self.text:
            self.keywords = self.keywords  + generate_keywords(self.model, sentence)
        return self.keywords
    
    def generate_embeddings(self) -> list:
        # generate embeddings for all the sentences
        nlp = spacy.load("en_core_web_sm")
        final_embeddings = []
        for text in self.text:
            print(text[0])
            doc = nlp(text[0])
            sentence_embeddings = [sent.vector for sent in doc.sents]    
            final_embeddings += sentence_embeddings