nickmuchi commited on
Commit
242f876
1 Parent(s): 0be8860

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +6 -4
functions.py CHANGED
@@ -122,13 +122,13 @@ def load_asr_model(asr_model_name):
122
  return asr_model
123
 
124
  @st.experimental_singleton(suppress_st_warning=True)
125
- def process_corpus(corpus, tok, title, embeddings, chunk_size=200, overlap=50):
126
 
127
  '''Process text for Semantic Search'''
128
 
129
  pinecone.init(api_key=OPEN_AI_KEY, environment="us-west1-gcp")
130
 
131
- tokenizer = tok
132
  text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(tokenizer,chunk_size=chunk_size,chunk_overlap=overlap,separator='. ')
133
 
134
  texts = text_splitter.split_text(corpus)
@@ -162,16 +162,18 @@ def gen_embeddings(embedding_model):
162
  return embeddings
163
 
164
  @st.experimental_memo(suppress_st_warning=True)
165
- def embed_text(query,corpus,title,embedding_model,emb_tok,chain_type='stuff'):
166
 
167
  '''Embed text and generate semantic search scores'''
168
 
169
  index_id = "earnings-embeddings"
170
 
 
 
171
  embeddings = gen_embeddings(embedding_model)
172
 
173
  title = title[0]
174
- docsearch = process_corpus(corpus,embed_tok,title, embeddings)
175
 
176
  docs = docsearch.similarity_search_with_score(query, k=3, namespace = f'{title}-earnings')
177
 
 
122
  return asr_model
123
 
124
  @st.experimental_singleton(suppress_st_warning=True)
125
+ def process_corpus(corpus, _tok, title, embeddings, chunk_size=200, overlap=50):
126
 
127
  '''Process text for Semantic Search'''
128
 
129
  pinecone.init(api_key=OPEN_AI_KEY, environment="us-west1-gcp")
130
 
131
+ tokenizer = _tok
132
  text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(tokenizer,chunk_size=chunk_size,chunk_overlap=overlap,separator='. ')
133
 
134
  texts = text_splitter.split_text(corpus)
 
162
  return embeddings
163
 
164
  @st.experimental_memo(suppress_st_warning=True)
165
+ def embed_text(query,corpus,title,embedding_model,_emb_tok,chain_type='stuff'):
166
 
167
  '''Embed text and generate semantic search scores'''
168
 
169
  index_id = "earnings-embeddings"
170
 
171
+
172
+
173
  embeddings = gen_embeddings(embedding_model)
174
 
175
  title = title[0]
176
+ docsearch = process_corpus(corpus,_emb_tok,title, embeddings)
177
 
178
  docs = docsearch.similarity_search_with_score(query, k=3, namespace = f'{title}-earnings')
179