fakezeta commited on
Commit
3536102
1 Parent(s): fbb1222

switching to intfloat/e5-base embedding model

Browse files
Files changed (1) hide show
  1. ingest_data.py +2 -2
ingest_data.py CHANGED
@@ -1,7 +1,7 @@
1
  from langchain.text_splitter import RecursiveCharacterTextSplitter
2
  from langchain.document_loaders import PyPDFLoader
3
  from langchain.vectorstores import Chroma
4
- from langchain.embeddings import TensorflowHubEmbeddings
5
  import os
6
  import time
7
  import streamlit as st
@@ -26,7 +26,7 @@ def embed_doc(filename):
26
 
27
 
28
  start = time.time()
29
- embeddings = TensorflowHubEmbeddings(model_url="https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3")
30
  end = time.time()
31
  st.text("Embedding time: "+str(round(end - start,1)))
32
  start = time.time()
 
1
  from langchain.text_splitter import RecursiveCharacterTextSplitter
2
  from langchain.document_loaders import PyPDFLoader
3
  from langchain.vectorstores import Chroma
4
+ from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
5
  import os
6
  import time
7
  import streamlit as st
 
26
 
27
 
28
  start = time.time()
29
+ embeddings = HuggingFaceEmbeddings(model_name="intfloat/e5-base")
30
  end = time.time()
31
  st.text("Embedding time: "+str(round(end - start,1)))
32
  start = time.time()