tdecae commited on
Commit
089299b
1 Parent(s): 20e0c60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -112,12 +112,14 @@ for f in os.listdir("multiple_docs"):
112
  splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
113
  docs = splitter.split_documents(docs)
114
 
115
- # Convert the document chunks to embeddings
116
  embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
117
- embeddings = [embedding_model.encode(doc.page_content) for doc in docs]
 
118
 
119
- # Save the embeddings to the vector store
120
- vectorstore = Chroma.from_embeddings(embeddings=embeddings, documents=docs, persist_directory="./data")
 
121
  vectorstore.persist()
122
 
123
  # Load the Hugging Face model for text generation
@@ -172,3 +174,4 @@ demo.launch(debug=True)
172
 
173
 
174
 
 
 
112
  splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
113
  docs = splitter.split_documents(docs)
114
 
115
+ # Extract the content from documents and create embeddings
116
  embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
117
+ texts = [doc.page_content for doc in docs]
118
+ embeddings = embedding_model.encode(texts)
119
 
120
+ # Create a Chroma vector store and add documents and their embeddings
121
+ vectorstore = Chroma(persist_directory="./data")
122
+ vectorstore.add_texts(texts, embeddings)
123
  vectorstore.persist()
124
 
125
  # Load the Hugging Face model for text generation
 
174
 
175
 
176
 
177
+