Kushwanth Chowday Kandala commited on
Commit
a78496d
1 Parent(s): 7c92df9

TypeError: Value must be iterable index.upsert(vectors=records)

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -196,17 +196,20 @@ def create_embeddings():
196
  pinecone = connect_pinecone()
197
  index = get_pinecone_semantic_index(pinecone)
198
 
199
- # The maximum metadata size per vector is 40KB ~ 40000Bytes ~ each text character is 1 to 2 bytes. so rougly given batch size of 10000 to 40000
200
- batch_size = 10000
201
- for i in tqdm(range(0, len(inputtext), batch_size)):
 
 
 
202
  # find end of batch
203
- end = min(i + batch_size, len(inputtext))
204
  # create ids batch
205
  ids = [str(i) for i in range(i, end)]
206
  # create metadata batch
207
- metadata = [{"text": text} for text in inputtext[i:end]]
208
  # create embeddings
209
- xc = model.encode(inputtext[i:end])
210
  # create records list for upsert
211
  records = zip(ids, xc, metadata)
212
  # upsert records
 
196
  pinecone = connect_pinecone()
197
  index = get_pinecone_semantic_index(pinecone)
198
 
199
+ # The maximum metadata size per vector is 40KB ~ 40000Bytes ~ each text character is 1 to 2 bytes. so rougly given chunk size of 10000 to 40000
200
+ chunk_size = 10000
201
+ batch_size = 2
202
+ chunks = split_into_chunks(inputtext, batch_size)
203
+
204
+ for i in tqdm(range(0, len(chunks), batch_size)):
205
  # find end of batch
206
+ end = min(i + batch_size, len(chunks))
207
  # create ids batch
208
  ids = [str(i) for i in range(i, end)]
209
  # create metadata batch
210
+ metadata = [{"text": text} for text in chunks[i:end]]
211
  # create embeddings
212
+ xc = model.encode(chunks[i:end])
213
  # create records list for upsert
214
  records = zip(ids, xc, metadata)
215
  # upsert records