Umair Khan commited on
Commit
1c2ae90
·
1 Parent(s): b10cf8e

modify writing to parquet

Browse files
Files changed (1) hide show
  1. app.py +5 -8
app.py CHANGED
@@ -83,14 +83,11 @@ def _save_outputs(adata, emb):
83
  d_model = int(emb.shape[1])
84
  index_arr = pa.array(adata.obs_names.astype(str).tolist(), type=pa.string())
85
  emb_arr = pa.array(emb.tolist(), type=pa.list_(pa.float32(), d_model))
86
- table = pa.Table.from_arrays(
87
- [index_arr, emb_arr],
88
- names=[PARQUET_INDEX_COL, PARQUET_EMB_COL],
89
- schema=pa.schema([
90
- pa.field(PARQUET_INDEX_COL, pa.string()),
91
- pa.field(PARQUET_EMB_COL, pa.list_(pa.float32(), d_model)),
92
- ]),
93
- )
94
  parquet_path = _unique_output("embs.parquet")
95
  pq.write_table(table, parquet_path, compression="zstd", use_dictionary=True)
96
 
 
83
  d_model = int(emb.shape[1])
84
  index_arr = pa.array(adata.obs_names.astype(str).tolist(), type=pa.string())
85
  emb_arr = pa.array(emb.tolist(), type=pa.list_(pa.float32(), d_model))
86
+ schema = pa.schema([
87
+ pa.field(PARQUET_INDEX_COL, pa.string()),
88
+ pa.field(PARQUET_EMB_COL, pa.list_(pa.float32(), d_model)),
89
+ ])
90
+ table = pa.Table.from_arrays([index_arr, emb_arr], schema=schema)
 
 
 
91
  parquet_path = _unique_output("embs.parquet")
92
  pq.write_table(table, parquet_path, compression="zstd", use_dictionary=True)
93