Umair Khan
commited on
Commit
·
1c2ae90
1
Parent(s):
b10cf8e
modify writing to parquet
Browse files
app.py
CHANGED
|
@@ -83,14 +83,11 @@ def _save_outputs(adata, emb):
|
|
| 83 |
d_model = int(emb.shape[1])
|
| 84 |
index_arr = pa.array(adata.obs_names.astype(str).tolist(), type=pa.string())
|
| 85 |
emb_arr = pa.array(emb.tolist(), type=pa.list_(pa.float32(), d_model))
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
pa.field(PARQUET_EMB_COL, pa.list_(pa.float32(), d_model)),
|
| 92 |
-
]),
|
| 93 |
-
)
|
| 94 |
parquet_path = _unique_output("embs.parquet")
|
| 95 |
pq.write_table(table, parquet_path, compression="zstd", use_dictionary=True)
|
| 96 |
|
|
|
|
| 83 |
d_model = int(emb.shape[1])
|
| 84 |
index_arr = pa.array(adata.obs_names.astype(str).tolist(), type=pa.string())
|
| 85 |
emb_arr = pa.array(emb.tolist(), type=pa.list_(pa.float32(), d_model))
|
| 86 |
+
schema = pa.schema([
|
| 87 |
+
pa.field(PARQUET_INDEX_COL, pa.string()),
|
| 88 |
+
pa.field(PARQUET_EMB_COL, pa.list_(pa.float32(), d_model)),
|
| 89 |
+
])
|
| 90 |
+
table = pa.Table.from_arrays([index_arr, emb_arr], schema=schema)
|
|
|
|
|
|
|
|
|
|
| 91 |
parquet_path = _unique_output("embs.parquet")
|
| 92 |
pq.write_table(table, parquet_path, compression="zstd", use_dictionary=True)
|
| 93 |
|