lhoestq HF staff commited on
Commit
1465ec0
1 Parent(s): 983332a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -3
app.py CHANGED
@@ -99,8 +99,7 @@ def _sorted_split_key(split: str) -> str:
99
  @lru_cache(maxsize=128)
100
  def get_parquet_splits(dataset: str, config: str) -> List[str]:
101
  fs = get_parquet_fs(dataset)
102
- all_parts = [path.rsplit(".", 1)[0].split("-") for path in fs.glob(f"{config}/*.parquet")]
103
- return sorted(set(parts[-4] if len(parts) > 3 and parts[-2] == "of" else parts[-1] for parts in all_parts), key=_sorted_split_key)
104
 
105
 
106
  #####################################################
@@ -114,7 +113,7 @@ RowGroupReaders = List[Callable[[], pa.Table]]
114
  @lru_cache(maxsize=128)
115
  def index(dataset: str, config: str, split: str) -> Tuple[np.ndarray, RowGroupReaders, int, Features]:
116
  fs = get_parquet_fs(dataset)
117
- sources = fs.glob(f"{config}/*-{split}.parquet") + fs.glob(f"{config}/*-{split}-*-of-*.parquet")
118
  if not sources:
119
  if config not in get_parquet_configs(dataset):
120
  raise AppError(f"Invalid config {config}. Available configs are: {', '.join(get_parquet_configs(dataset))}.")
 
99
  @lru_cache(maxsize=128)
100
  def get_parquet_splits(dataset: str, config: str) -> List[str]:
101
  fs = get_parquet_fs(dataset)
102
+ return [path for path in fs.ls(config) if fs.isdir(path)]
 
103
 
104
 
105
  #####################################################
 
113
  @lru_cache(maxsize=128)
114
  def index(dataset: str, config: str, split: str) -> Tuple[np.ndarray, RowGroupReaders, int, Features]:
115
  fs = get_parquet_fs(dataset)
116
+ sources = fs.glob(f"{config}/{split}/*.parquet")
117
  if not sources:
118
  if config not in get_parquet_configs(dataset):
119
  raise AppError(f"Invalid config {config}. Available configs are: {', '.join(get_parquet_configs(dataset))}.")