soeren commited on
Commit
5e53983
1 Parent(s): a36c062

space adjusted to new enrichment repo

Browse files
Files changed (3) hide show
  1. Dockerfile +0 -3
  2. prepare.py +0 -29
  3. run.py +18 -30
Dockerfile CHANGED
@@ -9,9 +9,6 @@ RUN pip install pip -U
9
  COPY requirements.txt .
10
  RUN pip install -r requirements.txt
11
 
12
- COPY prepare.py .
13
- RUN python prepare.py
14
-
15
  COPY . .
16
  RUN mkdir -p /code/.cache
17
  RUN chmod -R 777 /code
 
9
  COPY requirements.txt .
10
  RUN pip install -r requirements.txt
11
 
 
 
 
12
  COPY . .
13
  RUN mkdir -p /code/.cache
14
  RUN chmod -R 777 /code
prepare.py DELETED
@@ -1,29 +0,0 @@
1
- import pickle
2
- import datasets
3
- import os
4
- import umap
5
-
6
- #force restart
7
- if __name__ == "__main__":
8
- cache_file = "dataset_cache.pkl"
9
- if os.path.exists(cache_file):
10
- # Load dataset from cache
11
- with open(cache_file, "rb") as file:
12
- dataset = pickle.load(file)
13
- print("Dataset loaded from cache.")
14
- else:
15
- # Load dataset using datasets.load_dataset()
16
- ds_train = datasets.load_dataset("renumics/speech_commands_enriched", split="train")
17
- ds_test = datasets.load_dataset("renumics/speech_commands_enriched", split="test")
18
- ds_validation = datasets.load_dataset("renumics/speech_commands_enriched", split="validation")
19
- joined_dataset_enrichment = datasets.concatenate_datasets([ds_train, ds_validation, ds_test])
20
- print("Dataset loaded using datasets.load_dataset().")
21
-
22
- df = joined_dataset_enrichment.to_pandas()
23
-
24
- # Save dataset to cache
25
- with open(cache_file, "wb") as file:
26
- pickle.dump(df, file)
27
-
28
- print("Dataset saved to cache.")
29
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
run.py CHANGED
@@ -1,34 +1,22 @@
1
- import pickle
2
  from renumics import spotlight
3
  from renumics.spotlight import layout
4
- import os
5
-
6
- if __name__ == "__main__":
7
- cache_file = "dataset_cache.pkl"
8
- if os.path.exists(cache_file):
9
- # Load dataset from cache
10
- with open(cache_file, "rb") as file:
11
- df = pickle.load(file)
12
- print("Dataset loaded from cache.")
13
-
14
-
15
- while True:
16
- dtypes = {
17
- "full_audio": spotlight.Audio,
18
- "embedding_reduced": spotlight.Embedding
19
- }
20
- view = spotlight.show(
21
- df,
22
- dtype=dtypes,
23
- layout= layout.parse("spotlight-layout.json"),
24
- port=7860,
25
- host="0.0.0.0",
26
- allow_filebrowsing=False
27
- )
28
-
29
- view.close()
30
-
31
- else:
32
- print(f"Dataset {cache_file} not found. Please run prepare.py first.")
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
 
 
1
+ import datasets
2
  from renumics import spotlight
3
  from renumics.spotlight import layout
4
+ from renumics.spotlight.layouts import debug_classification
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ dataset = datasets.load_dataset("renumics/speech_commands_enrichment_only", "raw_and_enrichment_combined")
7
+ stacked_ds = datasets.concatenate_datasets([dataset["train"], dataset["validation"], dataset["test"]])
8
+ print(dataset)
9
+ while True:
10
+ layout = debug_classification(label='label_string', prediction='prediction', embedding='embedding_reduced',
11
+ features=["label", "prediction", "probability"], inspect={'audio': spotlight.Audio})
12
+ dtypes = {
13
+ "audio": spotlight.Audio,
14
+ "embedding_reduced": spotlight.Embedding
15
+ }
16
+ view = spotlight.show(
17
+ stacked_ds,
18
+ dtype=dtypes,
19
+ layout= layout
20
+ )
21
 
22
+ view.close()