Roman Castagné commited on
Commit
509fb35
1 Parent(s): 7ad5acf

update requirements and change oscar to oscar small

Browse files
Files changed (2) hide show
  1. app.py +1 -3
  2. requirements.txt +2 -0
app.py CHANGED
@@ -3,9 +3,7 @@ import gradio as gr
3
  from transformers import AutoModelForMaskedLM, AutoTokenizer, DataCollatorForLanguageModeling
4
 
5
 
6
- ds = datasets.load_dataset(
7
- "oscar-corpus/OSCAR-2109", "deduplicated_en", streaming=True, use_auth_token=True, split="train"
8
- )
9
  ds = ds.shuffle(buffer_size=1000)
10
  ds = iter(ds)
11
 
 
3
  from transformers import AutoModelForMaskedLM, AutoTokenizer, DataCollatorForLanguageModeling
4
 
5
 
6
+ ds = datasets.load_dataset("nthngdy/oscar-small", "unshuffled_deduplicated_en", streaming=True, split="train")
 
 
7
  ds = ds.shuffle(buffer_size=1000)
8
  ds = iter(ds)
9
 
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  datasets==2.4.0
2
  gradio==3.19.1
3
  transformers==4.22.0
 
 
 
1
  datasets==2.4.0
2
  gradio==3.19.1
3
  transformers==4.22.0
4
+ --extra-index-url https://download.pytorch.org/whl/cpu
5
+ torch==1.12.1