Spaces:
Sleeping
Sleeping
Roman Castagné
commited on
Commit
·
509fb35
1
Parent(s):
7ad5acf
update requirements and change oscar to oscar small
Browse files- app.py +1 -3
- requirements.txt +2 -0
app.py
CHANGED
@@ -3,9 +3,7 @@ import gradio as gr
|
|
3 |
from transformers import AutoModelForMaskedLM, AutoTokenizer, DataCollatorForLanguageModeling
|
4 |
|
5 |
|
6 |
-
ds = datasets.load_dataset(
|
7 |
-
"oscar-corpus/OSCAR-2109", "deduplicated_en", streaming=True, use_auth_token=True, split="train"
|
8 |
-
)
|
9 |
ds = ds.shuffle(buffer_size=1000)
|
10 |
ds = iter(ds)
|
11 |
|
|
|
3 |
from transformers import AutoModelForMaskedLM, AutoTokenizer, DataCollatorForLanguageModeling
|
4 |
|
5 |
|
6 |
+
ds = datasets.load_dataset("nthngdy/oscar-small", "unshuffled_deduplicated_en", streaming=True, split="train")
|
|
|
|
|
7 |
ds = ds.shuffle(buffer_size=1000)
|
8 |
ds = iter(ds)
|
9 |
|
requirements.txt
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
datasets==2.4.0
|
2 |
gradio==3.19.1
|
3 |
transformers==4.22.0
|
|
|
|
|
|
1 |
datasets==2.4.0
|
2 |
gradio==3.19.1
|
3 |
transformers==4.22.0
|
4 |
+
--extra-index-url https://download.pytorch.org/whl/cpu
|
5 |
+
torch==1.12.1
|