Spaces:
Sleeping
Sleeping
Roman Castagné
commited on
Commit
·
509fb35
1
Parent(s):
7ad5acf
update requirements and change oscar to oscar small
Browse files- app.py +1 -3
- requirements.txt +2 -0
app.py
CHANGED
|
@@ -3,9 +3,7 @@ import gradio as gr
|
|
| 3 |
from transformers import AutoModelForMaskedLM, AutoTokenizer, DataCollatorForLanguageModeling
|
| 4 |
|
| 5 |
|
| 6 |
-
ds = datasets.load_dataset(
|
| 7 |
-
"oscar-corpus/OSCAR-2109", "deduplicated_en", streaming=True, use_auth_token=True, split="train"
|
| 8 |
-
)
|
| 9 |
ds = ds.shuffle(buffer_size=1000)
|
| 10 |
ds = iter(ds)
|
| 11 |
|
|
|
|
| 3 |
from transformers import AutoModelForMaskedLM, AutoTokenizer, DataCollatorForLanguageModeling
|
| 4 |
|
| 5 |
|
| 6 |
+
ds = datasets.load_dataset("nthngdy/oscar-small", "unshuffled_deduplicated_en", streaming=True, split="train")
|
|
|
|
|
|
|
| 7 |
ds = ds.shuffle(buffer_size=1000)
|
| 8 |
ds = iter(ds)
|
| 9 |
|
requirements.txt
CHANGED
|
@@ -1,3 +1,5 @@
|
|
| 1 |
datasets==2.4.0
|
| 2 |
gradio==3.19.1
|
| 3 |
transformers==4.22.0
|
|
|
|
|
|
|
|
|
| 1 |
datasets==2.4.0
|
| 2 |
gradio==3.19.1
|
| 3 |
transformers==4.22.0
|
| 4 |
+
--extra-index-url https://download.pytorch.org/whl/cpu
|
| 5 |
+
torch==1.12.1
|