Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -7,13 +7,24 @@ from transformers import AutoTokenizer
|
|
7 |
import pickle
|
8 |
import os
|
9 |
|
10 |
-
|
11 |
|
12 |
-
|
13 |
-
pickle.dump(github_url, fp)
|
14 |
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
bloomz_tokenizer = AutoTokenizer.from_pretrained('bigscience/bloomz-1b7')
|
19 |
|
|
|
7 |
import pickle
|
8 |
import os
|
9 |
|
10 |
+
from glob import glob
|
11 |
|
12 |
+
files = glob("./shakespeare/**/*.html")
|
|
|
13 |
|
14 |
+
import shutil
|
15 |
+
import os
|
16 |
+
|
17 |
+
os.mkdir('./data')
|
18 |
+
destination_folder = './data/'
|
19 |
+
|
20 |
+
for html_file in files:
|
21 |
+
shutil.move(html_file, destination_folder + html_file.split("/")[-1])
|
22 |
+
|
23 |
+
from langchain.document_loaders import BSHTMLLoader, DirectoryLoader
|
24 |
+
|
25 |
+
bshtml_dir_loader = DirectoryLoader('./data/', loader_cls=BSHTMLLoader)
|
26 |
+
|
27 |
+
data = bshtml_dir_loader.load()
|
28 |
|
29 |
bloomz_tokenizer = AutoTokenizer.from_pretrained('bigscience/bloomz-1b7')
|
30 |
|