Spaces:
Runtime error
Runtime error
Update shakespeare.pkl
Browse files- shakespeare.pkl +21 -0
shakespeare.pkl
CHANGED
@@ -1 +1,22 @@
|
|
|
|
|
|
|
|
|
|
1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import os
|
3 |
+
import shutil
|
4 |
+
from langchain.document_loaders import BSHTMLLoader, DirectoryLoader
|
5 |
|
6 |
+
!git clone https://github.com/TheMITTech/shakespeare
|
7 |
+
|
8 |
+
from glob import glob
|
9 |
+
files = glob("./shakespeare/**/*.html")
|
10 |
+
|
11 |
+
os.mkdir('./data')
|
12 |
+
destination_folder = './data/'
|
13 |
+
|
14 |
+
for html_file in files:
|
15 |
+
shutil.move(html_file, destination_folder + html_file.split("/"[-1]))
|
16 |
+
|
17 |
+
bshtml_dir_loader = DirectoryLoader('./data/', loader_cls = BSHTMLLoader)
|
18 |
+
|
19 |
+
data = bshtml_dir_loader.load()
|
20 |
+
|
21 |
+
with open("shakespeare.pkl", "wb") as fp:
|
22 |
+
pickle.dump(data, fp)
|