gkrishnan commited on
Commit
c9bf686
1 Parent(s): c0c3c1b

Update shakespeare.pkl

Browse files
Files changed (1) hide show
  1. shakespeare.pkl +21 -0
shakespeare.pkl CHANGED
@@ -1 +1,22 @@
 
 
 
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import os
3
+ import shutil
4
+ from langchain.document_loaders import BSHTMLLoader, DirectoryLoader
5
 
6
+ !git clone https://github.com/TheMITTech/shakespeare
7
+
8
+ from glob import glob
9
+ files = glob("./shakespeare/**/*.html")
10
+
11
+ os.mkdir('./data')
12
+ destination_folder = './data/'
13
+
14
+ for html_file in files:
15
+ shutil.move(html_file, destination_folder + html_file.split("/"[-1]))
16
+
17
+ bshtml_dir_loader = DirectoryLoader('./data/', loader_cls = BSHTMLLoader)
18
+
19
+ data = bshtml_dir_loader.load()
20
+
21
+ with open("shakespeare.pkl", "wb") as fp:
22
+ pickle.dump(data, fp)