shakespeare / shakespeare.pkl
gkrishnan's picture
Update shakespeare.pkl
c9bf686
raw
history blame
549 Bytes
import pickle
import os
import shutil
from langchain.document_loaders import BSHTMLLoader, DirectoryLoader
!git clone https://github.com/TheMITTech/shakespeare
from glob import glob
files = glob("./shakespeare/**/*.html")
os.mkdir('./data')
destination_folder = './data/'
for html_file in files:
shutil.move(html_file, destination_folder + html_file.split("/"[-1]))
bshtml_dir_loader = DirectoryLoader('./data/', loader_cls = BSHTMLLoader)
data = bshtml_dir_loader.load()
with open("shakespeare.pkl", "wb") as fp:
pickle.dump(data, fp)