gkrishnan commited on
Commit
c511b8b
1 Parent(s): 18b0534

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -5
app.py CHANGED
@@ -7,13 +7,24 @@ from transformers import AutoTokenizer
7
  import pickle
8
  import os
9
 
10
- github_url = "https://github.com/TheMITTech/shakespeare"
11
 
12
- with open('shakespeare.pkl', 'wb') as fp:
13
- pickle.dump(github_url, fp)
14
 
15
- with open('shakespeare.pkl', 'rb') as fp:
16
- data = pickle.load(fp)
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  bloomz_tokenizer = AutoTokenizer.from_pretrained('bigscience/bloomz-1b7')
19
 
 
7
  import pickle
8
  import os
9
 
10
+ from glob import glob
11
 
12
+ files = glob("./shakespeare/**/*.html")
 
13
 
14
+ import shutil
15
+ import os
16
+
17
+ os.mkdir('./data')
18
+ destination_folder = './data/'
19
+
20
+ for html_file in files:
21
+ shutil.move(html_file, destination_folder + html_file.split("/")[-1])
22
+
23
+ from langchain.document_loaders import BSHTMLLoader, DirectoryLoader
24
+
25
+ bshtml_dir_loader = DirectoryLoader('./data/', loader_cls=BSHTMLLoader)
26
+
27
+ data = bshtml_dir_loader.load()
28
 
29
  bloomz_tokenizer = AutoTokenizer.from_pretrained('bigscience/bloomz-1b7')
30