Alexis Palmer commited on
Commit
45dfe77
1 Parent(s): 397ad86

new util version

Browse files
Files changed (1) hide show
  1. util.py +10 -0
util.py CHANGED
@@ -27,6 +27,16 @@ def load_raw_text(corpus_directory: str, file_names=None) -> str:
27
  corpus += (file_contents + "\n")
28
  return corpus
29
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  word_regex = r"[\w|\']+"
32
  def tokenize(text):
 
27
  corpus += (file_contents + "\n")
28
  return corpus
29
 
30
+ def load_single_raw_text_file(file_name):
31
+ """Loads a single text file into one large string"""
32
+
33
+ corpus = ""
34
+ with open(file_name, 'r') as file:
35
+ file_contents = file.read()
36
+ corpus += (file_contents + "\n")
37
+
38
+ return corpus
39
+
40
 
41
  word_regex = r"[\w|\']+"
42
  def tokenize(text):