awacke1 commited on
Commit
5afd5d3
1 Parent(s): b4defb1

Create new file

Browse files
Files changed (1) hide show
  1. utils.py +63 -0
utils.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ utils.py - Utility functions for the project.
3
+ """
4
+
5
+ import re
6
+ from pathlib import Path
7
+
8
+ from natsort import natsorted
9
+
10
+
11
+ def truncate_word_count(text, max_words=512):
12
+ """
13
+ truncate_word_count - a helper function for the gradio module
14
+ Parameters
15
+ ----------
16
+ text : str, required, the text to be processed
17
+ max_words : int, optional, the maximum number of words, default=512
18
+ Returns
19
+ -------
20
+ dict, the text and whether it was truncated
21
+ """
22
+ # split on whitespace with regex
23
+ words = re.split(r"\s+", text)
24
+ processed = {}
25
+ if len(words) > max_words:
26
+ processed["was_truncated"] = True
27
+ processed["truncated_text"] = " ".join(words[:max_words])
28
+ else:
29
+ processed["was_truncated"] = False
30
+ processed["truncated_text"] = text
31
+ return processed
32
+
33
+
34
+ def load_examples(src):
35
+ """
36
+ load_examples - a helper function for the gradio module to load examples
37
+ Returns:
38
+ list of str, the examples
39
+ """
40
+ src = Path(src)
41
+ src.mkdir(exist_ok=True)
42
+ examples = [f for f in src.glob("*.txt")]
43
+ examples = natsorted(examples)
44
+ # load the examples into a list
45
+ text_examples = []
46
+ for example in examples:
47
+ with open(example, "r") as f:
48
+ text = f.read()
49
+ text_examples.append([text, "large", 2, 512, 0.7, 3.5, 3])
50
+
51
+ return text_examples
52
+
53
+
54
+ def load_example_filenames(example_path: str or Path):
55
+ """
56
+ load_example_filenames - a helper function for the gradio module to load examples
57
+ Returns:
58
+ dict, the examples (filename:full path)
59
+ """
60
+ example_path = Path(example_path)
61
+ # load the examples into a list
62
+ examples = {f.name: f for f in example_path.glob("*.txt")}
63
+ return examples