Spaces:
Sleeping
Sleeping
coroianpetruta
commited on
Commit
•
2f1571b
1
Parent(s):
c9cd7db
Improved random sentence generator
Browse files
app.py
CHANGED
@@ -44,28 +44,34 @@ dataset = load_dataset("daily_dialog", trust_remote_code=True)
|
|
44 |
import re
|
45 |
|
46 |
dialogs = dataset["train"]
|
47 |
-
|
48 |
-
def
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# Function to randomly select one sentence from the dataset
|
60 |
def get_random_sentence():
|
61 |
-
|
62 |
-
random_dialogue = random.choice(dialogs['dialog'])
|
63 |
-
# Select a random sentence from the chosen dialogue
|
64 |
-
random_sentence = random.choice(random_dialogue)
|
65 |
-
|
66 |
-
# Clean the sentence
|
67 |
-
clean_random_sentence = clean_sentence(random_sentence)
|
68 |
-
return clean_random_sentence
|
69 |
|
70 |
|
71 |
|
|
|
44 |
import re
|
45 |
|
46 |
dialogs = dataset["train"]
|
47 |
+
|
48 |
+
def flatten(xss):
|
49 |
+
return [x for xs in xss for x in xs]
|
50 |
+
|
51 |
+
def split_keep_delimiters(s):
|
52 |
+
# Remove spaces before dots, exclamation points, commas, and question marks
|
53 |
+
s = re.sub(r'\s+([.,!?])', r'\1', s)
|
54 |
+
|
55 |
+
# Remove spaces before and after apostrophes
|
56 |
+
s = re.sub(r"\s*['’]\s*", r"'", s)
|
57 |
+
# Use re.findall to split by the delimiters while keeping them
|
58 |
+
parts = re.findall(r'[^.!?\s][^.!?]*[.!?]', s)
|
59 |
+
parts = [part.capitalize() for part in parts]
|
60 |
+
return parts
|
61 |
+
|
62 |
+
|
63 |
+
random_sentences = flatten(dialogs["dialog"])
|
64 |
+
random_sentences_stripped = []
|
65 |
+
for line in random_sentences:
|
66 |
+
sentences = split_keep_delimiters(line)
|
67 |
+
for sentence in sentences:
|
68 |
+
random_sentences_stripped.append(sentence)
|
69 |
+
|
70 |
+
|
71 |
|
72 |
# Function to randomly select one sentence from the dataset
|
73 |
def get_random_sentence():
|
74 |
+
return random.choice(random_sentences_stripped)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
|
77 |
|