Spaces:
Runtime error
Runtime error
File size: 1,218 Bytes
bddc905 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import re
import typing as t
def parse_messages_from_str(string: str, names: t.List[str]) -> t.List[str]:
'''
Given a big string containing raw chat history, this function attempts to
parse it out into a list where each item is an individual message.
'''
sanitized_names = [
re.escape(name) for name in names
]
speaker_regex = re.compile(rf"^({'|'.join(sanitized_names)}): ?",
re.MULTILINE)
message_start_indexes = []
for match in speaker_regex.finditer(string):
message_start_indexes.append(match.start())
if len(message_start_indexes) < 2:
# Single message in the string.
return [string.strip()]
prev_start_idx = message_start_indexes[0]
messages = []
for start_idx in message_start_indexes[1:]:
message = string[prev_start_idx:start_idx].strip()
messages.append(message)
prev_start_idx = start_idx
# add the last message
messages.append(string[prev_start_idx:].strip())
return messages
def serialize_chat_history(history: t.List[str]) -> str:
'''Given a structured chat history object, collapses it down to a string.'''
return "\n".join(history)
|