Spaces:
No application file
No application file
import json | |
from abc import ABC | |
from typing import List | |
from langchain.docstore.document import Document | |
from langchain.document_loaders.base import BaseLoader | |
class Person: | |
def __init__(self, name, age): | |
self.name = name | |
self.age = age | |
class Dialogue: | |
""" | |
Build an abstract dialogue model using classes and methods to represent different dialogue elements. | |
This class serves as a fundamental framework for constructing dialogue models. | |
""" | |
def __init__(self, file_path: str): | |
self.file_path = file_path | |
self.turns = [] | |
def add_turn(self, turn): | |
""" | |
Create an instance of a conversation participant | |
:param turn: | |
:return: | |
""" | |
self.turns.append(turn) | |
def parse_dialogue(self): | |
""" | |
The parse_dialogue function reads the specified dialogue file and parses each dialogue turn line by line. | |
For each turn, the function extracts the name of the speaker and the message content from the text, | |
creating a Turn instance. If the speaker is not already present in the participants dictionary, | |
a new Person instance is created. Finally, the parsed Turn instance is added to the Dialogue object. | |
Please note that this sample code assumes that each line in the file follows a specific format: | |
<speaker>:\r\n<message>\r\n\r\n. If your file has a different format or includes other metadata, | |
you may need to adjust the parsing logic accordingly. | |
""" | |
participants = {} | |
speaker_name = None | |
message = None | |
with open(self.file_path, encoding='utf-8') as file: | |
lines = file.readlines() | |
for i, line in enumerate(lines): | |
line = line.strip() | |
if not line: | |
continue | |
if speaker_name is None: | |
speaker_name, _ = line.split(':', 1) | |
elif message is None: | |
message = line | |
if speaker_name not in participants: | |
participants[speaker_name] = Person(speaker_name, None) | |
speaker = participants[speaker_name] | |
turn = Turn(speaker, message) | |
self.add_turn(turn) | |
# Reset speaker_name and message for the next turn | |
speaker_name = None | |
message = None | |
def display(self): | |
for turn in self.turns: | |
print(f"{turn.speaker.name}: {turn.message}") | |
def export_to_file(self, file_path): | |
with open(file_path, 'w', encoding='utf-8') as file: | |
for turn in self.turns: | |
file.write(f"{turn.speaker.name}: {turn.message}\n") | |
def to_dict(self): | |
dialogue_dict = {"turns": []} | |
for turn in self.turns: | |
turn_dict = { | |
"speaker": turn.speaker.name, | |
"message": turn.message | |
} | |
dialogue_dict["turns"].append(turn_dict) | |
return dialogue_dict | |
def to_json(self): | |
dialogue_dict = self.to_dict() | |
return json.dumps(dialogue_dict, ensure_ascii=False, indent=2) | |
def participants_to_export(self): | |
""" | |
participants_to_export | |
:return: | |
""" | |
participants = set() | |
for turn in self.turns: | |
participants.add(turn.speaker.name) | |
return ', '.join(participants) | |
class Turn: | |
def __init__(self, speaker, message): | |
self.speaker = speaker | |
self.message = message | |
class DialogueLoader(BaseLoader, ABC): | |
"""Load dialogue.""" | |
def __init__(self, file_path: str): | |
"""Initialize with dialogue.""" | |
self.file_path = file_path | |
dialogue = Dialogue(file_path=file_path) | |
dialogue.parse_dialogue() | |
self.dialogue = dialogue | |
def load(self) -> List[Document]: | |
"""Load from dialogue.""" | |
documents = [] | |
participants = self.dialogue.participants_to_export() | |
for turn in self.dialogue.turns: | |
metadata = {"source": f"Dialogue File:{self.dialogue.file_path}," | |
f"speaker:{turn.speaker.name}," | |
f"participant:{participants}"} | |
turn_document = Document(page_content=turn.message, metadata=metadata.copy()) | |
documents.append(turn_document) | |
return documents | |