dfa-ui / main.py
DarylT01's picture
fix(trap): add trap state support
7ef0785
# References https://blog.devgenius.io/finite-automata-implement-a-dfa-in-python-64dc3d7005d9
class DFA:
# 5-tuple init
def __init__(
self, alphabets, states, transition_functions, initial_state, final_states
):
self.alphabets = alphabets
self.states = states
self.transitions = transition_functions
self.initial_state = initial_state
self.final_states = final_states
# Processes the input word and returns True if the word is accepted by the DFA
def is_accepting(self, input_string: str):
current_state = self.initial_state
for char in input_string:
current_state = self.transitions.get(current_state, {}).get(
char, len(self.states) - 1
)
return current_state in self.final_states
# Processes the input paragraph and returns a dictionary of accepted words and their positions
def check(self, paragraph: str):
# Reject empty input
if paragraph.strip() == "":
raise ValueError("Empty string provided")
# Normalize the input
paragraph = paragraph.lower().strip()
# Convert the paragraph to a list of characters
chars = list(paragraph)
current_word = ""
accepted_words: dict[str : list[tuple[int, int]]] = (
{}
) # returns: {word: [(start_index, end_index)]}
# Start and end index of the current word
word_start = 0
word_end = 0
# Traverse the characters in the paragraph
for i, char in enumerate(chars):
# If the character is in the alphabet, then it is part of the word
if char in self.alphabets:
current_word += char
word_end = i
# If the character is not in the alphabet or it is the last character in the paragraph
# then the current word is complete
# Check if the current word is accepted by the DFA
# If it is accepted, add it to the accepted_words dictionary
# Reset the current_word to an empty string
if char not in self.alphabets or i == len(chars) - 1:
if current_word != "":
if self.is_accepting(current_word):
accepted_words[current_word] = accepted_words.get(
current_word, []
) + [(word_start, word_end)]
current_word = ""
word_start = i + 1
return accepted_words
# Generate a DFA from a list of words
def generate_dfa(input_strings: list[str]) -> DFA:
alphabets = {
"a",
"b",
"c",
"d",
"e",
"f",
"g",
"h",
"i",
"j",
"k",
"l",
"m",
"n",
"o",
"p",
"q",
"r",
"s",
"t",
"u",
"v",
"w",
"x",
"y",
"z",
}
states = set([0])
transition_functions = {}
final_states = set()
for input_string in [
input_string.lower().strip() for input_string in input_strings
]:
current_state = 0
for i in input_string:
if i not in alphabets:
raise ValueError(f"Invalid character '{i}' in string '{input_string}'")
# Find the upcoming state based on the current state and the input character to reach the next state
upcoming_state = transition_functions.get(current_state)
if upcoming_state is not None:
upcoming_state = upcoming_state.get(i)
# Create a new state if the upcoming state is not found
if upcoming_state is None:
# The new state is the next integer after the maximum state in the set of states
upcoming_state = len(states)
# Add the new state to the set of states
states.add(upcoming_state)
# Add the transition into the transition functions dictionary
if current_state not in transition_functions:
transition_functions[current_state] = {}
transition_functions[current_state][i] = upcoming_state
# Move to the upcoming state
current_state = upcoming_state
# Add the final state to the set of final states
final_states.add(current_state)
# Add the trap state to the set of states
states.add(len(states))
# Add the trap state to the transition functions
for state in states:
if state not in transition_functions:
transition_functions[state] = {}
for alphabet in alphabets:
if alphabet not in transition_functions[state]:
transition_functions[state][alphabet] = len(states) - 1
return DFA(alphabets, states, transition_functions, 0, final_states)
dfa = generate_dfa(["and", "but", "or", "because"])
print(dfa.check("me and you."))