Spaces:

Kaiizx
/

tokenizer

Runtime error

App Files Files Community

tokenizer / app.py

Kaiizx

fix output bug

718d914 about 1 year ago

raw

history blame contribute delete

2.13 kB

	import gradio as gr

	def tokenize(buffer: str):
	split = {'(', ')', '{', '}', '[', ']', ',', ':', '+', '-', '*', '/', '%', '=', '\n'}
	DFA_table = {
	-1: {'any': -1},
	0: {' ': 1, 'any': 5, 'split': 17, 'f': 7, '"': 8, "'": 9},
	1: {' ': 2, 'f': 7, 'any': 5},
	2: {' ': 3, 'f': 7, 'any': 5},
	3: {' ': 4, 'f': 7, 'any': 5},
	4: {'any': 18},
	5: {' ': 6, 'any': 5, 'split': 17},
	6: {' ': 6, 'any': 18, 'split': 17},
	7: {'any': 5, '"': 8, "'": 9},
	8: {'"': 16, 'any': 8},
	9: {"'": 11, 'any': 10},
	10: {"'": 16, 'any': 10},
	11: {' ': 16, "'": 12, 'any': -1, 'split': 17},
	12: {"'": 13, 'any': 12},
	13: {"'": 14, 'any': -1},
	14: {"'": 15, 'any': -1},
	15: {' ': 16, 'split': 17, 'any': -1},
	16: {' ': 16, 'any': -1, 'split': 17, '"': 18, "'": 18},
	17: {'any': -1}, # final: consume split as token
	18: {'any': -1}, # final: not consume split as token
	}
	finals = (17, 18)
	tokens = []
	cursor = 0
	while cursor < len(buffer):
	state = 0
	temp = ''
	while cursor < len(buffer):
	ch = buffer[cursor]
	if ch in split:
	ch = 'split'
	if ch not in DFA_table[state]:
	ch = 'any'
	state = DFA_table[state][ch]
	if state not in finals:
	temp += buffer[cursor]
	else:
	break
	cursor += 1

	if state not in finals and state != 5:
	raise RuntimeError(f"Rejected at state {state}")
	if temp != '':
	tokens.append(temp.strip() if temp != ' ' else temp)
	if state == finals[0]:
	tokens.append(buffer[cursor])
	cursor += 1
	return tokens





	interface = gr.Interface(
	fn=tokenize,
	title="Tokenizer",
	description="Tokenize the python code",
	theme="compact",
	inputs=gr.TextArea(label="Python code",value = "print('Hello World!!')"),
	outputs=gr.TextArea(label="Tokenize output")
	)

	interface.launch()