Ethgoin's picture
Update lexer.py
3f612f7 verified
import re
from typing import List, Tuple
Token = Tuple[str, str]
RESERVED = {
"int", "float", "boolean",
"ACTIVATE_SENSOR", "ACTIVATE_ALARM", "BREAK", "CALIBRATE", "CHARGE_BATTERY", "CHECK_BATTERY",
"CLOSE_DOOR", "CONTINUE", "COPY_FILE", "DEACTIVATE_ALARM", "DEACTIVATE_SENSOR",
"DECREASE_SPEED", "DELETE_FILE", "DOWNLOAD", "ELSE", "FALSE", "FOR", "IF", "INCREASE_SPEED",
"INIT", "LOCK", "LOG", "LOW_BATTERY", "MOVE_BACKWARD", "MOVE_FORWARD", "MOVE_TO", "NULL",
"OPEN_DOOR", "PAUSE", "PRINT", "READ_SENSOR", "REBOOT", "RENAME_FILE", "RESET", "RESUME",
"REVERSE", "ROTATE", "SAVE_FILE", "SCAN", "SET_SPEED", "SHUTDOWN", "SHUT_OFF", "START",
"STOP", "STOP_IMMEDIATELY", "THEN", "TOGGLE_LIGHT", "TRUE", "TURN_DOWN", "TURN_LEFT",
"TURN_RIGHT", "TURN_UP", "UNLOCK", "UPLOAD", "UPLOAD_FILE", "WAIT", "WHILE", "SET"
}
TOKEN_SPEC = [
("COMMENT", r"//[^\n]*"),
("STRING", r'"[^"\n]*"'),
("FLOAT", r'\d+\.\d+'),
("INT", r'\d+'),
("ASSIGN", r'='),
("PLUS", r'\+'),
("MINUS", r'-'),
("MULTIPLY", r'\*'),
("DIVIDE", r'/'),
("EQUAL", r'=='),
("NOT_EQUAL", r'!='),
("GREATER", r'>'),
("LESS", r'<'),
("OPEN_PAREN", r'\('),
("CLOSE_PAREN", r'\)'),
("OPEN_BRACE", r'\{'),
("CLOSE_BRACE", r'\}'),
("SEMICOLON", r';'),
("COLON", r':'),
("COMMA", r','),
("NEWLINE", r'\n'),
("SKIP", r'[ \t\r]+'),
("IDENTIFIER", r'[a-zA-Z_][a-zA-Z0-9_]*'),
("MISMATCH", r'.')
]
TOK_REGEX = '|'.join(f'(?P<{name}>{pattern})' for name, pattern in TOKEN_SPEC)
token_re = re.compile(TOK_REGEX)
def lexer(code: str) -> List[Token]:
tokens = []
for match in token_re.finditer(code):
kind = match.lastgroup
value = match.group()
if kind in ("NEWLINE", "SKIP", "COMMENT"):
continue
elif kind == "IDENTIFIER":
if value in RESERVED:
tokens.append((value, value))
else:
tokens.append(("IDENTIFIER", value))
elif kind == "MISMATCH":
raise RuntimeError(f"Token no reconocido: {value}")
else:
tokens.append((kind, value))
return tokens