Surbhi123's picture
Upload folder using huggingface_hub
64772a4 verified
# cython: infer_types=True, language_level=3, auto_pickle=False
#
# Cython Scanner
#
from __future__ import absolute_import
import cython
cython.declare(make_lexicon=object, lexicon=object,
print_function=object, error=object, warning=object,
os=object, platform=object)
import os
import platform
from unicodedata import normalize
from contextlib import contextmanager
from .. import Utils
from ..Plex.Scanners import Scanner
from ..Plex.Errors import UnrecognizedInput
from .Errors import error, warning, hold_errors, release_errors, CompileError
from .Lexicon import any_string_prefix, make_lexicon, IDENT
from .Future import print_function
debug_scanner = 0
trace_scanner = 0
scanner_debug_flags = 0
scanner_dump_file = None
lexicon = None
def get_lexicon():
global lexicon
if not lexicon:
lexicon = make_lexicon()
return lexicon
#------------------------------------------------------------------
py_reserved_words = [
"global", "nonlocal", "def", "class", "print", "del", "pass", "break",
"continue", "return", "raise", "import", "exec", "try",
"except", "finally", "while", "if", "elif", "else", "for",
"in", "assert", "and", "or", "not", "is", "lambda",
"from", "yield", "with",
]
pyx_reserved_words = py_reserved_words + [
"include", "ctypedef", "cdef", "cpdef",
"cimport", "DEF", "IF", "ELIF", "ELSE"
]
#------------------------------------------------------------------
class CompileTimeScope(object):
def __init__(self, outer=None):
self.entries = {}
self.outer = outer
def declare(self, name, value):
self.entries[name] = value
def update(self, other):
self.entries.update(other)
def lookup_here(self, name):
return self.entries[name]
def __contains__(self, name):
return name in self.entries
def lookup(self, name):
try:
return self.lookup_here(name)
except KeyError:
outer = self.outer
if outer:
return outer.lookup(name)
else:
raise
def initial_compile_time_env():
benv = CompileTimeScope()
names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE', 'UNAME_VERSION', 'UNAME_MACHINE')
for name, value in zip(names, platform.uname()):
benv.declare(name, value)
try:
import __builtin__ as builtins
except ImportError:
import builtins
names = (
'False', 'True',
'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes',
'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter',
'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len',
'list', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range',
'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str',
'sum', 'tuple', 'zip',
### defined below in a platform independent way
# 'long', 'unicode', 'reduce', 'xrange'
)
for name in names:
try:
benv.declare(name, getattr(builtins, name))
except AttributeError:
# ignore, likely Py3
pass
# Py2/3 adaptations
from functools import reduce
benv.declare('reduce', reduce)
benv.declare('unicode', getattr(builtins, 'unicode', getattr(builtins, 'str')))
benv.declare('long', getattr(builtins, 'long', getattr(builtins, 'int')))
benv.declare('xrange', getattr(builtins, 'xrange', getattr(builtins, 'range')))
denv = CompileTimeScope(benv)
return denv
#------------------------------------------------------------------
class SourceDescriptor(object):
"""
A SourceDescriptor should be considered immutable.
"""
filename = None
in_utility_code = False
_file_type = 'pyx'
_escaped_description = None
_cmp_name = ''
def __str__(self):
assert False # To catch all places where a descriptor is used directly as a filename
def set_file_type_from_name(self, filename):
name, ext = os.path.splitext(filename)
self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx'
def is_cython_file(self):
return self._file_type in ('pyx', 'pxd')
def is_python_file(self):
return self._file_type == 'py'
def get_escaped_description(self):
if self._escaped_description is None:
esc_desc = \
self.get_description().encode('ASCII', 'replace').decode("ASCII")
# Use forward slashes on Windows since these paths
# will be used in the #line directives in the C/C++ files.
self._escaped_description = esc_desc.replace('\\', '/')
return self._escaped_description
def __gt__(self, other):
# this is only used to provide some sort of order
try:
return self._cmp_name > other._cmp_name
except AttributeError:
return False
def __lt__(self, other):
# this is only used to provide some sort of order
try:
return self._cmp_name < other._cmp_name
except AttributeError:
return False
def __le__(self, other):
# this is only used to provide some sort of order
try:
return self._cmp_name <= other._cmp_name
except AttributeError:
return False
def __copy__(self):
return self # immutable, no need to copy
def __deepcopy__(self, memo):
return self # immutable, no need to copy
class FileSourceDescriptor(SourceDescriptor):
"""
Represents a code source. A code source is a more generic abstraction
for a "filename" (as sometimes the code doesn't come from a file).
Instances of code sources are passed to Scanner.__init__ as the
optional name argument and will be passed back when asking for
the position()-tuple.
"""
def __init__(self, filename, path_description=None):
filename = Utils.decode_filename(filename)
self.path_description = path_description or filename
self.filename = filename
# Prefer relative paths to current directory (which is most likely the project root) over absolute paths.
workdir = os.path.abspath('.') + os.sep
self.file_path = filename[len(workdir):] if filename.startswith(workdir) else filename
self.set_file_type_from_name(filename)
self._cmp_name = filename
self._lines = {}
def get_lines(self, encoding=None, error_handling=None):
# we cache the lines only the second time this is called, in
# order to save memory when they are only used once
key = (encoding, error_handling)
try:
lines = self._lines[key]
if lines is not None:
return lines
except KeyError:
pass
with Utils.open_source_file(self.filename, encoding=encoding, error_handling=error_handling) as f:
lines = list(f)
if key in self._lines:
self._lines[key] = lines
else:
# do not cache the first access, but remember that we
# already read it once
self._lines[key] = None
return lines
def get_description(self):
try:
return os.path.relpath(self.path_description)
except ValueError:
# path not under current directory => use complete file path
return self.path_description
def get_error_description(self):
path = self.filename
cwd = Utils.decode_filename(os.getcwd() + os.path.sep)
if path.startswith(cwd):
return path[len(cwd):]
return path
def get_filenametable_entry(self):
return self.file_path
def __eq__(self, other):
return isinstance(other, FileSourceDescriptor) and self.filename == other.filename
def __hash__(self):
return hash(self.filename)
def __repr__(self):
return "<FileSourceDescriptor:%s>" % self.filename
class StringSourceDescriptor(SourceDescriptor):
"""
Instances of this class can be used instead of a filenames if the
code originates from a string object.
"""
def __init__(self, name, code):
self.name = name
#self.set_file_type_from_name(name)
self.codelines = [x + "\n" for x in code.split("\n")]
self._cmp_name = name
def get_lines(self, encoding=None, error_handling=None):
if not encoding:
return self.codelines
else:
return [line.encode(encoding, error_handling).decode(encoding)
for line in self.codelines]
def get_description(self):
return self.name
get_error_description = get_description
def get_filenametable_entry(self):
return "<stringsource>"
def __hash__(self):
return id(self)
# Do not hash on the name, an identical string source should be the
# same object (name is often defaulted in other places)
# return hash(self.name)
def __eq__(self, other):
return isinstance(other, StringSourceDescriptor) and self.name == other.name
def __repr__(self):
return "<StringSourceDescriptor:%s>" % self.name
#------------------------------------------------------------------
class PyrexScanner(Scanner):
# context Context Compilation context
# included_files [string] Files included with 'include' statement
# compile_time_env dict Environment for conditional compilation
# compile_time_eval boolean In a true conditional compilation context
# compile_time_expr boolean In a compile-time expression context
# put_back_on_failure list or None If set, this records states so the tentatively_scan
# contextmanager can restore it
def __init__(self, file, filename, parent_scanner=None,
scope=None, context=None, source_encoding=None, parse_comments=True, initial_pos=None):
Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
if filename.is_python_file():
self.in_python_file = True
keywords = py_reserved_words
else:
self.in_python_file = False
keywords = pyx_reserved_words
self.keywords = {keyword: keyword for keyword in keywords}
self.async_enabled = 0
if parent_scanner:
self.context = parent_scanner.context
self.included_files = parent_scanner.included_files
self.compile_time_env = parent_scanner.compile_time_env
self.compile_time_eval = parent_scanner.compile_time_eval
self.compile_time_expr = parent_scanner.compile_time_expr
if parent_scanner.async_enabled:
self.enter_async()
else:
self.context = context
self.included_files = scope.included_files
self.compile_time_env = initial_compile_time_env()
self.compile_time_eval = 1
self.compile_time_expr = 0
if getattr(context.options, 'compile_time_env', None):
self.compile_time_env.update(context.options.compile_time_env)
self.parse_comments = parse_comments
self.source_encoding = source_encoding
self.trace = trace_scanner
self.indentation_stack = [0]
self.indentation_char = None
self.bracket_nesting_level = 0
self.put_back_on_failure = None
self.begin('INDENT')
self.sy = ''
self.next()
def normalize_ident(self, text):
try:
text.encode('ascii') # really just name.isascii but supports Python 2 and 3
except UnicodeEncodeError:
text = normalize('NFKC', text)
self.produce(IDENT, text)
def commentline(self, text):
if self.parse_comments:
self.produce('commentline', text)
def strip_underscores(self, text, symbol):
self.produce(symbol, text.replace('_', ''))
def current_level(self):
return self.indentation_stack[-1]
def open_bracket_action(self, text):
self.bracket_nesting_level += 1
return text
def close_bracket_action(self, text):
self.bracket_nesting_level -= 1
return text
def newline_action(self, text):
if self.bracket_nesting_level == 0:
self.begin('INDENT')
self.produce('NEWLINE', '')
string_states = {
"'": 'SQ_STRING',
'"': 'DQ_STRING',
"'''": 'TSQ_STRING',
'"""': 'TDQ_STRING'
}
def begin_string_action(self, text):
while text[:1] in any_string_prefix:
text = text[1:]
self.begin(self.string_states[text])
self.produce('BEGIN_STRING')
def end_string_action(self, text):
self.begin('')
self.produce('END_STRING')
def unclosed_string_action(self, text):
self.end_string_action(text)
self.error_at_scanpos("Unclosed string literal")
def indentation_action(self, text):
self.begin('')
# Indentation within brackets should be ignored.
#if self.bracket_nesting_level > 0:
# return
# Check that tabs and spaces are being used consistently.
if text:
c = text[0]
#print "Scanner.indentation_action: indent with", repr(c) ###
if self.indentation_char is None:
self.indentation_char = c
#print "Scanner.indentation_action: setting indent_char to", repr(c)
else:
if self.indentation_char != c:
self.error_at_scanpos("Mixed use of tabs and spaces")
if text.replace(c, "") != "":
self.error_at_scanpos("Mixed use of tabs and spaces")
# Figure out how many indents/dedents to do
current_level = self.current_level()
new_level = len(text)
#print "Changing indent level from", current_level, "to", new_level ###
if new_level == current_level:
return
elif new_level > current_level:
#print "...pushing level", new_level ###
self.indentation_stack.append(new_level)
self.produce('INDENT', '')
else:
while new_level < self.current_level():
#print "...popping level", self.indentation_stack[-1] ###
self.indentation_stack.pop()
self.produce('DEDENT', '')
#print "...current level now", self.current_level() ###
if new_level != self.current_level():
self.error_at_scanpos("Inconsistent indentation")
def eof_action(self, text):
while len(self.indentation_stack) > 1:
self.produce('DEDENT', '')
self.indentation_stack.pop()
self.produce('EOF', '')
def next(self):
try:
sy, systring = self.read()
except UnrecognizedInput:
self.error_at_scanpos("Unrecognized character")
return # just a marker, error() always raises
if sy == IDENT:
if systring in self.keywords:
if systring == u'print' and print_function in self.context.future_directives:
self.keywords.pop('print', None)
elif systring == u'exec' and self.context.language_level >= 3:
self.keywords.pop('exec', None)
else:
sy = self.keywords[systring] # intern
systring = self.context.intern_ustring(systring)
if self.put_back_on_failure is not None:
self.put_back_on_failure.append((sy, systring, self.position()))
self.sy = sy
self.systring = systring
if False: # debug_scanner:
_, line, col = self.position()
if not self.systring or self.sy == self.systring:
t = self.sy
else:
t = "%s %s" % (self.sy, self.systring)
print("--- %3d %2d %s" % (line, col, t))
def peek(self):
saved = self.sy, self.systring
saved_pos = self.position()
self.next()
next = self.sy, self.systring
self.unread(self.sy, self.systring, self.position())
self.sy, self.systring = saved
self.last_token_position_tuple = saved_pos
return next
def put_back(self, sy, systring, pos):
self.unread(self.sy, self.systring, self.last_token_position_tuple)
self.sy = sy
self.systring = systring
self.last_token_position_tuple = pos
def error(self, message, pos=None, fatal=True):
if pos is None:
pos = self.position()
if self.sy == 'INDENT':
error(pos, "Possible inconsistent indentation")
err = error(pos, message)
if fatal: raise err
def error_at_scanpos(self, message):
# Like error(fatal=True), but gets the current scanning position rather than
# the position of the last token read.
pos = self.get_current_scan_pos()
self.error(message, pos, True)
def expect(self, what, message=None):
if self.sy == what:
self.next()
else:
self.expected(what, message)
def expect_keyword(self, what, message=None):
if self.sy == IDENT and self.systring == what:
self.next()
else:
self.expected(what, message)
def expected(self, what, message=None):
if message:
self.error(message)
else:
if self.sy == IDENT:
found = self.systring
else:
found = self.sy
self.error("Expected '%s', found '%s'" % (what, found))
def expect_indent(self):
self.expect('INDENT', "Expected an increase in indentation level")
def expect_dedent(self):
self.expect('DEDENT', "Expected a decrease in indentation level")
def expect_newline(self, message="Expected a newline", ignore_semicolon=False):
# Expect either a newline or end of file
useless_trailing_semicolon = None
if ignore_semicolon and self.sy == ';':
useless_trailing_semicolon = self.position()
self.next()
if self.sy != 'EOF':
self.expect('NEWLINE', message)
if useless_trailing_semicolon is not None:
warning(useless_trailing_semicolon, "useless trailing semicolon")
def enter_async(self):
self.async_enabled += 1
if self.async_enabled == 1:
self.keywords['async'] = 'async'
self.keywords['await'] = 'await'
def exit_async(self):
assert self.async_enabled > 0
self.async_enabled -= 1
if not self.async_enabled:
del self.keywords['await']
del self.keywords['async']
if self.sy in ('async', 'await'):
self.sy, self.systring = IDENT, self.context.intern_ustring(self.sy)
@contextmanager
@cython.locals(scanner=Scanner)
def tentatively_scan(scanner):
errors = hold_errors()
try:
put_back_on_failure = scanner.put_back_on_failure
scanner.put_back_on_failure = []
initial_state = (scanner.sy, scanner.systring, scanner.position())
try:
yield errors
except CompileError as e:
pass
finally:
if errors:
if scanner.put_back_on_failure:
for put_back in reversed(scanner.put_back_on_failure[:-1]):
scanner.put_back(*put_back)
# we need to restore the initial state too
scanner.put_back(*initial_state)
elif put_back_on_failure is not None:
# the outer "tentatively_scan" block that we're in might still
# want to undo this block
put_back_on_failure.extend(scanner.put_back_on_failure)
scanner.put_back_on_failure = put_back_on_failure
finally:
release_errors(ignore=True)