# # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. # Use of this file is governed by the BSD 3-clause license that # can be found in the LICENSE.txt file in the project root. # This implementation of {@link TokenStream} loads tokens from a # {@link TokenSource} on-demand, and places the tokens in a buffer to provide # access to any previous token by index. # #
# This token stream ignores the value of {@link Token#getChannel}. If your # parser requires the token stream filter tokens to only those on a particular # channel, such as {@link Token#DEFAULT_CHANNEL} or # {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a # {@link CommonTokenStream}.
from io import StringIO from antlr4.Token import Token from antlr4.error.Errors import IllegalStateException # need forward declaration Lexer = None # this is just to keep meaningful parameter types to Parser class TokenStream(object): pass class BufferedTokenStream(TokenStream): __slots__ = ('tokenSource', 'tokens', 'index', 'fetchedEOF') def __init__(self, tokenSource:Lexer): # The {@link TokenSource} from which tokens for this stream are fetched. self.tokenSource = tokenSource # A collection of all tokens fetched from the token source. The list is # considered a complete view of the input once {@link #fetchedEOF} is set # to {@code true}. self.tokens = [] # The index into {@link #tokens} of the current token (next token to # {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be # {@link #LT LT(1)}. # #This field is set to -1 when the stream is first constructed or when # {@link #setTokenSource} is called, indicating that the first token has # not yet been fetched from the token source. For additional information, # see the documentation of {@link IntStream} for a description of # Initializing Methods.
self.index = -1 # Indicates whether the {@link Token#EOF} token has been fetched from # {@link #tokenSource} and added to {@link #tokens}. This field improves # performance for the following cases: # #For example, {@link CommonTokenStream} overrides this method to ensure that # the seek target is always an on-channel token.
# # @param i The target token index. # @return The adjusted target token index. def adjustSeekIndex(self, i:int): return i def lazyInit(self): if self.index == -1: self.setup() def setup(self): self.sync(0) self.index = self.adjustSeekIndex(0) # Reset this token stream by setting its token source.#/ def setTokenSource(self, tokenSource:Lexer): self.tokenSource = tokenSource self.tokens = [] self.index = -1 self.fetchedEOF = False # Given a starting index, return the index of the next token on channel. # Return i if tokens[i] is on channel. Return the index of the EOF token # if there are no tokens on channel between i and EOF. #/ def nextTokenOnChannel(self, i:int, channel:int): self.sync(i) if i>=len(self.tokens): return len(self.tokens) - 1 token = self.tokens[i] while token.channel!=channel: if token.type==Token.EOF: return i i += 1 self.sync(i) token = self.tokens[i] return i # Given a starting index, return the index of the previous token on channel. # Return i if tokens[i] is on channel. Return -1 if there are no tokens # on channel between i and 0. def previousTokenOnChannel(self, i:int, channel:int): while i>=0 and self.tokens[i].channel!=channel: i -= 1 return i # Collect all tokens on specified channel to the right of # the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or # EOF. If channel is -1, find any non default channel token. def getHiddenTokensToRight(self, tokenIndex:int, channel:int=-1): self.lazyInit() if tokenIndex<0 or tokenIndex>=len(self.tokens): raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1)) from antlr4.Lexer import Lexer nextOnChannel = self.nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL) from_ = tokenIndex+1 # if none onchannel to right, nextOnChannel=-1 so set to = last token to = (len(self.tokens)-1) if nextOnChannel==-1 else nextOnChannel return self.filterForChannel(from_, to, channel) # Collect all tokens on specified channel to the left of # the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. # If channel is -1, find any non default channel token. def getHiddenTokensToLeft(self, tokenIndex:int, channel:int=-1): self.lazyInit() if tokenIndex<0 or tokenIndex>=len(self.tokens): raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1)) from antlr4.Lexer import Lexer prevOnChannel = self.previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL) if prevOnChannel == tokenIndex - 1: return None # if none on channel to left, prevOnChannel=-1 then from=0 from_ = prevOnChannel+1 to = tokenIndex-1 return self.filterForChannel(from_, to, channel) def filterForChannel(self, left:int, right:int, channel:int): hidden = [] for i in range(left, right+1): t = self.tokens[i] if channel==-1: from antlr4.Lexer import Lexer if t.channel!= Lexer.DEFAULT_TOKEN_CHANNEL: hidden.append(t) elif t.channel==channel: hidden.append(t) if len(hidden)==0: return None return hidden def getSourceName(self): return self.tokenSource.getSourceName() # Get the text of all tokens in this buffer.#/ def getText(self, start:int=None, stop:int=None): self.lazyInit() self.fill() if isinstance(start, Token): start = start.tokenIndex elif start is None: start = 0 if isinstance(stop, Token): stop = stop.tokenIndex elif stop is None or stop >= len(self.tokens): stop = len(self.tokens) - 1 if start < 0 or stop < 0 or stop < start: return "" with StringIO() as buf: for i in range(start, stop+1): t = self.tokens[i] if t.type==Token.EOF: break buf.write(t.text) return buf.getvalue() # Get all tokens from lexer until EOF#/ def fill(self): self.lazyInit() while self.fetch(1000)==1000: pass