1*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 2*4882a593Smuzhiyun# ply: yacc.py 3*4882a593Smuzhiyun# 4*4882a593Smuzhiyun# Copyright (C) 2001-2009, 5*4882a593Smuzhiyun# David M. Beazley (Dabeaz LLC) 6*4882a593Smuzhiyun# All rights reserved. 7*4882a593Smuzhiyun# 8*4882a593Smuzhiyun# Redistribution and use in source and binary forms, with or without 9*4882a593Smuzhiyun# modification, are permitted provided that the following conditions are 10*4882a593Smuzhiyun# met: 11*4882a593Smuzhiyun# 12*4882a593Smuzhiyun# * Redistributions of source code must retain the above copyright notice, 13*4882a593Smuzhiyun# this list of conditions and the following disclaimer. 14*4882a593Smuzhiyun# * Redistributions in binary form must reproduce the above copyright notice, 15*4882a593Smuzhiyun# this list of conditions and the following disclaimer in the documentation 16*4882a593Smuzhiyun# and/or other materials provided with the distribution. 17*4882a593Smuzhiyun# * Neither the name of the David Beazley or Dabeaz LLC may be used to 18*4882a593Smuzhiyun# endorse or promote products derived from this software without 19*4882a593Smuzhiyun# specific prior written permission. 20*4882a593Smuzhiyun# 21*4882a593Smuzhiyun# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22*4882a593Smuzhiyun# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23*4882a593Smuzhiyun# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24*4882a593Smuzhiyun# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25*4882a593Smuzhiyun# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26*4882a593Smuzhiyun# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27*4882a593Smuzhiyun# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28*4882a593Smuzhiyun# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29*4882a593Smuzhiyun# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30*4882a593Smuzhiyun# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31*4882a593Smuzhiyun# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 33*4882a593Smuzhiyun# 34*4882a593Smuzhiyun# This implements an LR parser that is constructed from grammar rules defined 35*4882a593Smuzhiyun# as Python functions. The grammer is specified by supplying the BNF inside 36*4882a593Smuzhiyun# Python documentation strings. The inspiration for this technique was borrowed 37*4882a593Smuzhiyun# from John Aycock's Spark parsing system. PLY might be viewed as cross between 38*4882a593Smuzhiyun# Spark and the GNU bison utility. 39*4882a593Smuzhiyun# 40*4882a593Smuzhiyun# The current implementation is only somewhat object-oriented. The 41*4882a593Smuzhiyun# LR parser itself is defined in terms of an object (which allows multiple 42*4882a593Smuzhiyun# parsers to co-exist). However, most of the variables used during table 43*4882a593Smuzhiyun# construction are defined in terms of global variables. Users shouldn't 44*4882a593Smuzhiyun# notice unless they are trying to define multiple parsers at the same 45*4882a593Smuzhiyun# time using threads (in which case they should have their head examined). 46*4882a593Smuzhiyun# 47*4882a593Smuzhiyun# This implementation supports both SLR and LALR(1) parsing. LALR(1) 48*4882a593Smuzhiyun# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 49*4882a593Smuzhiyun# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 50*4882a593Smuzhiyun# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced 51*4882a593Smuzhiyun# by the more efficient DeRemer and Pennello algorithm. 52*4882a593Smuzhiyun# 53*4882a593Smuzhiyun# :::::::: WARNING ::::::: 54*4882a593Smuzhiyun# 55*4882a593Smuzhiyun# Construction of LR parsing tables is fairly complicated and expensive. 56*4882a593Smuzhiyun# To make this module run fast, a *LOT* of work has been put into 57*4882a593Smuzhiyun# optimization---often at the expensive of readability and what might 58*4882a593Smuzhiyun# consider to be good Python "coding style." Modify the code at your 59*4882a593Smuzhiyun# own risk! 60*4882a593Smuzhiyun# ---------------------------------------------------------------------------- 61*4882a593Smuzhiyun 62*4882a593Smuzhiyun__version__ = "3.3" 63*4882a593Smuzhiyun__tabversion__ = "3.2" # Table version 64*4882a593Smuzhiyun 65*4882a593Smuzhiyun#----------------------------------------------------------------------------- 66*4882a593Smuzhiyun# === User configurable parameters === 67*4882a593Smuzhiyun# 68*4882a593Smuzhiyun# Change these to modify the default behavior of yacc (if you wish) 69*4882a593Smuzhiyun#----------------------------------------------------------------------------- 70*4882a593Smuzhiyun 71*4882a593Smuzhiyunyaccdebug = 0 # Debugging mode. If set, yacc generates a 72*4882a593Smuzhiyun # a 'parser.out' file in the current directory 73*4882a593Smuzhiyun 74*4882a593Smuzhiyundebug_file = 'parser.out' # Default name of the debugging file 75*4882a593Smuzhiyuntab_module = 'parsetab' # Default name of the table module 76*4882a593Smuzhiyundefault_lr = 'LALR' # Default LR table generation method 77*4882a593Smuzhiyun 78*4882a593Smuzhiyunerror_count = 3 # Number of symbols that must be shifted to leave recovery mode 79*4882a593Smuzhiyun 80*4882a593Smuzhiyunyaccdevel = 0 # Set to True if developing yacc. This turns off optimized 81*4882a593Smuzhiyun # implementations of certain functions. 82*4882a593Smuzhiyun 83*4882a593Smuzhiyunresultlimit = 40 # Size limit of results when running in debug mode. 84*4882a593Smuzhiyun 85*4882a593Smuzhiyunpickle_protocol = 0 # Protocol to use when writing pickle files 86*4882a593Smuzhiyun 87*4882a593Smuzhiyunimport re, types, sys, os.path 88*4882a593Smuzhiyun 89*4882a593Smuzhiyun# Compatibility function for python 2.6/3.0 90*4882a593Smuzhiyunif sys.version_info[0] < 3: 91*4882a593Smuzhiyun def func_code(f): 92*4882a593Smuzhiyun return f.func_code 93*4882a593Smuzhiyunelse: 94*4882a593Smuzhiyun def func_code(f): 95*4882a593Smuzhiyun return f.__code__ 96*4882a593Smuzhiyun 97*4882a593Smuzhiyun# Compatibility 98*4882a593Smuzhiyuntry: 99*4882a593Smuzhiyun MAXINT = sys.maxint 100*4882a593Smuzhiyunexcept AttributeError: 101*4882a593Smuzhiyun MAXINT = sys.maxsize 102*4882a593Smuzhiyun 103*4882a593Smuzhiyun# Python 2.x/3.0 compatibility. 104*4882a593Smuzhiyundef load_ply_lex(): 105*4882a593Smuzhiyun if sys.version_info[0] < 3: 106*4882a593Smuzhiyun import lex 107*4882a593Smuzhiyun else: 108*4882a593Smuzhiyun import ply.lex as lex 109*4882a593Smuzhiyun return lex 110*4882a593Smuzhiyun 111*4882a593Smuzhiyun# This object is a stand-in for a logging object created by the 112*4882a593Smuzhiyun# logging module. PLY will use this by default to create things 113*4882a593Smuzhiyun# such as the parser.out file. If a user wants more detailed 114*4882a593Smuzhiyun# information, they can create their own logging object and pass 115*4882a593Smuzhiyun# it into PLY. 116*4882a593Smuzhiyun 117*4882a593Smuzhiyunclass PlyLogger(object): 118*4882a593Smuzhiyun def __init__(self,f): 119*4882a593Smuzhiyun self.f = f 120*4882a593Smuzhiyun def debug(self,msg,*args,**kwargs): 121*4882a593Smuzhiyun self.f.write((msg % args) + "\n") 122*4882a593Smuzhiyun info = debug 123*4882a593Smuzhiyun 124*4882a593Smuzhiyun def warning(self,msg,*args,**kwargs): 125*4882a593Smuzhiyun self.f.write("WARNING: "+ (msg % args) + "\n") 126*4882a593Smuzhiyun 127*4882a593Smuzhiyun def error(self,msg,*args,**kwargs): 128*4882a593Smuzhiyun self.f.write("ERROR: " + (msg % args) + "\n") 129*4882a593Smuzhiyun 130*4882a593Smuzhiyun critical = debug 131*4882a593Smuzhiyun 132*4882a593Smuzhiyun# Null logger is used when no output is generated. Does nothing. 133*4882a593Smuzhiyunclass NullLogger(object): 134*4882a593Smuzhiyun def __getattribute__(self,name): 135*4882a593Smuzhiyun return self 136*4882a593Smuzhiyun def __call__(self,*args,**kwargs): 137*4882a593Smuzhiyun return self 138*4882a593Smuzhiyun 139*4882a593Smuzhiyun# Exception raised for yacc-related errors 140*4882a593Smuzhiyunclass YaccError(Exception): pass 141*4882a593Smuzhiyun 142*4882a593Smuzhiyun# Format the result message that the parser produces when running in debug mode. 143*4882a593Smuzhiyundef format_result(r): 144*4882a593Smuzhiyun repr_str = repr(r) 145*4882a593Smuzhiyun if '\n' in repr_str: repr_str = repr(repr_str) 146*4882a593Smuzhiyun if len(repr_str) > resultlimit: 147*4882a593Smuzhiyun repr_str = repr_str[:resultlimit]+" ..." 148*4882a593Smuzhiyun result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) 149*4882a593Smuzhiyun return result 150*4882a593Smuzhiyun 151*4882a593Smuzhiyun 152*4882a593Smuzhiyun# Format stack entries when the parser is running in debug mode 153*4882a593Smuzhiyundef format_stack_entry(r): 154*4882a593Smuzhiyun repr_str = repr(r) 155*4882a593Smuzhiyun if '\n' in repr_str: repr_str = repr(repr_str) 156*4882a593Smuzhiyun if len(repr_str) < 16: 157*4882a593Smuzhiyun return repr_str 158*4882a593Smuzhiyun else: 159*4882a593Smuzhiyun return "<%s @ 0x%x>" % (type(r).__name__,id(r)) 160*4882a593Smuzhiyun 161*4882a593Smuzhiyun#----------------------------------------------------------------------------- 162*4882a593Smuzhiyun# === LR Parsing Engine === 163*4882a593Smuzhiyun# 164*4882a593Smuzhiyun# The following classes are used for the LR parser itself. These are not 165*4882a593Smuzhiyun# used during table construction and are independent of the actual LR 166*4882a593Smuzhiyun# table generation algorithm 167*4882a593Smuzhiyun#----------------------------------------------------------------------------- 168*4882a593Smuzhiyun 169*4882a593Smuzhiyun# This class is used to hold non-terminal grammar symbols during parsing. 170*4882a593Smuzhiyun# It normally has the following attributes set: 171*4882a593Smuzhiyun# .type = Grammar symbol type 172*4882a593Smuzhiyun# .value = Symbol value 173*4882a593Smuzhiyun# .lineno = Starting line number 174*4882a593Smuzhiyun# .endlineno = Ending line number (optional, set automatically) 175*4882a593Smuzhiyun# .lexpos = Starting lex position 176*4882a593Smuzhiyun# .endlexpos = Ending lex position (optional, set automatically) 177*4882a593Smuzhiyun 178*4882a593Smuzhiyunclass YaccSymbol: 179*4882a593Smuzhiyun def __str__(self): return self.type 180*4882a593Smuzhiyun def __repr__(self): return str(self) 181*4882a593Smuzhiyun 182*4882a593Smuzhiyun# This class is a wrapper around the objects actually passed to each 183*4882a593Smuzhiyun# grammar rule. Index lookup and assignment actually assign the 184*4882a593Smuzhiyun# .value attribute of the underlying YaccSymbol object. 185*4882a593Smuzhiyun# The lineno() method returns the line number of a given 186*4882a593Smuzhiyun# item (or 0 if not defined). The linespan() method returns 187*4882a593Smuzhiyun# a tuple of (startline,endline) representing the range of lines 188*4882a593Smuzhiyun# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 189*4882a593Smuzhiyun# representing the range of positional information for a symbol. 190*4882a593Smuzhiyun 191*4882a593Smuzhiyunclass YaccProduction: 192*4882a593Smuzhiyun def __init__(self,s,stack=None): 193*4882a593Smuzhiyun self.slice = s 194*4882a593Smuzhiyun self.stack = stack 195*4882a593Smuzhiyun self.lexer = None 196*4882a593Smuzhiyun self.parser= None 197*4882a593Smuzhiyun def __getitem__(self,n): 198*4882a593Smuzhiyun if isinstance(n,slice): 199*4882a593Smuzhiyun return [self[i] for i in range(*(n.indices(len(self.slice))))] 200*4882a593Smuzhiyun if n >= 0: return self.slice[n].value 201*4882a593Smuzhiyun else: return self.stack[n].value 202*4882a593Smuzhiyun 203*4882a593Smuzhiyun def __setitem__(self,n,v): 204*4882a593Smuzhiyun self.slice[n].value = v 205*4882a593Smuzhiyun 206*4882a593Smuzhiyun def __getslice__(self,i,j): 207*4882a593Smuzhiyun return [s.value for s in self.slice[i:j]] 208*4882a593Smuzhiyun 209*4882a593Smuzhiyun def __len__(self): 210*4882a593Smuzhiyun return len(self.slice) 211*4882a593Smuzhiyun 212*4882a593Smuzhiyun def lineno(self,n): 213*4882a593Smuzhiyun return getattr(self.slice[n],"lineno",0) 214*4882a593Smuzhiyun 215*4882a593Smuzhiyun def set_lineno(self,n,lineno): 216*4882a593Smuzhiyun self.slice[n].lineno = lineno 217*4882a593Smuzhiyun 218*4882a593Smuzhiyun def linespan(self,n): 219*4882a593Smuzhiyun startline = getattr(self.slice[n],"lineno",0) 220*4882a593Smuzhiyun endline = getattr(self.slice[n],"endlineno",startline) 221*4882a593Smuzhiyun return startline,endline 222*4882a593Smuzhiyun 223*4882a593Smuzhiyun def lexpos(self,n): 224*4882a593Smuzhiyun return getattr(self.slice[n],"lexpos",0) 225*4882a593Smuzhiyun 226*4882a593Smuzhiyun def lexspan(self,n): 227*4882a593Smuzhiyun startpos = getattr(self.slice[n],"lexpos",0) 228*4882a593Smuzhiyun endpos = getattr(self.slice[n],"endlexpos",startpos) 229*4882a593Smuzhiyun return startpos,endpos 230*4882a593Smuzhiyun 231*4882a593Smuzhiyun def error(self): 232*4882a593Smuzhiyun raise SyntaxError 233*4882a593Smuzhiyun 234*4882a593Smuzhiyun 235*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 236*4882a593Smuzhiyun# == LRParser == 237*4882a593Smuzhiyun# 238*4882a593Smuzhiyun# The LR Parsing engine. 239*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 240*4882a593Smuzhiyun 241*4882a593Smuzhiyunclass LRParser: 242*4882a593Smuzhiyun def __init__(self,lrtab,errorf): 243*4882a593Smuzhiyun self.productions = lrtab.lr_productions 244*4882a593Smuzhiyun self.action = lrtab.lr_action 245*4882a593Smuzhiyun self.goto = lrtab.lr_goto 246*4882a593Smuzhiyun self.errorfunc = errorf 247*4882a593Smuzhiyun 248*4882a593Smuzhiyun def errok(self): 249*4882a593Smuzhiyun self.errorok = 1 250*4882a593Smuzhiyun 251*4882a593Smuzhiyun def restart(self): 252*4882a593Smuzhiyun del self.statestack[:] 253*4882a593Smuzhiyun del self.symstack[:] 254*4882a593Smuzhiyun sym = YaccSymbol() 255*4882a593Smuzhiyun sym.type = '$end' 256*4882a593Smuzhiyun self.symstack.append(sym) 257*4882a593Smuzhiyun self.statestack.append(0) 258*4882a593Smuzhiyun 259*4882a593Smuzhiyun def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 260*4882a593Smuzhiyun if debug or yaccdevel: 261*4882a593Smuzhiyun if isinstance(debug,int): 262*4882a593Smuzhiyun debug = PlyLogger(sys.stderr) 263*4882a593Smuzhiyun return self.parsedebug(input,lexer,debug,tracking,tokenfunc) 264*4882a593Smuzhiyun elif tracking: 265*4882a593Smuzhiyun return self.parseopt(input,lexer,debug,tracking,tokenfunc) 266*4882a593Smuzhiyun else: 267*4882a593Smuzhiyun return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) 268*4882a593Smuzhiyun 269*4882a593Smuzhiyun 270*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 271*4882a593Smuzhiyun # parsedebug(). 272*4882a593Smuzhiyun # 273*4882a593Smuzhiyun # This is the debugging enabled version of parse(). All changes made to the 274*4882a593Smuzhiyun # parsing engine should be made here. For the non-debugging version, 275*4882a593Smuzhiyun # copy this code to a method parseopt() and delete all of the sections 276*4882a593Smuzhiyun # enclosed in: 277*4882a593Smuzhiyun # 278*4882a593Smuzhiyun # #--! DEBUG 279*4882a593Smuzhiyun # statements 280*4882a593Smuzhiyun # #--! DEBUG 281*4882a593Smuzhiyun # 282*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 283*4882a593Smuzhiyun 284*4882a593Smuzhiyun def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): 285*4882a593Smuzhiyun lookahead = None # Current lookahead symbol 286*4882a593Smuzhiyun lookaheadstack = [ ] # Stack of lookahead symbols 287*4882a593Smuzhiyun actions = self.action # Local reference to action table (to avoid lookup on self.) 288*4882a593Smuzhiyun goto = self.goto # Local reference to goto table (to avoid lookup on self.) 289*4882a593Smuzhiyun prod = self.productions # Local reference to production list (to avoid lookup on self.) 290*4882a593Smuzhiyun pslice = YaccProduction(None) # Production object passed to grammar rules 291*4882a593Smuzhiyun errorcount = 0 # Used during error recovery 292*4882a593Smuzhiyun 293*4882a593Smuzhiyun # --! DEBUG 294*4882a593Smuzhiyun debug.info("PLY: PARSE DEBUG START") 295*4882a593Smuzhiyun # --! DEBUG 296*4882a593Smuzhiyun 297*4882a593Smuzhiyun # If no lexer was given, we will try to use the lex module 298*4882a593Smuzhiyun if not lexer: 299*4882a593Smuzhiyun lex = load_ply_lex() 300*4882a593Smuzhiyun lexer = lex.lexer 301*4882a593Smuzhiyun 302*4882a593Smuzhiyun # Set up the lexer and parser objects on pslice 303*4882a593Smuzhiyun pslice.lexer = lexer 304*4882a593Smuzhiyun pslice.parser = self 305*4882a593Smuzhiyun 306*4882a593Smuzhiyun # If input was supplied, pass to lexer 307*4882a593Smuzhiyun if input is not None: 308*4882a593Smuzhiyun lexer.input(input) 309*4882a593Smuzhiyun 310*4882a593Smuzhiyun if tokenfunc is None: 311*4882a593Smuzhiyun # Tokenize function 312*4882a593Smuzhiyun get_token = lexer.token 313*4882a593Smuzhiyun else: 314*4882a593Smuzhiyun get_token = tokenfunc 315*4882a593Smuzhiyun 316*4882a593Smuzhiyun # Set up the state and symbol stacks 317*4882a593Smuzhiyun 318*4882a593Smuzhiyun statestack = [ ] # Stack of parsing states 319*4882a593Smuzhiyun self.statestack = statestack 320*4882a593Smuzhiyun symstack = [ ] # Stack of grammar symbols 321*4882a593Smuzhiyun self.symstack = symstack 322*4882a593Smuzhiyun 323*4882a593Smuzhiyun pslice.stack = symstack # Put in the production 324*4882a593Smuzhiyun errtoken = None # Err token 325*4882a593Smuzhiyun 326*4882a593Smuzhiyun # The start state is assumed to be (0,$end) 327*4882a593Smuzhiyun 328*4882a593Smuzhiyun statestack.append(0) 329*4882a593Smuzhiyun sym = YaccSymbol() 330*4882a593Smuzhiyun sym.type = "$end" 331*4882a593Smuzhiyun symstack.append(sym) 332*4882a593Smuzhiyun state = 0 333*4882a593Smuzhiyun while 1: 334*4882a593Smuzhiyun # Get the next symbol on the input. If a lookahead symbol 335*4882a593Smuzhiyun # is already set, we just use that. Otherwise, we'll pull 336*4882a593Smuzhiyun # the next token off of the lookaheadstack or from the lexer 337*4882a593Smuzhiyun 338*4882a593Smuzhiyun # --! DEBUG 339*4882a593Smuzhiyun debug.debug('') 340*4882a593Smuzhiyun debug.debug('State : %s', state) 341*4882a593Smuzhiyun # --! DEBUG 342*4882a593Smuzhiyun 343*4882a593Smuzhiyun if not lookahead: 344*4882a593Smuzhiyun if not lookaheadstack: 345*4882a593Smuzhiyun lookahead = get_token() # Get the next token 346*4882a593Smuzhiyun else: 347*4882a593Smuzhiyun lookahead = lookaheadstack.pop() 348*4882a593Smuzhiyun if not lookahead: 349*4882a593Smuzhiyun lookahead = YaccSymbol() 350*4882a593Smuzhiyun lookahead.type = "$end" 351*4882a593Smuzhiyun 352*4882a593Smuzhiyun # --! DEBUG 353*4882a593Smuzhiyun debug.debug('Stack : %s', 354*4882a593Smuzhiyun ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 355*4882a593Smuzhiyun # --! DEBUG 356*4882a593Smuzhiyun 357*4882a593Smuzhiyun # Check the action table 358*4882a593Smuzhiyun ltype = lookahead.type 359*4882a593Smuzhiyun t = actions[state].get(ltype) 360*4882a593Smuzhiyun 361*4882a593Smuzhiyun if t is not None: 362*4882a593Smuzhiyun if t > 0: 363*4882a593Smuzhiyun # shift a symbol on the stack 364*4882a593Smuzhiyun statestack.append(t) 365*4882a593Smuzhiyun state = t 366*4882a593Smuzhiyun 367*4882a593Smuzhiyun # --! DEBUG 368*4882a593Smuzhiyun debug.debug("Action : Shift and goto state %s", t) 369*4882a593Smuzhiyun # --! DEBUG 370*4882a593Smuzhiyun 371*4882a593Smuzhiyun symstack.append(lookahead) 372*4882a593Smuzhiyun lookahead = None 373*4882a593Smuzhiyun 374*4882a593Smuzhiyun # Decrease error count on successful shift 375*4882a593Smuzhiyun if errorcount: errorcount -=1 376*4882a593Smuzhiyun continue 377*4882a593Smuzhiyun 378*4882a593Smuzhiyun if t < 0: 379*4882a593Smuzhiyun # reduce a symbol on the stack, emit a production 380*4882a593Smuzhiyun p = prod[-t] 381*4882a593Smuzhiyun pname = p.name 382*4882a593Smuzhiyun plen = p.len 383*4882a593Smuzhiyun 384*4882a593Smuzhiyun # Get production function 385*4882a593Smuzhiyun sym = YaccSymbol() 386*4882a593Smuzhiyun sym.type = pname # Production name 387*4882a593Smuzhiyun sym.value = None 388*4882a593Smuzhiyun 389*4882a593Smuzhiyun # --! DEBUG 390*4882a593Smuzhiyun if plen: 391*4882a593Smuzhiyun debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) 392*4882a593Smuzhiyun else: 393*4882a593Smuzhiyun debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) 394*4882a593Smuzhiyun 395*4882a593Smuzhiyun # --! DEBUG 396*4882a593Smuzhiyun 397*4882a593Smuzhiyun if plen: 398*4882a593Smuzhiyun targ = symstack[-plen-1:] 399*4882a593Smuzhiyun targ[0] = sym 400*4882a593Smuzhiyun 401*4882a593Smuzhiyun # --! TRACKING 402*4882a593Smuzhiyun if tracking: 403*4882a593Smuzhiyun t1 = targ[1] 404*4882a593Smuzhiyun sym.lineno = t1.lineno 405*4882a593Smuzhiyun sym.lexpos = t1.lexpos 406*4882a593Smuzhiyun t1 = targ[-1] 407*4882a593Smuzhiyun sym.endlineno = getattr(t1,"endlineno",t1.lineno) 408*4882a593Smuzhiyun sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 409*4882a593Smuzhiyun 410*4882a593Smuzhiyun # --! TRACKING 411*4882a593Smuzhiyun 412*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 413*4882a593Smuzhiyun # The code enclosed in this section is duplicated 414*4882a593Smuzhiyun # below as a performance optimization. Make sure 415*4882a593Smuzhiyun # changes get made in both locations. 416*4882a593Smuzhiyun 417*4882a593Smuzhiyun pslice.slice = targ 418*4882a593Smuzhiyun 419*4882a593Smuzhiyun try: 420*4882a593Smuzhiyun # Call the grammar rule with our special slice object 421*4882a593Smuzhiyun del symstack[-plen:] 422*4882a593Smuzhiyun del statestack[-plen:] 423*4882a593Smuzhiyun p.callable(pslice) 424*4882a593Smuzhiyun # --! DEBUG 425*4882a593Smuzhiyun debug.info("Result : %s", format_result(pslice[0])) 426*4882a593Smuzhiyun # --! DEBUG 427*4882a593Smuzhiyun symstack.append(sym) 428*4882a593Smuzhiyun state = goto[statestack[-1]][pname] 429*4882a593Smuzhiyun statestack.append(state) 430*4882a593Smuzhiyun except SyntaxError: 431*4882a593Smuzhiyun # If an error was set. Enter error recovery state 432*4882a593Smuzhiyun lookaheadstack.append(lookahead) 433*4882a593Smuzhiyun symstack.pop() 434*4882a593Smuzhiyun statestack.pop() 435*4882a593Smuzhiyun state = statestack[-1] 436*4882a593Smuzhiyun sym.type = 'error' 437*4882a593Smuzhiyun lookahead = sym 438*4882a593Smuzhiyun errorcount = error_count 439*4882a593Smuzhiyun self.errorok = 0 440*4882a593Smuzhiyun continue 441*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 442*4882a593Smuzhiyun 443*4882a593Smuzhiyun else: 444*4882a593Smuzhiyun 445*4882a593Smuzhiyun # --! TRACKING 446*4882a593Smuzhiyun if tracking: 447*4882a593Smuzhiyun sym.lineno = lexer.lineno 448*4882a593Smuzhiyun sym.lexpos = lexer.lexpos 449*4882a593Smuzhiyun # --! TRACKING 450*4882a593Smuzhiyun 451*4882a593Smuzhiyun targ = [ sym ] 452*4882a593Smuzhiyun 453*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 454*4882a593Smuzhiyun # The code enclosed in this section is duplicated 455*4882a593Smuzhiyun # above as a performance optimization. Make sure 456*4882a593Smuzhiyun # changes get made in both locations. 457*4882a593Smuzhiyun 458*4882a593Smuzhiyun pslice.slice = targ 459*4882a593Smuzhiyun 460*4882a593Smuzhiyun try: 461*4882a593Smuzhiyun # Call the grammar rule with our special slice object 462*4882a593Smuzhiyun p.callable(pslice) 463*4882a593Smuzhiyun # --! DEBUG 464*4882a593Smuzhiyun debug.info("Result : %s", format_result(pslice[0])) 465*4882a593Smuzhiyun # --! DEBUG 466*4882a593Smuzhiyun symstack.append(sym) 467*4882a593Smuzhiyun state = goto[statestack[-1]][pname] 468*4882a593Smuzhiyun statestack.append(state) 469*4882a593Smuzhiyun except SyntaxError: 470*4882a593Smuzhiyun # If an error was set. Enter error recovery state 471*4882a593Smuzhiyun lookaheadstack.append(lookahead) 472*4882a593Smuzhiyun symstack.pop() 473*4882a593Smuzhiyun statestack.pop() 474*4882a593Smuzhiyun state = statestack[-1] 475*4882a593Smuzhiyun sym.type = 'error' 476*4882a593Smuzhiyun lookahead = sym 477*4882a593Smuzhiyun errorcount = error_count 478*4882a593Smuzhiyun self.errorok = 0 479*4882a593Smuzhiyun continue 480*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 481*4882a593Smuzhiyun 482*4882a593Smuzhiyun if t == 0: 483*4882a593Smuzhiyun n = symstack[-1] 484*4882a593Smuzhiyun result = getattr(n,"value",None) 485*4882a593Smuzhiyun # --! DEBUG 486*4882a593Smuzhiyun debug.info("Done : Returning %s", format_result(result)) 487*4882a593Smuzhiyun debug.info("PLY: PARSE DEBUG END") 488*4882a593Smuzhiyun # --! DEBUG 489*4882a593Smuzhiyun return result 490*4882a593Smuzhiyun 491*4882a593Smuzhiyun if t is None: 492*4882a593Smuzhiyun 493*4882a593Smuzhiyun # --! DEBUG 494*4882a593Smuzhiyun debug.error('Error : %s', 495*4882a593Smuzhiyun ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 496*4882a593Smuzhiyun # --! DEBUG 497*4882a593Smuzhiyun 498*4882a593Smuzhiyun # We have some kind of parsing error here. To handle 499*4882a593Smuzhiyun # this, we are going to push the current token onto 500*4882a593Smuzhiyun # the tokenstack and replace it with an 'error' token. 501*4882a593Smuzhiyun # If there are any synchronization rules, they may 502*4882a593Smuzhiyun # catch it. 503*4882a593Smuzhiyun # 504*4882a593Smuzhiyun # In addition to pushing the error token, we call call 505*4882a593Smuzhiyun # the user defined p_error() function if this is the 506*4882a593Smuzhiyun # first syntax error. This function is only called if 507*4882a593Smuzhiyun # errorcount == 0. 508*4882a593Smuzhiyun if errorcount == 0 or self.errorok: 509*4882a593Smuzhiyun errorcount = error_count 510*4882a593Smuzhiyun self.errorok = 0 511*4882a593Smuzhiyun errtoken = lookahead 512*4882a593Smuzhiyun if errtoken.type == "$end": 513*4882a593Smuzhiyun errtoken = None # End of file! 514*4882a593Smuzhiyun if self.errorfunc: 515*4882a593Smuzhiyun global errok,token,restart 516*4882a593Smuzhiyun errok = self.errok # Set some special functions available in error recovery 517*4882a593Smuzhiyun token = get_token 518*4882a593Smuzhiyun restart = self.restart 519*4882a593Smuzhiyun if errtoken and not hasattr(errtoken,'lexer'): 520*4882a593Smuzhiyun errtoken.lexer = lexer 521*4882a593Smuzhiyun tok = self.errorfunc(errtoken) 522*4882a593Smuzhiyun del errok, token, restart # Delete special functions 523*4882a593Smuzhiyun 524*4882a593Smuzhiyun if self.errorok: 525*4882a593Smuzhiyun # User must have done some kind of panic 526*4882a593Smuzhiyun # mode recovery on their own. The 527*4882a593Smuzhiyun # returned token is the next lookahead 528*4882a593Smuzhiyun lookahead = tok 529*4882a593Smuzhiyun errtoken = None 530*4882a593Smuzhiyun continue 531*4882a593Smuzhiyun else: 532*4882a593Smuzhiyun if errtoken: 533*4882a593Smuzhiyun if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 534*4882a593Smuzhiyun else: lineno = 0 535*4882a593Smuzhiyun if lineno: 536*4882a593Smuzhiyun sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 537*4882a593Smuzhiyun else: 538*4882a593Smuzhiyun sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 539*4882a593Smuzhiyun else: 540*4882a593Smuzhiyun sys.stderr.write("yacc: Parse error in input. EOF\n") 541*4882a593Smuzhiyun return 542*4882a593Smuzhiyun 543*4882a593Smuzhiyun else: 544*4882a593Smuzhiyun errorcount = error_count 545*4882a593Smuzhiyun 546*4882a593Smuzhiyun # case 1: the statestack only has 1 entry on it. If we're in this state, the 547*4882a593Smuzhiyun # entire parse has been rolled back and we're completely hosed. The token is 548*4882a593Smuzhiyun # discarded and we just keep going. 549*4882a593Smuzhiyun 550*4882a593Smuzhiyun if len(statestack) <= 1 and lookahead.type != "$end": 551*4882a593Smuzhiyun lookahead = None 552*4882a593Smuzhiyun errtoken = None 553*4882a593Smuzhiyun state = 0 554*4882a593Smuzhiyun # Nuke the pushback stack 555*4882a593Smuzhiyun del lookaheadstack[:] 556*4882a593Smuzhiyun continue 557*4882a593Smuzhiyun 558*4882a593Smuzhiyun # case 2: the statestack has a couple of entries on it, but we're 559*4882a593Smuzhiyun # at the end of the file. nuke the top entry and generate an error token 560*4882a593Smuzhiyun 561*4882a593Smuzhiyun # Start nuking entries on the stack 562*4882a593Smuzhiyun if lookahead.type == "$end": 563*4882a593Smuzhiyun # Whoa. We're really hosed here. Bail out 564*4882a593Smuzhiyun return 565*4882a593Smuzhiyun 566*4882a593Smuzhiyun if lookahead.type != 'error': 567*4882a593Smuzhiyun sym = symstack[-1] 568*4882a593Smuzhiyun if sym.type == 'error': 569*4882a593Smuzhiyun # Hmmm. Error is on top of stack, we'll just nuke input 570*4882a593Smuzhiyun # symbol and continue 571*4882a593Smuzhiyun lookahead = None 572*4882a593Smuzhiyun continue 573*4882a593Smuzhiyun t = YaccSymbol() 574*4882a593Smuzhiyun t.type = 'error' 575*4882a593Smuzhiyun if hasattr(lookahead,"lineno"): 576*4882a593Smuzhiyun t.lineno = lookahead.lineno 577*4882a593Smuzhiyun t.value = lookahead 578*4882a593Smuzhiyun lookaheadstack.append(lookahead) 579*4882a593Smuzhiyun lookahead = t 580*4882a593Smuzhiyun else: 581*4882a593Smuzhiyun symstack.pop() 582*4882a593Smuzhiyun statestack.pop() 583*4882a593Smuzhiyun state = statestack[-1] # Potential bug fix 584*4882a593Smuzhiyun 585*4882a593Smuzhiyun continue 586*4882a593Smuzhiyun 587*4882a593Smuzhiyun # Call an error function here 588*4882a593Smuzhiyun raise RuntimeError("yacc: internal parser error!!!\n") 589*4882a593Smuzhiyun 590*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 591*4882a593Smuzhiyun # parseopt(). 592*4882a593Smuzhiyun # 593*4882a593Smuzhiyun # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. 594*4882a593Smuzhiyun # Edit the debug version above, then copy any modifications to the method 595*4882a593Smuzhiyun # below while removing #--! DEBUG sections. 596*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 597*4882a593Smuzhiyun 598*4882a593Smuzhiyun 599*4882a593Smuzhiyun def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 600*4882a593Smuzhiyun lookahead = None # Current lookahead symbol 601*4882a593Smuzhiyun lookaheadstack = [ ] # Stack of lookahead symbols 602*4882a593Smuzhiyun actions = self.action # Local reference to action table (to avoid lookup on self.) 603*4882a593Smuzhiyun goto = self.goto # Local reference to goto table (to avoid lookup on self.) 604*4882a593Smuzhiyun prod = self.productions # Local reference to production list (to avoid lookup on self.) 605*4882a593Smuzhiyun pslice = YaccProduction(None) # Production object passed to grammar rules 606*4882a593Smuzhiyun errorcount = 0 # Used during error recovery 607*4882a593Smuzhiyun 608*4882a593Smuzhiyun # If no lexer was given, we will try to use the lex module 609*4882a593Smuzhiyun if not lexer: 610*4882a593Smuzhiyun lex = load_ply_lex() 611*4882a593Smuzhiyun lexer = lex.lexer 612*4882a593Smuzhiyun 613*4882a593Smuzhiyun # Set up the lexer and parser objects on pslice 614*4882a593Smuzhiyun pslice.lexer = lexer 615*4882a593Smuzhiyun pslice.parser = self 616*4882a593Smuzhiyun 617*4882a593Smuzhiyun # If input was supplied, pass to lexer 618*4882a593Smuzhiyun if input is not None: 619*4882a593Smuzhiyun lexer.input(input) 620*4882a593Smuzhiyun 621*4882a593Smuzhiyun if tokenfunc is None: 622*4882a593Smuzhiyun # Tokenize function 623*4882a593Smuzhiyun get_token = lexer.token 624*4882a593Smuzhiyun else: 625*4882a593Smuzhiyun get_token = tokenfunc 626*4882a593Smuzhiyun 627*4882a593Smuzhiyun # Set up the state and symbol stacks 628*4882a593Smuzhiyun 629*4882a593Smuzhiyun statestack = [ ] # Stack of parsing states 630*4882a593Smuzhiyun self.statestack = statestack 631*4882a593Smuzhiyun symstack = [ ] # Stack of grammar symbols 632*4882a593Smuzhiyun self.symstack = symstack 633*4882a593Smuzhiyun 634*4882a593Smuzhiyun pslice.stack = symstack # Put in the production 635*4882a593Smuzhiyun errtoken = None # Err token 636*4882a593Smuzhiyun 637*4882a593Smuzhiyun # The start state is assumed to be (0,$end) 638*4882a593Smuzhiyun 639*4882a593Smuzhiyun statestack.append(0) 640*4882a593Smuzhiyun sym = YaccSymbol() 641*4882a593Smuzhiyun sym.type = '$end' 642*4882a593Smuzhiyun symstack.append(sym) 643*4882a593Smuzhiyun state = 0 644*4882a593Smuzhiyun while 1: 645*4882a593Smuzhiyun # Get the next symbol on the input. If a lookahead symbol 646*4882a593Smuzhiyun # is already set, we just use that. Otherwise, we'll pull 647*4882a593Smuzhiyun # the next token off of the lookaheadstack or from the lexer 648*4882a593Smuzhiyun 649*4882a593Smuzhiyun if not lookahead: 650*4882a593Smuzhiyun if not lookaheadstack: 651*4882a593Smuzhiyun lookahead = get_token() # Get the next token 652*4882a593Smuzhiyun else: 653*4882a593Smuzhiyun lookahead = lookaheadstack.pop() 654*4882a593Smuzhiyun if not lookahead: 655*4882a593Smuzhiyun lookahead = YaccSymbol() 656*4882a593Smuzhiyun lookahead.type = '$end' 657*4882a593Smuzhiyun 658*4882a593Smuzhiyun # Check the action table 659*4882a593Smuzhiyun ltype = lookahead.type 660*4882a593Smuzhiyun t = actions[state].get(ltype) 661*4882a593Smuzhiyun 662*4882a593Smuzhiyun if t is not None: 663*4882a593Smuzhiyun if t > 0: 664*4882a593Smuzhiyun # shift a symbol on the stack 665*4882a593Smuzhiyun statestack.append(t) 666*4882a593Smuzhiyun state = t 667*4882a593Smuzhiyun 668*4882a593Smuzhiyun symstack.append(lookahead) 669*4882a593Smuzhiyun lookahead = None 670*4882a593Smuzhiyun 671*4882a593Smuzhiyun # Decrease error count on successful shift 672*4882a593Smuzhiyun if errorcount: errorcount -=1 673*4882a593Smuzhiyun continue 674*4882a593Smuzhiyun 675*4882a593Smuzhiyun if t < 0: 676*4882a593Smuzhiyun # reduce a symbol on the stack, emit a production 677*4882a593Smuzhiyun p = prod[-t] 678*4882a593Smuzhiyun pname = p.name 679*4882a593Smuzhiyun plen = p.len 680*4882a593Smuzhiyun 681*4882a593Smuzhiyun # Get production function 682*4882a593Smuzhiyun sym = YaccSymbol() 683*4882a593Smuzhiyun sym.type = pname # Production name 684*4882a593Smuzhiyun sym.value = None 685*4882a593Smuzhiyun 686*4882a593Smuzhiyun if plen: 687*4882a593Smuzhiyun targ = symstack[-plen-1:] 688*4882a593Smuzhiyun targ[0] = sym 689*4882a593Smuzhiyun 690*4882a593Smuzhiyun # --! TRACKING 691*4882a593Smuzhiyun if tracking: 692*4882a593Smuzhiyun t1 = targ[1] 693*4882a593Smuzhiyun sym.lineno = t1.lineno 694*4882a593Smuzhiyun sym.lexpos = t1.lexpos 695*4882a593Smuzhiyun t1 = targ[-1] 696*4882a593Smuzhiyun sym.endlineno = getattr(t1,"endlineno",t1.lineno) 697*4882a593Smuzhiyun sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 698*4882a593Smuzhiyun 699*4882a593Smuzhiyun # --! TRACKING 700*4882a593Smuzhiyun 701*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 702*4882a593Smuzhiyun # The code enclosed in this section is duplicated 703*4882a593Smuzhiyun # below as a performance optimization. Make sure 704*4882a593Smuzhiyun # changes get made in both locations. 705*4882a593Smuzhiyun 706*4882a593Smuzhiyun pslice.slice = targ 707*4882a593Smuzhiyun 708*4882a593Smuzhiyun try: 709*4882a593Smuzhiyun # Call the grammar rule with our special slice object 710*4882a593Smuzhiyun del symstack[-plen:] 711*4882a593Smuzhiyun del statestack[-plen:] 712*4882a593Smuzhiyun p.callable(pslice) 713*4882a593Smuzhiyun symstack.append(sym) 714*4882a593Smuzhiyun state = goto[statestack[-1]][pname] 715*4882a593Smuzhiyun statestack.append(state) 716*4882a593Smuzhiyun except SyntaxError: 717*4882a593Smuzhiyun # If an error was set. Enter error recovery state 718*4882a593Smuzhiyun lookaheadstack.append(lookahead) 719*4882a593Smuzhiyun symstack.pop() 720*4882a593Smuzhiyun statestack.pop() 721*4882a593Smuzhiyun state = statestack[-1] 722*4882a593Smuzhiyun sym.type = 'error' 723*4882a593Smuzhiyun lookahead = sym 724*4882a593Smuzhiyun errorcount = error_count 725*4882a593Smuzhiyun self.errorok = 0 726*4882a593Smuzhiyun continue 727*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 728*4882a593Smuzhiyun 729*4882a593Smuzhiyun else: 730*4882a593Smuzhiyun 731*4882a593Smuzhiyun # --! TRACKING 732*4882a593Smuzhiyun if tracking: 733*4882a593Smuzhiyun sym.lineno = lexer.lineno 734*4882a593Smuzhiyun sym.lexpos = lexer.lexpos 735*4882a593Smuzhiyun # --! TRACKING 736*4882a593Smuzhiyun 737*4882a593Smuzhiyun targ = [ sym ] 738*4882a593Smuzhiyun 739*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 740*4882a593Smuzhiyun # The code enclosed in this section is duplicated 741*4882a593Smuzhiyun # above as a performance optimization. Make sure 742*4882a593Smuzhiyun # changes get made in both locations. 743*4882a593Smuzhiyun 744*4882a593Smuzhiyun pslice.slice = targ 745*4882a593Smuzhiyun 746*4882a593Smuzhiyun try: 747*4882a593Smuzhiyun # Call the grammar rule with our special slice object 748*4882a593Smuzhiyun p.callable(pslice) 749*4882a593Smuzhiyun symstack.append(sym) 750*4882a593Smuzhiyun state = goto[statestack[-1]][pname] 751*4882a593Smuzhiyun statestack.append(state) 752*4882a593Smuzhiyun except SyntaxError: 753*4882a593Smuzhiyun # If an error was set. Enter error recovery state 754*4882a593Smuzhiyun lookaheadstack.append(lookahead) 755*4882a593Smuzhiyun symstack.pop() 756*4882a593Smuzhiyun statestack.pop() 757*4882a593Smuzhiyun state = statestack[-1] 758*4882a593Smuzhiyun sym.type = 'error' 759*4882a593Smuzhiyun lookahead = sym 760*4882a593Smuzhiyun errorcount = error_count 761*4882a593Smuzhiyun self.errorok = 0 762*4882a593Smuzhiyun continue 763*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 764*4882a593Smuzhiyun 765*4882a593Smuzhiyun if t == 0: 766*4882a593Smuzhiyun n = symstack[-1] 767*4882a593Smuzhiyun return getattr(n,"value",None) 768*4882a593Smuzhiyun 769*4882a593Smuzhiyun if t is None: 770*4882a593Smuzhiyun 771*4882a593Smuzhiyun # We have some kind of parsing error here. To handle 772*4882a593Smuzhiyun # this, we are going to push the current token onto 773*4882a593Smuzhiyun # the tokenstack and replace it with an 'error' token. 774*4882a593Smuzhiyun # If there are any synchronization rules, they may 775*4882a593Smuzhiyun # catch it. 776*4882a593Smuzhiyun # 777*4882a593Smuzhiyun # In addition to pushing the error token, we call call 778*4882a593Smuzhiyun # the user defined p_error() function if this is the 779*4882a593Smuzhiyun # first syntax error. This function is only called if 780*4882a593Smuzhiyun # errorcount == 0. 781*4882a593Smuzhiyun if errorcount == 0 or self.errorok: 782*4882a593Smuzhiyun errorcount = error_count 783*4882a593Smuzhiyun self.errorok = 0 784*4882a593Smuzhiyun errtoken = lookahead 785*4882a593Smuzhiyun if errtoken.type == '$end': 786*4882a593Smuzhiyun errtoken = None # End of file! 787*4882a593Smuzhiyun if self.errorfunc: 788*4882a593Smuzhiyun global errok,token,restart 789*4882a593Smuzhiyun errok = self.errok # Set some special functions available in error recovery 790*4882a593Smuzhiyun token = get_token 791*4882a593Smuzhiyun restart = self.restart 792*4882a593Smuzhiyun if errtoken and not hasattr(errtoken,'lexer'): 793*4882a593Smuzhiyun errtoken.lexer = lexer 794*4882a593Smuzhiyun tok = self.errorfunc(errtoken) 795*4882a593Smuzhiyun del errok, token, restart # Delete special functions 796*4882a593Smuzhiyun 797*4882a593Smuzhiyun if self.errorok: 798*4882a593Smuzhiyun # User must have done some kind of panic 799*4882a593Smuzhiyun # mode recovery on their own. The 800*4882a593Smuzhiyun # returned token is the next lookahead 801*4882a593Smuzhiyun lookahead = tok 802*4882a593Smuzhiyun errtoken = None 803*4882a593Smuzhiyun continue 804*4882a593Smuzhiyun else: 805*4882a593Smuzhiyun if errtoken: 806*4882a593Smuzhiyun if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 807*4882a593Smuzhiyun else: lineno = 0 808*4882a593Smuzhiyun if lineno: 809*4882a593Smuzhiyun sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 810*4882a593Smuzhiyun else: 811*4882a593Smuzhiyun sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 812*4882a593Smuzhiyun else: 813*4882a593Smuzhiyun sys.stderr.write("yacc: Parse error in input. EOF\n") 814*4882a593Smuzhiyun return 815*4882a593Smuzhiyun 816*4882a593Smuzhiyun else: 817*4882a593Smuzhiyun errorcount = error_count 818*4882a593Smuzhiyun 819*4882a593Smuzhiyun # case 1: the statestack only has 1 entry on it. If we're in this state, the 820*4882a593Smuzhiyun # entire parse has been rolled back and we're completely hosed. The token is 821*4882a593Smuzhiyun # discarded and we just keep going. 822*4882a593Smuzhiyun 823*4882a593Smuzhiyun if len(statestack) <= 1 and lookahead.type != '$end': 824*4882a593Smuzhiyun lookahead = None 825*4882a593Smuzhiyun errtoken = None 826*4882a593Smuzhiyun state = 0 827*4882a593Smuzhiyun # Nuke the pushback stack 828*4882a593Smuzhiyun del lookaheadstack[:] 829*4882a593Smuzhiyun continue 830*4882a593Smuzhiyun 831*4882a593Smuzhiyun # case 2: the statestack has a couple of entries on it, but we're 832*4882a593Smuzhiyun # at the end of the file. nuke the top entry and generate an error token 833*4882a593Smuzhiyun 834*4882a593Smuzhiyun # Start nuking entries on the stack 835*4882a593Smuzhiyun if lookahead.type == '$end': 836*4882a593Smuzhiyun # Whoa. We're really hosed here. Bail out 837*4882a593Smuzhiyun return 838*4882a593Smuzhiyun 839*4882a593Smuzhiyun if lookahead.type != 'error': 840*4882a593Smuzhiyun sym = symstack[-1] 841*4882a593Smuzhiyun if sym.type == 'error': 842*4882a593Smuzhiyun # Hmmm. Error is on top of stack, we'll just nuke input 843*4882a593Smuzhiyun # symbol and continue 844*4882a593Smuzhiyun lookahead = None 845*4882a593Smuzhiyun continue 846*4882a593Smuzhiyun t = YaccSymbol() 847*4882a593Smuzhiyun t.type = 'error' 848*4882a593Smuzhiyun if hasattr(lookahead,"lineno"): 849*4882a593Smuzhiyun t.lineno = lookahead.lineno 850*4882a593Smuzhiyun t.value = lookahead 851*4882a593Smuzhiyun lookaheadstack.append(lookahead) 852*4882a593Smuzhiyun lookahead = t 853*4882a593Smuzhiyun else: 854*4882a593Smuzhiyun symstack.pop() 855*4882a593Smuzhiyun statestack.pop() 856*4882a593Smuzhiyun state = statestack[-1] # Potential bug fix 857*4882a593Smuzhiyun 858*4882a593Smuzhiyun continue 859*4882a593Smuzhiyun 860*4882a593Smuzhiyun # Call an error function here 861*4882a593Smuzhiyun raise RuntimeError("yacc: internal parser error!!!\n") 862*4882a593Smuzhiyun 863*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 864*4882a593Smuzhiyun # parseopt_notrack(). 865*4882a593Smuzhiyun # 866*4882a593Smuzhiyun # Optimized version of parseopt() with line number tracking removed. 867*4882a593Smuzhiyun # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove 868*4882a593Smuzhiyun # code in the #--! TRACKING sections 869*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 870*4882a593Smuzhiyun 871*4882a593Smuzhiyun def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 872*4882a593Smuzhiyun lookahead = None # Current lookahead symbol 873*4882a593Smuzhiyun lookaheadstack = [ ] # Stack of lookahead symbols 874*4882a593Smuzhiyun actions = self.action # Local reference to action table (to avoid lookup on self.) 875*4882a593Smuzhiyun goto = self.goto # Local reference to goto table (to avoid lookup on self.) 876*4882a593Smuzhiyun prod = self.productions # Local reference to production list (to avoid lookup on self.) 877*4882a593Smuzhiyun pslice = YaccProduction(None) # Production object passed to grammar rules 878*4882a593Smuzhiyun errorcount = 0 # Used during error recovery 879*4882a593Smuzhiyun 880*4882a593Smuzhiyun # If no lexer was given, we will try to use the lex module 881*4882a593Smuzhiyun if not lexer: 882*4882a593Smuzhiyun lex = load_ply_lex() 883*4882a593Smuzhiyun lexer = lex.lexer 884*4882a593Smuzhiyun 885*4882a593Smuzhiyun # Set up the lexer and parser objects on pslice 886*4882a593Smuzhiyun pslice.lexer = lexer 887*4882a593Smuzhiyun pslice.parser = self 888*4882a593Smuzhiyun 889*4882a593Smuzhiyun # If input was supplied, pass to lexer 890*4882a593Smuzhiyun if input is not None: 891*4882a593Smuzhiyun lexer.input(input) 892*4882a593Smuzhiyun 893*4882a593Smuzhiyun if tokenfunc is None: 894*4882a593Smuzhiyun # Tokenize function 895*4882a593Smuzhiyun get_token = lexer.token 896*4882a593Smuzhiyun else: 897*4882a593Smuzhiyun get_token = tokenfunc 898*4882a593Smuzhiyun 899*4882a593Smuzhiyun # Set up the state and symbol stacks 900*4882a593Smuzhiyun 901*4882a593Smuzhiyun statestack = [ ] # Stack of parsing states 902*4882a593Smuzhiyun self.statestack = statestack 903*4882a593Smuzhiyun symstack = [ ] # Stack of grammar symbols 904*4882a593Smuzhiyun self.symstack = symstack 905*4882a593Smuzhiyun 906*4882a593Smuzhiyun pslice.stack = symstack # Put in the production 907*4882a593Smuzhiyun errtoken = None # Err token 908*4882a593Smuzhiyun 909*4882a593Smuzhiyun # The start state is assumed to be (0,$end) 910*4882a593Smuzhiyun 911*4882a593Smuzhiyun statestack.append(0) 912*4882a593Smuzhiyun sym = YaccSymbol() 913*4882a593Smuzhiyun sym.type = '$end' 914*4882a593Smuzhiyun symstack.append(sym) 915*4882a593Smuzhiyun state = 0 916*4882a593Smuzhiyun while 1: 917*4882a593Smuzhiyun # Get the next symbol on the input. If a lookahead symbol 918*4882a593Smuzhiyun # is already set, we just use that. Otherwise, we'll pull 919*4882a593Smuzhiyun # the next token off of the lookaheadstack or from the lexer 920*4882a593Smuzhiyun 921*4882a593Smuzhiyun if not lookahead: 922*4882a593Smuzhiyun if not lookaheadstack: 923*4882a593Smuzhiyun lookahead = get_token() # Get the next token 924*4882a593Smuzhiyun else: 925*4882a593Smuzhiyun lookahead = lookaheadstack.pop() 926*4882a593Smuzhiyun if not lookahead: 927*4882a593Smuzhiyun lookahead = YaccSymbol() 928*4882a593Smuzhiyun lookahead.type = '$end' 929*4882a593Smuzhiyun 930*4882a593Smuzhiyun # Check the action table 931*4882a593Smuzhiyun ltype = lookahead.type 932*4882a593Smuzhiyun t = actions[state].get(ltype) 933*4882a593Smuzhiyun 934*4882a593Smuzhiyun if t is not None: 935*4882a593Smuzhiyun if t > 0: 936*4882a593Smuzhiyun # shift a symbol on the stack 937*4882a593Smuzhiyun statestack.append(t) 938*4882a593Smuzhiyun state = t 939*4882a593Smuzhiyun 940*4882a593Smuzhiyun symstack.append(lookahead) 941*4882a593Smuzhiyun lookahead = None 942*4882a593Smuzhiyun 943*4882a593Smuzhiyun # Decrease error count on successful shift 944*4882a593Smuzhiyun if errorcount: errorcount -=1 945*4882a593Smuzhiyun continue 946*4882a593Smuzhiyun 947*4882a593Smuzhiyun if t < 0: 948*4882a593Smuzhiyun # reduce a symbol on the stack, emit a production 949*4882a593Smuzhiyun p = prod[-t] 950*4882a593Smuzhiyun pname = p.name 951*4882a593Smuzhiyun plen = p.len 952*4882a593Smuzhiyun 953*4882a593Smuzhiyun # Get production function 954*4882a593Smuzhiyun sym = YaccSymbol() 955*4882a593Smuzhiyun sym.type = pname # Production name 956*4882a593Smuzhiyun sym.value = None 957*4882a593Smuzhiyun 958*4882a593Smuzhiyun if plen: 959*4882a593Smuzhiyun targ = symstack[-plen-1:] 960*4882a593Smuzhiyun targ[0] = sym 961*4882a593Smuzhiyun 962*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 963*4882a593Smuzhiyun # The code enclosed in this section is duplicated 964*4882a593Smuzhiyun # below as a performance optimization. Make sure 965*4882a593Smuzhiyun # changes get made in both locations. 966*4882a593Smuzhiyun 967*4882a593Smuzhiyun pslice.slice = targ 968*4882a593Smuzhiyun 969*4882a593Smuzhiyun try: 970*4882a593Smuzhiyun # Call the grammar rule with our special slice object 971*4882a593Smuzhiyun del symstack[-plen:] 972*4882a593Smuzhiyun del statestack[-plen:] 973*4882a593Smuzhiyun p.callable(pslice) 974*4882a593Smuzhiyun symstack.append(sym) 975*4882a593Smuzhiyun state = goto[statestack[-1]][pname] 976*4882a593Smuzhiyun statestack.append(state) 977*4882a593Smuzhiyun except SyntaxError: 978*4882a593Smuzhiyun # If an error was set. Enter error recovery state 979*4882a593Smuzhiyun lookaheadstack.append(lookahead) 980*4882a593Smuzhiyun symstack.pop() 981*4882a593Smuzhiyun statestack.pop() 982*4882a593Smuzhiyun state = statestack[-1] 983*4882a593Smuzhiyun sym.type = 'error' 984*4882a593Smuzhiyun lookahead = sym 985*4882a593Smuzhiyun errorcount = error_count 986*4882a593Smuzhiyun self.errorok = 0 987*4882a593Smuzhiyun continue 988*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 989*4882a593Smuzhiyun 990*4882a593Smuzhiyun else: 991*4882a593Smuzhiyun 992*4882a593Smuzhiyun targ = [ sym ] 993*4882a593Smuzhiyun 994*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 995*4882a593Smuzhiyun # The code enclosed in this section is duplicated 996*4882a593Smuzhiyun # above as a performance optimization. Make sure 997*4882a593Smuzhiyun # changes get made in both locations. 998*4882a593Smuzhiyun 999*4882a593Smuzhiyun pslice.slice = targ 1000*4882a593Smuzhiyun 1001*4882a593Smuzhiyun try: 1002*4882a593Smuzhiyun # Call the grammar rule with our special slice object 1003*4882a593Smuzhiyun p.callable(pslice) 1004*4882a593Smuzhiyun symstack.append(sym) 1005*4882a593Smuzhiyun state = goto[statestack[-1]][pname] 1006*4882a593Smuzhiyun statestack.append(state) 1007*4882a593Smuzhiyun except SyntaxError: 1008*4882a593Smuzhiyun # If an error was set. Enter error recovery state 1009*4882a593Smuzhiyun lookaheadstack.append(lookahead) 1010*4882a593Smuzhiyun symstack.pop() 1011*4882a593Smuzhiyun statestack.pop() 1012*4882a593Smuzhiyun state = statestack[-1] 1013*4882a593Smuzhiyun sym.type = 'error' 1014*4882a593Smuzhiyun lookahead = sym 1015*4882a593Smuzhiyun errorcount = error_count 1016*4882a593Smuzhiyun self.errorok = 0 1017*4882a593Smuzhiyun continue 1018*4882a593Smuzhiyun # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1019*4882a593Smuzhiyun 1020*4882a593Smuzhiyun if t == 0: 1021*4882a593Smuzhiyun n = symstack[-1] 1022*4882a593Smuzhiyun return getattr(n,"value",None) 1023*4882a593Smuzhiyun 1024*4882a593Smuzhiyun if t is None: 1025*4882a593Smuzhiyun 1026*4882a593Smuzhiyun # We have some kind of parsing error here. To handle 1027*4882a593Smuzhiyun # this, we are going to push the current token onto 1028*4882a593Smuzhiyun # the tokenstack and replace it with an 'error' token. 1029*4882a593Smuzhiyun # If there are any synchronization rules, they may 1030*4882a593Smuzhiyun # catch it. 1031*4882a593Smuzhiyun # 1032*4882a593Smuzhiyun # In addition to pushing the error token, we call call 1033*4882a593Smuzhiyun # the user defined p_error() function if this is the 1034*4882a593Smuzhiyun # first syntax error. This function is only called if 1035*4882a593Smuzhiyun # errorcount == 0. 1036*4882a593Smuzhiyun if errorcount == 0 or self.errorok: 1037*4882a593Smuzhiyun errorcount = error_count 1038*4882a593Smuzhiyun self.errorok = 0 1039*4882a593Smuzhiyun errtoken = lookahead 1040*4882a593Smuzhiyun if errtoken.type == '$end': 1041*4882a593Smuzhiyun errtoken = None # End of file! 1042*4882a593Smuzhiyun if self.errorfunc: 1043*4882a593Smuzhiyun global errok,token,restart 1044*4882a593Smuzhiyun errok = self.errok # Set some special functions available in error recovery 1045*4882a593Smuzhiyun token = get_token 1046*4882a593Smuzhiyun restart = self.restart 1047*4882a593Smuzhiyun if errtoken and not hasattr(errtoken,'lexer'): 1048*4882a593Smuzhiyun errtoken.lexer = lexer 1049*4882a593Smuzhiyun tok = self.errorfunc(errtoken) 1050*4882a593Smuzhiyun del errok, token, restart # Delete special functions 1051*4882a593Smuzhiyun 1052*4882a593Smuzhiyun if self.errorok: 1053*4882a593Smuzhiyun # User must have done some kind of panic 1054*4882a593Smuzhiyun # mode recovery on their own. The 1055*4882a593Smuzhiyun # returned token is the next lookahead 1056*4882a593Smuzhiyun lookahead = tok 1057*4882a593Smuzhiyun errtoken = None 1058*4882a593Smuzhiyun continue 1059*4882a593Smuzhiyun else: 1060*4882a593Smuzhiyun if errtoken: 1061*4882a593Smuzhiyun if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 1062*4882a593Smuzhiyun else: lineno = 0 1063*4882a593Smuzhiyun if lineno: 1064*4882a593Smuzhiyun sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 1065*4882a593Smuzhiyun else: 1066*4882a593Smuzhiyun sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 1067*4882a593Smuzhiyun else: 1068*4882a593Smuzhiyun sys.stderr.write("yacc: Parse error in input. EOF\n") 1069*4882a593Smuzhiyun return 1070*4882a593Smuzhiyun 1071*4882a593Smuzhiyun else: 1072*4882a593Smuzhiyun errorcount = error_count 1073*4882a593Smuzhiyun 1074*4882a593Smuzhiyun # case 1: the statestack only has 1 entry on it. If we're in this state, the 1075*4882a593Smuzhiyun # entire parse has been rolled back and we're completely hosed. The token is 1076*4882a593Smuzhiyun # discarded and we just keep going. 1077*4882a593Smuzhiyun 1078*4882a593Smuzhiyun if len(statestack) <= 1 and lookahead.type != '$end': 1079*4882a593Smuzhiyun lookahead = None 1080*4882a593Smuzhiyun errtoken = None 1081*4882a593Smuzhiyun state = 0 1082*4882a593Smuzhiyun # Nuke the pushback stack 1083*4882a593Smuzhiyun del lookaheadstack[:] 1084*4882a593Smuzhiyun continue 1085*4882a593Smuzhiyun 1086*4882a593Smuzhiyun # case 2: the statestack has a couple of entries on it, but we're 1087*4882a593Smuzhiyun # at the end of the file. nuke the top entry and generate an error token 1088*4882a593Smuzhiyun 1089*4882a593Smuzhiyun # Start nuking entries on the stack 1090*4882a593Smuzhiyun if lookahead.type == '$end': 1091*4882a593Smuzhiyun # Whoa. We're really hosed here. Bail out 1092*4882a593Smuzhiyun return 1093*4882a593Smuzhiyun 1094*4882a593Smuzhiyun if lookahead.type != 'error': 1095*4882a593Smuzhiyun sym = symstack[-1] 1096*4882a593Smuzhiyun if sym.type == 'error': 1097*4882a593Smuzhiyun # Hmmm. Error is on top of stack, we'll just nuke input 1098*4882a593Smuzhiyun # symbol and continue 1099*4882a593Smuzhiyun lookahead = None 1100*4882a593Smuzhiyun continue 1101*4882a593Smuzhiyun t = YaccSymbol() 1102*4882a593Smuzhiyun t.type = 'error' 1103*4882a593Smuzhiyun if hasattr(lookahead,"lineno"): 1104*4882a593Smuzhiyun t.lineno = lookahead.lineno 1105*4882a593Smuzhiyun t.value = lookahead 1106*4882a593Smuzhiyun lookaheadstack.append(lookahead) 1107*4882a593Smuzhiyun lookahead = t 1108*4882a593Smuzhiyun else: 1109*4882a593Smuzhiyun symstack.pop() 1110*4882a593Smuzhiyun statestack.pop() 1111*4882a593Smuzhiyun state = statestack[-1] # Potential bug fix 1112*4882a593Smuzhiyun 1113*4882a593Smuzhiyun continue 1114*4882a593Smuzhiyun 1115*4882a593Smuzhiyun # Call an error function here 1116*4882a593Smuzhiyun raise RuntimeError("yacc: internal parser error!!!\n") 1117*4882a593Smuzhiyun 1118*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1119*4882a593Smuzhiyun# === Grammar Representation === 1120*4882a593Smuzhiyun# 1121*4882a593Smuzhiyun# The following functions, classes, and variables are used to represent and 1122*4882a593Smuzhiyun# manipulate the rules that make up a grammar. 1123*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1124*4882a593Smuzhiyun 1125*4882a593Smuzhiyunimport re 1126*4882a593Smuzhiyun 1127*4882a593Smuzhiyun# regex matching identifiers 1128*4882a593Smuzhiyun_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 1129*4882a593Smuzhiyun 1130*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1131*4882a593Smuzhiyun# class Production: 1132*4882a593Smuzhiyun# 1133*4882a593Smuzhiyun# This class stores the raw information about a single production or grammar rule. 1134*4882a593Smuzhiyun# A grammar rule refers to a specification such as this: 1135*4882a593Smuzhiyun# 1136*4882a593Smuzhiyun# expr : expr PLUS term 1137*4882a593Smuzhiyun# 1138*4882a593Smuzhiyun# Here are the basic attributes defined on all productions 1139*4882a593Smuzhiyun# 1140*4882a593Smuzhiyun# name - Name of the production. For example 'expr' 1141*4882a593Smuzhiyun# prod - A list of symbols on the right side ['expr','PLUS','term'] 1142*4882a593Smuzhiyun# prec - Production precedence level 1143*4882a593Smuzhiyun# number - Production number. 1144*4882a593Smuzhiyun# func - Function that executes on reduce 1145*4882a593Smuzhiyun# file - File where production function is defined 1146*4882a593Smuzhiyun# lineno - Line number where production function is defined 1147*4882a593Smuzhiyun# 1148*4882a593Smuzhiyun# The following attributes are defined or optional. 1149*4882a593Smuzhiyun# 1150*4882a593Smuzhiyun# len - Length of the production (number of symbols on right hand side) 1151*4882a593Smuzhiyun# usyms - Set of unique symbols found in the production 1152*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1153*4882a593Smuzhiyun 1154*4882a593Smuzhiyunclass Production(object): 1155*4882a593Smuzhiyun reduced = 0 1156*4882a593Smuzhiyun def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): 1157*4882a593Smuzhiyun self.name = name 1158*4882a593Smuzhiyun self.prod = tuple(prod) 1159*4882a593Smuzhiyun self.number = number 1160*4882a593Smuzhiyun self.func = func 1161*4882a593Smuzhiyun self.callable = None 1162*4882a593Smuzhiyun self.file = file 1163*4882a593Smuzhiyun self.line = line 1164*4882a593Smuzhiyun self.prec = precedence 1165*4882a593Smuzhiyun 1166*4882a593Smuzhiyun # Internal settings used during table construction 1167*4882a593Smuzhiyun 1168*4882a593Smuzhiyun self.len = len(self.prod) # Length of the production 1169*4882a593Smuzhiyun 1170*4882a593Smuzhiyun # Create a list of unique production symbols used in the production 1171*4882a593Smuzhiyun self.usyms = [ ] 1172*4882a593Smuzhiyun for s in self.prod: 1173*4882a593Smuzhiyun if s not in self.usyms: 1174*4882a593Smuzhiyun self.usyms.append(s) 1175*4882a593Smuzhiyun 1176*4882a593Smuzhiyun # List of all LR items for the production 1177*4882a593Smuzhiyun self.lr_items = [] 1178*4882a593Smuzhiyun self.lr_next = None 1179*4882a593Smuzhiyun 1180*4882a593Smuzhiyun # Create a string representation 1181*4882a593Smuzhiyun if self.prod: 1182*4882a593Smuzhiyun self.str = "%s -> %s" % (self.name," ".join(self.prod)) 1183*4882a593Smuzhiyun else: 1184*4882a593Smuzhiyun self.str = "%s -> <empty>" % self.name 1185*4882a593Smuzhiyun 1186*4882a593Smuzhiyun def __str__(self): 1187*4882a593Smuzhiyun return self.str 1188*4882a593Smuzhiyun 1189*4882a593Smuzhiyun def __repr__(self): 1190*4882a593Smuzhiyun return "Production("+str(self)+")" 1191*4882a593Smuzhiyun 1192*4882a593Smuzhiyun def __len__(self): 1193*4882a593Smuzhiyun return len(self.prod) 1194*4882a593Smuzhiyun 1195*4882a593Smuzhiyun def __nonzero__(self): 1196*4882a593Smuzhiyun return 1 1197*4882a593Smuzhiyun 1198*4882a593Smuzhiyun def __getitem__(self,index): 1199*4882a593Smuzhiyun return self.prod[index] 1200*4882a593Smuzhiyun 1201*4882a593Smuzhiyun # Return the nth lr_item from the production (or None if at the end) 1202*4882a593Smuzhiyun def lr_item(self,n): 1203*4882a593Smuzhiyun if n > len(self.prod): return None 1204*4882a593Smuzhiyun p = LRItem(self,n) 1205*4882a593Smuzhiyun 1206*4882a593Smuzhiyun # Precompute the list of productions immediately following. Hack. Remove later 1207*4882a593Smuzhiyun try: 1208*4882a593Smuzhiyun p.lr_after = self.Prodnames[p.prod[n+1]] 1209*4882a593Smuzhiyun except (IndexError,KeyError): 1210*4882a593Smuzhiyun p.lr_after = [] 1211*4882a593Smuzhiyun try: 1212*4882a593Smuzhiyun p.lr_before = p.prod[n-1] 1213*4882a593Smuzhiyun except IndexError: 1214*4882a593Smuzhiyun p.lr_before = None 1215*4882a593Smuzhiyun 1216*4882a593Smuzhiyun return p 1217*4882a593Smuzhiyun 1218*4882a593Smuzhiyun # Bind the production function name to a callable 1219*4882a593Smuzhiyun def bind(self,pdict): 1220*4882a593Smuzhiyun if self.func: 1221*4882a593Smuzhiyun self.callable = pdict[self.func] 1222*4882a593Smuzhiyun 1223*4882a593Smuzhiyun# This class serves as a minimal standin for Production objects when 1224*4882a593Smuzhiyun# reading table data from files. It only contains information 1225*4882a593Smuzhiyun# actually used by the LR parsing engine, plus some additional 1226*4882a593Smuzhiyun# debugging information. 1227*4882a593Smuzhiyunclass MiniProduction(object): 1228*4882a593Smuzhiyun def __init__(self,str,name,len,func,file,line): 1229*4882a593Smuzhiyun self.name = name 1230*4882a593Smuzhiyun self.len = len 1231*4882a593Smuzhiyun self.func = func 1232*4882a593Smuzhiyun self.callable = None 1233*4882a593Smuzhiyun self.file = file 1234*4882a593Smuzhiyun self.line = line 1235*4882a593Smuzhiyun self.str = str 1236*4882a593Smuzhiyun def __str__(self): 1237*4882a593Smuzhiyun return self.str 1238*4882a593Smuzhiyun def __repr__(self): 1239*4882a593Smuzhiyun return "MiniProduction(%s)" % self.str 1240*4882a593Smuzhiyun 1241*4882a593Smuzhiyun # Bind the production function name to a callable 1242*4882a593Smuzhiyun def bind(self,pdict): 1243*4882a593Smuzhiyun if self.func: 1244*4882a593Smuzhiyun self.callable = pdict[self.func] 1245*4882a593Smuzhiyun 1246*4882a593Smuzhiyun 1247*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1248*4882a593Smuzhiyun# class LRItem 1249*4882a593Smuzhiyun# 1250*4882a593Smuzhiyun# This class represents a specific stage of parsing a production rule. For 1251*4882a593Smuzhiyun# example: 1252*4882a593Smuzhiyun# 1253*4882a593Smuzhiyun# expr : expr . PLUS term 1254*4882a593Smuzhiyun# 1255*4882a593Smuzhiyun# In the above, the "." represents the current location of the parse. Here 1256*4882a593Smuzhiyun# basic attributes: 1257*4882a593Smuzhiyun# 1258*4882a593Smuzhiyun# name - Name of the production. For example 'expr' 1259*4882a593Smuzhiyun# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 1260*4882a593Smuzhiyun# number - Production number. 1261*4882a593Smuzhiyun# 1262*4882a593Smuzhiyun# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 1263*4882a593Smuzhiyun# then lr_next refers to 'expr -> expr PLUS . term' 1264*4882a593Smuzhiyun# lr_index - LR item index (location of the ".") in the prod list. 1265*4882a593Smuzhiyun# lookaheads - LALR lookahead symbols for this item 1266*4882a593Smuzhiyun# len - Length of the production (number of symbols on right hand side) 1267*4882a593Smuzhiyun# lr_after - List of all productions that immediately follow 1268*4882a593Smuzhiyun# lr_before - Grammar symbol immediately before 1269*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1270*4882a593Smuzhiyun 1271*4882a593Smuzhiyunclass LRItem(object): 1272*4882a593Smuzhiyun def __init__(self,p,n): 1273*4882a593Smuzhiyun self.name = p.name 1274*4882a593Smuzhiyun self.prod = list(p.prod) 1275*4882a593Smuzhiyun self.number = p.number 1276*4882a593Smuzhiyun self.lr_index = n 1277*4882a593Smuzhiyun self.lookaheads = { } 1278*4882a593Smuzhiyun self.prod.insert(n,".") 1279*4882a593Smuzhiyun self.prod = tuple(self.prod) 1280*4882a593Smuzhiyun self.len = len(self.prod) 1281*4882a593Smuzhiyun self.usyms = p.usyms 1282*4882a593Smuzhiyun 1283*4882a593Smuzhiyun def __str__(self): 1284*4882a593Smuzhiyun if self.prod: 1285*4882a593Smuzhiyun s = "%s -> %s" % (self.name," ".join(self.prod)) 1286*4882a593Smuzhiyun else: 1287*4882a593Smuzhiyun s = "%s -> <empty>" % self.name 1288*4882a593Smuzhiyun return s 1289*4882a593Smuzhiyun 1290*4882a593Smuzhiyun def __repr__(self): 1291*4882a593Smuzhiyun return "LRItem("+str(self)+")" 1292*4882a593Smuzhiyun 1293*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1294*4882a593Smuzhiyun# rightmost_terminal() 1295*4882a593Smuzhiyun# 1296*4882a593Smuzhiyun# Return the rightmost terminal from a list of symbols. Used in add_production() 1297*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1298*4882a593Smuzhiyundef rightmost_terminal(symbols, terminals): 1299*4882a593Smuzhiyun i = len(symbols) - 1 1300*4882a593Smuzhiyun while i >= 0: 1301*4882a593Smuzhiyun if symbols[i] in terminals: 1302*4882a593Smuzhiyun return symbols[i] 1303*4882a593Smuzhiyun i -= 1 1304*4882a593Smuzhiyun return None 1305*4882a593Smuzhiyun 1306*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1307*4882a593Smuzhiyun# === GRAMMAR CLASS === 1308*4882a593Smuzhiyun# 1309*4882a593Smuzhiyun# The following class represents the contents of the specified grammar along 1310*4882a593Smuzhiyun# with various computed properties such as first sets, follow sets, LR items, etc. 1311*4882a593Smuzhiyun# This data is used for critical parts of the table generation process later. 1312*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1313*4882a593Smuzhiyun 1314*4882a593Smuzhiyunclass GrammarError(YaccError): pass 1315*4882a593Smuzhiyun 1316*4882a593Smuzhiyunclass Grammar(object): 1317*4882a593Smuzhiyun def __init__(self,terminals): 1318*4882a593Smuzhiyun self.Productions = [None] # A list of all of the productions. The first 1319*4882a593Smuzhiyun # entry is always reserved for the purpose of 1320*4882a593Smuzhiyun # building an augmented grammar 1321*4882a593Smuzhiyun 1322*4882a593Smuzhiyun self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all 1323*4882a593Smuzhiyun # productions of that nonterminal. 1324*4882a593Smuzhiyun 1325*4882a593Smuzhiyun self.Prodmap = { } # A dictionary that is only used to detect duplicate 1326*4882a593Smuzhiyun # productions. 1327*4882a593Smuzhiyun 1328*4882a593Smuzhiyun self.Terminals = { } # A dictionary mapping the names of terminal symbols to a 1329*4882a593Smuzhiyun # list of the rules where they are used. 1330*4882a593Smuzhiyun 1331*4882a593Smuzhiyun for term in terminals: 1332*4882a593Smuzhiyun self.Terminals[term] = [] 1333*4882a593Smuzhiyun 1334*4882a593Smuzhiyun self.Terminals['error'] = [] 1335*4882a593Smuzhiyun 1336*4882a593Smuzhiyun self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list 1337*4882a593Smuzhiyun # of rule numbers where they are used. 1338*4882a593Smuzhiyun 1339*4882a593Smuzhiyun self.First = { } # A dictionary of precomputed FIRST(x) symbols 1340*4882a593Smuzhiyun 1341*4882a593Smuzhiyun self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols 1342*4882a593Smuzhiyun 1343*4882a593Smuzhiyun self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the 1344*4882a593Smuzhiyun # form ('right',level) or ('nonassoc', level) or ('left',level) 1345*4882a593Smuzhiyun 1346*4882a593Smuzhiyun self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. 1347*4882a593Smuzhiyun # This is only used to provide error checking and to generate 1348*4882a593Smuzhiyun # a warning about unused precedence rules. 1349*4882a593Smuzhiyun 1350*4882a593Smuzhiyun self.Start = None # Starting symbol for the grammar 1351*4882a593Smuzhiyun 1352*4882a593Smuzhiyun 1353*4882a593Smuzhiyun def __len__(self): 1354*4882a593Smuzhiyun return len(self.Productions) 1355*4882a593Smuzhiyun 1356*4882a593Smuzhiyun def __getitem__(self,index): 1357*4882a593Smuzhiyun return self.Productions[index] 1358*4882a593Smuzhiyun 1359*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1360*4882a593Smuzhiyun # set_precedence() 1361*4882a593Smuzhiyun # 1362*4882a593Smuzhiyun # Sets the precedence for a given terminal. assoc is the associativity such as 1363*4882a593Smuzhiyun # 'left','right', or 'nonassoc'. level is a numeric level. 1364*4882a593Smuzhiyun # 1365*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1366*4882a593Smuzhiyun 1367*4882a593Smuzhiyun def set_precedence(self,term,assoc,level): 1368*4882a593Smuzhiyun assert self.Productions == [None],"Must call set_precedence() before add_production()" 1369*4882a593Smuzhiyun if term in self.Precedence: 1370*4882a593Smuzhiyun raise GrammarError("Precedence already specified for terminal '%s'" % term) 1371*4882a593Smuzhiyun if assoc not in ['left','right','nonassoc']: 1372*4882a593Smuzhiyun raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 1373*4882a593Smuzhiyun self.Precedence[term] = (assoc,level) 1374*4882a593Smuzhiyun 1375*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1376*4882a593Smuzhiyun # add_production() 1377*4882a593Smuzhiyun # 1378*4882a593Smuzhiyun # Given an action function, this function assembles a production rule and 1379*4882a593Smuzhiyun # computes its precedence level. 1380*4882a593Smuzhiyun # 1381*4882a593Smuzhiyun # The production rule is supplied as a list of symbols. For example, 1382*4882a593Smuzhiyun # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 1383*4882a593Smuzhiyun # symbols ['expr','PLUS','term']. 1384*4882a593Smuzhiyun # 1385*4882a593Smuzhiyun # Precedence is determined by the precedence of the right-most non-terminal 1386*4882a593Smuzhiyun # or the precedence of a terminal specified by %prec. 1387*4882a593Smuzhiyun # 1388*4882a593Smuzhiyun # A variety of error checks are performed to make sure production symbols 1389*4882a593Smuzhiyun # are valid and that %prec is used correctly. 1390*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1391*4882a593Smuzhiyun 1392*4882a593Smuzhiyun def add_production(self,prodname,syms,func=None,file='',line=0): 1393*4882a593Smuzhiyun 1394*4882a593Smuzhiyun if prodname in self.Terminals: 1395*4882a593Smuzhiyun raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) 1396*4882a593Smuzhiyun if prodname == 'error': 1397*4882a593Smuzhiyun raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) 1398*4882a593Smuzhiyun if not _is_identifier.match(prodname): 1399*4882a593Smuzhiyun raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) 1400*4882a593Smuzhiyun 1401*4882a593Smuzhiyun # Look for literal tokens 1402*4882a593Smuzhiyun for n,s in enumerate(syms): 1403*4882a593Smuzhiyun if s[0] in "'\"": 1404*4882a593Smuzhiyun try: 1405*4882a593Smuzhiyun c = eval(s) 1406*4882a593Smuzhiyun if (len(c) > 1): 1407*4882a593Smuzhiyun raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) 1408*4882a593Smuzhiyun if not c in self.Terminals: 1409*4882a593Smuzhiyun self.Terminals[c] = [] 1410*4882a593Smuzhiyun syms[n] = c 1411*4882a593Smuzhiyun continue 1412*4882a593Smuzhiyun except SyntaxError: 1413*4882a593Smuzhiyun pass 1414*4882a593Smuzhiyun if not _is_identifier.match(s) and s != '%prec': 1415*4882a593Smuzhiyun raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) 1416*4882a593Smuzhiyun 1417*4882a593Smuzhiyun # Determine the precedence level 1418*4882a593Smuzhiyun if '%prec' in syms: 1419*4882a593Smuzhiyun if syms[-1] == '%prec': 1420*4882a593Smuzhiyun raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) 1421*4882a593Smuzhiyun if syms[-2] != '%prec': 1422*4882a593Smuzhiyun raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) 1423*4882a593Smuzhiyun precname = syms[-1] 1424*4882a593Smuzhiyun prodprec = self.Precedence.get(precname,None) 1425*4882a593Smuzhiyun if not prodprec: 1426*4882a593Smuzhiyun raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) 1427*4882a593Smuzhiyun else: 1428*4882a593Smuzhiyun self.UsedPrecedence[precname] = 1 1429*4882a593Smuzhiyun del syms[-2:] # Drop %prec from the rule 1430*4882a593Smuzhiyun else: 1431*4882a593Smuzhiyun # If no %prec, precedence is determined by the rightmost terminal symbol 1432*4882a593Smuzhiyun precname = rightmost_terminal(syms,self.Terminals) 1433*4882a593Smuzhiyun prodprec = self.Precedence.get(precname,('right',0)) 1434*4882a593Smuzhiyun 1435*4882a593Smuzhiyun # See if the rule is already in the rulemap 1436*4882a593Smuzhiyun map = "%s -> %s" % (prodname,syms) 1437*4882a593Smuzhiyun if map in self.Prodmap: 1438*4882a593Smuzhiyun m = self.Prodmap[map] 1439*4882a593Smuzhiyun raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + 1440*4882a593Smuzhiyun "Previous definition at %s:%d" % (m.file, m.line)) 1441*4882a593Smuzhiyun 1442*4882a593Smuzhiyun # From this point on, everything is valid. Create a new Production instance 1443*4882a593Smuzhiyun pnumber = len(self.Productions) 1444*4882a593Smuzhiyun if not prodname in self.Nonterminals: 1445*4882a593Smuzhiyun self.Nonterminals[prodname] = [ ] 1446*4882a593Smuzhiyun 1447*4882a593Smuzhiyun # Add the production number to Terminals and Nonterminals 1448*4882a593Smuzhiyun for t in syms: 1449*4882a593Smuzhiyun if t in self.Terminals: 1450*4882a593Smuzhiyun self.Terminals[t].append(pnumber) 1451*4882a593Smuzhiyun else: 1452*4882a593Smuzhiyun if not t in self.Nonterminals: 1453*4882a593Smuzhiyun self.Nonterminals[t] = [ ] 1454*4882a593Smuzhiyun self.Nonterminals[t].append(pnumber) 1455*4882a593Smuzhiyun 1456*4882a593Smuzhiyun # Create a production and add it to the list of productions 1457*4882a593Smuzhiyun p = Production(pnumber,prodname,syms,prodprec,func,file,line) 1458*4882a593Smuzhiyun self.Productions.append(p) 1459*4882a593Smuzhiyun self.Prodmap[map] = p 1460*4882a593Smuzhiyun 1461*4882a593Smuzhiyun # Add to the global productions list 1462*4882a593Smuzhiyun try: 1463*4882a593Smuzhiyun self.Prodnames[prodname].append(p) 1464*4882a593Smuzhiyun except KeyError: 1465*4882a593Smuzhiyun self.Prodnames[prodname] = [ p ] 1466*4882a593Smuzhiyun return 0 1467*4882a593Smuzhiyun 1468*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1469*4882a593Smuzhiyun # set_start() 1470*4882a593Smuzhiyun # 1471*4882a593Smuzhiyun # Sets the starting symbol and creates the augmented grammar. Production 1472*4882a593Smuzhiyun # rule 0 is S' -> start where start is the start symbol. 1473*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1474*4882a593Smuzhiyun 1475*4882a593Smuzhiyun def set_start(self,start=None): 1476*4882a593Smuzhiyun if not start: 1477*4882a593Smuzhiyun start = self.Productions[1].name 1478*4882a593Smuzhiyun if start not in self.Nonterminals: 1479*4882a593Smuzhiyun raise GrammarError("start symbol %s undefined" % start) 1480*4882a593Smuzhiyun self.Productions[0] = Production(0,"S'",[start]) 1481*4882a593Smuzhiyun self.Nonterminals[start].append(0) 1482*4882a593Smuzhiyun self.Start = start 1483*4882a593Smuzhiyun 1484*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1485*4882a593Smuzhiyun # find_unreachable() 1486*4882a593Smuzhiyun # 1487*4882a593Smuzhiyun # Find all of the nonterminal symbols that can't be reached from the starting 1488*4882a593Smuzhiyun # symbol. Returns a list of nonterminals that can't be reached. 1489*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1490*4882a593Smuzhiyun 1491*4882a593Smuzhiyun def find_unreachable(self): 1492*4882a593Smuzhiyun 1493*4882a593Smuzhiyun # Mark all symbols that are reachable from a symbol s 1494*4882a593Smuzhiyun def mark_reachable_from(s): 1495*4882a593Smuzhiyun if reachable[s]: 1496*4882a593Smuzhiyun # We've already reached symbol s. 1497*4882a593Smuzhiyun return 1498*4882a593Smuzhiyun reachable[s] = 1 1499*4882a593Smuzhiyun for p in self.Prodnames.get(s,[]): 1500*4882a593Smuzhiyun for r in p.prod: 1501*4882a593Smuzhiyun mark_reachable_from(r) 1502*4882a593Smuzhiyun 1503*4882a593Smuzhiyun reachable = { } 1504*4882a593Smuzhiyun for s in list(self.Terminals) + list(self.Nonterminals): 1505*4882a593Smuzhiyun reachable[s] = 0 1506*4882a593Smuzhiyun 1507*4882a593Smuzhiyun mark_reachable_from( self.Productions[0].prod[0] ) 1508*4882a593Smuzhiyun 1509*4882a593Smuzhiyun return [s for s in list(self.Nonterminals) 1510*4882a593Smuzhiyun if not reachable[s]] 1511*4882a593Smuzhiyun 1512*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1513*4882a593Smuzhiyun # infinite_cycles() 1514*4882a593Smuzhiyun # 1515*4882a593Smuzhiyun # This function looks at the various parsing rules and tries to detect 1516*4882a593Smuzhiyun # infinite recursion cycles (grammar rules where there is no possible way 1517*4882a593Smuzhiyun # to derive a string of only terminals). 1518*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1519*4882a593Smuzhiyun 1520*4882a593Smuzhiyun def infinite_cycles(self): 1521*4882a593Smuzhiyun terminates = {} 1522*4882a593Smuzhiyun 1523*4882a593Smuzhiyun # Terminals: 1524*4882a593Smuzhiyun for t in self.Terminals: 1525*4882a593Smuzhiyun terminates[t] = 1 1526*4882a593Smuzhiyun 1527*4882a593Smuzhiyun terminates['$end'] = 1 1528*4882a593Smuzhiyun 1529*4882a593Smuzhiyun # Nonterminals: 1530*4882a593Smuzhiyun 1531*4882a593Smuzhiyun # Initialize to false: 1532*4882a593Smuzhiyun for n in self.Nonterminals: 1533*4882a593Smuzhiyun terminates[n] = 0 1534*4882a593Smuzhiyun 1535*4882a593Smuzhiyun # Then propagate termination until no change: 1536*4882a593Smuzhiyun while 1: 1537*4882a593Smuzhiyun some_change = 0 1538*4882a593Smuzhiyun for (n,pl) in self.Prodnames.items(): 1539*4882a593Smuzhiyun # Nonterminal n terminates iff any of its productions terminates. 1540*4882a593Smuzhiyun for p in pl: 1541*4882a593Smuzhiyun # Production p terminates iff all of its rhs symbols terminate. 1542*4882a593Smuzhiyun for s in p.prod: 1543*4882a593Smuzhiyun if not terminates[s]: 1544*4882a593Smuzhiyun # The symbol s does not terminate, 1545*4882a593Smuzhiyun # so production p does not terminate. 1546*4882a593Smuzhiyun p_terminates = 0 1547*4882a593Smuzhiyun break 1548*4882a593Smuzhiyun else: 1549*4882a593Smuzhiyun # didn't break from the loop, 1550*4882a593Smuzhiyun # so every symbol s terminates 1551*4882a593Smuzhiyun # so production p terminates. 1552*4882a593Smuzhiyun p_terminates = 1 1553*4882a593Smuzhiyun 1554*4882a593Smuzhiyun if p_terminates: 1555*4882a593Smuzhiyun # symbol n terminates! 1556*4882a593Smuzhiyun if not terminates[n]: 1557*4882a593Smuzhiyun terminates[n] = 1 1558*4882a593Smuzhiyun some_change = 1 1559*4882a593Smuzhiyun # Don't need to consider any more productions for this n. 1560*4882a593Smuzhiyun break 1561*4882a593Smuzhiyun 1562*4882a593Smuzhiyun if not some_change: 1563*4882a593Smuzhiyun break 1564*4882a593Smuzhiyun 1565*4882a593Smuzhiyun infinite = [] 1566*4882a593Smuzhiyun for (s,term) in terminates.items(): 1567*4882a593Smuzhiyun if not term: 1568*4882a593Smuzhiyun if not s in self.Prodnames and not s in self.Terminals and s != 'error': 1569*4882a593Smuzhiyun # s is used-but-not-defined, and we've already warned of that, 1570*4882a593Smuzhiyun # so it would be overkill to say that it's also non-terminating. 1571*4882a593Smuzhiyun pass 1572*4882a593Smuzhiyun else: 1573*4882a593Smuzhiyun infinite.append(s) 1574*4882a593Smuzhiyun 1575*4882a593Smuzhiyun return infinite 1576*4882a593Smuzhiyun 1577*4882a593Smuzhiyun 1578*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1579*4882a593Smuzhiyun # undefined_symbols() 1580*4882a593Smuzhiyun # 1581*4882a593Smuzhiyun # Find all symbols that were used the grammar, but not defined as tokens or 1582*4882a593Smuzhiyun # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 1583*4882a593Smuzhiyun # and prod is the production where the symbol was used. 1584*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1585*4882a593Smuzhiyun def undefined_symbols(self): 1586*4882a593Smuzhiyun result = [] 1587*4882a593Smuzhiyun for p in self.Productions: 1588*4882a593Smuzhiyun if not p: continue 1589*4882a593Smuzhiyun 1590*4882a593Smuzhiyun for s in p.prod: 1591*4882a593Smuzhiyun if not s in self.Prodnames and not s in self.Terminals and s != 'error': 1592*4882a593Smuzhiyun result.append((s,p)) 1593*4882a593Smuzhiyun return result 1594*4882a593Smuzhiyun 1595*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1596*4882a593Smuzhiyun # unused_terminals() 1597*4882a593Smuzhiyun # 1598*4882a593Smuzhiyun # Find all terminals that were defined, but not used by the grammar. Returns 1599*4882a593Smuzhiyun # a list of all symbols. 1600*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1601*4882a593Smuzhiyun def unused_terminals(self): 1602*4882a593Smuzhiyun unused_tok = [] 1603*4882a593Smuzhiyun for s,v in self.Terminals.items(): 1604*4882a593Smuzhiyun if s != 'error' and not v: 1605*4882a593Smuzhiyun unused_tok.append(s) 1606*4882a593Smuzhiyun 1607*4882a593Smuzhiyun return unused_tok 1608*4882a593Smuzhiyun 1609*4882a593Smuzhiyun # ------------------------------------------------------------------------------ 1610*4882a593Smuzhiyun # unused_rules() 1611*4882a593Smuzhiyun # 1612*4882a593Smuzhiyun # Find all grammar rules that were defined, but not used (maybe not reachable) 1613*4882a593Smuzhiyun # Returns a list of productions. 1614*4882a593Smuzhiyun # ------------------------------------------------------------------------------ 1615*4882a593Smuzhiyun 1616*4882a593Smuzhiyun def unused_rules(self): 1617*4882a593Smuzhiyun unused_prod = [] 1618*4882a593Smuzhiyun for s,v in self.Nonterminals.items(): 1619*4882a593Smuzhiyun if not v: 1620*4882a593Smuzhiyun p = self.Prodnames[s][0] 1621*4882a593Smuzhiyun unused_prod.append(p) 1622*4882a593Smuzhiyun return unused_prod 1623*4882a593Smuzhiyun 1624*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1625*4882a593Smuzhiyun # unused_precedence() 1626*4882a593Smuzhiyun # 1627*4882a593Smuzhiyun # Returns a list of tuples (term,precedence) corresponding to precedence 1628*4882a593Smuzhiyun # rules that were never used by the grammar. term is the name of the terminal 1629*4882a593Smuzhiyun # on which precedence was applied and precedence is a string such as 'left' or 1630*4882a593Smuzhiyun # 'right' corresponding to the type of precedence. 1631*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1632*4882a593Smuzhiyun 1633*4882a593Smuzhiyun def unused_precedence(self): 1634*4882a593Smuzhiyun unused = [] 1635*4882a593Smuzhiyun for termname in self.Precedence: 1636*4882a593Smuzhiyun if not (termname in self.Terminals or termname in self.UsedPrecedence): 1637*4882a593Smuzhiyun unused.append((termname,self.Precedence[termname][0])) 1638*4882a593Smuzhiyun 1639*4882a593Smuzhiyun return unused 1640*4882a593Smuzhiyun 1641*4882a593Smuzhiyun # ------------------------------------------------------------------------- 1642*4882a593Smuzhiyun # _first() 1643*4882a593Smuzhiyun # 1644*4882a593Smuzhiyun # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 1645*4882a593Smuzhiyun # 1646*4882a593Smuzhiyun # During execution of compute_first1, the result may be incomplete. 1647*4882a593Smuzhiyun # Afterward (e.g., when called from compute_follow()), it will be complete. 1648*4882a593Smuzhiyun # ------------------------------------------------------------------------- 1649*4882a593Smuzhiyun def _first(self,beta): 1650*4882a593Smuzhiyun 1651*4882a593Smuzhiyun # We are computing First(x1,x2,x3,...,xn) 1652*4882a593Smuzhiyun result = [ ] 1653*4882a593Smuzhiyun for x in beta: 1654*4882a593Smuzhiyun x_produces_empty = 0 1655*4882a593Smuzhiyun 1656*4882a593Smuzhiyun # Add all the non-<empty> symbols of First[x] to the result. 1657*4882a593Smuzhiyun for f in self.First[x]: 1658*4882a593Smuzhiyun if f == '<empty>': 1659*4882a593Smuzhiyun x_produces_empty = 1 1660*4882a593Smuzhiyun else: 1661*4882a593Smuzhiyun if f not in result: result.append(f) 1662*4882a593Smuzhiyun 1663*4882a593Smuzhiyun if x_produces_empty: 1664*4882a593Smuzhiyun # We have to consider the next x in beta, 1665*4882a593Smuzhiyun # i.e. stay in the loop. 1666*4882a593Smuzhiyun pass 1667*4882a593Smuzhiyun else: 1668*4882a593Smuzhiyun # We don't have to consider any further symbols in beta. 1669*4882a593Smuzhiyun break 1670*4882a593Smuzhiyun else: 1671*4882a593Smuzhiyun # There was no 'break' from the loop, 1672*4882a593Smuzhiyun # so x_produces_empty was true for all x in beta, 1673*4882a593Smuzhiyun # so beta produces empty as well. 1674*4882a593Smuzhiyun result.append('<empty>') 1675*4882a593Smuzhiyun 1676*4882a593Smuzhiyun return result 1677*4882a593Smuzhiyun 1678*4882a593Smuzhiyun # ------------------------------------------------------------------------- 1679*4882a593Smuzhiyun # compute_first() 1680*4882a593Smuzhiyun # 1681*4882a593Smuzhiyun # Compute the value of FIRST1(X) for all symbols 1682*4882a593Smuzhiyun # ------------------------------------------------------------------------- 1683*4882a593Smuzhiyun def compute_first(self): 1684*4882a593Smuzhiyun if self.First: 1685*4882a593Smuzhiyun return self.First 1686*4882a593Smuzhiyun 1687*4882a593Smuzhiyun # Terminals: 1688*4882a593Smuzhiyun for t in self.Terminals: 1689*4882a593Smuzhiyun self.First[t] = [t] 1690*4882a593Smuzhiyun 1691*4882a593Smuzhiyun self.First['$end'] = ['$end'] 1692*4882a593Smuzhiyun 1693*4882a593Smuzhiyun # Nonterminals: 1694*4882a593Smuzhiyun 1695*4882a593Smuzhiyun # Initialize to the empty set: 1696*4882a593Smuzhiyun for n in self.Nonterminals: 1697*4882a593Smuzhiyun self.First[n] = [] 1698*4882a593Smuzhiyun 1699*4882a593Smuzhiyun # Then propagate symbols until no change: 1700*4882a593Smuzhiyun while 1: 1701*4882a593Smuzhiyun some_change = 0 1702*4882a593Smuzhiyun for n in self.Nonterminals: 1703*4882a593Smuzhiyun for p in self.Prodnames[n]: 1704*4882a593Smuzhiyun for f in self._first(p.prod): 1705*4882a593Smuzhiyun if f not in self.First[n]: 1706*4882a593Smuzhiyun self.First[n].append( f ) 1707*4882a593Smuzhiyun some_change = 1 1708*4882a593Smuzhiyun if not some_change: 1709*4882a593Smuzhiyun break 1710*4882a593Smuzhiyun 1711*4882a593Smuzhiyun return self.First 1712*4882a593Smuzhiyun 1713*4882a593Smuzhiyun # --------------------------------------------------------------------- 1714*4882a593Smuzhiyun # compute_follow() 1715*4882a593Smuzhiyun # 1716*4882a593Smuzhiyun # Computes all of the follow sets for every non-terminal symbol. The 1717*4882a593Smuzhiyun # follow set is the set of all symbols that might follow a given 1718*4882a593Smuzhiyun # non-terminal. See the Dragon book, 2nd Ed. p. 189. 1719*4882a593Smuzhiyun # --------------------------------------------------------------------- 1720*4882a593Smuzhiyun def compute_follow(self,start=None): 1721*4882a593Smuzhiyun # If already computed, return the result 1722*4882a593Smuzhiyun if self.Follow: 1723*4882a593Smuzhiyun return self.Follow 1724*4882a593Smuzhiyun 1725*4882a593Smuzhiyun # If first sets not computed yet, do that first. 1726*4882a593Smuzhiyun if not self.First: 1727*4882a593Smuzhiyun self.compute_first() 1728*4882a593Smuzhiyun 1729*4882a593Smuzhiyun # Add '$end' to the follow list of the start symbol 1730*4882a593Smuzhiyun for k in self.Nonterminals: 1731*4882a593Smuzhiyun self.Follow[k] = [ ] 1732*4882a593Smuzhiyun 1733*4882a593Smuzhiyun if not start: 1734*4882a593Smuzhiyun start = self.Productions[1].name 1735*4882a593Smuzhiyun 1736*4882a593Smuzhiyun self.Follow[start] = [ '$end' ] 1737*4882a593Smuzhiyun 1738*4882a593Smuzhiyun while 1: 1739*4882a593Smuzhiyun didadd = 0 1740*4882a593Smuzhiyun for p in self.Productions[1:]: 1741*4882a593Smuzhiyun # Here is the production set 1742*4882a593Smuzhiyun for i in range(len(p.prod)): 1743*4882a593Smuzhiyun B = p.prod[i] 1744*4882a593Smuzhiyun if B in self.Nonterminals: 1745*4882a593Smuzhiyun # Okay. We got a non-terminal in a production 1746*4882a593Smuzhiyun fst = self._first(p.prod[i+1:]) 1747*4882a593Smuzhiyun hasempty = 0 1748*4882a593Smuzhiyun for f in fst: 1749*4882a593Smuzhiyun if f != '<empty>' and f not in self.Follow[B]: 1750*4882a593Smuzhiyun self.Follow[B].append(f) 1751*4882a593Smuzhiyun didadd = 1 1752*4882a593Smuzhiyun if f == '<empty>': 1753*4882a593Smuzhiyun hasempty = 1 1754*4882a593Smuzhiyun if hasempty or i == (len(p.prod)-1): 1755*4882a593Smuzhiyun # Add elements of follow(a) to follow(b) 1756*4882a593Smuzhiyun for f in self.Follow[p.name]: 1757*4882a593Smuzhiyun if f not in self.Follow[B]: 1758*4882a593Smuzhiyun self.Follow[B].append(f) 1759*4882a593Smuzhiyun didadd = 1 1760*4882a593Smuzhiyun if not didadd: break 1761*4882a593Smuzhiyun return self.Follow 1762*4882a593Smuzhiyun 1763*4882a593Smuzhiyun 1764*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1765*4882a593Smuzhiyun # build_lritems() 1766*4882a593Smuzhiyun # 1767*4882a593Smuzhiyun # This function walks the list of productions and builds a complete set of the 1768*4882a593Smuzhiyun # LR items. The LR items are stored in two ways: First, they are uniquely 1769*4882a593Smuzhiyun # numbered and placed in the list _lritems. Second, a linked list of LR items 1770*4882a593Smuzhiyun # is built for each production. For example: 1771*4882a593Smuzhiyun # 1772*4882a593Smuzhiyun # E -> E PLUS E 1773*4882a593Smuzhiyun # 1774*4882a593Smuzhiyun # Creates the list 1775*4882a593Smuzhiyun # 1776*4882a593Smuzhiyun # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 1777*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 1778*4882a593Smuzhiyun 1779*4882a593Smuzhiyun def build_lritems(self): 1780*4882a593Smuzhiyun for p in self.Productions: 1781*4882a593Smuzhiyun lastlri = p 1782*4882a593Smuzhiyun i = 0 1783*4882a593Smuzhiyun lr_items = [] 1784*4882a593Smuzhiyun while 1: 1785*4882a593Smuzhiyun if i > len(p): 1786*4882a593Smuzhiyun lri = None 1787*4882a593Smuzhiyun else: 1788*4882a593Smuzhiyun lri = LRItem(p,i) 1789*4882a593Smuzhiyun # Precompute the list of productions immediately following 1790*4882a593Smuzhiyun try: 1791*4882a593Smuzhiyun lri.lr_after = self.Prodnames[lri.prod[i+1]] 1792*4882a593Smuzhiyun except (IndexError,KeyError): 1793*4882a593Smuzhiyun lri.lr_after = [] 1794*4882a593Smuzhiyun try: 1795*4882a593Smuzhiyun lri.lr_before = lri.prod[i-1] 1796*4882a593Smuzhiyun except IndexError: 1797*4882a593Smuzhiyun lri.lr_before = None 1798*4882a593Smuzhiyun 1799*4882a593Smuzhiyun lastlri.lr_next = lri 1800*4882a593Smuzhiyun if not lri: break 1801*4882a593Smuzhiyun lr_items.append(lri) 1802*4882a593Smuzhiyun lastlri = lri 1803*4882a593Smuzhiyun i += 1 1804*4882a593Smuzhiyun p.lr_items = lr_items 1805*4882a593Smuzhiyun 1806*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1807*4882a593Smuzhiyun# == Class LRTable == 1808*4882a593Smuzhiyun# 1809*4882a593Smuzhiyun# This basic class represents a basic table of LR parsing information. 1810*4882a593Smuzhiyun# Methods for generating the tables are not defined here. They are defined 1811*4882a593Smuzhiyun# in the derived class LRGeneratedTable. 1812*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1813*4882a593Smuzhiyun 1814*4882a593Smuzhiyunclass VersionError(YaccError): pass 1815*4882a593Smuzhiyun 1816*4882a593Smuzhiyunclass LRTable(object): 1817*4882a593Smuzhiyun def __init__(self): 1818*4882a593Smuzhiyun self.lr_action = None 1819*4882a593Smuzhiyun self.lr_goto = None 1820*4882a593Smuzhiyun self.lr_productions = None 1821*4882a593Smuzhiyun self.lr_method = None 1822*4882a593Smuzhiyun 1823*4882a593Smuzhiyun def read_table(self,module): 1824*4882a593Smuzhiyun if isinstance(module,types.ModuleType): 1825*4882a593Smuzhiyun parsetab = module 1826*4882a593Smuzhiyun else: 1827*4882a593Smuzhiyun if sys.version_info[0] < 3: 1828*4882a593Smuzhiyun exec("import %s as parsetab" % module) 1829*4882a593Smuzhiyun else: 1830*4882a593Smuzhiyun env = { } 1831*4882a593Smuzhiyun exec("import %s as parsetab" % module, env, env) 1832*4882a593Smuzhiyun parsetab = env['parsetab'] 1833*4882a593Smuzhiyun 1834*4882a593Smuzhiyun if parsetab._tabversion != __tabversion__: 1835*4882a593Smuzhiyun raise VersionError("yacc table file version is out of date") 1836*4882a593Smuzhiyun 1837*4882a593Smuzhiyun self.lr_action = parsetab._lr_action 1838*4882a593Smuzhiyun self.lr_goto = parsetab._lr_goto 1839*4882a593Smuzhiyun 1840*4882a593Smuzhiyun self.lr_productions = [] 1841*4882a593Smuzhiyun for p in parsetab._lr_productions: 1842*4882a593Smuzhiyun self.lr_productions.append(MiniProduction(*p)) 1843*4882a593Smuzhiyun 1844*4882a593Smuzhiyun self.lr_method = parsetab._lr_method 1845*4882a593Smuzhiyun return parsetab._lr_signature 1846*4882a593Smuzhiyun 1847*4882a593Smuzhiyun def read_pickle(self,filename): 1848*4882a593Smuzhiyun try: 1849*4882a593Smuzhiyun import cPickle as pickle 1850*4882a593Smuzhiyun except ImportError: 1851*4882a593Smuzhiyun import pickle 1852*4882a593Smuzhiyun 1853*4882a593Smuzhiyun in_f = open(filename,"rb") 1854*4882a593Smuzhiyun 1855*4882a593Smuzhiyun tabversion = pickle.load(in_f) 1856*4882a593Smuzhiyun if tabversion != __tabversion__: 1857*4882a593Smuzhiyun raise VersionError("yacc table file version is out of date") 1858*4882a593Smuzhiyun self.lr_method = pickle.load(in_f) 1859*4882a593Smuzhiyun signature = pickle.load(in_f) 1860*4882a593Smuzhiyun self.lr_action = pickle.load(in_f) 1861*4882a593Smuzhiyun self.lr_goto = pickle.load(in_f) 1862*4882a593Smuzhiyun productions = pickle.load(in_f) 1863*4882a593Smuzhiyun 1864*4882a593Smuzhiyun self.lr_productions = [] 1865*4882a593Smuzhiyun for p in productions: 1866*4882a593Smuzhiyun self.lr_productions.append(MiniProduction(*p)) 1867*4882a593Smuzhiyun 1868*4882a593Smuzhiyun in_f.close() 1869*4882a593Smuzhiyun return signature 1870*4882a593Smuzhiyun 1871*4882a593Smuzhiyun # Bind all production function names to callable objects in pdict 1872*4882a593Smuzhiyun def bind_callables(self,pdict): 1873*4882a593Smuzhiyun for p in self.lr_productions: 1874*4882a593Smuzhiyun p.bind(pdict) 1875*4882a593Smuzhiyun 1876*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1877*4882a593Smuzhiyun# === LR Generator === 1878*4882a593Smuzhiyun# 1879*4882a593Smuzhiyun# The following classes and functions are used to generate LR parsing tables on 1880*4882a593Smuzhiyun# a grammar. 1881*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1882*4882a593Smuzhiyun 1883*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1884*4882a593Smuzhiyun# digraph() 1885*4882a593Smuzhiyun# traverse() 1886*4882a593Smuzhiyun# 1887*4882a593Smuzhiyun# The following two functions are used to compute set valued functions 1888*4882a593Smuzhiyun# of the form: 1889*4882a593Smuzhiyun# 1890*4882a593Smuzhiyun# F(x) = F'(x) U U{F(y) | x R y} 1891*4882a593Smuzhiyun# 1892*4882a593Smuzhiyun# This is used to compute the values of Read() sets as well as FOLLOW sets 1893*4882a593Smuzhiyun# in LALR(1) generation. 1894*4882a593Smuzhiyun# 1895*4882a593Smuzhiyun# Inputs: X - An input set 1896*4882a593Smuzhiyun# R - A relation 1897*4882a593Smuzhiyun# FP - Set-valued function 1898*4882a593Smuzhiyun# ------------------------------------------------------------------------------ 1899*4882a593Smuzhiyun 1900*4882a593Smuzhiyundef digraph(X,R,FP): 1901*4882a593Smuzhiyun N = { } 1902*4882a593Smuzhiyun for x in X: 1903*4882a593Smuzhiyun N[x] = 0 1904*4882a593Smuzhiyun stack = [] 1905*4882a593Smuzhiyun F = { } 1906*4882a593Smuzhiyun for x in X: 1907*4882a593Smuzhiyun if N[x] == 0: traverse(x,N,stack,F,X,R,FP) 1908*4882a593Smuzhiyun return F 1909*4882a593Smuzhiyun 1910*4882a593Smuzhiyundef traverse(x,N,stack,F,X,R,FP): 1911*4882a593Smuzhiyun stack.append(x) 1912*4882a593Smuzhiyun d = len(stack) 1913*4882a593Smuzhiyun N[x] = d 1914*4882a593Smuzhiyun F[x] = FP(x) # F(X) <- F'(x) 1915*4882a593Smuzhiyun 1916*4882a593Smuzhiyun rel = R(x) # Get y's related to x 1917*4882a593Smuzhiyun for y in rel: 1918*4882a593Smuzhiyun if N[y] == 0: 1919*4882a593Smuzhiyun traverse(y,N,stack,F,X,R,FP) 1920*4882a593Smuzhiyun N[x] = min(N[x],N[y]) 1921*4882a593Smuzhiyun for a in F.get(y,[]): 1922*4882a593Smuzhiyun if a not in F[x]: F[x].append(a) 1923*4882a593Smuzhiyun if N[x] == d: 1924*4882a593Smuzhiyun N[stack[-1]] = MAXINT 1925*4882a593Smuzhiyun F[stack[-1]] = F[x] 1926*4882a593Smuzhiyun element = stack.pop() 1927*4882a593Smuzhiyun while element != x: 1928*4882a593Smuzhiyun N[stack[-1]] = MAXINT 1929*4882a593Smuzhiyun F[stack[-1]] = F[x] 1930*4882a593Smuzhiyun element = stack.pop() 1931*4882a593Smuzhiyun 1932*4882a593Smuzhiyunclass LALRError(YaccError): pass 1933*4882a593Smuzhiyun 1934*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1935*4882a593Smuzhiyun# == LRGeneratedTable == 1936*4882a593Smuzhiyun# 1937*4882a593Smuzhiyun# This class implements the LR table generation algorithm. There are no 1938*4882a593Smuzhiyun# public methods except for write() 1939*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 1940*4882a593Smuzhiyun 1941*4882a593Smuzhiyunclass LRGeneratedTable(LRTable): 1942*4882a593Smuzhiyun def __init__(self,grammar,method='LALR',log=None): 1943*4882a593Smuzhiyun if method not in ['SLR','LALR']: 1944*4882a593Smuzhiyun raise LALRError("Unsupported method %s" % method) 1945*4882a593Smuzhiyun 1946*4882a593Smuzhiyun self.grammar = grammar 1947*4882a593Smuzhiyun self.lr_method = method 1948*4882a593Smuzhiyun 1949*4882a593Smuzhiyun # Set up the logger 1950*4882a593Smuzhiyun if not log: 1951*4882a593Smuzhiyun log = NullLogger() 1952*4882a593Smuzhiyun self.log = log 1953*4882a593Smuzhiyun 1954*4882a593Smuzhiyun # Internal attributes 1955*4882a593Smuzhiyun self.lr_action = {} # Action table 1956*4882a593Smuzhiyun self.lr_goto = {} # Goto table 1957*4882a593Smuzhiyun self.lr_productions = grammar.Productions # Copy of grammar Production array 1958*4882a593Smuzhiyun self.lr_goto_cache = {} # Cache of computed gotos 1959*4882a593Smuzhiyun self.lr0_cidhash = {} # Cache of closures 1960*4882a593Smuzhiyun 1961*4882a593Smuzhiyun self._add_count = 0 # Internal counter used to detect cycles 1962*4882a593Smuzhiyun 1963*4882a593Smuzhiyun # Diagonistic information filled in by the table generator 1964*4882a593Smuzhiyun self.sr_conflict = 0 1965*4882a593Smuzhiyun self.rr_conflict = 0 1966*4882a593Smuzhiyun self.conflicts = [] # List of conflicts 1967*4882a593Smuzhiyun 1968*4882a593Smuzhiyun self.sr_conflicts = [] 1969*4882a593Smuzhiyun self.rr_conflicts = [] 1970*4882a593Smuzhiyun 1971*4882a593Smuzhiyun # Build the tables 1972*4882a593Smuzhiyun self.grammar.build_lritems() 1973*4882a593Smuzhiyun self.grammar.compute_first() 1974*4882a593Smuzhiyun self.grammar.compute_follow() 1975*4882a593Smuzhiyun self.lr_parse_table() 1976*4882a593Smuzhiyun 1977*4882a593Smuzhiyun # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 1978*4882a593Smuzhiyun 1979*4882a593Smuzhiyun def lr0_closure(self,I): 1980*4882a593Smuzhiyun self._add_count += 1 1981*4882a593Smuzhiyun 1982*4882a593Smuzhiyun # Add everything in I to J 1983*4882a593Smuzhiyun J = I[:] 1984*4882a593Smuzhiyun didadd = 1 1985*4882a593Smuzhiyun while didadd: 1986*4882a593Smuzhiyun didadd = 0 1987*4882a593Smuzhiyun for j in J: 1988*4882a593Smuzhiyun for x in j.lr_after: 1989*4882a593Smuzhiyun if getattr(x,"lr0_added",0) == self._add_count: continue 1990*4882a593Smuzhiyun # Add B --> .G to J 1991*4882a593Smuzhiyun J.append(x.lr_next) 1992*4882a593Smuzhiyun x.lr0_added = self._add_count 1993*4882a593Smuzhiyun didadd = 1 1994*4882a593Smuzhiyun 1995*4882a593Smuzhiyun return J 1996*4882a593Smuzhiyun 1997*4882a593Smuzhiyun # Compute the LR(0) goto function goto(I,X) where I is a set 1998*4882a593Smuzhiyun # of LR(0) items and X is a grammar symbol. This function is written 1999*4882a593Smuzhiyun # in a way that guarantees uniqueness of the generated goto sets 2000*4882a593Smuzhiyun # (i.e. the same goto set will never be returned as two different Python 2001*4882a593Smuzhiyun # objects). With uniqueness, we can later do fast set comparisons using 2002*4882a593Smuzhiyun # id(obj) instead of element-wise comparison. 2003*4882a593Smuzhiyun 2004*4882a593Smuzhiyun def lr0_goto(self,I,x): 2005*4882a593Smuzhiyun # First we look for a previously cached entry 2006*4882a593Smuzhiyun g = self.lr_goto_cache.get((id(I),x),None) 2007*4882a593Smuzhiyun if g: return g 2008*4882a593Smuzhiyun 2009*4882a593Smuzhiyun # Now we generate the goto set in a way that guarantees uniqueness 2010*4882a593Smuzhiyun # of the result 2011*4882a593Smuzhiyun 2012*4882a593Smuzhiyun s = self.lr_goto_cache.get(x,None) 2013*4882a593Smuzhiyun if not s: 2014*4882a593Smuzhiyun s = { } 2015*4882a593Smuzhiyun self.lr_goto_cache[x] = s 2016*4882a593Smuzhiyun 2017*4882a593Smuzhiyun gs = [ ] 2018*4882a593Smuzhiyun for p in I: 2019*4882a593Smuzhiyun n = p.lr_next 2020*4882a593Smuzhiyun if n and n.lr_before == x: 2021*4882a593Smuzhiyun s1 = s.get(id(n),None) 2022*4882a593Smuzhiyun if not s1: 2023*4882a593Smuzhiyun s1 = { } 2024*4882a593Smuzhiyun s[id(n)] = s1 2025*4882a593Smuzhiyun gs.append(n) 2026*4882a593Smuzhiyun s = s1 2027*4882a593Smuzhiyun g = s.get('$end',None) 2028*4882a593Smuzhiyun if not g: 2029*4882a593Smuzhiyun if gs: 2030*4882a593Smuzhiyun g = self.lr0_closure(gs) 2031*4882a593Smuzhiyun s['$end'] = g 2032*4882a593Smuzhiyun else: 2033*4882a593Smuzhiyun s['$end'] = gs 2034*4882a593Smuzhiyun self.lr_goto_cache[(id(I),x)] = g 2035*4882a593Smuzhiyun return g 2036*4882a593Smuzhiyun 2037*4882a593Smuzhiyun # Compute the LR(0) sets of item function 2038*4882a593Smuzhiyun def lr0_items(self): 2039*4882a593Smuzhiyun 2040*4882a593Smuzhiyun C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] 2041*4882a593Smuzhiyun i = 0 2042*4882a593Smuzhiyun for I in C: 2043*4882a593Smuzhiyun self.lr0_cidhash[id(I)] = i 2044*4882a593Smuzhiyun i += 1 2045*4882a593Smuzhiyun 2046*4882a593Smuzhiyun # Loop over the items in C and each grammar symbols 2047*4882a593Smuzhiyun i = 0 2048*4882a593Smuzhiyun while i < len(C): 2049*4882a593Smuzhiyun I = C[i] 2050*4882a593Smuzhiyun i += 1 2051*4882a593Smuzhiyun 2052*4882a593Smuzhiyun # Collect all of the symbols that could possibly be in the goto(I,X) sets 2053*4882a593Smuzhiyun asyms = { } 2054*4882a593Smuzhiyun for ii in I: 2055*4882a593Smuzhiyun for s in ii.usyms: 2056*4882a593Smuzhiyun asyms[s] = None 2057*4882a593Smuzhiyun 2058*4882a593Smuzhiyun for x in asyms: 2059*4882a593Smuzhiyun g = self.lr0_goto(I,x) 2060*4882a593Smuzhiyun if not g: continue 2061*4882a593Smuzhiyun if id(g) in self.lr0_cidhash: continue 2062*4882a593Smuzhiyun self.lr0_cidhash[id(g)] = len(C) 2063*4882a593Smuzhiyun C.append(g) 2064*4882a593Smuzhiyun 2065*4882a593Smuzhiyun return C 2066*4882a593Smuzhiyun 2067*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2068*4882a593Smuzhiyun # ==== LALR(1) Parsing ==== 2069*4882a593Smuzhiyun # 2070*4882a593Smuzhiyun # LALR(1) parsing is almost exactly the same as SLR except that instead of 2071*4882a593Smuzhiyun # relying upon Follow() sets when performing reductions, a more selective 2072*4882a593Smuzhiyun # lookahead set that incorporates the state of the LR(0) machine is utilized. 2073*4882a593Smuzhiyun # Thus, we mainly just have to focus on calculating the lookahead sets. 2074*4882a593Smuzhiyun # 2075*4882a593Smuzhiyun # The method used here is due to DeRemer and Pennelo (1982). 2076*4882a593Smuzhiyun # 2077*4882a593Smuzhiyun # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 2078*4882a593Smuzhiyun # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 2079*4882a593Smuzhiyun # Vol. 4, No. 4, Oct. 1982, pp. 615-649 2080*4882a593Smuzhiyun # 2081*4882a593Smuzhiyun # Further details can also be found in: 2082*4882a593Smuzhiyun # 2083*4882a593Smuzhiyun # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 2084*4882a593Smuzhiyun # McGraw-Hill Book Company, (1985). 2085*4882a593Smuzhiyun # 2086*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2087*4882a593Smuzhiyun 2088*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2089*4882a593Smuzhiyun # compute_nullable_nonterminals() 2090*4882a593Smuzhiyun # 2091*4882a593Smuzhiyun # Creates a dictionary containing all of the non-terminals that might produce 2092*4882a593Smuzhiyun # an empty production. 2093*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2094*4882a593Smuzhiyun 2095*4882a593Smuzhiyun def compute_nullable_nonterminals(self): 2096*4882a593Smuzhiyun nullable = {} 2097*4882a593Smuzhiyun num_nullable = 0 2098*4882a593Smuzhiyun while 1: 2099*4882a593Smuzhiyun for p in self.grammar.Productions[1:]: 2100*4882a593Smuzhiyun if p.len == 0: 2101*4882a593Smuzhiyun nullable[p.name] = 1 2102*4882a593Smuzhiyun continue 2103*4882a593Smuzhiyun for t in p.prod: 2104*4882a593Smuzhiyun if not t in nullable: break 2105*4882a593Smuzhiyun else: 2106*4882a593Smuzhiyun nullable[p.name] = 1 2107*4882a593Smuzhiyun if len(nullable) == num_nullable: break 2108*4882a593Smuzhiyun num_nullable = len(nullable) 2109*4882a593Smuzhiyun return nullable 2110*4882a593Smuzhiyun 2111*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2112*4882a593Smuzhiyun # find_nonterminal_trans(C) 2113*4882a593Smuzhiyun # 2114*4882a593Smuzhiyun # Given a set of LR(0) items, this functions finds all of the non-terminal 2115*4882a593Smuzhiyun # transitions. These are transitions in which a dot appears immediately before 2116*4882a593Smuzhiyun # a non-terminal. Returns a list of tuples of the form (state,N) where state 2117*4882a593Smuzhiyun # is the state number and N is the nonterminal symbol. 2118*4882a593Smuzhiyun # 2119*4882a593Smuzhiyun # The input C is the set of LR(0) items. 2120*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2121*4882a593Smuzhiyun 2122*4882a593Smuzhiyun def find_nonterminal_transitions(self,C): 2123*4882a593Smuzhiyun trans = [] 2124*4882a593Smuzhiyun for state in range(len(C)): 2125*4882a593Smuzhiyun for p in C[state]: 2126*4882a593Smuzhiyun if p.lr_index < p.len - 1: 2127*4882a593Smuzhiyun t = (state,p.prod[p.lr_index+1]) 2128*4882a593Smuzhiyun if t[1] in self.grammar.Nonterminals: 2129*4882a593Smuzhiyun if t not in trans: trans.append(t) 2130*4882a593Smuzhiyun state = state + 1 2131*4882a593Smuzhiyun return trans 2132*4882a593Smuzhiyun 2133*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2134*4882a593Smuzhiyun # dr_relation() 2135*4882a593Smuzhiyun # 2136*4882a593Smuzhiyun # Computes the DR(p,A) relationships for non-terminal transitions. The input 2137*4882a593Smuzhiyun # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 2138*4882a593Smuzhiyun # 2139*4882a593Smuzhiyun # Returns a list of terminals. 2140*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2141*4882a593Smuzhiyun 2142*4882a593Smuzhiyun def dr_relation(self,C,trans,nullable): 2143*4882a593Smuzhiyun dr_set = { } 2144*4882a593Smuzhiyun state,N = trans 2145*4882a593Smuzhiyun terms = [] 2146*4882a593Smuzhiyun 2147*4882a593Smuzhiyun g = self.lr0_goto(C[state],N) 2148*4882a593Smuzhiyun for p in g: 2149*4882a593Smuzhiyun if p.lr_index < p.len - 1: 2150*4882a593Smuzhiyun a = p.prod[p.lr_index+1] 2151*4882a593Smuzhiyun if a in self.grammar.Terminals: 2152*4882a593Smuzhiyun if a not in terms: terms.append(a) 2153*4882a593Smuzhiyun 2154*4882a593Smuzhiyun # This extra bit is to handle the start state 2155*4882a593Smuzhiyun if state == 0 and N == self.grammar.Productions[0].prod[0]: 2156*4882a593Smuzhiyun terms.append('$end') 2157*4882a593Smuzhiyun 2158*4882a593Smuzhiyun return terms 2159*4882a593Smuzhiyun 2160*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2161*4882a593Smuzhiyun # reads_relation() 2162*4882a593Smuzhiyun # 2163*4882a593Smuzhiyun # Computes the READS() relation (p,A) READS (t,C). 2164*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2165*4882a593Smuzhiyun 2166*4882a593Smuzhiyun def reads_relation(self,C, trans, empty): 2167*4882a593Smuzhiyun # Look for empty transitions 2168*4882a593Smuzhiyun rel = [] 2169*4882a593Smuzhiyun state, N = trans 2170*4882a593Smuzhiyun 2171*4882a593Smuzhiyun g = self.lr0_goto(C[state],N) 2172*4882a593Smuzhiyun j = self.lr0_cidhash.get(id(g),-1) 2173*4882a593Smuzhiyun for p in g: 2174*4882a593Smuzhiyun if p.lr_index < p.len - 1: 2175*4882a593Smuzhiyun a = p.prod[p.lr_index + 1] 2176*4882a593Smuzhiyun if a in empty: 2177*4882a593Smuzhiyun rel.append((j,a)) 2178*4882a593Smuzhiyun 2179*4882a593Smuzhiyun return rel 2180*4882a593Smuzhiyun 2181*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2182*4882a593Smuzhiyun # compute_lookback_includes() 2183*4882a593Smuzhiyun # 2184*4882a593Smuzhiyun # Determines the lookback and includes relations 2185*4882a593Smuzhiyun # 2186*4882a593Smuzhiyun # LOOKBACK: 2187*4882a593Smuzhiyun # 2188*4882a593Smuzhiyun # This relation is determined by running the LR(0) state machine forward. 2189*4882a593Smuzhiyun # For example, starting with a production "N : . A B C", we run it forward 2190*4882a593Smuzhiyun # to obtain "N : A B C ." We then build a relationship between this final 2191*4882a593Smuzhiyun # state and the starting state. These relationships are stored in a dictionary 2192*4882a593Smuzhiyun # lookdict. 2193*4882a593Smuzhiyun # 2194*4882a593Smuzhiyun # INCLUDES: 2195*4882a593Smuzhiyun # 2196*4882a593Smuzhiyun # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 2197*4882a593Smuzhiyun # 2198*4882a593Smuzhiyun # This relation is used to determine non-terminal transitions that occur 2199*4882a593Smuzhiyun # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 2200*4882a593Smuzhiyun # if the following holds: 2201*4882a593Smuzhiyun # 2202*4882a593Smuzhiyun # B -> LAT, where T -> epsilon and p' -L-> p 2203*4882a593Smuzhiyun # 2204*4882a593Smuzhiyun # L is essentially a prefix (which may be empty), T is a suffix that must be 2205*4882a593Smuzhiyun # able to derive an empty string. State p' must lead to state p with the string L. 2206*4882a593Smuzhiyun # 2207*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2208*4882a593Smuzhiyun 2209*4882a593Smuzhiyun def compute_lookback_includes(self,C,trans,nullable): 2210*4882a593Smuzhiyun 2211*4882a593Smuzhiyun lookdict = {} # Dictionary of lookback relations 2212*4882a593Smuzhiyun includedict = {} # Dictionary of include relations 2213*4882a593Smuzhiyun 2214*4882a593Smuzhiyun # Make a dictionary of non-terminal transitions 2215*4882a593Smuzhiyun dtrans = {} 2216*4882a593Smuzhiyun for t in trans: 2217*4882a593Smuzhiyun dtrans[t] = 1 2218*4882a593Smuzhiyun 2219*4882a593Smuzhiyun # Loop over all transitions and compute lookbacks and includes 2220*4882a593Smuzhiyun for state,N in trans: 2221*4882a593Smuzhiyun lookb = [] 2222*4882a593Smuzhiyun includes = [] 2223*4882a593Smuzhiyun for p in C[state]: 2224*4882a593Smuzhiyun if p.name != N: continue 2225*4882a593Smuzhiyun 2226*4882a593Smuzhiyun # Okay, we have a name match. We now follow the production all the way 2227*4882a593Smuzhiyun # through the state machine until we get the . on the right hand side 2228*4882a593Smuzhiyun 2229*4882a593Smuzhiyun lr_index = p.lr_index 2230*4882a593Smuzhiyun j = state 2231*4882a593Smuzhiyun while lr_index < p.len - 1: 2232*4882a593Smuzhiyun lr_index = lr_index + 1 2233*4882a593Smuzhiyun t = p.prod[lr_index] 2234*4882a593Smuzhiyun 2235*4882a593Smuzhiyun # Check to see if this symbol and state are a non-terminal transition 2236*4882a593Smuzhiyun if (j,t) in dtrans: 2237*4882a593Smuzhiyun # Yes. Okay, there is some chance that this is an includes relation 2238*4882a593Smuzhiyun # the only way to know for certain is whether the rest of the 2239*4882a593Smuzhiyun # production derives empty 2240*4882a593Smuzhiyun 2241*4882a593Smuzhiyun li = lr_index + 1 2242*4882a593Smuzhiyun while li < p.len: 2243*4882a593Smuzhiyun if p.prod[li] in self.grammar.Terminals: break # No forget it 2244*4882a593Smuzhiyun if not p.prod[li] in nullable: break 2245*4882a593Smuzhiyun li = li + 1 2246*4882a593Smuzhiyun else: 2247*4882a593Smuzhiyun # Appears to be a relation between (j,t) and (state,N) 2248*4882a593Smuzhiyun includes.append((j,t)) 2249*4882a593Smuzhiyun 2250*4882a593Smuzhiyun g = self.lr0_goto(C[j],t) # Go to next set 2251*4882a593Smuzhiyun j = self.lr0_cidhash.get(id(g),-1) # Go to next state 2252*4882a593Smuzhiyun 2253*4882a593Smuzhiyun # When we get here, j is the final state, now we have to locate the production 2254*4882a593Smuzhiyun for r in C[j]: 2255*4882a593Smuzhiyun if r.name != p.name: continue 2256*4882a593Smuzhiyun if r.len != p.len: continue 2257*4882a593Smuzhiyun i = 0 2258*4882a593Smuzhiyun # This look is comparing a production ". A B C" with "A B C ." 2259*4882a593Smuzhiyun while i < r.lr_index: 2260*4882a593Smuzhiyun if r.prod[i] != p.prod[i+1]: break 2261*4882a593Smuzhiyun i = i + 1 2262*4882a593Smuzhiyun else: 2263*4882a593Smuzhiyun lookb.append((j,r)) 2264*4882a593Smuzhiyun for i in includes: 2265*4882a593Smuzhiyun if not i in includedict: includedict[i] = [] 2266*4882a593Smuzhiyun includedict[i].append((state,N)) 2267*4882a593Smuzhiyun lookdict[(state,N)] = lookb 2268*4882a593Smuzhiyun 2269*4882a593Smuzhiyun return lookdict,includedict 2270*4882a593Smuzhiyun 2271*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2272*4882a593Smuzhiyun # compute_read_sets() 2273*4882a593Smuzhiyun # 2274*4882a593Smuzhiyun # Given a set of LR(0) items, this function computes the read sets. 2275*4882a593Smuzhiyun # 2276*4882a593Smuzhiyun # Inputs: C = Set of LR(0) items 2277*4882a593Smuzhiyun # ntrans = Set of nonterminal transitions 2278*4882a593Smuzhiyun # nullable = Set of empty transitions 2279*4882a593Smuzhiyun # 2280*4882a593Smuzhiyun # Returns a set containing the read sets 2281*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2282*4882a593Smuzhiyun 2283*4882a593Smuzhiyun def compute_read_sets(self,C, ntrans, nullable): 2284*4882a593Smuzhiyun FP = lambda x: self.dr_relation(C,x,nullable) 2285*4882a593Smuzhiyun R = lambda x: self.reads_relation(C,x,nullable) 2286*4882a593Smuzhiyun F = digraph(ntrans,R,FP) 2287*4882a593Smuzhiyun return F 2288*4882a593Smuzhiyun 2289*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2290*4882a593Smuzhiyun # compute_follow_sets() 2291*4882a593Smuzhiyun # 2292*4882a593Smuzhiyun # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 2293*4882a593Smuzhiyun # and an include set, this function computes the follow sets 2294*4882a593Smuzhiyun # 2295*4882a593Smuzhiyun # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 2296*4882a593Smuzhiyun # 2297*4882a593Smuzhiyun # Inputs: 2298*4882a593Smuzhiyun # ntrans = Set of nonterminal transitions 2299*4882a593Smuzhiyun # readsets = Readset (previously computed) 2300*4882a593Smuzhiyun # inclsets = Include sets (previously computed) 2301*4882a593Smuzhiyun # 2302*4882a593Smuzhiyun # Returns a set containing the follow sets 2303*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2304*4882a593Smuzhiyun 2305*4882a593Smuzhiyun def compute_follow_sets(self,ntrans,readsets,inclsets): 2306*4882a593Smuzhiyun FP = lambda x: readsets[x] 2307*4882a593Smuzhiyun R = lambda x: inclsets.get(x,[]) 2308*4882a593Smuzhiyun F = digraph(ntrans,R,FP) 2309*4882a593Smuzhiyun return F 2310*4882a593Smuzhiyun 2311*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2312*4882a593Smuzhiyun # add_lookaheads() 2313*4882a593Smuzhiyun # 2314*4882a593Smuzhiyun # Attaches the lookahead symbols to grammar rules. 2315*4882a593Smuzhiyun # 2316*4882a593Smuzhiyun # Inputs: lookbacks - Set of lookback relations 2317*4882a593Smuzhiyun # followset - Computed follow set 2318*4882a593Smuzhiyun # 2319*4882a593Smuzhiyun # This function directly attaches the lookaheads to productions contained 2320*4882a593Smuzhiyun # in the lookbacks set 2321*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2322*4882a593Smuzhiyun 2323*4882a593Smuzhiyun def add_lookaheads(self,lookbacks,followset): 2324*4882a593Smuzhiyun for trans,lb in lookbacks.items(): 2325*4882a593Smuzhiyun # Loop over productions in lookback 2326*4882a593Smuzhiyun for state,p in lb: 2327*4882a593Smuzhiyun if not state in p.lookaheads: 2328*4882a593Smuzhiyun p.lookaheads[state] = [] 2329*4882a593Smuzhiyun f = followset.get(trans,[]) 2330*4882a593Smuzhiyun for a in f: 2331*4882a593Smuzhiyun if a not in p.lookaheads[state]: p.lookaheads[state].append(a) 2332*4882a593Smuzhiyun 2333*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2334*4882a593Smuzhiyun # add_lalr_lookaheads() 2335*4882a593Smuzhiyun # 2336*4882a593Smuzhiyun # This function does all of the work of adding lookahead information for use 2337*4882a593Smuzhiyun # with LALR parsing 2338*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2339*4882a593Smuzhiyun 2340*4882a593Smuzhiyun def add_lalr_lookaheads(self,C): 2341*4882a593Smuzhiyun # Determine all of the nullable nonterminals 2342*4882a593Smuzhiyun nullable = self.compute_nullable_nonterminals() 2343*4882a593Smuzhiyun 2344*4882a593Smuzhiyun # Find all non-terminal transitions 2345*4882a593Smuzhiyun trans = self.find_nonterminal_transitions(C) 2346*4882a593Smuzhiyun 2347*4882a593Smuzhiyun # Compute read sets 2348*4882a593Smuzhiyun readsets = self.compute_read_sets(C,trans,nullable) 2349*4882a593Smuzhiyun 2350*4882a593Smuzhiyun # Compute lookback/includes relations 2351*4882a593Smuzhiyun lookd, included = self.compute_lookback_includes(C,trans,nullable) 2352*4882a593Smuzhiyun 2353*4882a593Smuzhiyun # Compute LALR FOLLOW sets 2354*4882a593Smuzhiyun followsets = self.compute_follow_sets(trans,readsets,included) 2355*4882a593Smuzhiyun 2356*4882a593Smuzhiyun # Add all of the lookaheads 2357*4882a593Smuzhiyun self.add_lookaheads(lookd,followsets) 2358*4882a593Smuzhiyun 2359*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2360*4882a593Smuzhiyun # lr_parse_table() 2361*4882a593Smuzhiyun # 2362*4882a593Smuzhiyun # This function constructs the parse tables for SLR or LALR 2363*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2364*4882a593Smuzhiyun def lr_parse_table(self): 2365*4882a593Smuzhiyun Productions = self.grammar.Productions 2366*4882a593Smuzhiyun Precedence = self.grammar.Precedence 2367*4882a593Smuzhiyun goto = self.lr_goto # Goto array 2368*4882a593Smuzhiyun action = self.lr_action # Action array 2369*4882a593Smuzhiyun log = self.log # Logger for output 2370*4882a593Smuzhiyun 2371*4882a593Smuzhiyun actionp = { } # Action production array (temporary) 2372*4882a593Smuzhiyun 2373*4882a593Smuzhiyun log.info("Parsing method: %s", self.lr_method) 2374*4882a593Smuzhiyun 2375*4882a593Smuzhiyun # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 2376*4882a593Smuzhiyun # This determines the number of states 2377*4882a593Smuzhiyun 2378*4882a593Smuzhiyun C = self.lr0_items() 2379*4882a593Smuzhiyun 2380*4882a593Smuzhiyun if self.lr_method == 'LALR': 2381*4882a593Smuzhiyun self.add_lalr_lookaheads(C) 2382*4882a593Smuzhiyun 2383*4882a593Smuzhiyun # Build the parser table, state by state 2384*4882a593Smuzhiyun st = 0 2385*4882a593Smuzhiyun for I in C: 2386*4882a593Smuzhiyun # Loop over each production in I 2387*4882a593Smuzhiyun actlist = [ ] # List of actions 2388*4882a593Smuzhiyun st_action = { } 2389*4882a593Smuzhiyun st_actionp = { } 2390*4882a593Smuzhiyun st_goto = { } 2391*4882a593Smuzhiyun log.info("") 2392*4882a593Smuzhiyun log.info("state %d", st) 2393*4882a593Smuzhiyun log.info("") 2394*4882a593Smuzhiyun for p in I: 2395*4882a593Smuzhiyun log.info(" (%d) %s", p.number, str(p)) 2396*4882a593Smuzhiyun log.info("") 2397*4882a593Smuzhiyun 2398*4882a593Smuzhiyun for p in I: 2399*4882a593Smuzhiyun if p.len == p.lr_index + 1: 2400*4882a593Smuzhiyun if p.name == "S'": 2401*4882a593Smuzhiyun # Start symbol. Accept! 2402*4882a593Smuzhiyun st_action["$end"] = 0 2403*4882a593Smuzhiyun st_actionp["$end"] = p 2404*4882a593Smuzhiyun else: 2405*4882a593Smuzhiyun # We are at the end of a production. Reduce! 2406*4882a593Smuzhiyun if self.lr_method == 'LALR': 2407*4882a593Smuzhiyun laheads = p.lookaheads[st] 2408*4882a593Smuzhiyun else: 2409*4882a593Smuzhiyun laheads = self.grammar.Follow[p.name] 2410*4882a593Smuzhiyun for a in laheads: 2411*4882a593Smuzhiyun actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) 2412*4882a593Smuzhiyun r = st_action.get(a,None) 2413*4882a593Smuzhiyun if r is not None: 2414*4882a593Smuzhiyun # Whoa. Have a shift/reduce or reduce/reduce conflict 2415*4882a593Smuzhiyun if r > 0: 2416*4882a593Smuzhiyun # Need to decide on shift or reduce here 2417*4882a593Smuzhiyun # By default we favor shifting. Need to add 2418*4882a593Smuzhiyun # some precedence rules here. 2419*4882a593Smuzhiyun sprec,slevel = Productions[st_actionp[a].number].prec 2420*4882a593Smuzhiyun rprec,rlevel = Precedence.get(a,('right',0)) 2421*4882a593Smuzhiyun if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 2422*4882a593Smuzhiyun # We really need to reduce here. 2423*4882a593Smuzhiyun st_action[a] = -p.number 2424*4882a593Smuzhiyun st_actionp[a] = p 2425*4882a593Smuzhiyun if not slevel and not rlevel: 2426*4882a593Smuzhiyun log.info(" ! shift/reduce conflict for %s resolved as reduce",a) 2427*4882a593Smuzhiyun self.sr_conflicts.append((st,a,'reduce')) 2428*4882a593Smuzhiyun Productions[p.number].reduced += 1 2429*4882a593Smuzhiyun elif (slevel == rlevel) and (rprec == 'nonassoc'): 2430*4882a593Smuzhiyun st_action[a] = None 2431*4882a593Smuzhiyun else: 2432*4882a593Smuzhiyun # Hmmm. Guess we'll keep the shift 2433*4882a593Smuzhiyun if not rlevel: 2434*4882a593Smuzhiyun log.info(" ! shift/reduce conflict for %s resolved as shift",a) 2435*4882a593Smuzhiyun self.sr_conflicts.append((st,a,'shift')) 2436*4882a593Smuzhiyun elif r < 0: 2437*4882a593Smuzhiyun # Reduce/reduce conflict. In this case, we favor the rule 2438*4882a593Smuzhiyun # that was defined first in the grammar file 2439*4882a593Smuzhiyun oldp = Productions[-r] 2440*4882a593Smuzhiyun pp = Productions[p.number] 2441*4882a593Smuzhiyun if oldp.line > pp.line: 2442*4882a593Smuzhiyun st_action[a] = -p.number 2443*4882a593Smuzhiyun st_actionp[a] = p 2444*4882a593Smuzhiyun chosenp,rejectp = pp,oldp 2445*4882a593Smuzhiyun Productions[p.number].reduced += 1 2446*4882a593Smuzhiyun Productions[oldp.number].reduced -= 1 2447*4882a593Smuzhiyun else: 2448*4882a593Smuzhiyun chosenp,rejectp = oldp,pp 2449*4882a593Smuzhiyun self.rr_conflicts.append((st,chosenp,rejectp)) 2450*4882a593Smuzhiyun log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) 2451*4882a593Smuzhiyun else: 2452*4882a593Smuzhiyun raise LALRError("Unknown conflict in state %d" % st) 2453*4882a593Smuzhiyun else: 2454*4882a593Smuzhiyun st_action[a] = -p.number 2455*4882a593Smuzhiyun st_actionp[a] = p 2456*4882a593Smuzhiyun Productions[p.number].reduced += 1 2457*4882a593Smuzhiyun else: 2458*4882a593Smuzhiyun i = p.lr_index 2459*4882a593Smuzhiyun a = p.prod[i+1] # Get symbol right after the "." 2460*4882a593Smuzhiyun if a in self.grammar.Terminals: 2461*4882a593Smuzhiyun g = self.lr0_goto(I,a) 2462*4882a593Smuzhiyun j = self.lr0_cidhash.get(id(g),-1) 2463*4882a593Smuzhiyun if j >= 0: 2464*4882a593Smuzhiyun # We are in a shift state 2465*4882a593Smuzhiyun actlist.append((a,p,"shift and go to state %d" % j)) 2466*4882a593Smuzhiyun r = st_action.get(a,None) 2467*4882a593Smuzhiyun if r is not None: 2468*4882a593Smuzhiyun # Whoa have a shift/reduce or shift/shift conflict 2469*4882a593Smuzhiyun if r > 0: 2470*4882a593Smuzhiyun if r != j: 2471*4882a593Smuzhiyun raise LALRError("Shift/shift conflict in state %d" % st) 2472*4882a593Smuzhiyun elif r < 0: 2473*4882a593Smuzhiyun # Do a precedence check. 2474*4882a593Smuzhiyun # - if precedence of reduce rule is higher, we reduce. 2475*4882a593Smuzhiyun # - if precedence of reduce is same and left assoc, we reduce. 2476*4882a593Smuzhiyun # - otherwise we shift 2477*4882a593Smuzhiyun rprec,rlevel = Productions[st_actionp[a].number].prec 2478*4882a593Smuzhiyun sprec,slevel = Precedence.get(a,('right',0)) 2479*4882a593Smuzhiyun if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): 2480*4882a593Smuzhiyun # We decide to shift here... highest precedence to shift 2481*4882a593Smuzhiyun Productions[st_actionp[a].number].reduced -= 1 2482*4882a593Smuzhiyun st_action[a] = j 2483*4882a593Smuzhiyun st_actionp[a] = p 2484*4882a593Smuzhiyun if not rlevel: 2485*4882a593Smuzhiyun log.info(" ! shift/reduce conflict for %s resolved as shift",a) 2486*4882a593Smuzhiyun self.sr_conflicts.append((st,a,'shift')) 2487*4882a593Smuzhiyun elif (slevel == rlevel) and (rprec == 'nonassoc'): 2488*4882a593Smuzhiyun st_action[a] = None 2489*4882a593Smuzhiyun else: 2490*4882a593Smuzhiyun # Hmmm. Guess we'll keep the reduce 2491*4882a593Smuzhiyun if not slevel and not rlevel: 2492*4882a593Smuzhiyun log.info(" ! shift/reduce conflict for %s resolved as reduce",a) 2493*4882a593Smuzhiyun self.sr_conflicts.append((st,a,'reduce')) 2494*4882a593Smuzhiyun 2495*4882a593Smuzhiyun else: 2496*4882a593Smuzhiyun raise LALRError("Unknown conflict in state %d" % st) 2497*4882a593Smuzhiyun else: 2498*4882a593Smuzhiyun st_action[a] = j 2499*4882a593Smuzhiyun st_actionp[a] = p 2500*4882a593Smuzhiyun 2501*4882a593Smuzhiyun # Print the actions associated with each terminal 2502*4882a593Smuzhiyun _actprint = { } 2503*4882a593Smuzhiyun for a,p,m in actlist: 2504*4882a593Smuzhiyun if a in st_action: 2505*4882a593Smuzhiyun if p is st_actionp[a]: 2506*4882a593Smuzhiyun log.info(" %-15s %s",a,m) 2507*4882a593Smuzhiyun _actprint[(a,m)] = 1 2508*4882a593Smuzhiyun log.info("") 2509*4882a593Smuzhiyun # Print the actions that were not used. (debugging) 2510*4882a593Smuzhiyun not_used = 0 2511*4882a593Smuzhiyun for a,p,m in actlist: 2512*4882a593Smuzhiyun if a in st_action: 2513*4882a593Smuzhiyun if p is not st_actionp[a]: 2514*4882a593Smuzhiyun if not (a,m) in _actprint: 2515*4882a593Smuzhiyun log.debug(" ! %-15s [ %s ]",a,m) 2516*4882a593Smuzhiyun not_used = 1 2517*4882a593Smuzhiyun _actprint[(a,m)] = 1 2518*4882a593Smuzhiyun if not_used: 2519*4882a593Smuzhiyun log.debug("") 2520*4882a593Smuzhiyun 2521*4882a593Smuzhiyun # Construct the goto table for this state 2522*4882a593Smuzhiyun 2523*4882a593Smuzhiyun nkeys = { } 2524*4882a593Smuzhiyun for ii in I: 2525*4882a593Smuzhiyun for s in ii.usyms: 2526*4882a593Smuzhiyun if s in self.grammar.Nonterminals: 2527*4882a593Smuzhiyun nkeys[s] = None 2528*4882a593Smuzhiyun for n in nkeys: 2529*4882a593Smuzhiyun g = self.lr0_goto(I,n) 2530*4882a593Smuzhiyun j = self.lr0_cidhash.get(id(g),-1) 2531*4882a593Smuzhiyun if j >= 0: 2532*4882a593Smuzhiyun st_goto[n] = j 2533*4882a593Smuzhiyun log.info(" %-30s shift and go to state %d",n,j) 2534*4882a593Smuzhiyun 2535*4882a593Smuzhiyun action[st] = st_action 2536*4882a593Smuzhiyun actionp[st] = st_actionp 2537*4882a593Smuzhiyun goto[st] = st_goto 2538*4882a593Smuzhiyun st += 1 2539*4882a593Smuzhiyun 2540*4882a593Smuzhiyun 2541*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2542*4882a593Smuzhiyun # write() 2543*4882a593Smuzhiyun # 2544*4882a593Smuzhiyun # This function writes the LR parsing tables to a file 2545*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2546*4882a593Smuzhiyun 2547*4882a593Smuzhiyun def write_table(self,modulename,outputdir='',signature=""): 2548*4882a593Smuzhiyun basemodulename = modulename.split(".")[-1] 2549*4882a593Smuzhiyun filename = os.path.join(outputdir,basemodulename) + ".py" 2550*4882a593Smuzhiyun try: 2551*4882a593Smuzhiyun f = open(filename,"w") 2552*4882a593Smuzhiyun 2553*4882a593Smuzhiyun f.write(""" 2554*4882a593Smuzhiyun# %s 2555*4882a593Smuzhiyun# This file is automatically generated. Do not edit. 2556*4882a593Smuzhiyun_tabversion = %r 2557*4882a593Smuzhiyun 2558*4882a593Smuzhiyun_lr_method = %r 2559*4882a593Smuzhiyun 2560*4882a593Smuzhiyun_lr_signature = %r 2561*4882a593Smuzhiyun """ % (filename, __tabversion__, self.lr_method, signature)) 2562*4882a593Smuzhiyun 2563*4882a593Smuzhiyun # Change smaller to 0 to go back to original tables 2564*4882a593Smuzhiyun smaller = 1 2565*4882a593Smuzhiyun 2566*4882a593Smuzhiyun # Factor out names to try and make smaller 2567*4882a593Smuzhiyun if smaller: 2568*4882a593Smuzhiyun items = { } 2569*4882a593Smuzhiyun 2570*4882a593Smuzhiyun for s,nd in self.lr_action.items(): 2571*4882a593Smuzhiyun for name,v in nd.items(): 2572*4882a593Smuzhiyun i = items.get(name) 2573*4882a593Smuzhiyun if not i: 2574*4882a593Smuzhiyun i = ([],[]) 2575*4882a593Smuzhiyun items[name] = i 2576*4882a593Smuzhiyun i[0].append(s) 2577*4882a593Smuzhiyun i[1].append(v) 2578*4882a593Smuzhiyun 2579*4882a593Smuzhiyun f.write("\n_lr_action_items = {") 2580*4882a593Smuzhiyun for k,v in items.items(): 2581*4882a593Smuzhiyun f.write("%r:([" % k) 2582*4882a593Smuzhiyun for i in v[0]: 2583*4882a593Smuzhiyun f.write("%r," % i) 2584*4882a593Smuzhiyun f.write("],[") 2585*4882a593Smuzhiyun for i in v[1]: 2586*4882a593Smuzhiyun f.write("%r," % i) 2587*4882a593Smuzhiyun 2588*4882a593Smuzhiyun f.write("]),") 2589*4882a593Smuzhiyun f.write("}\n") 2590*4882a593Smuzhiyun 2591*4882a593Smuzhiyun f.write(""" 2592*4882a593Smuzhiyun_lr_action = { } 2593*4882a593Smuzhiyunfor _k, _v in _lr_action_items.items(): 2594*4882a593Smuzhiyun for _x,_y in zip(_v[0],_v[1]): 2595*4882a593Smuzhiyun if not _x in _lr_action: _lr_action[_x] = { } 2596*4882a593Smuzhiyun _lr_action[_x][_k] = _y 2597*4882a593Smuzhiyundel _lr_action_items 2598*4882a593Smuzhiyun""") 2599*4882a593Smuzhiyun 2600*4882a593Smuzhiyun else: 2601*4882a593Smuzhiyun f.write("\n_lr_action = { "); 2602*4882a593Smuzhiyun for k,v in self.lr_action.items(): 2603*4882a593Smuzhiyun f.write("(%r,%r):%r," % (k[0],k[1],v)) 2604*4882a593Smuzhiyun f.write("}\n"); 2605*4882a593Smuzhiyun 2606*4882a593Smuzhiyun if smaller: 2607*4882a593Smuzhiyun # Factor out names to try and make smaller 2608*4882a593Smuzhiyun items = { } 2609*4882a593Smuzhiyun 2610*4882a593Smuzhiyun for s,nd in self.lr_goto.items(): 2611*4882a593Smuzhiyun for name,v in nd.items(): 2612*4882a593Smuzhiyun i = items.get(name) 2613*4882a593Smuzhiyun if not i: 2614*4882a593Smuzhiyun i = ([],[]) 2615*4882a593Smuzhiyun items[name] = i 2616*4882a593Smuzhiyun i[0].append(s) 2617*4882a593Smuzhiyun i[1].append(v) 2618*4882a593Smuzhiyun 2619*4882a593Smuzhiyun f.write("\n_lr_goto_items = {") 2620*4882a593Smuzhiyun for k,v in items.items(): 2621*4882a593Smuzhiyun f.write("%r:([" % k) 2622*4882a593Smuzhiyun for i in v[0]: 2623*4882a593Smuzhiyun f.write("%r," % i) 2624*4882a593Smuzhiyun f.write("],[") 2625*4882a593Smuzhiyun for i in v[1]: 2626*4882a593Smuzhiyun f.write("%r," % i) 2627*4882a593Smuzhiyun 2628*4882a593Smuzhiyun f.write("]),") 2629*4882a593Smuzhiyun f.write("}\n") 2630*4882a593Smuzhiyun 2631*4882a593Smuzhiyun f.write(""" 2632*4882a593Smuzhiyun_lr_goto = { } 2633*4882a593Smuzhiyunfor _k, _v in _lr_goto_items.items(): 2634*4882a593Smuzhiyun for _x,_y in zip(_v[0],_v[1]): 2635*4882a593Smuzhiyun if not _x in _lr_goto: _lr_goto[_x] = { } 2636*4882a593Smuzhiyun _lr_goto[_x][_k] = _y 2637*4882a593Smuzhiyundel _lr_goto_items 2638*4882a593Smuzhiyun""") 2639*4882a593Smuzhiyun else: 2640*4882a593Smuzhiyun f.write("\n_lr_goto = { "); 2641*4882a593Smuzhiyun for k,v in self.lr_goto.items(): 2642*4882a593Smuzhiyun f.write("(%r,%r):%r," % (k[0],k[1],v)) 2643*4882a593Smuzhiyun f.write("}\n"); 2644*4882a593Smuzhiyun 2645*4882a593Smuzhiyun # Write production table 2646*4882a593Smuzhiyun f.write("_lr_productions = [\n") 2647*4882a593Smuzhiyun for p in self.lr_productions: 2648*4882a593Smuzhiyun if p.func: 2649*4882a593Smuzhiyun f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) 2650*4882a593Smuzhiyun else: 2651*4882a593Smuzhiyun f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) 2652*4882a593Smuzhiyun f.write("]\n") 2653*4882a593Smuzhiyun f.close() 2654*4882a593Smuzhiyun 2655*4882a593Smuzhiyun except IOError: 2656*4882a593Smuzhiyun e = sys.exc_info()[1] 2657*4882a593Smuzhiyun sys.stderr.write("Unable to create '%s'\n" % filename) 2658*4882a593Smuzhiyun sys.stderr.write(str(e)+"\n") 2659*4882a593Smuzhiyun return 2660*4882a593Smuzhiyun 2661*4882a593Smuzhiyun 2662*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2663*4882a593Smuzhiyun # pickle_table() 2664*4882a593Smuzhiyun # 2665*4882a593Smuzhiyun # This function pickles the LR parsing tables to a supplied file object 2666*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2667*4882a593Smuzhiyun 2668*4882a593Smuzhiyun def pickle_table(self,filename,signature=""): 2669*4882a593Smuzhiyun try: 2670*4882a593Smuzhiyun import cPickle as pickle 2671*4882a593Smuzhiyun except ImportError: 2672*4882a593Smuzhiyun import pickle 2673*4882a593Smuzhiyun outf = open(filename,"wb") 2674*4882a593Smuzhiyun pickle.dump(__tabversion__,outf,pickle_protocol) 2675*4882a593Smuzhiyun pickle.dump(self.lr_method,outf,pickle_protocol) 2676*4882a593Smuzhiyun pickle.dump(signature,outf,pickle_protocol) 2677*4882a593Smuzhiyun pickle.dump(self.lr_action,outf,pickle_protocol) 2678*4882a593Smuzhiyun pickle.dump(self.lr_goto,outf,pickle_protocol) 2679*4882a593Smuzhiyun 2680*4882a593Smuzhiyun outp = [] 2681*4882a593Smuzhiyun for p in self.lr_productions: 2682*4882a593Smuzhiyun if p.func: 2683*4882a593Smuzhiyun outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) 2684*4882a593Smuzhiyun else: 2685*4882a593Smuzhiyun outp.append((str(p),p.name,p.len,None,None,None)) 2686*4882a593Smuzhiyun pickle.dump(outp,outf,pickle_protocol) 2687*4882a593Smuzhiyun outf.close() 2688*4882a593Smuzhiyun 2689*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 2690*4882a593Smuzhiyun# === INTROSPECTION === 2691*4882a593Smuzhiyun# 2692*4882a593Smuzhiyun# The following functions and classes are used to implement the PLY 2693*4882a593Smuzhiyun# introspection features followed by the yacc() function itself. 2694*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 2695*4882a593Smuzhiyun 2696*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 2697*4882a593Smuzhiyun# get_caller_module_dict() 2698*4882a593Smuzhiyun# 2699*4882a593Smuzhiyun# This function returns a dictionary containing all of the symbols defined within 2700*4882a593Smuzhiyun# a caller further down the call stack. This is used to get the environment 2701*4882a593Smuzhiyun# associated with the yacc() call if none was provided. 2702*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 2703*4882a593Smuzhiyun 2704*4882a593Smuzhiyundef get_caller_module_dict(levels): 2705*4882a593Smuzhiyun try: 2706*4882a593Smuzhiyun raise RuntimeError 2707*4882a593Smuzhiyun except RuntimeError: 2708*4882a593Smuzhiyun e,b,t = sys.exc_info() 2709*4882a593Smuzhiyun f = t.tb_frame 2710*4882a593Smuzhiyun while levels > 0: 2711*4882a593Smuzhiyun f = f.f_back 2712*4882a593Smuzhiyun levels -= 1 2713*4882a593Smuzhiyun ldict = f.f_globals.copy() 2714*4882a593Smuzhiyun if f.f_globals != f.f_locals: 2715*4882a593Smuzhiyun ldict.update(f.f_locals) 2716*4882a593Smuzhiyun 2717*4882a593Smuzhiyun return ldict 2718*4882a593Smuzhiyun 2719*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 2720*4882a593Smuzhiyun# parse_grammar() 2721*4882a593Smuzhiyun# 2722*4882a593Smuzhiyun# This takes a raw grammar rule string and parses it into production data 2723*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 2724*4882a593Smuzhiyundef parse_grammar(doc,file,line): 2725*4882a593Smuzhiyun grammar = [] 2726*4882a593Smuzhiyun # Split the doc string into lines 2727*4882a593Smuzhiyun pstrings = doc.splitlines() 2728*4882a593Smuzhiyun lastp = None 2729*4882a593Smuzhiyun dline = line 2730*4882a593Smuzhiyun for ps in pstrings: 2731*4882a593Smuzhiyun dline += 1 2732*4882a593Smuzhiyun p = ps.split() 2733*4882a593Smuzhiyun if not p: continue 2734*4882a593Smuzhiyun try: 2735*4882a593Smuzhiyun if p[0] == '|': 2736*4882a593Smuzhiyun # This is a continuation of a previous rule 2737*4882a593Smuzhiyun if not lastp: 2738*4882a593Smuzhiyun raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) 2739*4882a593Smuzhiyun prodname = lastp 2740*4882a593Smuzhiyun syms = p[1:] 2741*4882a593Smuzhiyun else: 2742*4882a593Smuzhiyun prodname = p[0] 2743*4882a593Smuzhiyun lastp = prodname 2744*4882a593Smuzhiyun syms = p[2:] 2745*4882a593Smuzhiyun assign = p[1] 2746*4882a593Smuzhiyun if assign != ':' and assign != '::=': 2747*4882a593Smuzhiyun raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) 2748*4882a593Smuzhiyun 2749*4882a593Smuzhiyun grammar.append((file,dline,prodname,syms)) 2750*4882a593Smuzhiyun except SyntaxError: 2751*4882a593Smuzhiyun raise 2752*4882a593Smuzhiyun except Exception: 2753*4882a593Smuzhiyun raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) 2754*4882a593Smuzhiyun 2755*4882a593Smuzhiyun return grammar 2756*4882a593Smuzhiyun 2757*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 2758*4882a593Smuzhiyun# ParserReflect() 2759*4882a593Smuzhiyun# 2760*4882a593Smuzhiyun# This class represents information extracted for building a parser including 2761*4882a593Smuzhiyun# start symbol, error function, tokens, precedence list, action functions, 2762*4882a593Smuzhiyun# etc. 2763*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 2764*4882a593Smuzhiyunclass ParserReflect(object): 2765*4882a593Smuzhiyun def __init__(self,pdict,log=None): 2766*4882a593Smuzhiyun self.pdict = pdict 2767*4882a593Smuzhiyun self.start = None 2768*4882a593Smuzhiyun self.error_func = None 2769*4882a593Smuzhiyun self.tokens = None 2770*4882a593Smuzhiyun self.files = {} 2771*4882a593Smuzhiyun self.grammar = [] 2772*4882a593Smuzhiyun self.error = 0 2773*4882a593Smuzhiyun 2774*4882a593Smuzhiyun if log is None: 2775*4882a593Smuzhiyun self.log = PlyLogger(sys.stderr) 2776*4882a593Smuzhiyun else: 2777*4882a593Smuzhiyun self.log = log 2778*4882a593Smuzhiyun 2779*4882a593Smuzhiyun # Get all of the basic information 2780*4882a593Smuzhiyun def get_all(self): 2781*4882a593Smuzhiyun self.get_start() 2782*4882a593Smuzhiyun self.get_error_func() 2783*4882a593Smuzhiyun self.get_tokens() 2784*4882a593Smuzhiyun self.get_precedence() 2785*4882a593Smuzhiyun self.get_pfunctions() 2786*4882a593Smuzhiyun 2787*4882a593Smuzhiyun # Validate all of the information 2788*4882a593Smuzhiyun def validate_all(self): 2789*4882a593Smuzhiyun self.validate_start() 2790*4882a593Smuzhiyun self.validate_error_func() 2791*4882a593Smuzhiyun self.validate_tokens() 2792*4882a593Smuzhiyun self.validate_precedence() 2793*4882a593Smuzhiyun self.validate_pfunctions() 2794*4882a593Smuzhiyun self.validate_files() 2795*4882a593Smuzhiyun return self.error 2796*4882a593Smuzhiyun 2797*4882a593Smuzhiyun # Compute a signature over the grammar 2798*4882a593Smuzhiyun def signature(self): 2799*4882a593Smuzhiyun try: 2800*4882a593Smuzhiyun import hashlib 2801*4882a593Smuzhiyun except ImportError: 2802*4882a593Smuzhiyun raise RuntimeError("Unable to import hashlib") 2803*4882a593Smuzhiyun try: 2804*4882a593Smuzhiyun sig = hashlib.new('MD5', usedforsecurity=False) 2805*4882a593Smuzhiyun except TypeError: 2806*4882a593Smuzhiyun # Some configurations don't appear to support two arguments 2807*4882a593Smuzhiyun sig = hashlib.new('MD5') 2808*4882a593Smuzhiyun try: 2809*4882a593Smuzhiyun if self.start: 2810*4882a593Smuzhiyun sig.update(self.start.encode('latin-1')) 2811*4882a593Smuzhiyun if self.prec: 2812*4882a593Smuzhiyun sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) 2813*4882a593Smuzhiyun if self.tokens: 2814*4882a593Smuzhiyun sig.update(" ".join(self.tokens).encode('latin-1')) 2815*4882a593Smuzhiyun for f in self.pfuncs: 2816*4882a593Smuzhiyun if f[3]: 2817*4882a593Smuzhiyun sig.update(f[3].encode('latin-1')) 2818*4882a593Smuzhiyun except (TypeError,ValueError): 2819*4882a593Smuzhiyun pass 2820*4882a593Smuzhiyun return sig.digest() 2821*4882a593Smuzhiyun 2822*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2823*4882a593Smuzhiyun # validate_file() 2824*4882a593Smuzhiyun # 2825*4882a593Smuzhiyun # This method checks to see if there are duplicated p_rulename() functions 2826*4882a593Smuzhiyun # in the parser module file. Without this function, it is really easy for 2827*4882a593Smuzhiyun # users to make mistakes by cutting and pasting code fragments (and it's a real 2828*4882a593Smuzhiyun # bugger to try and figure out why the resulting parser doesn't work). Therefore, 2829*4882a593Smuzhiyun # we just do a little regular expression pattern matching of def statements 2830*4882a593Smuzhiyun # to try and detect duplicates. 2831*4882a593Smuzhiyun # ----------------------------------------------------------------------------- 2832*4882a593Smuzhiyun 2833*4882a593Smuzhiyun def validate_files(self): 2834*4882a593Smuzhiyun # Match def p_funcname( 2835*4882a593Smuzhiyun fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 2836*4882a593Smuzhiyun 2837*4882a593Smuzhiyun for filename in self.files.keys(): 2838*4882a593Smuzhiyun base,ext = os.path.splitext(filename) 2839*4882a593Smuzhiyun if ext != '.py': return 1 # No idea. Assume it's okay. 2840*4882a593Smuzhiyun 2841*4882a593Smuzhiyun try: 2842*4882a593Smuzhiyun f = open(filename) 2843*4882a593Smuzhiyun lines = f.readlines() 2844*4882a593Smuzhiyun f.close() 2845*4882a593Smuzhiyun except IOError: 2846*4882a593Smuzhiyun continue 2847*4882a593Smuzhiyun 2848*4882a593Smuzhiyun counthash = { } 2849*4882a593Smuzhiyun for linen,l in enumerate(lines): 2850*4882a593Smuzhiyun linen += 1 2851*4882a593Smuzhiyun m = fre.match(l) 2852*4882a593Smuzhiyun if m: 2853*4882a593Smuzhiyun name = m.group(1) 2854*4882a593Smuzhiyun prev = counthash.get(name) 2855*4882a593Smuzhiyun if not prev: 2856*4882a593Smuzhiyun counthash[name] = linen 2857*4882a593Smuzhiyun else: 2858*4882a593Smuzhiyun self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) 2859*4882a593Smuzhiyun 2860*4882a593Smuzhiyun # Get the start symbol 2861*4882a593Smuzhiyun def get_start(self): 2862*4882a593Smuzhiyun self.start = self.pdict.get('start') 2863*4882a593Smuzhiyun 2864*4882a593Smuzhiyun # Validate the start symbol 2865*4882a593Smuzhiyun def validate_start(self): 2866*4882a593Smuzhiyun if self.start is not None: 2867*4882a593Smuzhiyun if not isinstance(self.start,str): 2868*4882a593Smuzhiyun self.log.error("'start' must be a string") 2869*4882a593Smuzhiyun 2870*4882a593Smuzhiyun # Look for error handler 2871*4882a593Smuzhiyun def get_error_func(self): 2872*4882a593Smuzhiyun self.error_func = self.pdict.get('p_error') 2873*4882a593Smuzhiyun 2874*4882a593Smuzhiyun # Validate the error function 2875*4882a593Smuzhiyun def validate_error_func(self): 2876*4882a593Smuzhiyun if self.error_func: 2877*4882a593Smuzhiyun if isinstance(self.error_func,types.FunctionType): 2878*4882a593Smuzhiyun ismethod = 0 2879*4882a593Smuzhiyun elif isinstance(self.error_func, types.MethodType): 2880*4882a593Smuzhiyun ismethod = 1 2881*4882a593Smuzhiyun else: 2882*4882a593Smuzhiyun self.log.error("'p_error' defined, but is not a function or method") 2883*4882a593Smuzhiyun self.error = 1 2884*4882a593Smuzhiyun return 2885*4882a593Smuzhiyun 2886*4882a593Smuzhiyun eline = func_code(self.error_func).co_firstlineno 2887*4882a593Smuzhiyun efile = func_code(self.error_func).co_filename 2888*4882a593Smuzhiyun self.files[efile] = 1 2889*4882a593Smuzhiyun 2890*4882a593Smuzhiyun if (func_code(self.error_func).co_argcount != 1+ismethod): 2891*4882a593Smuzhiyun self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) 2892*4882a593Smuzhiyun self.error = 1 2893*4882a593Smuzhiyun 2894*4882a593Smuzhiyun # Get the tokens map 2895*4882a593Smuzhiyun def get_tokens(self): 2896*4882a593Smuzhiyun tokens = self.pdict.get("tokens",None) 2897*4882a593Smuzhiyun if not tokens: 2898*4882a593Smuzhiyun self.log.error("No token list is defined") 2899*4882a593Smuzhiyun self.error = 1 2900*4882a593Smuzhiyun return 2901*4882a593Smuzhiyun 2902*4882a593Smuzhiyun if not isinstance(tokens,(list, tuple)): 2903*4882a593Smuzhiyun self.log.error("tokens must be a list or tuple") 2904*4882a593Smuzhiyun self.error = 1 2905*4882a593Smuzhiyun return 2906*4882a593Smuzhiyun 2907*4882a593Smuzhiyun if not tokens: 2908*4882a593Smuzhiyun self.log.error("tokens is empty") 2909*4882a593Smuzhiyun self.error = 1 2910*4882a593Smuzhiyun return 2911*4882a593Smuzhiyun 2912*4882a593Smuzhiyun self.tokens = tokens 2913*4882a593Smuzhiyun 2914*4882a593Smuzhiyun # Validate the tokens 2915*4882a593Smuzhiyun def validate_tokens(self): 2916*4882a593Smuzhiyun # Validate the tokens. 2917*4882a593Smuzhiyun if 'error' in self.tokens: 2918*4882a593Smuzhiyun self.log.error("Illegal token name 'error'. Is a reserved word") 2919*4882a593Smuzhiyun self.error = 1 2920*4882a593Smuzhiyun return 2921*4882a593Smuzhiyun 2922*4882a593Smuzhiyun terminals = {} 2923*4882a593Smuzhiyun for n in self.tokens: 2924*4882a593Smuzhiyun if n in terminals: 2925*4882a593Smuzhiyun self.log.warning("Token '%s' multiply defined", n) 2926*4882a593Smuzhiyun terminals[n] = 1 2927*4882a593Smuzhiyun 2928*4882a593Smuzhiyun # Get the precedence map (if any) 2929*4882a593Smuzhiyun def get_precedence(self): 2930*4882a593Smuzhiyun self.prec = self.pdict.get("precedence",None) 2931*4882a593Smuzhiyun 2932*4882a593Smuzhiyun # Validate and parse the precedence map 2933*4882a593Smuzhiyun def validate_precedence(self): 2934*4882a593Smuzhiyun preclist = [] 2935*4882a593Smuzhiyun if self.prec: 2936*4882a593Smuzhiyun if not isinstance(self.prec,(list,tuple)): 2937*4882a593Smuzhiyun self.log.error("precedence must be a list or tuple") 2938*4882a593Smuzhiyun self.error = 1 2939*4882a593Smuzhiyun return 2940*4882a593Smuzhiyun for level,p in enumerate(self.prec): 2941*4882a593Smuzhiyun if not isinstance(p,(list,tuple)): 2942*4882a593Smuzhiyun self.log.error("Bad precedence table") 2943*4882a593Smuzhiyun self.error = 1 2944*4882a593Smuzhiyun return 2945*4882a593Smuzhiyun 2946*4882a593Smuzhiyun if len(p) < 2: 2947*4882a593Smuzhiyun self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) 2948*4882a593Smuzhiyun self.error = 1 2949*4882a593Smuzhiyun return 2950*4882a593Smuzhiyun assoc = p[0] 2951*4882a593Smuzhiyun if not isinstance(assoc,str): 2952*4882a593Smuzhiyun self.log.error("precedence associativity must be a string") 2953*4882a593Smuzhiyun self.error = 1 2954*4882a593Smuzhiyun return 2955*4882a593Smuzhiyun for term in p[1:]: 2956*4882a593Smuzhiyun if not isinstance(term,str): 2957*4882a593Smuzhiyun self.log.error("precedence items must be strings") 2958*4882a593Smuzhiyun self.error = 1 2959*4882a593Smuzhiyun return 2960*4882a593Smuzhiyun preclist.append((term,assoc,level+1)) 2961*4882a593Smuzhiyun self.preclist = preclist 2962*4882a593Smuzhiyun 2963*4882a593Smuzhiyun # Get all p_functions from the grammar 2964*4882a593Smuzhiyun def get_pfunctions(self): 2965*4882a593Smuzhiyun p_functions = [] 2966*4882a593Smuzhiyun for name, item in self.pdict.items(): 2967*4882a593Smuzhiyun if name[:2] != 'p_': continue 2968*4882a593Smuzhiyun if name == 'p_error': continue 2969*4882a593Smuzhiyun if isinstance(item,(types.FunctionType,types.MethodType)): 2970*4882a593Smuzhiyun line = func_code(item).co_firstlineno 2971*4882a593Smuzhiyun file = func_code(item).co_filename 2972*4882a593Smuzhiyun p_functions.append((line,file,name,item.__doc__)) 2973*4882a593Smuzhiyun 2974*4882a593Smuzhiyun # Sort all of the actions by line number 2975*4882a593Smuzhiyun p_functions.sort() 2976*4882a593Smuzhiyun self.pfuncs = p_functions 2977*4882a593Smuzhiyun 2978*4882a593Smuzhiyun 2979*4882a593Smuzhiyun # Validate all of the p_functions 2980*4882a593Smuzhiyun def validate_pfunctions(self): 2981*4882a593Smuzhiyun grammar = [] 2982*4882a593Smuzhiyun # Check for non-empty symbols 2983*4882a593Smuzhiyun if len(self.pfuncs) == 0: 2984*4882a593Smuzhiyun self.log.error("no rules of the form p_rulename are defined") 2985*4882a593Smuzhiyun self.error = 1 2986*4882a593Smuzhiyun return 2987*4882a593Smuzhiyun 2988*4882a593Smuzhiyun for line, file, name, doc in self.pfuncs: 2989*4882a593Smuzhiyun func = self.pdict[name] 2990*4882a593Smuzhiyun if isinstance(func, types.MethodType): 2991*4882a593Smuzhiyun reqargs = 2 2992*4882a593Smuzhiyun else: 2993*4882a593Smuzhiyun reqargs = 1 2994*4882a593Smuzhiyun if func_code(func).co_argcount > reqargs: 2995*4882a593Smuzhiyun self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) 2996*4882a593Smuzhiyun self.error = 1 2997*4882a593Smuzhiyun elif func_code(func).co_argcount < reqargs: 2998*4882a593Smuzhiyun self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) 2999*4882a593Smuzhiyun self.error = 1 3000*4882a593Smuzhiyun elif not func.__doc__: 3001*4882a593Smuzhiyun self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) 3002*4882a593Smuzhiyun else: 3003*4882a593Smuzhiyun try: 3004*4882a593Smuzhiyun parsed_g = parse_grammar(doc,file,line) 3005*4882a593Smuzhiyun for g in parsed_g: 3006*4882a593Smuzhiyun grammar.append((name, g)) 3007*4882a593Smuzhiyun except SyntaxError: 3008*4882a593Smuzhiyun e = sys.exc_info()[1] 3009*4882a593Smuzhiyun self.log.error(str(e)) 3010*4882a593Smuzhiyun self.error = 1 3011*4882a593Smuzhiyun 3012*4882a593Smuzhiyun # Looks like a valid grammar rule 3013*4882a593Smuzhiyun # Mark the file in which defined. 3014*4882a593Smuzhiyun self.files[file] = 1 3015*4882a593Smuzhiyun 3016*4882a593Smuzhiyun # Secondary validation step that looks for p_ definitions that are not functions 3017*4882a593Smuzhiyun # or functions that look like they might be grammar rules. 3018*4882a593Smuzhiyun 3019*4882a593Smuzhiyun for n,v in self.pdict.items(): 3020*4882a593Smuzhiyun if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue 3021*4882a593Smuzhiyun if n[0:2] == 't_': continue 3022*4882a593Smuzhiyun if n[0:2] == 'p_' and n != 'p_error': 3023*4882a593Smuzhiyun self.log.warning("'%s' not defined as a function", n) 3024*4882a593Smuzhiyun if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or 3025*4882a593Smuzhiyun (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): 3026*4882a593Smuzhiyun try: 3027*4882a593Smuzhiyun doc = v.__doc__.split(" ") 3028*4882a593Smuzhiyun if doc[1] == ':': 3029*4882a593Smuzhiyun self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", 3030*4882a593Smuzhiyun func_code(v).co_filename, func_code(v).co_firstlineno,n) 3031*4882a593Smuzhiyun except Exception: 3032*4882a593Smuzhiyun pass 3033*4882a593Smuzhiyun 3034*4882a593Smuzhiyun self.grammar = grammar 3035*4882a593Smuzhiyun 3036*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 3037*4882a593Smuzhiyun# yacc(module) 3038*4882a593Smuzhiyun# 3039*4882a593Smuzhiyun# Build a parser 3040*4882a593Smuzhiyun# ----------------------------------------------------------------------------- 3041*4882a593Smuzhiyun 3042*4882a593Smuzhiyundef yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, 3043*4882a593Smuzhiyun check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', 3044*4882a593Smuzhiyun debuglog=None, errorlog = None, picklefile=None): 3045*4882a593Smuzhiyun 3046*4882a593Smuzhiyun global parse # Reference to the parsing method of the last built parser 3047*4882a593Smuzhiyun 3048*4882a593Smuzhiyun # If pickling is enabled, table files are not created 3049*4882a593Smuzhiyun 3050*4882a593Smuzhiyun if picklefile: 3051*4882a593Smuzhiyun write_tables = 0 3052*4882a593Smuzhiyun 3053*4882a593Smuzhiyun if errorlog is None: 3054*4882a593Smuzhiyun errorlog = PlyLogger(sys.stderr) 3055*4882a593Smuzhiyun 3056*4882a593Smuzhiyun # Get the module dictionary used for the parser 3057*4882a593Smuzhiyun if module: 3058*4882a593Smuzhiyun _items = [(k,getattr(module,k)) for k in dir(module)] 3059*4882a593Smuzhiyun pdict = dict(_items) 3060*4882a593Smuzhiyun else: 3061*4882a593Smuzhiyun pdict = get_caller_module_dict(2) 3062*4882a593Smuzhiyun 3063*4882a593Smuzhiyun # Collect parser information from the dictionary 3064*4882a593Smuzhiyun pinfo = ParserReflect(pdict,log=errorlog) 3065*4882a593Smuzhiyun pinfo.get_all() 3066*4882a593Smuzhiyun 3067*4882a593Smuzhiyun if pinfo.error: 3068*4882a593Smuzhiyun raise YaccError("Unable to build parser") 3069*4882a593Smuzhiyun 3070*4882a593Smuzhiyun # Check signature against table files (if any) 3071*4882a593Smuzhiyun signature = pinfo.signature() 3072*4882a593Smuzhiyun 3073*4882a593Smuzhiyun # Read the tables 3074*4882a593Smuzhiyun try: 3075*4882a593Smuzhiyun lr = LRTable() 3076*4882a593Smuzhiyun if picklefile: 3077*4882a593Smuzhiyun read_signature = lr.read_pickle(picklefile) 3078*4882a593Smuzhiyun else: 3079*4882a593Smuzhiyun read_signature = lr.read_table(tabmodule) 3080*4882a593Smuzhiyun if optimize or (read_signature == signature): 3081*4882a593Smuzhiyun try: 3082*4882a593Smuzhiyun lr.bind_callables(pinfo.pdict) 3083*4882a593Smuzhiyun parser = LRParser(lr,pinfo.error_func) 3084*4882a593Smuzhiyun parse = parser.parse 3085*4882a593Smuzhiyun return parser 3086*4882a593Smuzhiyun except Exception: 3087*4882a593Smuzhiyun e = sys.exc_info()[1] 3088*4882a593Smuzhiyun errorlog.warning("There was a problem loading the table file: %s", repr(e)) 3089*4882a593Smuzhiyun except VersionError: 3090*4882a593Smuzhiyun e = sys.exc_info() 3091*4882a593Smuzhiyun errorlog.warning(str(e)) 3092*4882a593Smuzhiyun except Exception: 3093*4882a593Smuzhiyun pass 3094*4882a593Smuzhiyun 3095*4882a593Smuzhiyun if debuglog is None: 3096*4882a593Smuzhiyun if debug: 3097*4882a593Smuzhiyun debuglog = PlyLogger(open(debugfile,"w")) 3098*4882a593Smuzhiyun else: 3099*4882a593Smuzhiyun debuglog = NullLogger() 3100*4882a593Smuzhiyun 3101*4882a593Smuzhiyun debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) 3102*4882a593Smuzhiyun 3103*4882a593Smuzhiyun 3104*4882a593Smuzhiyun errors = 0 3105*4882a593Smuzhiyun 3106*4882a593Smuzhiyun # Validate the parser information 3107*4882a593Smuzhiyun if pinfo.validate_all(): 3108*4882a593Smuzhiyun raise YaccError("Unable to build parser") 3109*4882a593Smuzhiyun 3110*4882a593Smuzhiyun if not pinfo.error_func: 3111*4882a593Smuzhiyun errorlog.warning("no p_error() function is defined") 3112*4882a593Smuzhiyun 3113*4882a593Smuzhiyun # Create a grammar object 3114*4882a593Smuzhiyun grammar = Grammar(pinfo.tokens) 3115*4882a593Smuzhiyun 3116*4882a593Smuzhiyun # Set precedence level for terminals 3117*4882a593Smuzhiyun for term, assoc, level in pinfo.preclist: 3118*4882a593Smuzhiyun try: 3119*4882a593Smuzhiyun grammar.set_precedence(term,assoc,level) 3120*4882a593Smuzhiyun except GrammarError: 3121*4882a593Smuzhiyun e = sys.exc_info()[1] 3122*4882a593Smuzhiyun errorlog.warning("%s",str(e)) 3123*4882a593Smuzhiyun 3124*4882a593Smuzhiyun # Add productions to the grammar 3125*4882a593Smuzhiyun for funcname, gram in pinfo.grammar: 3126*4882a593Smuzhiyun file, line, prodname, syms = gram 3127*4882a593Smuzhiyun try: 3128*4882a593Smuzhiyun grammar.add_production(prodname,syms,funcname,file,line) 3129*4882a593Smuzhiyun except GrammarError: 3130*4882a593Smuzhiyun e = sys.exc_info()[1] 3131*4882a593Smuzhiyun errorlog.error("%s",str(e)) 3132*4882a593Smuzhiyun errors = 1 3133*4882a593Smuzhiyun 3134*4882a593Smuzhiyun # Set the grammar start symbols 3135*4882a593Smuzhiyun try: 3136*4882a593Smuzhiyun if start is None: 3137*4882a593Smuzhiyun grammar.set_start(pinfo.start) 3138*4882a593Smuzhiyun else: 3139*4882a593Smuzhiyun grammar.set_start(start) 3140*4882a593Smuzhiyun except GrammarError: 3141*4882a593Smuzhiyun e = sys.exc_info()[1] 3142*4882a593Smuzhiyun errorlog.error(str(e)) 3143*4882a593Smuzhiyun errors = 1 3144*4882a593Smuzhiyun 3145*4882a593Smuzhiyun if errors: 3146*4882a593Smuzhiyun raise YaccError("Unable to build parser") 3147*4882a593Smuzhiyun 3148*4882a593Smuzhiyun # Verify the grammar structure 3149*4882a593Smuzhiyun undefined_symbols = grammar.undefined_symbols() 3150*4882a593Smuzhiyun for sym, prod in undefined_symbols: 3151*4882a593Smuzhiyun errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) 3152*4882a593Smuzhiyun errors = 1 3153*4882a593Smuzhiyun 3154*4882a593Smuzhiyun unused_terminals = grammar.unused_terminals() 3155*4882a593Smuzhiyun if unused_terminals: 3156*4882a593Smuzhiyun debuglog.info("") 3157*4882a593Smuzhiyun debuglog.info("Unused terminals:") 3158*4882a593Smuzhiyun debuglog.info("") 3159*4882a593Smuzhiyun for term in unused_terminals: 3160*4882a593Smuzhiyun errorlog.warning("Token '%s' defined, but not used", term) 3161*4882a593Smuzhiyun debuglog.info(" %s", term) 3162*4882a593Smuzhiyun 3163*4882a593Smuzhiyun # Print out all productions to the debug log 3164*4882a593Smuzhiyun if debug: 3165*4882a593Smuzhiyun debuglog.info("") 3166*4882a593Smuzhiyun debuglog.info("Grammar") 3167*4882a593Smuzhiyun debuglog.info("") 3168*4882a593Smuzhiyun for n,p in enumerate(grammar.Productions): 3169*4882a593Smuzhiyun debuglog.info("Rule %-5d %s", n, p) 3170*4882a593Smuzhiyun 3171*4882a593Smuzhiyun # Find unused non-terminals 3172*4882a593Smuzhiyun unused_rules = grammar.unused_rules() 3173*4882a593Smuzhiyun for prod in unused_rules: 3174*4882a593Smuzhiyun errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) 3175*4882a593Smuzhiyun 3176*4882a593Smuzhiyun if len(unused_terminals) == 1: 3177*4882a593Smuzhiyun errorlog.warning("There is 1 unused token") 3178*4882a593Smuzhiyun if len(unused_terminals) > 1: 3179*4882a593Smuzhiyun errorlog.warning("There are %d unused tokens", len(unused_terminals)) 3180*4882a593Smuzhiyun 3181*4882a593Smuzhiyun if len(unused_rules) == 1: 3182*4882a593Smuzhiyun errorlog.warning("There is 1 unused rule") 3183*4882a593Smuzhiyun if len(unused_rules) > 1: 3184*4882a593Smuzhiyun errorlog.warning("There are %d unused rules", len(unused_rules)) 3185*4882a593Smuzhiyun 3186*4882a593Smuzhiyun if debug: 3187*4882a593Smuzhiyun debuglog.info("") 3188*4882a593Smuzhiyun debuglog.info("Terminals, with rules where they appear") 3189*4882a593Smuzhiyun debuglog.info("") 3190*4882a593Smuzhiyun terms = list(grammar.Terminals) 3191*4882a593Smuzhiyun terms.sort() 3192*4882a593Smuzhiyun for term in terms: 3193*4882a593Smuzhiyun debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) 3194*4882a593Smuzhiyun 3195*4882a593Smuzhiyun debuglog.info("") 3196*4882a593Smuzhiyun debuglog.info("Nonterminals, with rules where they appear") 3197*4882a593Smuzhiyun debuglog.info("") 3198*4882a593Smuzhiyun nonterms = list(grammar.Nonterminals) 3199*4882a593Smuzhiyun nonterms.sort() 3200*4882a593Smuzhiyun for nonterm in nonterms: 3201*4882a593Smuzhiyun debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) 3202*4882a593Smuzhiyun debuglog.info("") 3203*4882a593Smuzhiyun 3204*4882a593Smuzhiyun if check_recursion: 3205*4882a593Smuzhiyun unreachable = grammar.find_unreachable() 3206*4882a593Smuzhiyun for u in unreachable: 3207*4882a593Smuzhiyun errorlog.warning("Symbol '%s' is unreachable",u) 3208*4882a593Smuzhiyun 3209*4882a593Smuzhiyun infinite = grammar.infinite_cycles() 3210*4882a593Smuzhiyun for inf in infinite: 3211*4882a593Smuzhiyun errorlog.error("Infinite recursion detected for symbol '%s'", inf) 3212*4882a593Smuzhiyun errors = 1 3213*4882a593Smuzhiyun 3214*4882a593Smuzhiyun unused_prec = grammar.unused_precedence() 3215*4882a593Smuzhiyun for term, assoc in unused_prec: 3216*4882a593Smuzhiyun errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) 3217*4882a593Smuzhiyun errors = 1 3218*4882a593Smuzhiyun 3219*4882a593Smuzhiyun if errors: 3220*4882a593Smuzhiyun raise YaccError("Unable to build parser") 3221*4882a593Smuzhiyun 3222*4882a593Smuzhiyun # Run the LRGeneratedTable on the grammar 3223*4882a593Smuzhiyun if debug: 3224*4882a593Smuzhiyun errorlog.debug("Generating %s tables", method) 3225*4882a593Smuzhiyun 3226*4882a593Smuzhiyun lr = LRGeneratedTable(grammar,method,debuglog) 3227*4882a593Smuzhiyun 3228*4882a593Smuzhiyun if debug: 3229*4882a593Smuzhiyun num_sr = len(lr.sr_conflicts) 3230*4882a593Smuzhiyun 3231*4882a593Smuzhiyun # Report shift/reduce and reduce/reduce conflicts 3232*4882a593Smuzhiyun if num_sr == 1: 3233*4882a593Smuzhiyun errorlog.warning("1 shift/reduce conflict") 3234*4882a593Smuzhiyun elif num_sr > 1: 3235*4882a593Smuzhiyun errorlog.warning("%d shift/reduce conflicts", num_sr) 3236*4882a593Smuzhiyun 3237*4882a593Smuzhiyun num_rr = len(lr.rr_conflicts) 3238*4882a593Smuzhiyun if num_rr == 1: 3239*4882a593Smuzhiyun errorlog.warning("1 reduce/reduce conflict") 3240*4882a593Smuzhiyun elif num_rr > 1: 3241*4882a593Smuzhiyun errorlog.warning("%d reduce/reduce conflicts", num_rr) 3242*4882a593Smuzhiyun 3243*4882a593Smuzhiyun # Write out conflicts to the output file 3244*4882a593Smuzhiyun if debug and (lr.sr_conflicts or lr.rr_conflicts): 3245*4882a593Smuzhiyun debuglog.warning("") 3246*4882a593Smuzhiyun debuglog.warning("Conflicts:") 3247*4882a593Smuzhiyun debuglog.warning("") 3248*4882a593Smuzhiyun 3249*4882a593Smuzhiyun for state, tok, resolution in lr.sr_conflicts: 3250*4882a593Smuzhiyun debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) 3251*4882a593Smuzhiyun 3252*4882a593Smuzhiyun already_reported = {} 3253*4882a593Smuzhiyun for state, rule, rejected in lr.rr_conflicts: 3254*4882a593Smuzhiyun if (state,id(rule),id(rejected)) in already_reported: 3255*4882a593Smuzhiyun continue 3256*4882a593Smuzhiyun debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 3257*4882a593Smuzhiyun debuglog.warning("rejected rule (%s) in state %d", rejected,state) 3258*4882a593Smuzhiyun errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 3259*4882a593Smuzhiyun errorlog.warning("rejected rule (%s) in state %d", rejected, state) 3260*4882a593Smuzhiyun already_reported[state,id(rule),id(rejected)] = 1 3261*4882a593Smuzhiyun 3262*4882a593Smuzhiyun warned_never = [] 3263*4882a593Smuzhiyun for state, rule, rejected in lr.rr_conflicts: 3264*4882a593Smuzhiyun if not rejected.reduced and (rejected not in warned_never): 3265*4882a593Smuzhiyun debuglog.warning("Rule (%s) is never reduced", rejected) 3266*4882a593Smuzhiyun errorlog.warning("Rule (%s) is never reduced", rejected) 3267*4882a593Smuzhiyun warned_never.append(rejected) 3268*4882a593Smuzhiyun 3269*4882a593Smuzhiyun # Write the table file if requested 3270*4882a593Smuzhiyun if write_tables: 3271*4882a593Smuzhiyun lr.write_table(tabmodule,outputdir,signature) 3272*4882a593Smuzhiyun 3273*4882a593Smuzhiyun # Write a pickled version of the tables 3274*4882a593Smuzhiyun if picklefile: 3275*4882a593Smuzhiyun lr.pickle_table(picklefile,signature) 3276*4882a593Smuzhiyun 3277*4882a593Smuzhiyun # Build the parser 3278*4882a593Smuzhiyun lr.bind_callables(pinfo.pdict) 3279*4882a593Smuzhiyun parser = LRParser(lr,pinfo.error_func) 3280*4882a593Smuzhiyun 3281*4882a593Smuzhiyun parse = parser.parse 3282*4882a593Smuzhiyun return parser 3283