/** A module which extends the {@link module:Base Base module}
* and supports creating scanners and SLR(1) parsers from BNF grammars.
* Parsers employ the [observer pattern](https://en.wikipedia.org/wiki/Observer_pattern)
* to be traced and to call actions written in JavaScript.
*
* The fundamental object created by this module is a {@linkcode module:BNF~Grammar Grammar}
* which could be prepared from grammar rules using the syntax defined
* {@link module:BNF~Grammar#bnf here}.
* Grammars are written as ordered pairs and can define precedences.
*
* The state table is created from sets of positions in rules,
* and conflicts are resolved using the lookahead and follow sets,
* and precedences, if available,
* i.e., it is a [simplified implementation of LR(1)](https://en.wikipedia.org/wiki/Simple_LR_parser).
* The table is not optimized.
*
* An {@link module:EBNF~Grammar EBNF grammar},
* optionally with precedences and `$error`,
* can be imported for SLR(1) parsing,
* but the {@link module:BNF BNF} and {@link module:EBNF EBNF} modules
* only depend on the {@link module:Base Base module} and not on each other.
*
* The grammar rules, precedence levels, and states
* are represented using a number of classes summarized below.
* {@linkcode module:BNF~Grammar#check check()} creates a state table
* which controls a {@linkcode module:BNF~Parser Parser}.
* A {@linkcode module:Base~Scanner Scanner} is created from the grammar's terminals.
*
* Objects are created using factory methods which check parameters
* and maintain inventories. All factory methods are defined in {@linkcode module:BNF~Grammar Grammar}
* and the factory methods for all but {@linkcode module:BNF~Message Message} have the same name as the classes
* (with lower-case initials).
*
* All properties have getters, very few have setters, i.e., there is
* no assignment outside class boundaries.
| class | main properties | main methods |
| ----- | --------------- | ------------ |
| {@linkcode module:BNF~Grammar Grammar}: {@linkcode module:Base~Factory Factory} | `config`, `ebnf`, `sr`, `rr`, `rules`: `Array<`{@linkcode module:BNF~Rule Rule}`>`,<br>`states`: `Array<`{@linkcode module:BNF~State State}`>` | {@linkcode module:BNF~Grammar#check check()}, {@linkcode module:BNF~Grammar#parser parser()}, {@linkcode module:BNF~Parser#build build()}, {@linkcode module:BNF~Parser#trace trace()} |
| {@linkcode module:BNF~Rule Rule} | `nt`, `symbols`: `Array<`{@linkcode module:Base~Symbol Symbol}`>`, `prec`, `empty`, `reached`, `finite`, `first`, `reduced` | |
| {@linkcode module:BNF~NT NT}: {@linkcode module:Base~NT NT}: {@linkcode module:Base~Symbol Symbol} | `name`, `ord`, `rules`: `Array<`{@linkcode module:BNF~Rule Rule}`>`, `empty`, `reached`, `finite`, `first`, `follow` | |
| {@linkcode module:BNF~Lit Lit}: {@linkcode module:Base~Lit Lit}: {@linkcode module:Base~T T}: {@linkcode module:Base~Symbol Symbol} | `name`, `value`, `ord`, `prec`, `used`, `first` | {@linkcode module:BNF~Lit#unescape unescape(s)} |
| {@linkcode module:BNF~Token Token}: {@linkcode module:Base~Token Token}: {@linkcode module:Base~T T}: {@linkcode module:Base~Symbol Symbol} | `name`, `pat`, `ord`, `prec`, `used`, `first` | |
| {@linkcode module:BNF~State State} | `marks`: `Array<`{@linkcode module:BNF~Mark Mark}`>`, `core`,<br>`messages`: `Object<ord,` {@linkcode module:BNF~Message Message}`>` | {@linkcode module:BNF~State#advance advance(state)}, {@linkcode module:BNF~State#equals equals(core)} |
| {@linkcode module:BNF~Mark Mark} | `rule`, `position`, `complete` | {@linkcode module:BNF~Mark#advance advance()}, {@linkcode module:BNF~Mark#equals equals(mark)} |
| {@linkcode module:BNF~Message Message} | `message`, `symbol`, `info` | |
| {@linkcode module:BNF~Parser Parser} | `grammar`, `stack[]`, `state`: {@linkcode module:BNF~State State}, `values[]` | {@linkcode module:BNF~Parser#parse parse(input)} |
@module BNF
@see module:Base
@author © 2023 Axel T. Schreiner <axel@schreiner-family.net>
@version 2024-07-25
*/
import * as Base from './base.js';
/** @mixin
@property {number} ord - global index; set in {@linkcode module:BNF~Grammar#check check()}.
@property {Object.<number, module:Base~T>} first - maps `ord` to `this`.
*/
const T = (superclass) => class extends superclass {
#ord = undefined;
get ord () { return this.#ord; }
set ord (value) { typeof this.#ord == 'undefined' && (this.#ord = value); }
#first = { };
get first () { return this.#first; }
/** Displays ordinal number, if any, and description of terminal.
@returns {string}
@memberof module:BNF~T
@instance
*/
dump () { return (this.ord >= 0 ? this.ord : '?') + ': ' + super.dump(); }
}
/** Represents a literal symbol for BNF.
@mixes module:BNF~T
@property {number} ord - global index; set in {@linkcode module:BNF~Grammar#check check()}.
@property {Object.<number, module:Base~T>} first - maps `ord` to `this`.
@extends module:Base~Lit
@property {string} name - representation for a literal.
Empty string is reserved for `$eof`, the end of input.
@property {Object} prec - precedence.
@property {string} [prec.assoc] - associativity, `'%left'`, `'%right'`, or `'%nonassoc'`, if any.
@property {number} [prec.level] - precedence level, from 0, if any.
@property {string} value - (unquoted) value for the literal; empty string for `$eof`, too.
@property {boolean} [screened] - set true only during scanner construction
if literal value matches a token pattern.
*/
class Lit extends T(Base.Lit) {
/** Creates a literal symbol for BNF;
see factory method {@linkcode module:BNF~Grammar#lit grammar.lit()}.
@param {string} [name] - a (quoted) representation for the literal.
*/
constructor (name) { super(name); }
}
/** Represents a token symbol for BNF.
mixes module:BNF~T
@property {number} ord - global index; set in {@linkcode module:BNF~Grammar#check check()}.
@property {Object.<number, module:Base~T>} first - maps `ord` to `this`.
@extends module:Base~Token
@property {string} name - name for the token.
Empty string is reserved for `$error`, can be something unexpected.
@property {Object} prec - precedence.
@property {string} [prec.assoc] - associativity, `'%left'`, `'%right'`, or `'%nonassoc'`, if any.
@property {number} [prec.level] - precedence level, from 0, if any.
@property {RegExp} pat - pattern for token; empty `RegExp` for `$error`.
@property {Array<Lit>} [screen] - contains literals with values matching the pattern, if any.
*/
class Token extends T(Base.Token) {
/** Creates a token symbol for BNF;
see factory method {@linkcode module:BNF~Grammar#token grammar.token()}.
@param {string} name - token name.
@param {RegExp} pat - for a token.
*/
constructor (name, pat) { super(name, pat); }
}
/** Represents a non-terminal symbol for BNF.
@property {number} index - non-terminal's index in `.nts`.
@property {number} ord - non-terminal's global index;
set in {@linkcode module:BNF~Grammar#check check()}.
@property {module:BNF~Rule[]} rules - defining `this`.
@property {boolean} empty - true if no input can be accepted.
@property {boolean} reached - true if this can be reached from rule zero.
@property {boolean} finite - true if there is a non-recursive expansion.
@property {Object.<number, module:BNF~T>} first - terminals at front,
maps ord to {@link module:BNF~T}.
@property {Object.<number, module:BNF~T>} follow - terminals following,
maps ord to {@link module:BNF~T}.
@extends module:Base~NT
@property {string} name - name for the non-terminal.
Empty string is reserved for `$accept`, can be left-hand side of a start rule.
*/
class NT extends Base.NT {
#index;
get index () { return this.#index; }
#ord = undefined;
get ord () { return this.#ord; }
set ord (value) { typeof this.#ord == 'undefined' && (this.#ord = value); }
#rules = [];
get rules () { return this.#rules; }
#empty;
get empty () { return this.#empty; }
set empty (_) { this.#empty = true; } // cannot unset it
#reached = false;
get reached () { return this.#reached; }
set reached (_) { this.#reached = true; } // cannot unset it
#finite = false;
get finite () { return this.#finite; }
set finite (_) { this.#finite = true; } // cannot unset it
#first = {};
get first () { return this.#first; }
#follow = {};
get follow () { return this.#follow; }
/** Creates a non-terminal symbol for BNF;
see factory method {@linkcode module:BNF~Grammar#nt grammar.nt()}.
@param {string} name - non-terminal's name.
@param {number} index - non-terminal's index in `.nts`.
*/
constructor (name, index) {
super(name);
this.#index = index;
}
/** Displays index or ord and name and contents of all sets.
@returns {string}
*/
dump () {
const result = [
(' ' + (this.ord ? this.ord : this.index)).substr(-3) + ': ' + this.toString()
];
if (this.empty) result.push('empty: true');
for (let set in { first:0, follow:0 }) {
const r = Object.values(this[set]).map(t => t.toString()).join(' ');
if (r.length) result.push(set + ': ' + r);
}
return result.join('\n\t');
}
}
/** Represents a BNF rule, i.e., an ordered pair.
@property {module:BNF~NT} nt - rule's non-terminal (left-hand side).
@property {Array<module:Base~Symbol>} symbols - rule's right-hand side.
@property {number} index - rule's index in {@linkcode module:BNF~Grammar grammar.rules}.
@property {boolean} empty - computed from `.symbols`.
@property {boolean} reached - true if this can be reached from rule zero.
@property {boolean} finite - true if all non-terminals in the
right-hand side have {@link module:BNF~NT}`.finite` set.
@property {Object.<number, module:BNF~T>} first - terminals at front,
maps ord to {@link module:BNF~T}.
@property {boolean} reduced - true if this rule has been reduced.
@property {?Object} prec - precedence.
@property {string} prec.assoc - associativity, `'%left'`, `'%right'` or `'%nonassoc'`
if any.
@property {number} prec.level - precedence level, from 0.
@property {module:BNF~T} prec.t - terminal providing the precedence.
*/
class Rule {
#nt;
get nt () { return this.#nt; }
#symbols;
get symbols () { return this.#symbols; }
#index;
get index () { return this.#index; }
get empty () { return !this.symbols.length; }
#reached = false;
get reached () { return this.#reached; }
set reached (_) { this.#reached = true; } // cannot unset it
#finite = false;
get finite () { return this.#finite; }
set finite (_) { this.#finite = true; } // cannot unset it
#first = {};
get first () { return this.#first; }
#reduced = false;
get reduced () { return this.#reduced; }
set reduced (_) { this.#reduced = true; } // cannot unset it ??
#prec;
get prec () { return this.#prec; }
/** Creates a BNF rule;
see {@linkcode module:BNF~Grammar#rule rule()} factory method.
@param {module:BNF~NT} nt - left-hand side, non-terminal.
@param {module:Base~Symbol[]} symbols right-hand side.
@param {number} index - rule's index in {@link module:BNF~Grammar}`.rules`.
@param {?Object} [prec] - precedence, if any.
*/
constructor (nt, symbols, index, prec) {
this.#nt = nt;
this.#symbols = symbols;
this.#index = index;
this.#prec = prec;
}
/** Displays a rule in BNF notation.
@param {number} [mark] - precedes a symbol on the right-hand side if it is in range.
@returns {string}
*/
toString (mark) {
let result = this.nt + ': ' +
this.symbols.map((symbol, n) => (n == mark ? '● ' : '') + symbol).join(' ');
if (this.symbols.length == mark) result += (this.symbols.length ? ' ●' : '●');
if (this.prec) result += ' %prec ' + this.prec.t;
return result + ';';
}
/** Displays index, rule, `empty`, and content of `first`.
@returns {string}
*/
dump () {
const result = [
(this.index >= 0 ? (' ' + this.index).substr(-3) + ': ' : '') + this.toString()
];
if (this.empty) result.push('empty: true');
const r = Object.values(this.first).map(t => t.toString()).join(' ');
if (r.length) result.push('first: ' + r);
return result.join('\n\t');
}
}
/** Represents a message of the parser automaton.
@property {string} verb - one of `'accept'`, `'error'`, `'goto'`, `'reduce'`, or `'shift'`.
@property {module:Base~Symbol} symbol - symbol on which to message.
@property {Number|module:BNF~Rule} info - additional information, if any.
*/
class Message {
#verb;
get verb () { return this.#verb; }
#symbol;
get symbol () { return this.#symbol; }
#info;
get info () { return this.#info; }
set info (info) { this.#info = info; }
/** Creates a new message;
see factory methods {@linkcode module:BNF~Grammar#accept grammar.accept()},
{@linkcode module:BNF~Grammar#reduce grammar.reduce()},
{@linkcode module:BNF~Grammar#shift_or_goto grammar.shift_or_goto()},
and {@linkcode module:BNF~Parser#observe parser.observe()}.
@param {string} verb - one of `'accept'`, `'error'`, `'goto'`, `'reduce'`, or `'shift'`.
@param {module:BNF~T|module:BNF~NT} symbol - symbol on which to send message.
@param {Number|module:BNF~Rule} info - additional information, if any.
*/
constructor (verb, symbol, info) {
this.#verb = verb;
this.#symbol = symbol;
this.#info = info;
}
/** Displays symbol, message, and additional information if any.
@returns {string}
*/
toString () {
let result = (this.symbol + ' ').substr(0, 13) + this.verb;
switch (this.verb) {
case 'goto':
case 'shift': result += ' ' + this.info; break;
case 'reduce': result += ' (' + this.info + ')'; break;
}
return result;
};
}
/** Represents a mark in a rule.
@property {function()} assert - bound to {@linkcode module:BNF~Grammar#assert grammar.assert()}.
@property {function()} mark - bound to {@linkcode module:BNF~Grammar#mark grammar.mark()}.
@property {module:BNF~Rule} rule - rule to mark.
@property {number} position - position in rule, before a symbol or after all.
@property {boolean} complete - true if position is after all symbols.
*/
class Mark {
#assert;
get assert () { return this.#assert; }
#mark;
get mark () { return this.#mark; }
#rule;
get rule () { return this.#rule; }
#position;
get position () { return this.#position; }
#complete;
get complete () { return this.#complete; }
/** Creates a new mark for a rule;
see factory method {@linkcode module:BNF~Grammar#mark grammar.mark()}.
@param {module:BNF~Grammar} grammar - supplies factory method.
@param {module:BNF~Rule} rule - rule to mark.
@param {number} position - position in rule, before a symbol or after all.
*/
constructor (grammar, rule, position) {
// "inherit" assert() and mark()
this.#assert = grammar.constructor.prototype.assert.bind(grammar);
this.#mark = grammar.constructor.prototype.mark.bind(grammar);
this.#rule = rule;
this.#position = position;
this.#complete = position == this.rule.symbols.length;
}
/** Displays the marked rule.
@returns {string}
*/
toString () { return this.rule.toString(this.position); }
/** Compares two marks.
@param {module:BNF~Mark} c to compare to `this`.
@returns true if same rule and same position.
*/
equals (c) {
this.assert(c instanceof Mark, 'equals():', c, 'not a marked rule');
return this.rule === c.rule && this.position == c.position;
}
/** Advances the mark.
@returns {module:BNF~Mark} a new configuration with the mark moved right,
i.e., `position` increased by 1.
*/
advance () { return this.mark(this.rule, this.position + 1); }
}
/** Represents a state of the automaton.
@property {module:BNF~Grammar} grammar - owner of this state.
@property {module:BNF~Mark[]} marks - core and closure defining this state.
@property {number} core - number of marked rules in the core.
@property {Object.<number, module:BNF~Message>} messages - maps possible next symbols to messages.
@property {string[]} errors - errors detected in this state, if any.
*/
class State {
#grammar;
get grammar () { return this.#grammar; }
#marks;
get marks () { return this.#marks; }
#core;
get core () { return this.#core; }
#messages;
get messages () { return this.#messages; }
#errors = [];
get errors () { return this.#errors; }
/** Creates a new state of the automaton;
see factory method {@linkcode module:BNF~Grammar#state grammar.state()}.
@param {module:BNF~Grammar} grammar - owner of this state.
@param {module:BNF~Mark[]} marks - core and closure defining this state.
@param {number} core - number of marked rules in the core.
@param {Object.<number, module:BNF~Message>} messages - maps possible next symbols to `null`.
*/
constructor (grammar, marks, core, messages) {
this.#grammar = grammar;
this.#marks = marks;
this.#core = core;
this.#messages = messages;
}
/** Displays core configurations and messages.
@returns {string}
*/
toString () { return this.dump(true); }
/** Displays all marked rules and messages.
@param {boolean} core if true, only displays core configurations.
@returns {string}
*/
dump (core) {
return ' ' +
(core ? this.marks.slice(0, this.core) : this.marks).
map(mark => mark.toString()).
concat('', ... Object.values(this.messages).
map(a => (a ? a.toString() : 'null'))).join('\n ') +
(this.errors.length ? '\n' + this.errors.join('\n') : '');
}
/** Compares a core of marked rules to this state's core.
@param {module:BNF~Mark[]} core - to compare to `this`.
@returns true if this state has the same core (in any order).
*/
equals (core) {
this.grammar.assert(core instanceof Array && core.every(mark => mark instanceof Mark),
'equals():', core, 'not a list of marked rules');
// same core sizes?
return this.core == core.length &&
// same elements?
this.marks.slice(0, this.core).every(a => core.some(b => a.equals(b)));
}
/** Populates the `.messages` table. Fills in `reduce` for
complete rules, `shift` for terminals, `accept`
for the end of input terminal, and `goto` for non-terminals.
@param {number} stateNumber - this state's number for error messages.
*/
advance (stateNumber) {
this.grammar.assert(typeof stateNumber == 'number' && this.grammar.states[stateNumber] === this,
'advance():', stateNumber, 'invalid state number');
// following construction, messages maps every literal/token/non-terminal
// which can follow this state to null.
const error = (... s) =>
(this.errors.push(s.join(' ')), this.grammar.message('state', stateNumber + ':', ... s));
// create reduce messages for complete rules
this.marks.forEach(mark => {
if (mark.complete) {
const rule = mark.rule; // rule we are in
// for each terminal which can follow the rule in the grammar
for (let t in rule.nt.follow) { // ordinal number
const f = rule.nt.follow[t]; // terminal which can follow
if (!(t in this.messages)) { // can it follow in this state?
// if t is not in messages it cannot follow this state -> reduce
rule.reduced = true;
this.messages[t] = this.grammar.reduce(f, rule);
} else if (this.messages[t] == null) {
// if (t, null) is in messages, depending on precedences we might have a s/r conflict
if (rule.prec && f.prec.assoc) { // rule and terminal have defined precedence
if (rule.prec.level > f.prec.level) { // rule's precedence is higher -> reduce
rule.reduced = true;
this.messages[t] = this.grammar.reduce(f, rule);
} else if (rule.prec.level < f.prec.level) {
// rule's precedence is lower -> shift (below)
} else // equal precedence
switch (rule.prec.assoc) {
case '%left': // left-associative -> reduce
rule.reduced = true;
this.messages[t] = this.grammar.reduce(f, rule);
case '%right': // right-associative -> shift (below)
break;
case '%nonassoc': // non-associative -> error action
rule.reduced = true; // avoid message
delete this.messages[t]; // i.e. f as input would be an error
}
} else { // no precedence available
++ this.grammar.sr;
error('shift/reduce conflict between',
f.toString(), 'and rule', '(' + rule + ')');
} // resolved as a shift (below)
} else {
// t is in messages and messages[t] is already set as a reduce
const r2 = this.messages[t].info; // the conflict
++ this.grammar.rr;
error('for', f.toString(), 'reduce/reduce conflict between',
'(' + rule + ')', 'and', '(' + r2 + ')');
// resolve for rule which is first in the grammar
if (rule.index < r2.index) this.messages[t].info = rule;
} // done with this t
} // done with every t which can follow
} // done with every complete mark
}, this);
// create accept/shift messages for each next symbol which has none
for (let a in this.messages) {
if (this.messages[a] == null) {
if (a == this.grammar.lit().ord) {
// special case: $eof
this.messages[a] = this.grammar.accept();
this.grammar.rules[0].reduced = true;
} else {
// create next core by advancing marker over one symbol
const next = [ ];
let symbol = null;
this.marks.forEach(mark => {
// find a as next symbol in all marks
if (!mark.complete && a == mark.rule.symbols[mark.position].ord) {
// remember symbol and push mark after symbol
symbol = mark.rule.symbols[mark.position];
next.push(mark.advance());
}
}, this);
// add new state with next as core, if any
// shift/goto existent or new state
if (!this.grammar.states.some((state, s) => state.equals(next) ?
(this.messages[a] = this.grammar.shift_or_goto(symbol, s), true) : false
, this)) {
this.messages[a] = this.grammar.shift_or_goto(symbol, this.grammar.states.length);
this.grammar.states.push(this.grammar.state(next));
}
}
} // done with all symbols w/out a message
} // done with all symbols
}
}
/**
* Represents a context-free grammar to create SLR(1) parsers.
* Contains factory methods to create objects to represent the grammar as a tree
* and to represent the parsers' state table.
* <p>A `Grammar` object can be asked to generate
* {@link module:Base~Factory#scanner scanners} and [parsers](#parser)
* to process input sentences conforming to the grammar.
* If a parser is called with suitable {@link module:Base~Action actions}
* it can transform input.
* @property {?module:EBNF~Grammar} ebnf - set only if created from EBNF Grammar.
* @property {Array<module:BNF~Rule>} rules - list of grammar rules, can be pushed.
* @property {Array<module:BNF~State>} states - list of possible states for parser.
* @property {number} sr - number of shift/reduce conflicts.
* @property {number} rr - number of reduce/reduce conflicts.
*
* @extends module:Base~Factory
* @property {Object.<string, Object>} config - maps names to configurable values.
* @property {function(string[])} config.log - function to print strings, by default `console.log`.
* @property {RegExp} config.lits - restricts literal representation, by default single-quoted;
* must be anchored.
* @property {RegExp} config.tokens - restricts token names, by default alphanumeric;
* must be anchored.
* @property {RegExp} config.nts - restricts non-terminal names, by default alphanumeric;
* must be anchored.
* @property {string} config.uniq - prefix for unique non-terminal names, by default `$-`.
*
* @property {boolean} [config.error] - if true, insert `$error` when translating `some`.
* @property {boolean} [config.lookahead] - if true, trace lookahead when parsing.
* @property {RegEx} [config.trace] - if set, observe with
* {@linkcode module:BNF~Parser#trace grammar.trace( , config.trace)};
* only affects {@linkcode module:BNF~Grammar#parser grammar.parser()}.
* @property {boolean} [config.build] - if set, build with
* {@linkcode module:BNF~Parser#build grammar.build()};
* only affects {@linkcode module:BNF~Grammar#parser grammar.parser()}.
*
* @property {Array<module:Base~Lit>} lits - list of unique literals, can be pushed.
* @property {Object.<string, module:Base~Lit>} litsByName - maps `'x'` to unique literal.
* @property {Array<module:Base~Token>} tokens - list of unique tokens, can be pushed.
* @property {Object.<string, module:Base~Token>} tokensByName - maps name to unique token.
* @property {Array<module:Base~Precedence>} levels - list of precedence levels, can be pushed.
* @property {Array<module:Base~NT>} nts - list of unique non-terminals, can be pushed.
* @property {Object.<string, module:Base~NT>} ntsByName - maps name to unique non-terminal.
* @property {number} errors - incremented by {@linkcode module:Base~Factory#error error()} method.
*
* @example <caption> LL(1) recursive descent parsing </caption>
* const e = new EBNF.Grammar(' ... grammar ... ')
* e.parser(/ ... skip .../)(' ... input ... ')
* e.parser(/ ... skip .../)(' ... input ... ', { ... actions ... })
* @example <caption> equivalent SLR(1) stack-based parsing </caption>
* const b = BNF.Grammar.fromEBNF(e)
* b.parser(/ ... skip .../)(' ... input ... ')
* b.parser(/ ... skip .../)(' ... input ... ', { ... actions ... })
* @example <caption> details </caption>
* const b = BNF.Grammar.fromEBNF(e)
* const s = b.scanner(/ ... skip ... /)
* new BNF.Parser(b).parse(s.scan(' ... input ... ').concat(null), b.build())
* new BNF.Parser(b).parse(s.scan(' ... input ... ').concat(null), b.build({ ... actions ... }))
*/
class Grammar extends Base.Factory {
#ebnf = null;
get ebnf () { return this.#ebnf; }
set ebnf (value) { this.#ebnf = value; }
#rules = [];
get rules () { return this.#rules; }
#states = [];
get states () { return this.#states; }
#sr = 0;
get sr () { return this.#sr; }
set sr (value) { this.#sr = value; }
#rr = 0;
get rr () { return this.#rr; }
set rr (value) { this.#rr = value; }
/** Creates a grammar representation.
Creates the `$accept` non-terminal, `$eof` *end of input* literal, and `$error` token,
and reserves rule zero: `$accept:` *start* `$eof;`.
Defines tokens, if any.
@param {?string} [grammar] - the grammar to represent,
using the {@link module:BNF~Grammar.bnf BNF grammar}
and {@link module:BNF~Grammar.terminals BNF token and literal notation}.
This can be omitted to construct the rules directly using the factory methods.
@param {?Object.<string, RegExp>} [tokens] - maps token names, if any,
in the new grammar to their patterns
which must not accept empty input, must not use `d`, `g`, or `y` flag,
should not be anchored, and should use `(:? )` rather than `( )` for grouping.
`tokens` can map the empty string to a skip pattern
which will be used to interpret the grammar string.
@param {Object.<string, Object>} [config] - overwrites configurable values' defaults;
loaded first but can only be the third parameter.
@throws {Error} an error for bad token definitions or syntax errors in the grammar.
*/
constructor (grammar = '', tokens = {}, config = {}) {
super();
// create rule zero and the singletons.
this.rule(this.nt()); // reserve $accept and rule zero
this.lit().used = true; // reserve $eof (used in rule zero)
this.token(); // reserve $error
// load configuration, if any
if (typeof config == 'object' && config !== null)
Object.assign(this.config, config);
// compile grammar into this?
if (typeof grammar == 'string') {
// skip pattern
let skip = /\s+/;
// load tokens, [''] is skip
if (typeof tokens == 'object' && tokens !== null) {
if ('' in tokens) {
skip = (tokens['']);
delete tokens[''];
}
Object.entries(tokens).forEach(kv => this.token(kv[0], kv[1]), this);
}
// need to send output to the new config
Grammar.grammar.config.log = this.config.log;
// compile grammar
Grammar.grammar.parser(skip).parse(grammar, new Actions(this));
// only load tokens (from grammar)
} else if (typeof grammar == 'object' && grammar !== null) {
delete grammar['']; // if any
this.add(grammar); // i.e., the token definitions if any
}
}
/** Completes the grammar representation and reports if there are errors;
call exactly once.
Creates rule zero: `$accept:` *start* `$eof;`.
Sets ordinal number for literals, then tokens, then non-terminals.
Checks that all rules are reached and all non-terminals can reduce to a terminal.
Computes first and follow sets.
Creates state table.
Checks that all rules can be reduced.
@param {module:BNF~NT} start - the *start* non-terminal.
*/
check (start) {
this.assert(!this.rules[0].symbols.length, 'check():', this, 'can only check a grammar once');
this.assert(start instanceof NT, 'check():', start, 'start must be a non-terminal');
// create rule zero
this.rules[0].symbols.push(start, this.lit());
// set ord, create first for literals
this.lits.forEach((lit, n) => lit.first[lit.ord = n] = lit);
// set ord after literals, create first for tokens
this.tokens.forEach((token, n) => token.first[token.ord = n + this.lits.length] = token, this);
// set ord after literals and tokens
this.nts.forEach((nt, n) => nt.ord = n + this.tokens.length + this.lits.length, this);
// check that all non-terminals are defined
this.nts.forEach(nt => !nt.rules.length && this.error(nt.name, 'is undefined'), this);
// set and check recursively that all non-terminals can be reached from rule 0
const setReached = rule =>
rule.reached || (
rule.reached = rule.nt.reached = true,
rule.symbols.forEach(nt => nt instanceof NT && nt.rules.forEach(setReached)));
setReached(this.rules[0]);
this.nts.forEach(
nt => nt.reached || this.error(nt.name, 'cannot be reached from rule zero'), this);
// check that all non-terminals have a finite expansion
let changed;
do {
changed = false; // until no more changes
this.rules.forEach(rule => { // check rule
if (!rule.nt.finite && // undecided?
rule.symbols.some(sym => sym instanceof Base.T || sym.finite)) // has T or finite NT
changed = rule.finite = rule.nt.finite = true; // changed: finite!
});
} while (changed);
this.nts.forEach(nt => nt.finite || this.error(nt.name, 'is not finite'), this);
// add elements in bs to as, return true if change
const merge = (as, bs) => Object.entries(bs).reduce(
(result, kv) => kv[0] in as ? result : (as[kv[0]] = kv[1], true), false);
// compute first for non-terminals and rules
do {
changed = false; // until no more changes
// for each rule with non-empty rhs
this.rules.forEach((rule, r) => {
if (rule.symbols.length) {
// for each symbol in rhs, as long as they accept empty
if (rule.symbols.every(sym => {
// add symbol's first to rule's first
if (merge(rule.first, sym.first)) changed = true;
// continue only if nt accepts empty
return sym instanceof NT ? sym.empty : false;
}))
// if all accept empty, so does rule
if (!rule.empty) changed = rule.empty = true;
// add rule's first to nt's first, ditto empty
if (merge(rule.nt.first, rule.first)) changed = true;
if (!rule.nt.empty && rule.empty) changed = rule.nt.empty = true;
}
});
} while (changed);
// compute follow for non-terminals
do {
changed = false; // until no more changes
// for each non-empty rule
this.rules.forEach(rule => rule.symbols.length &&
// over rhs in reverse order
rule.symbols.reduceRight((follow, last) => {
if (last instanceof NT) {
if (merge(last.follow, follow)) changed = true;
if (last.empty) // if empty, pass follow further forward
return Object.assign({}, last.first, follow);
}
// otherwise, pass first forward
return last.first;
}, rule.nt.follow));
} while (changed);
// create state[0]: mark rule 0 in position 0
this.states.push(this.state([this.mark(this.rules[0], 0)]));
// tell each state to advance; this creates new states which are also advanced
for (let s = 0; s < this.states.length; ++ s)
this.states[s].advance(s);
// check that all rules can be reduced
this.rules.forEach((rule, r) => rule.reduced ||
this.error('rule', r, '(' + rule + ')', 'is never reduced'), this);
if (this.errors) this.message('errors: ' + this.errors);
if (this.sr) this.message('shift/reduce conflicts: ' + this.sr);
if (this.rr) this.message('reduce/reduce conflicts: ' + this.rr);
}
/** Factory method to create a unique literal symbol, maintains `.lits` and `.litsByName`
@param {string} [literal] - literal's representation conforming to `.config.lits`.
If omitted represents the `$eof` literal terminal.
@param {boolean} [used] - if `true` mark literal as used.
@returns {module:BNF~Lit} a unique literal.
*/
lit (literal = '', used) {
// return existing literal?
let lit = this.litsByName[literal];
if (! lit) {
// create new literal
lit = new Lit(literal);
this.add(lit);
}
if (used) lit.used = true;
return lit;
}
/** Factory method to create a unique token symbol, maintains `.tokens` and `.tokensByName`.
@param {string} [name] - token's name conforming to `.config.tokens`; error if a non-terminal.
If omitted represents the `$error` token with an empty `RegExp`.
@param {RegExp} [pat] - pattern to match values representing the token in input;
used only when the token is created,
must not accept empty input, must not use `d`, `g`, or `y` flag,
should not be anchored, should use `(:? )` rather than `( )` for grouping.
@param {boolean} [used] - if `true` mark token as used.
@returns {module:BNF~Token} a unique token.
*/
token (name = '', pat, used) {
// return existing token?
let token = this.tokensByName[name];
if (! token) {
// don't allow non-terminal
if (name != '' && name in this.ntsByName)
this.error(name, 'is already defined as a non-terminal');
// create new token
token = new Token(name, name.length ? pat : new RegExp());
this.add(token);
}
if (used) token.used = true;
return token;
}
/** Factory method to create a unique non-terminal symbol, maintains `.nts` and `.ntsByName`.
@param {string} [name] - non-terminal's name conforming to `config.nts`; error if a token.
If omitted represents the `$accept` non-terminal,
if not a string creates a unique name (intended for EBNF translation).
@returns {module:BNF~NT} a unique non-terminal.
*/
nt (name = '') {
// unique name?
if (typeof name != 'string') name = this.config.uniq + this.nts.length;
// return existing non-terminal?
let nt = this.ntsByName[name];
if (! nt) {
// don't allow token
if (name != '' && name in this.tokensByName)
this.error(name, 'is already defined as a token');
// create new non-terminal
nt = new NT(name, this.nts.length);
this.add(nt);
}
return nt;
}
/** Factory method to create a rule representation for BNF.
Maintains rule's non-terminal's `.rules` and `this.rules`.
Maintains rule's non-terminal's `.empty`.
Precedence levels have to be defined prior to using this method.
@param {module:BNF~NT} nt - left-hand side, non-terminal.
@param {module:Base~Symbol[]} [symbols] - right-hand side, list of symbols.
@param {?module:Base~T} [terminal] - can define rule's precedence,
by default the precedence of the rightmost terminal, if any.
@returns {module:BNF~Rule} a new rule representation.
*/
rule (nt, symbols = [], terminal = null) {
this.assert(nt instanceof NT, 'rule():', nt, 'not a non-terminal');
this.assert(symbols instanceof Array &&
symbols.every(s => s instanceof Lit || s instanceof Token || s instanceof NT),
'rule():', symbols, 'not an array of terminals and non-terminals');
if (terminal === null) // implicit precedence from last terminal with precedence if any
symbols.forEach(s => { if (!(s instanceof NT) && s.prec.assoc) terminal = s; });
else { // explicit precedence
this.assert(terminal instanceof Base.T, 'rule():', terminal, 'not a terminal');
if (!terminal.prec.assoc) {
this.error(terminal + ' has undefined precedence');
terminal = null;
}
}
// create new rule
const rule = new Rule(nt, symbols, this.rules.length,
terminal === null ? null : Object.assign({}, { t: terminal }, terminal.prec));
// check for empty unless rule zero
if (this.rules.length && !symbols.length) nt.empty = true;
// add new rule to rule's nt's rules and this.rules
rule.nt.rules.push(rule);
this.rules.push(rule);
return rule;
}
/** Factory method to create the `accept` message for `$eof`.
@returns {module:BNF~Message} an object representing the message.
*/
accept () {
return new Message('accept', this.lit(), '');
}
/** Factory method to create a `reduce` message.
@param {module:Base~T} t - terminal on which to send message.
@param {module:BNF~Rule} rule - rule to reduce.
@returns {module:BNF~Message} an object representing the message.
*/
reduce (t, rule) {
this.assert(t instanceof Base.T, 'reduce():', t, 'not a terminal symbol');
this.assert(rule instanceof Rule, 'reduce():', rule, 'not a Rule');
return new Message('reduce', t, rule);
}
/** Factory method to create a `shift` or `goto` message.
@param {module:Base~Symbol} symbol - symbol on which to send message.
@param {number} state - state to shift to.
@returns {module:BNF~Message} an object representing the message.
*/
shift_or_goto (symbol, state) {
this.assert(symbol instanceof Base.Symbol, 'shift_or_goto():', symbol, 'not a symbol');
this.assert(typeof state == 'number' && state >= 0 && state <= this.states.length,
'shift_or_goto():', state, 'invalid state');
return new Message(symbol instanceof Base.T ? 'shift' : 'goto', symbol, state);
}
/** Factory method to represent a mark in a rule.
@param {module:BNF~Rule} rule - rule to mark.
@param {number} position - position in rule, before a symbol or after all.
@returns {module:BNF~Mark} an object representing the marked rule.
*/
mark (rule, position) {
this.assert(rule instanceof Rule, 'mark():', rule, 'not a Rule');
this.assert(typeof position == 'number' && position >= 0 && position <= rule.symbols.length,
'mark():', position, 'invalid position');
return new Mark(this, rule, position);
}
/** Factory method to represent a state of the parser automaton.
The state is created from the core marks; rules in the closure are added.
@param {module:BNF~Mark[]} core - list of marks in the core, closure is added.
@returns {module:BNF~State} an object representing the state.
*/
state (core) {
this.assert(core instanceof Array && core.every(mark => mark instanceof Mark),
'state():', core, 'not an array of marked rules');
const coreLength = core.length; // for efficient state comparison
const messages = {}; // initialized to map all possible next symbols to null
// compute closure: loop over core and added marks
for (let c = 0; c < core.length; ++ c)
// for each incomplete mark
if (!core[c].complete) {
// next symbol in a mark
const s = core[c].rule.symbols[core[c].position];
if (s instanceof NT && !(s.ord in messages))
// add all rules for a new non-terminal, marked at 0
s.rules.forEach(rule => core.push(this.mark(rule, 0)), this);
// map this next terminal or non-terminal to null
messages[s.ord] = null;
}
return new State(this, core, coreLength, messages);
}
/** Factory method to create a parser to recognize and process input.
@param {RegEx} [skip] - a pattern to define ignorable character sequences,
by default white space,
must not accept empty input, must not use flags, must not be anchored,
should use `(:? )`rather than `( )` for grouping.
@returns {module:BNF~Parser} the parser.
*/
parser (skip = new RegExp('\\s+')) { // /\s+/ crashes jsdoc
this.assert(skip instanceof RegExp, 'parser():', skip, 'not a regular expression');
return new Parser(this, skip);
}
/** Displays description of grammar and number of errors if any.
@param {boolean} [states] - if true, also displays state table.
@returns {string}
*/
toString (states) {
const result = [];
if (this.ebnf) result.push('EBNF:', '', this.ebnf.toString(), '', 'BNF:', '');
if (this.levels.length)
result.push(... this.levels.map(level => ' ' + level.toString()), '');
if (this.rules.length)
result.push(... this.rules.map((rule, n) => (' ' + n).substr(-3) + ' ' + rule.toString()), '');
result.push('literals: ' + this.lits.filter(lit => lit.used).join(', '));
result.push('tokens: ' + this.tokens.filter(token => token.used).
map(token => token + ' ' + token.pat).join(', '));
result.push('non-terminals: ' + this.nts.join(', '));
if (states && this.states.length) result.push('',
this.states.map((state, n) => 'state ' + n + '\n' + state.toString()).join('\n\n'));
if (this.errors + this.sr + this.rr) result.push('');
if (this.errors) result.push('errors: ' + this.errors);
if (this.sr) result.push('shift/reduce conflicts: ' + this.sr);
if (this.rr) result.push('reduce/reduce conflicts: ' + this.rr);
return result.join('\n');
}
/** Displays grammar, terminals, and non-terminals with name and contents of all sets.
Displays number of errors if any.
@param {?Object} a - with one argument (kludge!) acts as a static method and displays the argument
converting nested arrays to a string –
useful because `console.debug` only reaches 3 levels.
@param {boolean} states - if true also displays states.
@returns {string}
*/
dump (a, states) {
// kludge part
if (arguments.length == 1) return super.dump(a);
const result = [];
if (this.ebnf) result.push('EBNF:', '', this.ebnf.toString(), '', 'BNF:', '');
if (this.levels.length)
result.push(... this.levels.map(level => ' ' + level.toString()), '');
if (this.rules.length)
result.push(... this.rules.map((rule, n) => rule.dump()), '');
result.push('literals: ' + this.lits.join(', '));
result.push('tokens: ' + this.tokens.map(token => token + ' ' + token.pat).join(', '));
result.push('non-terminals:', ... this.nts.map(nt => nt.dump()));
if (states && this.states.length) result.push('',
this.states.map((state, n) => 'state ' + n + '\n' + state.dump()).join('\n\n'));
if (this.errors + this.sr + this.rr) result.push('');
if (this.errors) result.push('errors: ' + this.errors);
if (this.sr) result.push('shift/reduce conflicts: ' + this.sr);
if (this.rr) result.push('reduce/reduce conflicts: ' + this.rr);
return result.join('\n');
}
}
/** Wraps a method {@linkcode module:BNF~Parser#parse parser.parse()}
which recognizes input and calls on an observer, if any.
Unlike {@linkcode module:BNF~Grammar#parser grammar.parser().parse()},
here input can be presented in pieces, i.e., the method throws `true`
if it should be called with more input.
@property {?module:Base~Scanner} scanner - tokenizes input.
@property {boolean} parsing - true while recognition is in progress.
@property {boolean} building - true if recognition calls {@linkcode module:BNF~Parser#build build()}.
@property {Array.<number>} stack - stack of state numbers.
@property {module:BNF~State} state - current state.
@property {Array.<object>} values - parallels state stack, nested lists or action results.
@property {?(function|Array.<module:Base~Tuple>|boolean)} input - provides input as tuples.
`null`, an empty array, or a `null` array element act as `$eof`.
`false` is set to request more input.
@property {?module:Base~Tuple} current - current input tuple, if any.
@property {Array.<module:Base~Tuple>} tuples - available tuples.
@property {number} index - index of next tuple.
@extends module:Base~Parser
@property {module:BNF~Grammar} grammar - represents the grammar and states, counts errors;
concurrent recognition will trash error counting.
@property {?Object} actions - maps rule names to action methods during recognition.
*/
class Parser extends Base.Parser {
#scanner;
get scanner () { return this.#scanner; }
#parsing = false;
get parsing () { return this.#parsing; }
#building;
get building () { return this.#building; }
#stack;
get stack () { return this.#stack; }
#state;
get state () { return this.grammar.states[this.stack.at(-1)]; }
#values;
get values () { return this.#values; }
#input;
get input () { return this.#input; }
#current = null;
get current () { return this.#current; }
#tuples = [];
get tuples () { return this.#tuples; }
#index = 0;
get index () { return this.#index; }
/** Creates a parser;
see {@linkcode module:BNF~Grammar#parser parser()} factory method.
@param {module:BNF~Grammar} grammar - represents grammar and states.
@param {RegExp} [skip] - a pattern to define ignorable character sequences,
by default white space,
must not accept empty input, must not use `d`, `g`, or `y` flag,
should not be anchored, should use `(:? )`rather than `( )` for grouping.
*/
constructor (grammar, skip) {
super(grammar);
this.#scanner = grammar.scanner(skip);
}
/** Parses (some more) input. `actions` can only be supplied with the first input;
however, the function is serially reusable.
Resets and reports `.errors` for the grammar.
@param {?(string|Array.<module:Base~Tuple>|function)} input - a string is scanned into tuples
with `null` appended as end of input; can be a function which returns an array of tuples.
`null`, an empty array, or a `null` array element act like a tuple containing `$eof`.
@param {Function|Object} [actions] - a function is assumed to be a class
and a singleton is created with `this` as constructor argument.
The object maps rule names to action methods.
@param {Object} arg - used as further constructor arguments.
@returns {Object} result from observer, if any; ends one recognition.
@throws {boolean|Error|string} `true` for more input,
`Error` or error message otherwise — terminates one recognition.
*/
parse (input, actions, ...arg) {
this.grammar.assert(input === null || typeof input == 'string' || typeof input == 'function' ||
(input instanceof Array && input.every(elt => elt === null || elt instanceof Base.Tuple)),
'parse():', input, 'is not a string, a Tuple array, a function, or null');
// initialize?
if (this.parsing)
this.grammar.assert(arguments.length == 1, 'parse(): cannot accept actions while parsing');
else {
// to start parsing, create stack in state 0
this.#parsing = true;
this.#stack = [ 0 ];
// reset error count
this.grammar.errors = 0;
// set up actions, if any
this.grammar.assert(!actions || actions instanceof Object || actions instanceof Function,
'parse():', actions, 'actions has no methods');
super.parse(actions, ...arg);
// set up list building
this.#building = this.grammar.config.build || // configured to build lists
this.grammar.ebnf || // fromEBNF grammar implicitly builds
this.actions; // actions require lists
// create a value stack only for building
if (this.building)
this.#values = [ [ ] ]
else
delete this.values;
// headline trace if any
if (this.grammar.config.trace)
this.grammar.message('STATE TUPLE MESSAGE RETURNS');
}
// initialize for next()
this.#input = typeof input == 'string' ? this.scanner.scan(input).concat(null) : input;
this.#current = null;
this.#tuples = [];
this.#index = 0;
// call/cc: throw [ result ] to return result from the parser
try {
while (true) {
this.current || this.next();
if (this.current.t && this.current.t.ord in this.state.messages) {
// expected input
if (this.process(this.current))
this.next(); // consumed
} else
// illegal character or unexpected input
this.recover();
}
} catch (outcome) {
this.grammar.assert(outcome === true ||
(outcome instanceof Array && outcome.length == 1) ||
typeof outcome == 'string' || outcome instanceof Error,
'parse():', outcome, 'is not true, [ return-value ], Error, or error message');
if (outcome !== true) {
this.#parsing = false;
if (this.grammar.errors) // from actions, maybe
this.grammar.message('parse():', this.grammar.errors, this.grammar.errors > 1 ? 'errors' : 'error');
}
if (outcome instanceof Array) return outcome[0];
if (outcome instanceof Error) console.trace(outcome.message);
throw outcome;
}
}
/** Part of {@linkcode module:BNF~Parser#parse parse()}.
Sets `.current` to a new tuple, but not past `$eof`.
@private
@throws `true` to ask for more input.
*/
next () {
this.grammar.assert(this.tuples instanceof Array &&
this.tuples.every(t => t === null || t instanceof Base.Tuple),
'next():', this.tuples, 'not an array of Tuple');
if (this.index >= this.tuples.length) { // no tuples left
if (typeof this.input == 'function') // call for input
this.#tuples = this.input();
else if (this.input === false) // throw true for more input
throw true;
else // consume input
(this.#tuples = this.input, this.#input = false);
this.grammar.assert(this.tuples === null || (this.tuples instanceof Array &&
this.tuples.every(t => t === null || t instanceof Base.Tuple)),
'next():', this.tuples, 'not null or an array of Tuple');
if (!this.tuples || this.tuples.length == 0) // none? arrange for end of input tuple
this.#tuples = [ null ];
this.#index = 0;
}
// use 'index' tuple
if ((this.#current = this.tuples[this.index]) == null) // null? set end of input tuple
this.#current = this.tuples[this.index] = this.grammar.tuple(0, this.grammar.lit());
// advance index (to later tokenize more) but don't run past end of input tuple
if (this.current.t !== this.grammar.lit()) ++ this.#index;
if (this.grammar.config.lookahead) this.grammar.message(this.current.toString());
}
/** Part of {@linkcode module:BNF~Parser#parse parse()}.
processes an expected input: sends message to {@linkcode module:BNF~Parser#observe observe()}
and handles the state stack and the result, if any.
@private
@param {module:Base~Tuple} tuple - to be processed.
@returns {boolean} `true` if tuple is consumed (shift), `false` if not (reduce).
@throws {string|Object[]} fatal error message or `[ result ]` if `accept` message.
*/
process (tuple) {
this.grammar.assert(tuple instanceof Base.Tuple, 'process():', tuple, 'not a Tuple');
// get message and inform observer
const verb = this.state.messages[tuple.t.ord].verb,
info = this.state.messages[tuple.t.ord].info,
result = this.observe(tuple, verb, info);
// process
switch (verb) {
default: // should not happen
throw 'fatal error: process(): ' + verb + ' not accept, shift, or reduce';
case 'accept':
throw [ result ]; // parse ends with success
case 'shift':
this.grammar.assert(typeof info == 'number' && info >= 0 && info < this.grammar.states.length,
'process():', info, 'not a state number for shift');
this.stack.push(info); // shift to new state
return true; // tuple consumed
case 'reduce': // reduce a rule
this.grammar.assert(info instanceof Rule, 'process():', info, 'not a Rule for reduce');
// pop the stack by the length of the rule, uncover state
this.stack.length -= info.symbols.length;
// there has to be a goto for the non-terminal
const g = this.state.messages[info.nt.ord];
this.grammar.assert(g instanceof Message && g.verb == 'goto',
'process():', g.verb, 'reduce expects goto');
this.grammar.assert(typeof g.info == 'number' && g.info >= 0 && g.info < this.grammar.states.length,
'process():', g.info, 'not a state number for goto');
this.observe(tuple, g.verb, g.info); // observe the goto
this.stack.push(g.info); // goto to new state
return false; // tuple still available
}
}
/** Recognition observer, part of {@linkcode module:BNF~Parser#parse parse()}.
Calls {@linkcode module:BNF~Parser#build build()} to create a result, if any;
calls {@linkcode module:BNF~Parser#trace trace()} if configured;
reports `info` from an `error` message, if any,
@private
@param {module:Base~Tuple} tuple - current input.
@param {string} verb - of message.
@param {module:BNF~Rule|number|string} info - of message.
@returns {Object} the result.
*/
observe (tuple, verb, info) {
this.grammar.assert(tuple instanceof Base.Tuple, 'observe():', tuple, 'not a Tuple');
this.grammar.assert(/^(accept|shift|reduce|goto|error)$/.test(verb),
'observe():', verb, 'not accept, shift, reduce, or error');
const result = this.building ? this.build(tuple, verb, info) : null;
if (this.grammar.config.trace instanceof RegExp && this.grammar.config.trace.test(verb))
this.trace(tuple, verb, info, result);
if (verb == 'error' && info && info.length)
this.error(info);
return result;
}
/** Formats and displays trace, part of {@linkcode module:BNF~Parser#parse parse()}.
@private
@param {module:Base~Tuple} tuple - current input.
@param {string} verb - of message.
@param {module:BNF~Rule|number|string} info - of message.
@param {Object} the result.
*/
trace (tuple, verb, info, result) {
const w = (' '+ this.stack.at(-1)) .substr(-3);
const t = (tuple + ' ') .substr(0, 15);
const v = (verb + ' ') .substr(0, 6);
const i = (verb == 'error' ? (info === null ? 'null' : '-message- ')
: info + ' ').padEnd(39) .substr(0, 39);
const r = this.grammar.dump(result) .substr(0, 39);
this.grammar.message(w + ' ' + t + ' '+ v + ' ' + i + ' '+ r);
// 3 2 15 2 6 2 39 2 39 -> 110 max
}
/** List builder, part of {@linkcode module:BNF~Parser#parse parse()}.
Manages the value stack to collect nested lists of all terminal values and applies
{@link module:Base~Action action methods} matching rule names which can
restructure the list of values collected for a rule.
<p>If the grammar is created from an {@link module:EBNF~Grammar EBNF grammar}
the generated rules `$-#: $-# other;`
and `$-#: ;` have implicit actions to support transparency for actions.
<p>An action should throw an `Error` to abort recognition
or a string to report an error and return `null` as result.
| verb | effect |
| ------- | ------ |
| `'shift'` | pushes the tuple's terminal's value onto the value stack; returns `null`. |
| `'reduce'` | pops one value per symbol, presents the list to an {@link module:Base~Action action function} if any; returns the list or the result. |
| `'goto'` | the result of the preceding `'reduce'` is on top of the value stack; returns `null`. |
| `'accept'` | returns the top-level value. |
| `'error'` | if there is no `info` pops one value and returns ` null`. |
@private
@param {module:Base~Tuple} tuple - current input.
@param {string} verb - `'shift'`, `'reduce'`, `'goto'`, `'accept'`, or `'error'`.
@param {module:BNF~Rule|module:Base~T|number|string} info - a reference to the relevant rule,
terminal, or state number, or an error message.
@returns {object} anything on success.
@throws {Error} with an error message to abort recognition.
*/
build (tuple, verb, info) {
switch (verb) {
case 'shift':
this.values.push(tuple.value);
return null;
case 'reduce':
const len = info.symbols.length;
let result = this.values.splice(- len, len); // can be []
if (this.grammar.ebnf && info.nt.name.startsWith(this.grammar.config.uniq)) {
if (len == 2 && info.symbols[0] === info.nt)
result.splice(0, 1, ...result[0]);
else if (!len)
result = null;
} else
try {
result = this.act(info.nt.name, result); // apply action, if any
} catch (e) {
if (e instanceof Error) { console.trace(e); throw e; }
this.error(e);
result = null;
}
// perform upcoming 'goto' message
this.values.push(result);
return result;
case 'goto':
return null;
case 'accept':
return this.values.pop();
case 'error':
if (!info) this.values.pop();
return null;
}
}
/** Part of {@linkcode module:BNF~Parser#parse parse()}. Attempts error recovery
within the current input sequence. Sends one 'error' with an error message,
works on input and stack until a `shift` `$error` and `shift` `current` are done.
Sends 'error' with null message every time the stack is popped.
Returns when `current` was consumed to resume normal processing.
@private
@throws `true` to ask for more input, i.e., abandons recovery.
@throws `[ result ]` for `$accept`.
@throws `'irrecoverable error'` if the stack is empty.
*/
recover () {
const error = this.grammar.token(), eof = this.grammar.lit();
// produce error message listing expected symbols in state table
// and run the message past the observer
this.observe(this.current, 'error',
`${this.current} is not allowed\nexpecting:` +
Object.entries(this.state.messages).reduce((s, kv) => s +=
kv[1].symbol instanceof Base.T && kv[1].symbol !== error ?
' ' + kv[1].symbol : '', ''));
// drop any illegal character sent by the scanner
while (!this.current.t)
this.next(); // next tuple, throws true for more input
// now at tuple with $eof or actual input
console.assert(this.current instanceof Base.Tuple && this.current.t instanceof Base.T,
'recover expects Tuple');
pop: while (this.stack.length > 0) {
if (!(error.ord in this.state.messages)) { // $error unexpected
this.observe(this.current, 'error', null); // pop value stack
this.stack.pop(); // pop state stack
continue;
} // else $error expected
if (this.process(this.grammar.tuple(this.current.lineno, error)))
while (true) // did shift $error, now search for input
if (this.current.t?.ord in this.state.messages) {
if (this.process(this.current)) { // did shift current
this.#current = null; // consume
return; // recovery should be complete
} // else did reduce+goto before current, retry
} else if (this.current.t === eof) { // end of input
this.grammar.message('terminating at ' + this.current);
break pop; // cannot recover
} else { // current is not expected: discard
this.grammar.message('discarding ' + this.current);
this.next();
}
// else did reduce before $error, process $error again or pop
}
throw 'irrecoverable error';
}
/** Displays the current state stack and value stack, if any.
@returns {string}
*/
toString () {
return this.stack ?
'stack (length ' + this.stack.length +'):\n' +
this.stack.map((state, n) =>
(' ' + (state + ' ').substr(0, 4) + (this.building ? this.grammar.dump(this.values[n]) : '')
).substr(0, 79), this).join('\n') :
'no stack available';
}
/** Displays a message; lets grammar count it as an error.
@param {object[]} s - message, to be displayed; prefixed by `.current` and joined by blanks.
@return {string} the message.
*/
error (... s) {
return this.grammar.error(this.index >= 0 ? 'at' + ' ' +
(this.current ? this.current.toString() : 'end of input') + ':' : '', ... s);
}
}
/**
* Grammar describing the BNF notation accepted by {@linkcode module:BNF~Grammar new Grammar()}:
* <p> A *grammar* consists of an optional sequence of *precedence levels*
* followed by one or more rules.
* <p> Each *precedence level* consists of an associativity followed by one or more literals
* or token names and terminated with a semicolon. Precedence levels are increasing.
* <p> Each *rule* consists of a non-terminal name on the left-hand side,
* a colon, a *symbol sequence* on the right-hand side, and a semicolon.
* Rules with the same names are alternatives.
* <p> A *symbol sequence* contains zero or more items, such as a non-terminal name,
* a self-defining {@link module:BNF~Grammar.terminals literal}, or
* the name of a {@link module:BNF~Grammar.terminals token}.
* @example <caption> BNF grammars' grammar </caption>
* grammar: precedences rules;
* precedences: ;
* precedences: precedences '%left' terminals ';';
* precedences: precedences '%right' terminals ';';
* precedences: precedences '%nonassoc' terminals ';';
* terminals: terminal;
* terminals: terminals terminal;
* terminal: lit;
* terminal: name;
* rules: rule;
* rules: rules rule;
* rule: Name ':' symbols ';';
* rule: Name ':' symbols '%prec' terminal ';';
* symbols: ;
* symbols: symbols name;
* symbols: symbols lit;
* name: Name;
* lit: Lit;
* @constant {string}
*/
Grammar.bnf = [
"grammar: precedences rules;",
"precedences: ;",
"precedences: precedences '%left' terminals ';';",
"precedences: precedences '%right' terminals ';';",
"precedences: precedences '%nonassoc' terminals ';';",
"terminals: terminal;",
"terminals: terminals terminal;",
"terminal: lit;",
"terminal: name;",
"rules: rule;",
"rules: rules rule;",
"rule: Name ':' symbols ';';",
"rule: Name ':' symbols '%prec' terminal ';';",
"symbols: ;",
"symbols: symbols name;",
"symbols: symbols lit;",
"name: Name;",
"lit: Lit;"
].join('\n');
/**
* Token definitions for `Lit` and `Name`
* in {@linkcode module:BNF~Grammar#bnf Grammar#bnf}.
* <p> *Literals* represent themselves and are single-quoted strings
* using `\` only to escape single quotes and `\` itself.
* <p> A *Name* either represents a non-terminal or a *token*.
* <p> *Tokens* represent sets of inputs, such as names or numbers,
* and are alphanumeric names which must start with a letter
* and may include underscores.
* <p>`$error` is a special token to control error recovery.
* @example <caption> BNF grammars' tokens </caption>
* {
* Lit: /'(?:[^'\\]|\\['\\])+'/,
* Name: /[A-Za-z][A-Za-z0-9_]*|\$error/
* }
* @see {@linkcode module:BNF~Parser#recover recover()}
* @see {@linkcode module:BNF~Grammar.grammar Grammar.grammar}
* @constant {Object<string,RegExp>}
*/
Grammar.terminals = {
Lit: /'(?:[^'\\]|\\['\\])+'/,
Name: /[A-Za-z][A-Za-z0-9_]*|\$error/
};
/**
* The BNF grammars' grammar; created when the module is loaded
* and used internally in {@linkcode module:BNF~Grammar new Grammar()}.
* @see {@linkcode module:BNF~Actions Actions}
* @constant {module:BNF~Grammar}
* @private
*/
Grammar.grammar = new Grammar(Grammar.terminals); {
// grammar: precedences rules;
Grammar.grammar.rule(Grammar.grammar.nt('grammar'), [
Grammar.grammar.nt('precedences'),
Grammar.grammar.nt('rules')
]);
// precedences: ;
Grammar.grammar.rule(Grammar.grammar.nt('precedences'), [
]);
// precedences: precedences '%left' terminals ';';
Grammar.grammar.rule(Grammar.grammar.nt('precedences'), [
Grammar.grammar.nt('precedences'),
Grammar.grammar.lit("'%left'"),
Grammar.grammar.nt('terminals'),
Grammar.grammar.lit("';'")
]);
// precedences: precedences '%right' terminals ';';
Grammar.grammar.rule(Grammar.grammar.nt('precedences'), [
Grammar.grammar.nt('precedences'),
Grammar.grammar.lit("'%right'"),
Grammar.grammar.nt('terminals'),
Grammar.grammar.lit("';'")
]);
// precedences: precedences '%nonassoc' terminals ';';
Grammar.grammar.rule(Grammar.grammar.nt('precedences'), [
Grammar.grammar.nt('precedences'),
Grammar.grammar.lit("'%nonassoc'"),
Grammar.grammar.nt('terminals'),
Grammar.grammar.lit("';'")
]);
// terminals: terminal;
Grammar.grammar.rule(Grammar.grammar.nt('terminals'), [
Grammar.grammar.nt('terminal')
]);
// terminals: terminals terminal;
Grammar.grammar.rule(Grammar.grammar.nt('terminals'), [
Grammar.grammar.nt('terminals'),
Grammar.grammar.nt('terminal')
]);
// terminal: lit;
Grammar.grammar.rule(Grammar.grammar.nt('terminal'), [
Grammar.grammar.nt('lit')
]);
// terminal: name;
Grammar.grammar.rule(Grammar.grammar.nt('terminal'), [
Grammar.grammar.nt('name')
]);
// rules: rule;
Grammar.grammar.rule(Grammar.grammar.nt('rules'), [
Grammar.grammar.nt('rule')
]);
// rules: rules rule;
Grammar.grammar.rule(Grammar.grammar.nt('rules'), [
Grammar.grammar.nt('rules'),
Grammar.grammar.nt('rule')
]);
// rule: Name ':' symbols ';';
Grammar.grammar.rule(Grammar.grammar.nt('rule'), [
Grammar.grammar.token('Name'),
Grammar.grammar.lit("':'"),
Grammar.grammar.nt('symbols'),
Grammar.grammar.lit("';'")
]);
// rule: Name ':' symbols '%prec' terminal ';';
Grammar.grammar.rule(Grammar.grammar.nt('rule'), [
Grammar.grammar.token('Name'),
Grammar.grammar.lit("':'"),
Grammar.grammar.nt('symbols'),
Grammar.grammar.lit("'%prec'"),
Grammar.grammar.nt('terminal'),
Grammar.grammar.lit("';'")
]);
// symbols: ;
Grammar.grammar.rule(Grammar.grammar.nt('symbols'), [
]);
// symbols: symbols name;
Grammar.grammar.rule(Grammar.grammar.nt('symbols'), [
Grammar.grammar.nt('symbols'),
Grammar.grammar.nt('name')
]);
// symbols: symbols lit;
Grammar.grammar.rule(Grammar.grammar.nt('symbols'), [
Grammar.grammar.nt('symbols'),
Grammar.grammar.nt('lit')
]);
// name: Name;
Grammar.grammar.rule(Grammar.grammar.nt('name'), [
Grammar.grammar.token('Name')
]);
// lit: Lit;
Grammar.grammar.rule(Grammar.grammar.nt('lit'), [
Grammar.grammar.token('Lit')
]);
// all but $error are used
Grammar.grammar.lits.forEach(lit => lit.used = true);
Grammar.grammar.tokens.forEach(token => { if (token.name.length) token.used = true; });
Grammar.grammar.check(Grammar.grammar.nt('grammar'));
}
/**
* Factory method to represent an {@link module:EBNF~Grammar EBNF grammar}
* as a {@link module:BNF~Grammar BNF grammar} and check it.
* @param {module:EBNF~Grammar} ebnf - grammar to represent.
* @param {Object.<string, Object>} config - overwrites configurable values' defaults.
* @returns the {@link module:BNF~Grammar BNF grammar}, ready for parsing.
* @throws `'unexpected term in EBNF sequence'` *node*
* @example <caption> Translating <code>[ s | t | ... ]</code> </caption>
* $-#: ;
* $-#: s;
* $-#: t;
* ...
* @example <caption> Translating <code>{ s | t | ... }</code> </caption>
* $-##: $-#;
* $-##: $-## $-#;
* $-##: $error;
* $-##: $-## $error;
* $-#: s;
* $-#: t;
* ...
* @constant {function(String,Object)}
*/
Grammar.fromEBNF = (ebnf, config) => {
const g = new Grammar(null, null, config);
g.ebnf = ebnf;
// [ s | t | ... ] -> $-#: | s | t | ... ;
const opt = opt => {
const nt = g.nt({}); // unique
// $-#: ;
g.rule(nt);
// $-#: alt; ...
opt.seqs.forEach(s => g.rule(nt, seq(s)), g);
return nt;
};
// { s | t | ... } -> $-#: s | t | ...;
// $-##: $-# | $-## $-# | $error | $-## $error;
const some = some => {
const nt = g.nt({}), nt1 = g.nt({}); // unique
// $-#: alt; ...
some.seqs.forEach(s => g.rule(nt1, seq(s)), g);
// $-##: $-#;
g.rule(nt, [ nt1 ]);
// $-##: $-## $-#;
g.rule(nt, [ nt, nt1 ]);
if (g.config.error) {
const e = g.token();
// $-##: $error;
g.rule(nt, [ e ]);
// $-##: $-## $error;
g.rule(nt, [ nt, e ]);
}
return nt;
};
// a b ... -> [ a, b, ... ]
const seq = (seq) => seq.nodes.reduce((list, node) => {
switch (node.constructor.name) {
case 'Lit': list.push(g.lit(node.name)); return list;
case 'Token': list.push(g.token(node.name, node.pat)); return list;
case 'NT': list.push(g.nt(node.name)); return list;
case 'Opt': list.push(opt(node)); return list;
case 'Some': list.push(some(node)); return list;
}
throw Error('unexpected term in EBNF sequence ' + node.toString());
}, []);
// (re)create (used) literals
ebnf.lits.filter(l => l.used).forEach(l => g.lit(l.name, true));
// (re)create (used) tokens
ebnf.tokens.filter(t => t.used).forEach(t => g.token(t.name, t.pat, true));
// (re)create precedences if any
ebnf.levels.forEach(level =>
g.precedence(level.assoc,
level.terminals.map(t => {
const l = g.litsByName[t.name]; if (l) return l;
return g.tokensByName[t.name];
})
)
);
// create start non-terminal
const start = g.nt(ebnf.rules[0].nt.name);
// create non-terminals and rules
ebnf.rules.forEach(rule => {
// record non-terminal
const nt = g.nt(rule.nt.name);
// create rules
rule.seqs.forEach(s => {
if (s.terminal) {
let prec = g.litsByName[s.terminal.name];
if (! prec) prec = g.tokensByName[s.terminal.name];
g.rule(nt, seq(s), prec);
} else
g.rule(nt, seq(s));
});
});
// wrap up
g.check(start);
return g;
};
/** The BNF grammar parser's actions,
used internally in {@linkcode module:BNF~Grammar new Grammar()}.
The methods intentionally defeat the argument count checks.
@property {module:BNF~Grammar} g - the grammar to add precedences and rules to.
@private
*/
class Actions {
#g;
get g () { return this.#g; }
/** Creates the singleton with the {@link module:Base~Action action methods}.
@param {module:BNF~Grammar} g - to hold the rule representations.
*/
constructor (g) { this.#g = g; }
/** `grammar: precedences rules;`
@returns {module:BNF~Grammar} g - represents and checks the grammar.
*/
grammar (p, r) { this.g.check(this.g.rules[1].nt); return this.g; }
/** `precedences: ;`
`precedences: precedences '%left' terminals ';';`
`precedences: precedences '%right' terminals ';';`
`precedences: precedences '%nonassoc' terminals ';';`
*/
precedences (... arg) {
return ((p, assoc, terminals) => {
if (assoc) this.g.precedence(assoc, terminals);
})(... arg);
}
/** `terminals: terminal;`
`terminals: terminals terminal;`
@returns {module:BNF~Lit|module:BNF~Token} represents a list of terminals.
*/
terminals (... t) {
if (t.length == 1) return [ t[0] ];
t[0].push(t[1]);
return t[0];
}
/** `terminal: lit;`
`terminal: name;`
@returns {module:BNF~Lit|module:BNF~Token} represents a terminal.
*/
terminal (item) {
if (!(item instanceof NT)) return item;
this.g.error(item.name + ': precedence requires a terminal');
return null;
}
/** `rule: Name ':' symbols ';';`
`rule: Name ':' symbols '%prec' terminal ';';`
@returns {module:BNF~Rule} represents a rule.
*/
rule (... arg) {
return ((name, _, symbols, p, terminal) =>
this.g.rule(this.g.nt(name), symbols, terminal)
)(... arg);
}
/** `symbols: ;`
`symbols: symbols name;`
`symbols: symbols lit;`
@returns {module:Base~Symbol[]} represents a list of symbols.
*/
symbols (... symbols) {
if (!symbols.length) return [ ];
symbols[0].push(symbols[1]);
return symbols[0];
}
/** `name: Name;`
@returns {module:BNF~Token|module:BNF~NT} represents a used token or a non-terminal.
*/
name (name) {
if (name == '$error') return this.g.token('', new RegExp(), true);
if (name in this.g.tokensByName) return this.g.token(name, undefined, true);
return this.g.nt(name);
}
/** `lit: Lit;`
@returns {module:BNF~Lit} represents a used literal.
*/
lit (literal) { return this.g.lit(literal, true); }
}
export {
Lit,
Token,
NT,
Rule,
Message,
Mark,
State,
Grammar,
Parser,
Actions
};