/** A module which contains classes and methods shared by both,
the {@link module:BNF BNF} and {@link module:EBNF EBNF} modules.
It implements messaging, creating precedence levels, and creating scanners.
| class | main properties | main methods |
| ----- | --------------- | ------------ |
| {@linkcode module:Base~Factory Factory} | `config`, `errors`,<br>`lits`[`ByName`], `tokens`[`ByName`], `nts`[`ByName`],<br>`levels`: `Array<`{@linkcode module:Base~Precedence Precedence}`>` | {@linkcode module:Base~Factory#add add(item)}, {@linkcode module:Base~Factory#dump dump(item)},<br>{@linkcode module:Base~Factory#assert assert(condition, ...)},<br>{@linkcode module:Base~Factory#error error(...)},<br>{@linkcode module:Base~Factory#message message(...)} |
| {@linkcode module:Base~Precedence Precedence} | `assoc`, `terminals` | |
| {@linkcode module:Base~Scanner Scanner} | `pattern` | {@linkcode module:Base~Scanner#scan scan(input)}: `Array<`{@linkcode module:Base~Tuple Tuple}`>` |
| {@linkcode module:Base~Tuple Tuple} | `lineno`, `t`, `value` | {@linkcode module:Base~Tuple#escape escape(s)} |
@module Base
@author © 2023 Axel T. Schreiner <axel@schreiner-family.net>
@version 2024-02-13
*/
/** Contains configurable values, inventories, and factory methods to create
scanners, tokenized input tuples, and precedence levels.
@property {Object.<string, Object>} config - maps names to configurable values.
@property {function(string[])} config.log - function to print strings, by default `console.log`.
@property {RegExp} config.lits - restricts literal representation, by default single-quoted;
must be anchored.
@property {RegExp} config.tokens - restricts token names, by default alphanumeric;
must be anchored.
@property {RegExp} config.nts - restricts non-terminal names, by default alphanumeric;
must be anchored.
@property {string} config.uniq - prefix for unique non-terminal names, by default `$-`.
@property {Array<module:Base~Lit>} lits - list of unique literals, can be pushed.
@property {Object.<string, module:Base~Lit>} litsByName - maps `'x'` to unique literal.
@property {Array<module:Base~Token>} tokens - list of unique tokens, can be pushed.
@property {Object.<string, module:Base~Token>} tokensByName - maps name to unique token.
@property {Array<module:Base~Precedence>} levels - list of precedence levels, can be pushed.
@property {Array<module:Base~NT>} nts - list of unique non-terminals, can be pushed.
@property {Object.<string, module:Base~NT>} ntsByName - maps name to unique non-terminal.
@property {number} errors - incremented by {@linkcode module:Base~Factory#error error()} method;
can be reset, e.g., to count during recognition.
@property {boolean} noargs - don't check for argument count errors in {@linkcode module:Base~Parser#act act()} method.
@abstract
*/
class Factory {
#config = {
log: console.log,
lits: /^'(?:[^'\\]|\\['\\])+'$/,
tokens: /^[A-Za-z][A-Za-z0-9_]*$/,
nts: /^[A-Za-z][A-Za-z0-9_]*$/,
uniq: '$-',
noargs: false
};
get config () { return this.#config; }
#lits = [];
get lits () { return this.#lits; }
#litsByName = { };
get litsByName () { return this.#litsByName; }
#tokens = [ ];
get tokens () { return this.#tokens; }
#tokensByName = { };
get tokensByName () { return this.#tokensByName; }
#levels = [];
get levels () { return this.#levels; }
#nts = [ ];
get nts () { return this.#nts; }
#ntsByName = { };
get ntsByName () { return this.#ntsByName; }
#errors = 0;
get errors () { return this.#errors; }
set errors (errors) { this.#errors = errors; }
/** Adds a new symbol to the proper inventory or creates and adds new tokens.
Must be called with a new, unique symbol or with a map of token names to patterns.
Validates item names against `.config`.
Token patterns must not accept empty input, must not use `d`, `g`, or `y` flag,
should not be anchored, and should use `(:? )` rather than `( )` for grouping.
@param {Symbol|Object.<string, RegExp>} item - to add to the proper inventory or create and add.
*/
add (item) {
if (item instanceof Symbol) {
this.assert(typeof item.name == 'string', 'add():', item, 'name not a string');
if (item instanceof Lit) {
this.assert(item.name == '' || this.config.lits.test(item.name), 'add():', item, 'invalid literal');
this.assert(!(item.name in this.litsByName), 'add():', item, 'already in litsByName');
this.lits.push(item);
this.litsByName[item.name] = item;
} else if (item instanceof Token) {
this.assert(item.name == '' || this.config.tokens.test(item.name), 'add():', item, 'invalid token name');
this.assert(item.pat instanceof RegExp, 'add():', item, 'not a regular expression pattern');
if (item.name.length) {
this.assert(!item.pat.test(''), 'add():', item, 'pattern accepts empty input');
this.assert(!/[dgy]/.test(item.pat.flags), 'add():', item, 'pattern uses "d", "g", or "y" flag(s)');
}
this.assert(!(item.name in this.tokensByName), 'add():', item, 'already in tokensByName');
this.tokens.push(item);
this.tokensByName[item.name] = item;
} else if (item instanceof NT) {
this.assert(item.name == '' || item.name.startsWith(this.config.uniq) || this.config.nts.test(item.name),
'add():', item, 'invalid non-terminal name');
this.assert(!(item.name in this.ntsByName), 'add():', item, 'already in ntsByName');
this.nts.push(item);
this.ntsByName[item.name] = item;
} else
this.assert(false, 'add():', item, 'no suitable inventory');
} else {
this.assert(item instanceof Object, item, 'not a map of token definitions');
Object.entries(item).forEach(kv => this.token(kv[0], kv[1]), this);
}
}
/** Displays an object as a string; in particular, nested arrays.
This is useful because `console.debug` only reaches 3 levels.
@param {Object} [a] - the object to display;
if omitted, returns an empty string.
@returns {string}
*/
dump (a) {
if (!arguments.length) return '';
switch (typeof a) {
case 'string':
return "'" + a.replace(/([\\'])/g, "\\$1") + "'"; // could do more...
case 'object':
if (a)
switch (a.constructor.name) {
case 'Array':
return '[ ' + a.map(elt => this.dump(elt)).join(' ') + ' ]';
default:
return a.constructor.name + ' { ' + a.toString() + ' }';
}
return 'null';
}
return '' + a;
}
/** Factory method to represent a list of terminals
with equal precedence level and equal associativity.
Creates a new {@linkcode module:Base~Precedence Precedence} object,
adds it to `.levels`,
adds `.prec.level` and `.prec.assoc` to all terminals in the list,
and checks for duplicates.
@param {string} assoc - associativity: `'%left'`, `'%right'`, or `'%nonassoc'`.
@param {Array.<?module:Base~T>} terminals - to add, `null` elements are ignored; no duplicates.
@returns {?module:Base~Precedence} representing the set,
or `null` if there are no terminals.
*/
precedence (assoc, terminals) {
this.assert(/^%(left|right|nonassoc)$/.test(assoc), 'precedence():', assoc, 'invalid associativity');
this.assert(terminals instanceof Array && terminals.every(t => t === null || t instanceof T),
'precedence():', terminals, 'invalid list of termials');
let result = null;
terminals = terminals.filter(t => !!t);
if (terminals.length) {
terminals.forEach(t => {
this.assert(!t.prec.assoc, 'precedence():', t, 'is a duplicate precedence definition');
t.prec.level = this.levels.length, t.prec.assoc = assoc;
}, this);
result = new Precedence(assoc, terminals);
this.levels.push(result);
}
return result;
}
/** Factory method to create a scanner.
@param {RegExp} [skip] - a pattern to define ignorable character sequences,
by default white space,
must not accept empty input, must not use `d`, `g`, or `y` flag,
should not be anchored, should use `(:? )` rather than `( )` for grouping.
@param {Array<T>} [terminals] - ordered list to create the lexical analysis pattern.
@returns {?module:Base~Scanner} the scanner.
*/
scanner (skip = new RegExp('\\s+'), terminals) { // /\s+/ crashes jsdoc
this.assert(skip instanceof RegExp, 'scanner():', skip, 'not a regular expression');
this.assert(!skip.test(''), 'scanner():', skip, 'skip accepts empty input');
this.assert(!/[dgy]/.test(skip.flags), 'scanner():', skip, 'skip uses "d", "g", or "y" flag(s)');
// can't check anchor and grouping.
this.assert(!terminals || (terminals instanceof Array && terminals.every(t => t instanceof T)),
'scanner():', terminals + ':', 'not a list of terminals');
return new Scanner(this, skip, terminals);
}
/** Factory method to create an element of a tokenized input stream.
@param {number} lineno - input position.
@param {?module:Base~T} t - terminal, i.e., literal or token object;
{@linkcode module:Base~Scanner#scan scan()} uses `null` for an illegal character.
@param {?string} [value] - terminal's representation in the input.
@returns {module:Base~Tuple} an element of a tokenized input stream.
*/
tuple (lineno, t, value = null) {
this.assert(typeof lineno == 'number' && lineno >= 0, 'tuple():', lineno, 'invalid line number');
this.assert(t === null || t instanceof T, 'tuple():', t, 'invalid terminal');
this.assert(value === null || typeof value == 'string', 'tuple():', value, 'invalid value');
return new Tuple(lineno, t, value === null ? '' : value);
}
/** Displays a message and throws an error if a condition is not met;
primarily used for stronger argument typing.
@param {boolean} condition - should be true.
@param {Array<?object>} s - message, to be displayed; joined by blanks.
@throws {string} message if condition is not met.
*/
assert (condition, ...s) { if (!condition) throw this.message('assertion error:', ... s); }
/** Displays a message and counts it as an error.
@param {Array<?object>} s - message, to be displayed; joined by blanks.
@return {string} the message.
*/
error (...s) { ++ this.errors; return this.message('error:', ... s); }
/** Displays a message on the configured `.log`.
@param {Array<?object>} s - message, to be displayed; joined by blanks.
@return {string} the message.
*/
message (...s) {
const message = s.map(s => s === null ? 'null' : s.toString()).join(' ');
this.config.log(message);
return message;
}
}
/** Represents a symbol in the grammar alphabets.
Symbols are only created through factory methods in the grammar which arrange for uniqueness.
@property {string} name - name for a token or non-terminal, representation for a literal.
An empty string is used for one reserved symbol in each subclass.
@abstract
*/
class Symbol {
#name;
get name () { return this.#name; }
/** Creates a symbol; should only be used by subclass.
@param {string} name - symbol name.
*/
constructor (name) { this.#name = name; }
}
/** Represents a terminal, i.e., a literal or a token.
@property {Object} prec - precedence.
@property {string} [prec.assoc] - associativity, `'%left'`, `'%right'`, or `'%nonassoc'`, if any.
@property {number} [prec.level] - precedence level, from 0, if any.
@property {boolean} used - true if used in a grammar.
@extends module:Base~Symbol
@property {string} name - name for a token, representation for a literal.
@abstract
*/
class T extends Symbol {
#prec = { };
get prec () { return this.#prec; }
#used = false;
get used () { return this.#used; }
set used (_) { this.#used = true; } // cannot clear
/** Creates a terminal; should only be used by subclass.
@param {string} name - name for a token, representation for a literal.
*/
constructor (name) { super(name); }
/** Displays description and precedence, if any.
@returns {string}
*/
dump () {
return this.toString() + (this.prec.assoc ? ' ' + this.prec.assoc + ' ' + this.prec.level : '');
}
}
/** Represents a literal symbol.
@property {string} value - (unquoted) value for the literal; empty string for `$eof`, too.
@property {boolean} [screened] - set true only during scanner construction
if literal value matches a token pattern.
@extends module:Base~T
@property {Object} prec - precedence.
@property {string} [prec.assoc] - associativity, `'%left'`, `'%right'`, or `'%nonassoc'`, if any.
@property {number} [prec.level] - precedence level, from 0, if any.
@property {boolean} used - true if used in a grammar.
@property {string} name - name for a token, representation for a literal.
@abstract
*/
class Lit extends T {
#value;
get value () { return this.#value; }
/** Creates a literal symbol; should only be used by factory method.
Extracts value from representation.
@param {string} name - (quoted) representation for the literal.
Empty string is reserved for `$eof`, the end of input.
*/
constructor (name) {
super(name);
this.#value = name.length ? this.unescape(name) : '';
}
/** Displays representation of a literal or `$eof`.
@returns {string}
*/
toString () { return this.name.length ? this.name : '$eof'; }
/** Removes leading and trailing delimiter character
and elaboarates backslash escapes.
@param {string} s - string to unescape.
@returns {string} unquoted, unescaped string.
*/
unescape (s) {
let result = '', c;
for (let i = 1; i < s.length - 1; )
if ((c = s.charAt(i ++)) != '\\')
result += c;
else if (i >= s.length - 1)
result += '\\'; // trailing backslash in literal
else if ((c = 'bfnrtv\\\''.indexOf(s.charAt(i ++))) >= 0)
result += '\b\f\n\r\t\v\\\''.charAt(c);
else switch (c = s.charAt(i - 1)) {
case 'x':
if (i + 1 < s.length-1 &&
'0123456789abcdef'.indexOf(s.charAt(i)) >= 0 &&
'0123456789abcdef'.indexOf(s.charAt(i + 1)) >= 0) {
result += String.fromCharCode(parseInt(s.substr(i, 2), 16));
i += 2;
} else
result += 'x'; // bad \x
break;
case 'u':
if (i + 3 < s.length-1 &&
'0123456789abcdef'.indexOf(s.charAt(i)) >= 0 &&
'0123456789abcdef'.indexOf(s.charAt(i + 1)) >= 0 &&
'0123456789abcdef'.indexOf(s.charAt(i + 2)) >= 0 &&
'0123456789abcdef'.indexOf(s.charAt(i + 3)) >= 0) {
result += String.fromCharCode(parseInt(s.substr(i, 4), 16));
i += 4;
} else
result += 'u'; // bad \u
break;
default: // bad \
result += c;
}
return result;
}
}
/** Represents a token symbol.
@property {RegExp} pat - pattern for token; empty `RegExp` for `$error`.
@property {Array<Lit>} [screen] - contains literals with values matching the pattern, if any.
@extends module:Base~T
@property {Object} prec - precedence.
@property {string} [prec.assoc] - associativity, `'%left'`, `'%right'`, or `'%nonassoc'`, if any.
@property {number} [prec.level] - precedence level, from 0, if any.
@property {boolean} used - true if used in a grammar.
@property {string} name - name for a token, representation for a literal.
Empty string is reserved for `$error`, can be something unexpected.
@abstract
*/
class Token extends T {
#pat;
get pat () { return this.#pat; }
/** Creates a token symbol; should only be used by factory method.
@param {string} name - token name.
Empty string is reserved for `$error`, something unexpected.
@param {RegExp} pat - pattern for token; empty `RegExp` for `$error`.
*/
constructor (name, pat) {
super(name);
this.#pat = pat;
}
/** Displays name of a token or `$error`.
@returns {string}
*/
toString () { return this.name.length ? this.name : '$error'; }
}
/** Represents a list of terminal symbols of equal precedence and associativity.
@property {string} assoc - associativity, `'%left'`, `'%right'`, or `'%nonassoc'`.
@property {Array<module:Base~T>} terminals - list of terminal symbols.
*/
class Precedence {
#assoc;
get assoc () { return this.#assoc; }
#terminals;
get terminals () { return this.#terminals; }
/** Creates a new precedence level;
see factory method {@linkcode module:Base~Factory grammar.precedence()}.
@param {string} assoc - associativity, `'%left'`, `'%right'`, or `'%nonassoc'`.
@property {Array<module:Base~T>} terminals - list of terminals.
*/
constructor (assoc, terminals) {
this.#assoc = assoc;
this.#terminals = terminals;
}
/** Displays associativity and the list of terminals.
@returns {string}.
*/
toString () { return this.assoc + ' ' + this.terminals.join(' '); }
/** Displays associativity and the list of terminals.
@returns {string}.
*/
dump () { return this.toString(); }
}
/** Represents a non-terminal symbol.
@extends module:Base~Symbol
@property {string} name - name for the non-terminal.
Empty string is reserved for `$accept`, can be left-hand side of a start rule.
@abstract
*/
class NT extends Symbol {
/** Creates a non-terminal symbol; should only be used by factory method.
@param {string} name - non-terminal's name.
*/
constructor (name) { super(name); }
/** Displays name of a non-terminal or `$accept`.
@returns {string}
*/
toString () { return this.name.length ? this.name : '$accept'; }
/** Displays name of a non-terminal or `$accept`.
@returns {string}
*/
dump () { return this.toString(); }
}
/** Represents an element of a tokenized input stream.
@property {number} lineno - input position.
@property {?module:Base~T} t - terminal, i.e., literal or token object.
`null` is reserved for unrecognizable input.
@property {string} value - `t`'s representation in the input.
*/
class Tuple {
#lineno;
get lineno () { return this.#lineno; }
#t;
get t () { return this.#t; }
#value;
get value () { return this.#value; }
/** Creates an element of a tokenized input stream;
see factory method {@linkcode module:Base~Factory#tuple grammar.tuple()}.
@param {number} lineno - input position.
@param {?module:Base~T} t - terminal, i.e., literal or token object.
`null` is reserved for unrecognizable input.
@param {string} value - `t`'s representation in the input.
*/
constructor (lineno, t, value) {
this.#lineno = lineno;
this.#t = t;
this.#value = value;
}
/** Displays position, terminal, and associated value.
@returns {string}.
*/
toString () {
return (this.lineno > 0 ? '(' + this.lineno + ') ' : 'eof ') +
(!this.t ? this.escape(this.value) // unrecognizable input
: this.t instanceof Lit ? this.t.toString() // literal representation or `$eof`
: this.t.toString == '$error' ? '$error'
: this.escape(this.value) + ' ' + this.t); // token
}
/** Escapes non-ASCII and invisible characters using backslash.
Similar to {@linkcode module:Base~Scanner#escape Scanner.escape()}.
@param {string} s - string to escape.
@returns {string} double-quoted, escaped string.
@example
escape(null) // returns empty string
escape('x') // returns string containing "x"
escape('\b') // returns string containing "\b"
escape('y') // returns string containing "\x##" or "\u####"
*/
escape (s) {
if (s == null) return '';
let result = '"';
for (let i = 0; i < s.length; ++ i) {
let c = s.charAt(i);
let cc = '\b\f\n\r\t\v\\"'.indexOf(c);
if (cc >= 0)
result += '\\' + 'bfnrtv\\"'.charAt(cc);
else if (c >= ' ' && c <= '~')
result += c;
else if ((cc = s.charCodeAt(i)) < 16)
result += '\\x0' + cc.toString(16);
else if (cc < 256)
result += '\\x' + cc.toString(16);
else if (cc < 16 * 256)
result += '\\u0' + cc.toString(16);
else
result += '\\u' + cc.toString(16);
}
return result + '"';
}
}
/** Wraps a function which tokenizes a string.
Token patterns should not partially overlap literals, e.g., `/[a-z]+/` would conceal `'formula1'`.
@property {function(string[])} assert - bound to {@linkcode module:Base~Factory#assert factory.assert()}.
@property {function(string[])} tuple - bound to {@linkcode module:Base~Factory#tuple factory.tuple()}.
@property {Array.<module:Base~T>} terminals - ordered for pattern;
first tokens ordered by ascending name then literals ordered by decreasing length.
@property {RegExp} skip - a pattern to define ignorable character sequences,
should not accept empty input, should not use flags, should not be anchored,
should use `(:? )` rather than `( )` for grouping.
@property {RegExp} pattern - read-only, concatenates capture groups
with `skip` and `terminals`, used to disect input.
*/
class Scanner {
#assert;
get assert () { return this.#assert; }
#tuple;
get tuple () { return this.#tuple; }
#terminals;
get terminals () { return this.#terminals; }
#skip;
get skip () { return this.#skip; }
#pattern;
get pattern () { return this.#pattern; }
/** Creates the pattern used to tokenize a string;
see factory method {@linkcode module:Base~Factory#scanner grammar.scanner()}.
@param {module:Base~Factory} factory - supplies literals and tokens;
unused terminals and `$eof` and `$error`, if any, are ignored.
@param {RegExp} skip - a pattern to define ignorable character sequences,
must not accept empty input, must not use `d`, `g`, or `y` flag,
should not be anchored, should use `(:? )` rather than `( )` for grouping.
@param {Array<T>} [terminals] - ordered list corresponding to `.pattern`;
if omitted, tokens by ascending name and then literals by decreasing length.
*/
constructor (factory, skip, terminals) {
// "inherit" assert() and tuple()
this.#assert = factory.constructor.prototype.assert.bind(factory);
this.#tuple = factory.constructor.prototype.tuple.bind(factory);
if (terminals)
this.#terminals = terminals; // and we hope for the best
else {
// import non-empty, used literals, sorted by decreasing length
const lits = factory.lits.filter(lit => lit.used && lit.name.length).sort(
(a, b) => a === b ? 0 : a.value < b.value ? 1 : -1);
// import non-empty, used tokens, sorted by ascending name
const tokens = factory.tokens.filter(token => token.used && token.name.length).sort(
(a, b) => a === b ? 0 : a.name < b.name ? -1 : 1);
// token.screen = non-empty map of covered literals, if any
// lit.screened = true if literal is covered by one token, two or more is bad
tokens.forEach(token => {
let keep = false;
token.screen = lits.reduce((map, lit) => {
const match = token.pat.exec(lit.value); // match?
if (match && match[0] == lit.value) { // exact
this.assert(!lit.screened, lit + ': must not be recognized by more than one token pattern');
lit.screened = true;
keep = true;
map[lit.value] = lit; // to be screened for
}
return map;
}, { });
if (!keep) delete token.screen; // nothing to screen
});
// tokens followed by non-screened literals
this.#terminals = tokens.concat(lits.filter(lit => !lit.screened));
// remove screened flags if any
lits.forEach(lit => { delete lit.screened; });
}
this.#skip = skip;
// pattern = ^(:? ( skip ) | ( token ) |.. | ( literal ) |.. ) flags: mg
let pattern = [ ];
// skip
pattern.push('(' + skip.toString().slice(1, -1) + ')');
// terminals
pattern.push(... this.terminals.map(t => '(' +
(t instanceof Lit ? this.escape(t.value) : t.pat.toString().slice(1, -1)) + ')'));
this.#pattern = new RegExp(pattern.join('|'), 'mg');
}
/** Tokenizes a string.
@param {string} input - to be divided into literals and tokens.
@returns {Array.<module:Base~Tuple>} a list of literals and tokens.
The list contains one `Tuple` with a `null` terminal for each
character sequence which is neither ignorable nor a literal or a token.
*/
scan (input) {
this.assert(typeof input == 'string', 'scan():', input, 'not a string');
// returns number of \n in s
const nl = s => s.replaceAll(/[^\n]/g, '').length;
const result = [];
let lineno = 1, m, begin = this.pattern.lastIndex = 0;
while (this.pattern.lastIndex < input.length) // loop over input
if (m = this.pattern.exec(input)) { // find anything?
if (m.index > begin) { // illegal char at beginning?
const illegal = input.substr(begin, m.index - begin);
result.push(this.tuple(lineno, null, illegal));
lineno += nl(illegal); // count \n
}
m.slice(2).some((input, n) => { // non-skip capture groups
if (!input || !input.length) return false; // group did not match
let t = this.terminals[n], // corresponding terminal
lit; // result of screening if any
if (t instanceof Token && t.screen && (lit = t.screen[input]))
t = lit;
result.push(this.tuple(lineno, t, input)); // new tuple
return true;
});
lineno += nl(m[0]); // count \n
begin = this.pattern.lastIndex; // next scan starts here
} else { // nothing left to find
result.push(this.tuple(lineno, null, input.substr(begin)));
break;
}
return result;
}
/** Escapes most characters by `\.` or `\x..` or `\u....`.
Similar to {@linkcode module:Base~Tuple#escape Tuple.escape()}.
@param {string} s - string to escape.
@returns {string} escaped string.
escape(null) // will crash
escape('a') // [alphanumerics] returns string containing a
escape('\b') // [controls] returns string containing \b
escape('s') // [specials] returns string containing \s
escape('x') // [other] returns string containing \x## or \u####
*/
escape (s) {
this.assert(typeof s == 'string', 'escape():', s, 'not a string');
let result = '';
for (let i = 0; i < s.length; ++ i) {
let c = s.charAt(i), cc;
if (c.search(/[a-zA-Z0-9_]/) >= 0)
result += c;
else if ((cc = '"\b\f\n\r\t\v\\\''.indexOf(c)) >= 0)
result += '\\' + '"bfnrtv\\\''.charAt(cc);
else if (c.search(/[\x20-\x2f\x3a-\x40\x5b-\x60\x7b-\x7e]/) >= 0)
result += '\\' + c;
else {
c = s.charCodeAt(i);
if (c < 16)
result += '\\x0' + c.toString(16);
else if (c < 256)
result += '\\x' + c.toString(16);
else if (c < 16*256)
result += '\\u0' + c.toString(16);
else
result += '\\u' + c.toString(16);
}
}
return result;
}
}
/** Method to process values collected by a rule.
@callback Action
@param {...Object} value - one value collected per item on the right-hand side.
@returns {Object} the value to be collected in the parent rule or returned by recognition.
@throws {string|Error} a string with an error message to continue recognition
or an `Error` with an error message to abort recognition.
*/
/** Abstract base class for recognition based on a grammar.
Should wrap a method `parse()` which recognizes input, builds a tree of nested lists,
and creates and calls on an object with {@link module:Base~Action action methods}, if any.
@property {module:Base~Factory} grammar - represents the grammar, counts errors;
concurrent recognition will trash error counting.
@property {?Object} actions - maps rule names to action methods during recognition.
@abstract
*/
class Parser {
#grammar;
get grammar () { return this.#grammar; }
#actions = null;
get actions () { return this.#actions; }
/** Creates a parser; only used by subclass to set `.grammar`.
@param {module:Base~Factory} grammar - represents grammar.
*/
constructor (grammar) { this.#grammar = grammar; }
/** Only used by subclass to set `.actions`; resets `.errors` for the grammar.
Should recognize an input sentence.
@param {Function|Object} [actions] - a function is assumed to be a class
and a singleton is created with `this` as constructor argument.
The object maps rule names to action methods.
@param {Object} arg - used as further constructor arguments.
*/
parse (actions, ...arg) {
// action methods?
try {
if (actions instanceof Function) this.#actions = new actions(this, ...arg);
else if (actions instanceof Object) this.#actions = actions;
} catch (e) {
throw new Error(
this.grammar.error('parse cannot create actions:', e instanceof Error ? e.message : e)
);
}
// reset error count
this.grammar.errors = 0;
}
/** Calls an {@link module:Base~Action action method}.
Checks argument count unless `grammar.config.noargs` is set
or the method expects no arguments, i.e., has a rest parameter.
@param {string} name - rule name to match.
@param {Array} result - list of arguments.
@returns action method result or unchanged `result`.
*/
act (name, result) {
if (this.actions) {
const method = this.actions.constructor.prototype[name];
if (typeof method == 'function') {
if (this.grammar.config.actions) // trace before
this.grammar.config.log(name + '(' +
result.map(arg => this.grammar.dump(arg)).join(', ') +
')', 'returns');
// call action method
result = this.call(this.actions, method, ...result);
if (this.grammar.config.actions) // trace after
this.grammar.config.log(this.grammar.dump(result));
}
}
return result;
}
/** Checks if argument and parameter count of a method match
unless the method expects no parameters, or has a rest parameter
or `grammar.config.noargs` is true.
@param {object} target - to apply method to.
@param {function} method - to check.
@param {Object} args - arguments to pass.
@returns {Object} method result.
@example <caption><tt> super.method(arg1, .. argn) </tt></caption>
this.parser.call(this, super.method, arg1, .. argn)
*/
call (target, method, ...args) {
if (method.length && !this.grammar.config.noargs && method.length != args.length)
this.grammar.error(`${method.name} arguments: expected ${method.length}, ` +
`received ${args.length}`);
return method.apply(target, args);
}
}
export {
Factory,
Lit,
NT,
Parser,
Precedence,
Scanner,
Symbol,
T,
Token,
Tuple
};