/** 实现add(2,2) subtract(4,2) */
// 第一阶段: 解析 (将原始代码转化为更抽象的代码)
// 1.词法分析(tokens)
// 目的: (add(2,subtract(4,2)) =>
// const fake_tokens = [
// { type: 'paren', value: '(' },
// { type: 'name', value: 'add' },
// { type: 'number', value: '2' },
// { type: 'paren', value: '(' },
// { type: 'name', value: 'subtract' },
// { type: 'paren', value: '(' },
// { type: 'number', value: '4' },
// { type: 'number', value: '2' },
// { type: 'paren', value: ')' },
// { type: 'paren', value: ')' },
// ]
function tokenizer(input) {
let current = 0 // 初始化指针指向第几个
let tokens = [] // 初始化最终输出
// 遍历
while (current < input.length) {
let char = input[current];
// 左括号
if (char === '(') {
tokens.push({
type: 'paren',
value: '('
})
current++;
continue
}
// 右括号
if (char === ')') {
tokens.push({
type: 'paren',
value: ')'
})
current++;
continue
}
// 空格换行符
let whitespace = /\s/;
if (whitespace.test(char)) {
current++;
continue
}
// 数字匹配
let numbers = /[0-9]/;
if (numbers.test(char)) {
let value = ''
// 数字大于1位数 123
while (numbers.test(char)) {
value = value + char
char = input[++current]
}
tokens.push({
type: 'number',
value
})
continue
}
// ""匹配字符串""
if (char === '"') {
let value = ''
char = input[++current] // "后面一位
while (char !== '"') {
value = value + char
char = input[++current]
}
char = input[++current] // "后面一位
tokens.push({
type: 'string',
value
})
continue
}
// 匹配name
let letters = /[a-z]/i;
if (letters.test(char)) {
let value = ''
while (letters.test(char)) {
value += char
char = input[++current]
}
tokens.push({
type: 'name',
value
})
continue
}
throw new TypeError('没有匹配上的文案')
}
return tokens
}
// 2.语法分析(AST)
// 目的 把tokens =>
// const fake_ast = {
// type: 'Program',
// body: [{
// type: 'CallExpression',
// name: 'add',
// params: [{
// type: 'NumberLiteral',
// value: 2
// }, {
// type: 'CallExpression',
// name: 'subtract',
// params: [{
// type: 'NumberLiteral',
// value: 4
// }, {
// type: 'NumberLiteral',
// value: 2
// }]
// }]
// }]
// }
function parser(tokens) {
let current = 0
let ast = {
type: 'Program',
body: []
}
function walk() {
let token = tokens[current]
// number
if (token.type === 'number') {
current++
return {
type: 'NumberLiteral',
value: token.value
}
}
// string
if (token.type === 'string') {
current++
return {
type: 'StringLiteral',
value: token.value
}
}
// (
if (token.type === 'paren' && token.value === '(') {
token = tokens[++current]
let node = {
type: 'CallExpression',
name: token.value,
params: []
}
token = tokens[++current]
while (token.type !== 'paren' || (token.type === 'paren' && token.value !== ')')) {
node.params.push(walk())
token = tokens[current]
}
current++
return node
}
throw new TypeError(token.type)
}
while (current < tokens.length) {
ast.body.push(walk())
}
return ast
}
// 第二阶段: 转换 (采用抽象表示并进行操作)
// 旧AST转化为新AST树
// const fake_new_ast = {
// type: 'Program',
// body: [{
// type: 'ExpressionStatement',
// expression: {
// type: 'CallExpression',
// callee: {
// type: 'Identifier',
// name: 'add'
// },
// arguments: [{
// type: 'NumberLiteral',
// value: '2'
// }, {
// type: 'CallExpression',
// callee: {
// type: 'Identifier',
// name: 'subtract'
// },
// arguments: [{
// type: 'NumberLiteral',
// value: '4'
// }, {
// type: 'NumberLiteral',
// value: '2'
// }]
// }]
// }
// }]
// }
function traverser(ast, visitor) {
function traverseArray(array, parent) {
array.forEach(child => {
traverseNode(child, parent)
})
}
function traverseNode(node, parent) {
let methods = visitor[node.type]
if (methods && methods.enter) {
methods.enter(node, parent)
}
switch (node.type) {
case 'Program':
traverseArray(node.body, node)
break
case 'CallExpression':
traverseArray(node.params, node)
break
case 'NumberLiteral':
case 'StringLiteral':
break
default:
throw new TypeError(node.type)
}
if (methods && methods.exit) {
methods.exit(node, parent)
}
}
traverseNode(ast, null)
}
function transformer(ast) {
let newAst = {
type: 'Program',
body: []
}
ast._context = newAst.body
traverser(ast, {
NumberLiteral: {
enter(node, parent) {
parent._context.push({
type: 'NumberLiteral',
value: node.value
})
}
},
StringLiteral: {
enter(node, parent) {
parent._context.push({
type: 'StringLiteral',
value: node.value
})
}
},
CallExpression: {
enter(node, parent) {
let expression = {
type: 'CallExpression',
callee: {
type: 'Identifier',
name: node.name
},
arguments: []
}
node._context = expression.arguments
if (parent.type !== 'CallExpression') {
expression = {
type: 'ExpressionStatement',
expression,
}
}
parent._context.push(expression)
}
}
})
return newAst
}
// 第三阶段: 代码生成 (采用转换后的代码表表示)
function codeGenerator(node) {
switch (node.type) {
case 'Program':
return node.body.map(codeGenerator).join('\n')
case 'ExpressionStatement':
return (
codeGenerator(node.expression) + ';'
)
case 'CallExpression':
return codeGenerator(node.callee) + '(' + node.arguments.map(codeGenerator).join(',') + ')';
case 'Identifier':
return node.name
case 'NumberLiteral':
return node.value
case 'StringLiteral':
return '"' + node.value + '"'
default:
throw new TypeError(node.type)
}
}
// 完整流程
/**
* 1.input => tokenizer => tokens
* 2.tokens => parser => ast
* 3.ast => transformer => newAst
* 4.newAst => generator => output
*/
function compiler(input) {
let tokens = tokenizer(input)
let ast = parser(tokens)
let new_ast = transformer(ast)
let output = codeGenerator(new_ast)
return output
}
const input = `(add 2(subtract 4 2))`
// output: add(2,subtract(4,2))
console.log(compiler(input))
console