function parseHtml(html = '') {
const startIndex = 0;
const endIndex = 0;
const startTagOpen = /^<([a-zA-Z\d]+)/;
const startTagClose = /^\s*(\/?)>/;
const attribute = /^\s*([\w-]+)(?:="([^"]*)")?\s*/;
const endTag = /^<\/([a-zA-Z\d]+)>/;
const stack = [];
const nodes = [];
while(html) {
const index = html.indexOf('<');
if (index === 0) {
let endTagMatch = html.match(endTag);
if (endTagMatch) {
if (stack[stack.length - 1]) {
if (stack[stack.length - 1].tag === endTagMatch[1]) {
stack.pop();
advanced(endTagMatch[0].length);
continue;
} else {
throw new Error(`起始标签和结束标签不匹配: 起始标签(${stack[stack.length - 1].tag}),结束标签(${endTagMatch[0]})`);
}
} else {
throw new Error(`${endTagMatch[0]}没有起始标签`);
}
}
let startTagOpenMatch = html.match(startTagOpen);
if (startTagOpenMatch) {
const node = {
nodeType: 1,
tag: startTagOpenMatch[1],
attrs: [],
children: [],
};
advanced(startTagOpenMatch[0].length);
let end, attr;
while(!(end = html.match(startTagClose)) && (attr = html.match(attribute))) {
advanced(attr[0].length);
node.attrs.push({
name: attr[1],
value: attr[2],
});
}
if (end) {
if (stack.length === 0) {
nodes.push(node);
} else {
stack[stack.length - 1].children.push(node);
}
if (end[1] !== '/') {
stack.push(node);
}
advanced(end[0].length);
}
}
} else {
const node = {
nodeType: 3,
textContent: html.slice(0, index)
};
if (stack.length === 0) {
nodes.push(node);
} else {
stack[stack.length - 1].children.push(node);
}
advanced(node.textContent.length);
}
}
function advanced(n) {
html = html.substring(n);
}
return nodes;
}
parseHtml('<div id="test" class="a b"></div>');
parseHtml('<div id="test" class="a b">Hello World</div>');
parseHtml('开始<div id="test" class="a b">Hello World</div>');
parseHtml('<div id="test" class="a b"><br class="br" />Hello World</div>');
parseHtml('</div>');
parseHtml('<div></p>');
console.log(parseHtml('<div id="test" class="a b"></div>'))
console