function entropy(data) {
const counts = {};
for (const row of data) {
const label = row[row.length - 1];
counts[label] = (counts[label] || 0) + 1;
}
let entropy = 0;
for (const count in counts) {
const p = counts[count] / data.length;
entropy -= p * Math.log2(p);
}
return entropy;
}
function informationGain(data, attributeIndex) {
const totalEntropy = entropy(data);
const attributeValues = {};
for (const row of data) {
const value = row[attributeIndex];
if (!attributeValues[value]) {
attributeValues[value] = [];
}
attributeValues[value].push(row);
}
let weightedEntropy = 0;
for (const subset of Object.values(attributeValues)) {
weightedEntropy += (subset.length / data.length) * entropy(subset);
}
return totalEntropy - weightedEntropy;
}
function chooseBestAttribute(data) {
let bestGain = -1;
let bestAttribute = -1;
for (let i = 0; i < data[0].length - 1; i++) {
const gain = informationGain(data, i);
if (gain > bestGain) {
bestGain = gain;
bestAttribute = i;
}
}
return bestAttribute;
}
function majorityClass(data) {
const counts = {};
for (const row of data) {
const label = row[row.length - 1];
counts[label] = (counts[label] || 0) + 1;
}
return Object.keys(counts).reduce((a, b) => (counts[a] > counts[b] ? a : b));
}
function buildDecisionTree(data) {
debugger;
if (data.length === 0) return null;
const labels = data.map((row) => row[row.length - 1]);
if (new Set(labels).size === 1) return labels[0];
if (data[0].length === 1) return majorityClass(data);
const bestAttribute = chooseBestAttribute(data);
const tree = { attribute: bestAttribute, branches: {} };
const attributeValues = new Set(data.map((row) => row[bestAttribute]));
for (const value of attributeValues) {
const subset = data.filter((row) => row[bestAttribute] === value);
const subtree = buildDecisionTree(
subset.map((row) => [
...row.slice(0, bestAttribute),
...row.slice(bestAttribute + 1),
]),
);
tree.branches[value] = subtree;
}
return tree;
}
const data = [
['有眼镜', '短发', '胖', '男'],
['有眼镜', '长发', '瘦', '女'],
['有眼镜', '短发', '胖', '女'],
['没眼镜', '长发', '胖', '男'],
['没眼镜', '短发', '瘦', '男'],
];
const tree = buildDecisionTree(data);
console.log(tree);