编辑代码

// 计算数据集的熵

function entropy(data) {

  // 统计每个标签的出现次数

  const counts = {};

  for (const row of data) {

    const label = row[row.length - 1];

    counts[label] = (counts[label] || 0) + 1;

  }

  // 计算熵

  let entropy = 0;

  for (const count in counts) {

    const p = counts[count] / data.length;

    entropy -= p * Math.log2(p);

  }

  return entropy;

}

// 计算信息增益

function informationGain(data, attributeIndex) {

  // 计算整个数据集的熵

  const totalEntropy = entropy(data);

  // 统计每个属性值对应的数据集

  const attributeValues = {};

  for (const row of data) {

    const value = row[attributeIndex];

    if (!attributeValues[value]) {

      attributeValues[value] = [];

    }

    attributeValues[value].push(row);

  }

  // 计算加权熵

  let weightedEntropy = 0;

  for (const subset of Object.values(attributeValues)) {

    weightedEntropy += (subset.length / data.length) * entropy(subset);

  }

  // 返回信息增益

  return totalEntropy - weightedEntropy;

}

// 选择最佳属性

function chooseBestAttribute(data) {

  let bestGain = -1;

  let bestAttribute = -1;

  // 遍历每个属性, 计算信息增益

  for (let i = 0; i < data[0].length - 1; i++) {

    const gain = informationGain(data, i);

    if (gain > bestGain) {

      bestGain = gain;

      bestAttribute = i;

    }

  }

  return bestAttribute;

}

// 获取数据集中出现次数最多的标签

function majorityClass(data) {

  const counts = {};

  for (const row of data) {

    const label = row[row.length - 1];

    counts[label] = (counts[label] || 0) + 1;

  }

  // 返回出现次数最多的标签

  return Object.keys(counts).reduce((a, b) => (counts[a] > counts[b] ? a : b));

}

// 递归构建决策树

function buildDecisionTree(data) {
    debugger;

  if (data.length === 0) return null;

  const labels = data.map((row) => row[row.length - 1]);

  // 如果所有数据属于同一类,返回该类

  if (new Set(labels).size === 1) return labels[0];

  // 如果没有更多特征可选,返回出现次数最多的标签、

  if (data[0].length === 1) return majorityClass(data);

  // 选择最佳属性

  const bestAttribute = chooseBestAttribute(data);

  // 创建数节点

  const tree = { attribute: bestAttribute, branches: {} };

  // 获取最佳属性的所有可能值

  const attributeValues = new Set(data.map((row) => row[bestAttribute]));

  for (const value of attributeValues) {

    // 获取对应值的数据子集

    const subset = data.filter((row) => row[bestAttribute] === value);

    // 递归构建子树

    const subtree = buildDecisionTree(

      subset.map((row) => [

        ...row.slice(0, bestAttribute),

        ...row.slice(bestAttribute + 1),

      ]),

    );

    tree.branches[value] = subtree;

  }

  return tree;

}

// 样本数据

const data = [

  ['有眼镜', '短发', '胖', '男'],

  ['有眼镜', '长发', '瘦', '女'],

  ['有眼镜', '短发', '胖', '女'],

  ['没眼镜', '长发', '胖', '男'],

  ['没眼镜', '短发', '瘦', '男'],

];

// 构建决策树

const tree = buildDecisionTree(data);

console.log(tree);