编辑代码

import pandas as pd
import numpy as np
from faker import Faker
from datetime import datetime, timedelta

fake = Faker("en_US")
Faker.seed(42)  # 固定种子确保数据可复现

# 预设数据维度
crisis_types = [
    "军事冲突", "自然灾害", "金融危机", "能源安全", "粮食安全", "网络安全",
    "难民危机", "贸易冲突", "公共卫生", "生态危机", "科技竞争", "恐怖主义"
]
regions = [
    "中东", "东欧", "东南亚", "非洲萨赫勒", "南美", "北美", "西欧", "中亚",
    "北极", "澳新", "南亚", "国际水域"
]
impact_levels = ["高", "中", "低"]
statuses = ["持续中", "已控制", "谈判中", "灾后重建", "治理中", "已解决", "已缓解"]
derivative_crises = {  # 衍生危机映射(如军事冲突可能引发难民危机)
    "军事冲突": ["难民危机", "公共卫生", "粮食安全"],
    "自然灾害": ["生态危机", "公共卫生", "粮食安全"],
    "金融危机": ["贸易冲突", "难民危机", "科技竞争"],
    "能源安全": ["贸易冲突", "军事冲突", "生态危机"]
}

data = []
for i in range(1, 201):
    event_id = f"CRISIS-{i:03d}"
    name = fake.sentence(nb_words=3).replace(".", "")  # 生成3词事件名
    start_date = datetime(2023, 1, 1) + timedelta(days=np.random.randint(0, 1095))  # 随机时间
    c_type = np.random.choice(crisis_types)
    main_region = np.random.choice(regions)
    # 50%概率涉及多国或跨区域
    if np.random.rand() > 0.5:
        if main_region == "国际水域":
            related_regions = np.random.choice(regions, size=2, replace=False).tolist()
        else:
            related_regions = [main_region, np.random.choice(regions)]
        region = "、".join(related_regions)
    else:
        region = main_region
    description = fake.paragraph(nb_sentences=2)  # 随机描述
    impact = np.random.choice(impact_levels, p=[0.25, 0.6, 0.15])  # 中等级概率最高
    # 经济损失根据影响等级生成(正态分布)
    if impact == "高":
        econ_loss = round(np.random.normal(300, 200), 1)
    elif impact == "中":
        econ_loss = round(np.random.normal(100, 80), 1)
    else:
        econ_loss = round(np.random.normal(20, 15), 1)
    econ_loss = max(econ_loss, 0)  # 确保非负
    # 人员伤亡逻辑:排除经济/科技类危机
    if c_type in ["金融危机", "贸易冲突", "科技竞争"]:
        casualties = 0
    else:
        casualties = np.random.randint(0, 100000) if impact == "高" else np.random.randint(0, 5000)
    # 危机状态与类型关联
    if c_type == "自然灾害":
        status = np.random.choice(["灾后重建", "已控制", "已缓解"])
    elif c_type == "军事冲突":
        status = np.random.choice(["持续中", "谈判中", "已控制"])
    else:
        status = np.random.choice(statuses)
    # 20%概率生成衍生危机
    derivative = np.random.choice([None] + derivative_crises.get(c_type, []), p=[0.8] + [0.2/len(derivative_crises.get(c_type, []))] if derivative_crises.get(c_type) else [1])
    derivative = derivative if derivative else "无"
    
    data.append([
        event_id, name, start_date.strftime("%Y-%m-%d"), c_type, region, 
        description, impact, econ_loss, casualties, status, derivative
    ])

# 生成DataFrame并保存为Excel
columns = ["事件编号", "事件名称", "发生时间", "危机类型", "涉及地区", 
           "简要描述", "影响等级", "经济损失(亿美元)", "人员伤亡(人)", "危机状态", "衍生危机"]
df = pd.DataFrame(data, columns=columns)
df.to_excel("international_crisis_200.xlsx", index=False)
print("已生成 international_crisis_200.xlsx(200条数据),包含衍生危机字段")