import pandas as pd
import numpy as np
from faker import Faker
from datetime import datetime, timedelta
fake = Faker("en_US")
Faker.seed(42)
crisis_types = [
"军事冲突", "自然灾害", "金融危机", "能源安全", "粮食安全", "网络安全",
"难民危机", "贸易冲突", "公共卫生", "生态危机", "科技竞争", "恐怖主义"
]
regions = [
"中东", "东欧", "东南亚", "非洲萨赫勒", "南美", "北美", "西欧", "中亚",
"北极", "澳新", "南亚", "国际水域"
]
impact_levels = ["高", "中", "低"]
statuses = ["持续中", "已控制", "谈判中", "灾后重建", "治理中", "已解决", "已缓解"]
derivative_crises = {
"军事冲突": ["难民危机", "公共卫生", "粮食安全"],
"自然灾害": ["生态危机", "公共卫生", "粮食安全"],
"金融危机": ["贸易冲突", "难民危机", "科技竞争"],
"能源安全": ["贸易冲突", "军事冲突", "生态危机"]
}
data = []
for i in range(1, 201):
event_id = f"CRISIS-{i:03d}"
name = fake.sentence(nb_words=3).replace(".", "")
start_date = datetime(2023, 1, 1) + timedelta(days=np.random.randint(0, 1095))
c_type = np.random.choice(crisis_types)
main_region = np.random.choice(regions)
if np.random.rand() > 0.5:
if main_region == "国际水域":
related_regions = np.random.choice(regions, size=2, replace=False).tolist()
else:
related_regions = [main_region, np.random.choice(regions)]
region = "、".join(related_regions)
else:
region = main_region
description = fake.paragraph(nb_sentences=2)
impact = np.random.choice(impact_levels, p=[0.25, 0.6, 0.15])
if impact == "高":
econ_loss = round(np.random.normal(300, 200), 1)
elif impact == "中":
econ_loss = round(np.random.normal(100, 80), 1)
else:
econ_loss = round(np.random.normal(20, 15), 1)
econ_loss = max(econ_loss, 0)
if c_type in ["金融危机", "贸易冲突", "科技竞争"]:
casualties = 0
else:
casualties = np.random.randint(0, 100000) if impact == "高" else np.random.randint(0, 5000)
if c_type == "自然灾害":
status = np.random.choice(["灾后重建", "已控制", "已缓解"])
elif c_type == "军事冲突":
status = np.random.choice(["持续中", "谈判中", "已控制"])
else:
status = np.random.choice(statuses)
derivative = np.random.choice([None] + derivative_crises.get(c_type, []), p=[0.8] + [0.2/len(derivative_crises.get(c_type, []))] if derivative_crises.get(c_type) else [1])
derivative = derivative if derivative else "无"
data.append([
event_id, name, start_date.strftime("%Y-%m-%d"), c_type, region,
description, impact, econ_loss, casualties, status, derivative
])
columns = ["事件编号", "事件名称", "发生时间", "危机类型", "涉及地区",
"简要描述", "影响等级", "经济损失(亿美元)", "人员伤亡(人)", "危机状态", "衍生危机"]
df = pd.DataFrame(data, columns=columns)
df.to_excel("international_crisis_200.xlsx", index=False)
print("已生成 international_crisis_200.xlsx(200条数据),包含衍生危机字段")