编辑代码

# (b) 模拟数据
set.seed(123)  # 设置随机种子以保持结果的可复现性
n <- 500  # 样本大小
mu <- 0.25
phi1 <- 1.5
phi2 <- -0.75
yt <- numeric(n)  # 存储模拟数据
yt[1] <- 1  # 初始值

for (t in 2:n) {
  yt[t] <- mu + phi1 * yt[t-1] + phi2 * yt[t-2] + rnorm(1)  # 使用递归关系生成数据,加入正态随机噪声
}

# 使用前400个数据点估计y、i和d2
yt_est <- yt[1:400]
y_est <- mean(yt_est)
i_est <- sum(diff(yt_est)^2) / (n-1)
d2_est <- sum((yt_est - y_est)^2) / n

# (c) 预测
yt_pred <- yt[401:500]  # 剩余100个数据点作为预测数据

# 迭代预测
yt_iterative <- numeric(5)  # 存储迭代预测结果
yt_iterative[1] <- yt[400]  # 使用最后一个已知值作为初始值

for (i in 2:5) {
  yt_iterative[i] <- mu + phi1 * yt_iterative[i-1] + phi2 * yt[400 + i-2]
}

# 直接预测
yt_direct <- mu + phi1 * yt[400:404] + phi2 * yt[399:403]

# 计算预测误差
mse_iterative <- mean((yt_pred - yt_iterative)^2)
mse_direct <- mean((yt_pred - yt_direct)^2)
mae_iterative <- mean(abs(yt_pred - yt_iterative))
mae_direct <- mean(abs(yt_pred - yt_direct))

# (d) 绘制预测界限和真实值
library(ggplot2)

df <- data.frame(yt = yt_pred, lower = yt_direct - 1.96 * sqrt(mse_direct), upper = yt_direct + 1.96 * sqrt(mse_direct))
df <- mutate(df, index = 401:500)

ggplot(df, aes(x = index, y = yt)) +
  geom_line(color = "blue", size = 1) +
  geom_line(aes(y = lower), color = "red", linetype = "dashed") +
  geom_line(aes(y = upper), color = "red", linetype = "dashed") +
  labs(x = "Index", y = "yt", title = "Forecast Limits") +
  theme_minimal()

# (e) 判断哪种预测方法更好
if (mse_iterative < mse_direct) {
  approach <- "迭代预测"
} else if (mse_iterative > mse_direct) {
  approach <- "直接预测"
} else {
  approach <- "两种方法效果相同"
}

approach