dlm_code
##
## dlmによる状態空間モデル
##
library(dlm)
library(ggplot2)
library(gridExtra)
font <- "IPAexGothic" # フォントは適宜指定してください
options(digits = 3)
## (1) dlmによるローカルレベルモデル
# 松浦(2016)のデータを使用
# カレントディレクトリにデータがなければ、GitHubから取得
data.file <- "data-season.txt"
if (file.exists(data.file)) {
data <- read.csv(data.file)
} else {
data.url <- "https://raw.githubusercontent.com/iwanami-datascience/vol1/master/matsuura/example2/input/data-season.txt"
data <- read.csv(url(data.url))
}
Y <- data$Y
# グラフ描画
df.data <- data.frame(Time = seq_along(Y),
Value = Y)
p0 <- ggplot(df.data) +
geom_line(aes(x = Time, y = Value)) +
labs(x = "時間(四半期)", y = "販売個数(千個)") +
theme_classic(base_family = font)
print(p0)
# ローカルレベルモデル
# 観測ノイズとシステムノイズの分散をともに1とした場合
dlm.mod1<- dlmModPoly(order = 1, dV = 1, dW = 1,
m0 = 0, C0 = 1e+07)
dlm.flt1 <- dlmFilter(Y, dlm.mod1)
# フィルタリングされた値の最初の部分を表示
head(dlm.flt1$m)
# 最初の要素を除く
head(dropFirst(dlm.flt1$m))
# 観測ノイズの分散を1、システムノイズの分散を5とした場合
dlm.mod2<- dlmModPoly(order = 1, dV = 1, dW = 5,
m0 = 0, C0 = 1e+07)
dlm.flt2 <- dlmFilter(Y, dlm.mod2)
dlm.flt2$sd <- sqrt(unlist(dlmSvd2var(dlm.flt2$U.C, dlm.flt2$D.C)))
# 最尤推定
dlm.build3 <- function(par) {
dlmModPoly(order = 1,
dV = exp(par[1]), dW = exp(par[2]))
}
dlm.mle3 <- dlmMLE(Y, parm = c(0, 0),
build = dlm.build3)
dlm.mod3 <- dlm.build3(dlm.mle3$par)
# ノイズの誤差分散の値
V(dlm.mod3)
W(dlm.mod3)
# フィルタリング
dlm.flt3 <- dlmFilter(Y, dlm.mod3)
head(dropFirst(dlm.flt3$m))
# 平滑化
dlm.smt3 <- dlmSmooth(Y, dlm.mod3)
head(dropFirst(dlm.smt3$s))
# 各モデルの対数尤度(の符号を反転させた値)
dlmLL(Y, dlm.mod1)
dlmLL(Y, dlm.mod2)
dlmLL(Y, dlm.mod3)
# 誤差標準偏差
dlm.flt1$sd <- sqrt(unlist(dlmSvd2var(dlm.flt1$U.C, dlm.flt1$D.C)))
dlm.flt3$sd <- sqrt(unlist(dlmSvd2var(dlm.flt3$U.C, dlm.flt3$D.C)))
dlm.smt3$sd <- sqrt(unlist(dlmSvd2var(dlm.smt3$U.S, dlm.smt3$D.S)))
# グラフ
df1 <- data.frame(Time = rep(seq_along(Y), 6),
Value = c(Y, dropFirst(dlm.flt1$m),
Y, dropFirst(dlm.flt3$m),
Y, dropFirst(dlm.smt3$s)),
Type = factor(c(rep("観測値", length(Y)),
rep("フィルタリングした値", length(Y)),
rep("観測値", length(Y)),
rep("フィルタリングした値", length(Y)),
rep("観測値", length(Y)),
rep("平滑化した値", length(Y))),
levels = c("観測値", "フィルタリングした値",
"平滑化した値")),
Facet = factor(c(rep("A", length(Y) * 2),
rep("B", length(Y) * 2),
rep("C", length(Y) * 2))))
df1.ci <- data.frame(Time = rep(seq_along(Y), 6),
Lower = c(dropFirst(qnorm(0.025, dlm.flt1$m, dlm.flt1$sd)),
dropFirst(qnorm(0.025, dlm.flt3$m, dlm.flt3$sd)),
dropFirst(qnorm(0.025, dlm.smt3$s, dlm.smt3$sd))),
Upper = c(dropFirst(qnorm(0.975, dlm.flt1$m, dlm.flt1$sd)),
dropFirst(qnorm(0.975, dlm.flt3$m, dlm.flt3$sd)),
dropFirst(qnorm(0.975, dlm.smt3$s, dlm.smt3$sd))),
Facet = factor(rep(c(rep("A", length(Y)),
rep("B", length(Y)),
rep("C", length(Y))), 2)))
p1 <- ggplot(df1) +
geom_line(aes(x = Time, y = Value, color = Type,
size = Type, alpha = Type)) +
scale_color_manual(values = c("black", "gray10", "gray60")) +
scale_size_manual(values = c(0.5, 1.5, 1.5)) +
scale_alpha_manual(values = c(1, 0.6, 0.6)) +
labs(x = "時間(四半期)", y = "販売個数(千個)") +
geom_ribbon(data = df1.ci,
aes(x = Time, ymin = Lower, ymax = Upper),
alpha = 0.25) +
theme_classic(base_family = font) +
facet_wrap(~ Facet, ncol = 3, scales = "free_x") +
theme(strip.background = element_blank(),
strip.text = element_text(size = 9, hjust = 0.1),
legend.title = element_blank(), legend.position = "bottom")
print(p1)
# 2階差分のモデル
dlm.build3.2 <- function(par) {
mod <- dlmModPoly(order = 2, dV = exp(par[1]),
dW = c(0, exp(par[2])))
}
dlm.mle3.2 <- dlmMLE(Y, parm = c(0, 0),
build = dlm.build3.2)
# 尤度
dlmLL(Y, dlm.build3.2(dlm.mle3.2$par))
## (2) dlmによる季節調整モデル
dlm.build4 <- function(par) {
mod <- dlmModPoly(order = 1) +
dlmModSeas(frequency = 4)
V(mod) <- exp(par[1])
diag(W(mod))[1:2] <- exp(par[2:3])
m0(mod) <- c(20, 0, 0, 0)
return(mod)
}
# 最尤推定
dlm.mle4 <- dlmMLE(Y, parm = rep(0, 3),
build = dlm.build4)
dlm.mod4 <- dlm.build4(dlm.mle4$par)
# 係数行列
FF(dlm.mod4)
GG(dlm.mod4)
# ノイズの誤差分散
V(dlm.mod4)
W(dlm.mod4)
# 対数尤度(の符号を反転させた値)
dlmLL(Y, dlm.mod4)
# 予測
dlm.fil4 <- dlmFilter(Y, dlm.mod4)
dlm.for4 <- dlmForecast(dlm.fil4, nAhead = 8)
# 予測値の最初の部分
head(dlm.for4$f, 3)
head(dlm.for4$a, 3)
head(unlist(dlm.for4$Q))
# 平滑化
dlm.smo4 <- dlmSmooth(dlm.fil4)
head(dropFirst(dlm.smo4$s), 3)
# 誤差の標準偏差
var.s <- dlmSvd2var(dlm.smo4$U.S, dlm.smo4$D.S)
sd.s <- sapply(1:length(var.s), function(i) sqrt(diag(var.s[[i]])[1:2]))
# グラフ描画
obs.exp <- dlm.for4$f
lev.exp <- dlm.for4$a[, 1]
ses.exp <- dlm.for4$a[, 2]
stdev <- sqrt(unlist(dlm.for4$Q))
df2 <- data.frame(Time = c(rep(seq_along(Y), 2),
length(Y) + 0:(nrow(obs.exp))),
Value = c(Y, dropFirst(dlm.smo4$s[, 1]), Y[length(Y)], obs.exp),
Type = factor(c(rep("観測値", length(Y)),
rep("水準成分", length(Y)),
rep("予測値", nrow(obs.exp) + 1)),
levels = c("観測値", "水準成分",
"予測値")))
df2.ci1 <- data.frame(Time = seq_along(Y),
Lower = dropFirst(qnorm(0.025, dlm.smo4$s[, 1], sd.s[1, ])),
Upper = dropFirst(qnorm(0.975, dlm.smo4$s[, 1], sd.s[1, ])))
df2.ci2 <- data.frame(Time = length(Y) + 1:nrow(obs.exp),
Lower = qnorm(0.025, obs.exp, stdev),
Upper = qnorm(0.975, obs.exp, stdev))
p2 <- ggplot(df2) +
geom_line(aes(x = Time, y = Value, color = Type,
size = Type, alpha = Type)) +
scale_color_manual(values = c("black", "gray10",
"gray40")) +
scale_size_manual(values = c(0.33, 1.0, 1.0)) +
scale_alpha_manual(values = c(1, 0.6, 0.6)) +
geom_ribbon(data = df2.ci1, aes(x = Time, ymin = Lower, ymax = Upper),
fill = "gray50", alpha = 0.5) +
geom_ribbon(data = df2.ci2, aes(x = Time, ymin = Lower, ymax = Upper),
fill = "gray50", alpha = 0.5) +
labs(title = "A", x = "時間(四半期)", y = "販売個数(千個)") +
theme_classic(base_family = font) +
theme(legend.title = element_blank(),
legend.justification = c(0, 1),
legend.position = c(0.1, 1))
df3 <- data.frame(Time = seq_along(Y),
Value = dropFirst(dlm.smo4$s[, 2]))
df3.ci <- data.frame(Time = seq_along(Y),
Lower = dropFirst(qnorm(0.025, dlm.smo4$s[, 2], sd.s[2, ])),
Upper = dropFirst(qnorm(0.975, dlm.smo4$s[, 2], sd.s[2, ])))
p3 <- ggplot(df3) +
geom_line(aes(x = Time, y = Value), color = "gray25", size = 1) +
geom_ribbon(data = df3.ci,
aes(x = Time, ymin = Lower, ymax = Upper),
fill = "gray50", alpha = 0.5) +
labs(title = "B", x = "時間(四半期)", y = "販売個数(千個)") +
theme_classic(base_family = font)
grid.arrange(p2, p3, ncol = 2)
# ベイズ推定
# 季節調整モデル
set.seed(1)
dlm.fit5 <- dlmGibbsDIG(Y, dlmModPoly(order = 1) +
dlmModSeas(frequency = 4),
ind = 1:2,
shape.y = 0.01, rate.y = 0.01,
shape.theta = 0.01,
rate.theta = 0.01,
n.sample = 1010, thin = 29)
# 事後平均
mcmcMean(dlm.fit5$dV[-(1:10)])
mcmcMean(dlm.fit5$dW[-(1:10), ])
# 軌跡
p5 <- ggplot(data.frame(Iter = rep(1:1010, 3),
Value = c(dlm.fit5$dV, dlm.fit5$dW[, 1], dlm.fit5$dW[, 2]),
Var = rep(c("dV", "dW1", "dW2"), each = 1010))) +
geom_line(aes(x = Iter, y = Value)) +
facet_wrap(~ Var, ncol = 1, scales = "free_y") +
theme_classic(base_family = font)
print(p5)