dlm_code

##

## dlmによる状態空間モデル

##

library(dlm)

library(ggplot2)

library(gridExtra)

font <- "IPAexGothic" # フォントは適宜指定してください

options(digits = 3)

## (1) dlmによるローカルレベルモデル

# 松浦(2016)のデータを使用

# カレントディレクトリにデータがなければ、GitHubから取得

data.file <- "data-season.txt"

if (file.exists(data.file)) {

data <- read.csv(data.file)

} else {

data.url <- "https://raw.githubusercontent.com/iwanami-datascience/vol1/master/matsuura/example2/input/data-season.txt"

data <- read.csv(url(data.url))

}

Y <- data$Y

# グラフ描画

df.data <- data.frame(Time = seq_along(Y),

Value = Y)

p0 <- ggplot(df.data) +

geom_line(aes(x = Time, y = Value)) +

labs(x = "時間(四半期)", y = "販売個数(千個)") +

theme_classic(base_family = font)

print(p0)

# ローカルレベルモデル

# 観測ノイズとシステムノイズの分散をともに1とした場合

dlm.mod1<- dlmModPoly(order = 1, dV = 1, dW = 1,

m0 = 0, C0 = 1e+07)

dlm.flt1 <- dlmFilter(Y, dlm.mod1)

# フィルタリングされた値の最初の部分を表示

head(dlm.flt1$m)

# 最初の要素を除く

head(dropFirst(dlm.flt1$m))

# 観測ノイズの分散を1、システムノイズの分散を5とした場合

dlm.mod2<- dlmModPoly(order = 1, dV = 1, dW = 5,

m0 = 0, C0 = 1e+07)

dlm.flt2 <- dlmFilter(Y, dlm.mod2)

dlm.flt2$sd <- sqrt(unlist(dlmSvd2var(dlm.flt2$U.C, dlm.flt2$D.C)))

# 最尤推定

dlm.build3 <- function(par) {

dlmModPoly(order = 1,

dV = exp(par[1]), dW = exp(par[2]))

}

dlm.mle3 <- dlmMLE(Y, parm = c(0, 0),

build = dlm.build3)

dlm.mod3 <- dlm.build3(dlm.mle3$par)

# ノイズの誤差分散の値

V(dlm.mod3)

W(dlm.mod3)

# フィルタリング

dlm.flt3 <- dlmFilter(Y, dlm.mod3)

head(dropFirst(dlm.flt3$m))

# 平滑化

dlm.smt3 <- dlmSmooth(Y, dlm.mod3)

head(dropFirst(dlm.smt3$s))

# 各モデルの対数尤度(の符号を反転させた値)

dlmLL(Y, dlm.mod1)

dlmLL(Y, dlm.mod2)

dlmLL(Y, dlm.mod3)

# 誤差標準偏差

dlm.flt1$sd <- sqrt(unlist(dlmSvd2var(dlm.flt1$U.C, dlm.flt1$D.C)))

dlm.flt3$sd <- sqrt(unlist(dlmSvd2var(dlm.flt3$U.C, dlm.flt3$D.C)))

dlm.smt3$sd <- sqrt(unlist(dlmSvd2var(dlm.smt3$U.S, dlm.smt3$D.S)))

# グラフ

df1 <- data.frame(Time = rep(seq_along(Y), 6),

Value = c(Y, dropFirst(dlm.flt1$m),

Y, dropFirst(dlm.flt3$m),

Y, dropFirst(dlm.smt3$s)),

Type = factor(c(rep("観測値", length(Y)),

rep("フィルタリングした値", length(Y)),

rep("観測値", length(Y)),

rep("フィルタリングした値", length(Y)),

rep("観測値", length(Y)),

rep("平滑化した値", length(Y))),

levels = c("観測値", "フィルタリングした値",

"平滑化した値")),

Facet = factor(c(rep("A", length(Y) * 2),

rep("B", length(Y) * 2),

rep("C", length(Y) * 2))))

df1.ci <- data.frame(Time = rep(seq_along(Y), 6),

Lower = c(dropFirst(qnorm(0.025, dlm.flt1$m, dlm.flt1$sd)),

dropFirst(qnorm(0.025, dlm.flt3$m, dlm.flt3$sd)),

dropFirst(qnorm(0.025, dlm.smt3$s, dlm.smt3$sd))),

Upper = c(dropFirst(qnorm(0.975, dlm.flt1$m, dlm.flt1$sd)),

dropFirst(qnorm(0.975, dlm.flt3$m, dlm.flt3$sd)),

dropFirst(qnorm(0.975, dlm.smt3$s, dlm.smt3$sd))),

Facet = factor(rep(c(rep("A", length(Y)),

rep("B", length(Y)),

rep("C", length(Y))), 2)))

p1 <- ggplot(df1) +

geom_line(aes(x = Time, y = Value, color = Type,

size = Type, alpha = Type)) +

scale_color_manual(values = c("black", "gray10", "gray60")) +

scale_size_manual(values = c(0.5, 1.5, 1.5)) +

scale_alpha_manual(values = c(1, 0.6, 0.6)) +

labs(x = "時間(四半期)", y = "販売個数(千個)") +

geom_ribbon(data = df1.ci,

aes(x = Time, ymin = Lower, ymax = Upper),

alpha = 0.25) +

theme_classic(base_family = font) +

facet_wrap(~ Facet, ncol = 3, scales = "free_x") +

theme(strip.background = element_blank(),

strip.text = element_text(size = 9, hjust = 0.1),

legend.title = element_blank(), legend.position = "bottom")

print(p1)

# 2階差分のモデル

dlm.build3.2 <- function(par) {

mod <- dlmModPoly(order = 2, dV = exp(par[1]),

dW = c(0, exp(par[2])))

}

dlm.mle3.2 <- dlmMLE(Y, parm = c(0, 0),

build = dlm.build3.2)

# 尤度

dlmLL(Y, dlm.build3.2(dlm.mle3.2$par))

## (2) dlmによる季節調整モデル

dlm.build4 <- function(par) {

mod <- dlmModPoly(order = 1) +

dlmModSeas(frequency = 4)

V(mod) <- exp(par[1])

diag(W(mod))[1:2] <- exp(par[2:3])

m0(mod) <- c(20, 0, 0, 0)

return(mod)

}

# 最尤推定

dlm.mle4 <- dlmMLE(Y, parm = rep(0, 3),

build = dlm.build4)

dlm.mod4 <- dlm.build4(dlm.mle4$par)

# 係数行列

FF(dlm.mod4)

GG(dlm.mod4)

# ノイズの誤差分散

V(dlm.mod4)

W(dlm.mod4)

# 対数尤度(の符号を反転させた値)

dlmLL(Y, dlm.mod4)

# 予測

dlm.fil4 <- dlmFilter(Y, dlm.mod4)

dlm.for4 <- dlmForecast(dlm.fil4, nAhead = 8)

# 予測値の最初の部分

head(dlm.for4$f, 3)

head(dlm.for4$a, 3)

head(unlist(dlm.for4$Q))

# 平滑化

dlm.smo4 <- dlmSmooth(dlm.fil4)

head(dropFirst(dlm.smo4$s), 3)

# 誤差の標準偏差

var.s <- dlmSvd2var(dlm.smo4$U.S, dlm.smo4$D.S)

sd.s <- sapply(1:length(var.s), function(i) sqrt(diag(var.s[[i]])[1:2]))

# グラフ描画

obs.exp <- dlm.for4$f

lev.exp <- dlm.for4$a[, 1]

ses.exp <- dlm.for4$a[, 2]

stdev <- sqrt(unlist(dlm.for4$Q))

df2 <- data.frame(Time = c(rep(seq_along(Y), 2),

length(Y) + 0:(nrow(obs.exp))),

Value = c(Y, dropFirst(dlm.smo4$s[, 1]), Y[length(Y)], obs.exp),

Type = factor(c(rep("観測値", length(Y)),

rep("水準成分", length(Y)),

rep("予測値", nrow(obs.exp) + 1)),

levels = c("観測値", "水準成分",

"予測値")))

df2.ci1 <- data.frame(Time = seq_along(Y),

Lower = dropFirst(qnorm(0.025, dlm.smo4$s[, 1], sd.s[1, ])),

Upper = dropFirst(qnorm(0.975, dlm.smo4$s[, 1], sd.s[1, ])))

df2.ci2 <- data.frame(Time = length(Y) + 1:nrow(obs.exp),

Lower = qnorm(0.025, obs.exp, stdev),

Upper = qnorm(0.975, obs.exp, stdev))

p2 <- ggplot(df2) +

geom_line(aes(x = Time, y = Value, color = Type,

size = Type, alpha = Type)) +

scale_color_manual(values = c("black", "gray10",

"gray40")) +

scale_size_manual(values = c(0.33, 1.0, 1.0)) +

scale_alpha_manual(values = c(1, 0.6, 0.6)) +

geom_ribbon(data = df2.ci1, aes(x = Time, ymin = Lower, ymax = Upper),

fill = "gray50", alpha = 0.5) +

geom_ribbon(data = df2.ci2, aes(x = Time, ymin = Lower, ymax = Upper),

fill = "gray50", alpha = 0.5) +

labs(title = "A", x = "時間(四半期)", y = "販売個数(千個)") +

theme_classic(base_family = font) +

theme(legend.title = element_blank(),

legend.justification = c(0, 1),

legend.position = c(0.1, 1))

df3 <- data.frame(Time = seq_along(Y),

Value = dropFirst(dlm.smo4$s[, 2]))

df3.ci <- data.frame(Time = seq_along(Y),

Lower = dropFirst(qnorm(0.025, dlm.smo4$s[, 2], sd.s[2, ])),

Upper = dropFirst(qnorm(0.975, dlm.smo4$s[, 2], sd.s[2, ])))

p3 <- ggplot(df3) +

geom_line(aes(x = Time, y = Value), color = "gray25", size = 1) +

geom_ribbon(data = df3.ci,

aes(x = Time, ymin = Lower, ymax = Upper),

fill = "gray50", alpha = 0.5) +

labs(title = "B", x = "時間(四半期)", y = "販売個数(千個)") +

theme_classic(base_family = font)

grid.arrange(p2, p3, ncol = 2)

# ベイズ推定

# 季節調整モデル

set.seed(1)

dlm.fit5 <- dlmGibbsDIG(Y, dlmModPoly(order = 1) +

dlmModSeas(frequency = 4),

ind = 1:2,

shape.y = 0.01, rate.y = 0.01,

shape.theta = 0.01,

rate.theta = 0.01,

n.sample = 1010, thin = 29)

# 事後平均

mcmcMean(dlm.fit5$dV[-(1:10)])

mcmcMean(dlm.fit5$dW[-(1:10), ])

# 軌跡

p5 <- ggplot(data.frame(Iter = rep(1:1010, 3),

Value = c(dlm.fit5$dV, dlm.fit5$dW[, 1], dlm.fit5$dW[, 2]),

Var = rep(c("dV", "dW1", "dW2"), each = 1010))) +

geom_line(aes(x = Iter, y = Value)) +

facet_wrap(~ Var, ncol = 1, scales = "free_y") +

theme_classic(base_family = font)

print(p5)