编辑代码

# for each pass through the corpus
for (i in 1:chain_length)
{
# for each document
for (d in 1:M)
{
# for each word
for (w in 1:length(docs[[d]]))
{
t0 = ta[[d]][w]
wid = docs[[d]][w]
dt[d, t0] = dt[d, t0] - 1
wt[t0, wid] = wt[t0, wid] - 1
post1 = (wt[, wid] + beta) / (rowSums(wt) + V * beta)
post2 = (dt[d, ] + alpha) / (sum(dt[d, ]) + K * alpha)
# sample with probability proportional to posterior terms
t1 = sample(1:K, 1, prob=post1*post2)
# update topic assignment list with newly sampled topic for token w.
# and re-increment word-topic and document-topic count matrices with
# the new sampled topic for token w.
ta[[d]][w] = t1
dt[d, t1] = dt[d, t1] + 1
wt[t1, wid] = wt[t1, wid] + 1
}
}
}
# return renoramlized latent probabilities
phi = (wt + beta ) / (rowSums(wt) + V * beta)
theta = (dt + alpha) / (rowSums(dt) + K * alpha)
return(
list(
wt=wt,
dt=dt,
phi=phi,
theta=theta
)
)
}
lda_res = lda_gibbs(docs, vocab, K=2, alpha=1, beta=.001, chain_length=1000)