1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
#!/usr/bin/ruby
require("gsl")
def nmf(v, col, thresh)
r, c = v.shape
# w * h = v
w = GSL::Matrix.alloc(r, col)
h = GSL::Matrix.alloc(col, c)
initrand(w, w.max)
initrand(h, h.max)
dist = thresh
iter = 0
while(dist >= thresh && iter < 1000)
# multiplicative update rule
dist, w, h = update(v, w, h)
puts "#{iter}: #{dist}" #if (iter%10 == 0)
iter += 1
end
puts "Ended in #{iter} iteration(s) with dist=#{dist}"
return w, h
end
def initrand(m, max)
r, c = m.shape
0.upto(r-1) { |i|
0.upto(c-1) { |j|
m[i,j] = rand(max)
}
}
end
def update(v, w, h)
# choose an update rule
# v, w, h are GSL::Matrix objects
dist, w, h = update_eu(v, w, h)
return dist, w, h
end
def update_eu(v, w, h)
# multiplicative update rule
# for minimizing euclidean distance
# v, w, h are GSL::Matrix objects
dist = dist_eu(v, w*h)
wt = w.transpose
ht = h.transpose
h = h.mul_elements(wt * v).div_elements(wt * w * h)
w = w.mul_elements(v * ht).div_elements(w * h * ht)
return dist, w, h
end
def update_kl(v, w, h)
# multiplicative update rule
# for minimizing divergence
# v, w, h are GSL::Matrix objects
dist = dist_kl(v, w*h)
wt = w.transpose
ht = h.transpose
num = v.div_elements(w * h)
rh, ch = h.shape
rw, cw = w.shape
0.upto(rh-1) { |i|
0.upto(ch-1) { |j|
h[i, j] *= w.col(i).mul(num.col(j)).sum / w.col(i).sum
}
}
0.upto(rw-1) { |i|
0.upto(cw-1) { |j|
w[i, j] *= h.row(j).mul(num.row(i)).sum / h.row(j).sum
}
}
return dist, w, h
end
# Euclidean distance
def dist_eu(a, b)
# a,b are GSL::Matrix objects
dist = 0
(a-b).collect { |d| dist += d * d}
dist
end
# Kullback-Leibler divergence
def dist_kl(a, b)
# a, b are GSL::Matrix objects
dist = 0
r, c = a.shape
0.upto(r-1) { |i|
0.upto(c-1) { |j|
if (b[i,j] == 0)
puts "error"
dist = 2**32
break
end
dist += a[i,j] * Math::log(a[i,j]/b[i,j]) - a[i,j] + b[i,j]
}
}
dist
end
|