Non-negative Matrix Factorization

Posted by marco
Thu, 03 Jul 2008 00:13:00 GMT

Here it is a small ruby script that calculate the non-negative matrix factorization of a given matrix. I tried to mantain the notation consistent with the one used in the paper of Lee and Seung

The script requires the GSL library.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112

#!/usr/bin/ruby


require("gsl")



def nmf(v, col, thresh)
   r, c = v.shape

   # w * h = v
   w = GSL::Matrix.alloc(r, col)
   h = GSL::Matrix.alloc(col, c)
   initrand(w, w.max)
   initrand(h, h.max)
   dist = thresh
   iter = 0
   while(dist >= thresh && iter < 1000)
      # multiplicative update rule
      dist, w, h = update(v, w, h)
      puts "#{iter}: #{dist}" #if (iter%10 == 0)
      iter += 1
   end   
   puts "Ended in #{iter} iteration(s) with dist=#{dist}"
   return w, h
end


def initrand(m, max)
   r, c = m.shape
   0.upto(r-1) { |i|
       0.upto(c-1) { |j|
          m[i,j] = rand(max)
       }
   }
end

def update(v, w, h)
   # choose an update rule
   # v, w, h are GSL::Matrix objects
   dist, w, h = update_eu(v, w, h)
   return dist, w, h
end

def update_eu(v, w, h)
   # multiplicative update rule
   # for minimizing euclidean distance
   # v, w, h are GSL::Matrix objects
   dist = dist_eu(v, w*h)
   wt = w.transpose
   ht =  h.transpose
   h = h.mul_elements(wt * v).div_elements(wt * w * h)
   w = w.mul_elements(v * ht).div_elements(w * h * ht)
   return dist, w, h
end


def update_kl(v, w, h)
   # multiplicative update rule
   # for minimizing divergence
   # v, w, h are GSL::Matrix objects
   dist = dist_kl(v, w*h)
   wt = w.transpose
   ht =  h.transpose
   num = v.div_elements(w * h)
   rh, ch = h.shape
   rw, cw = w.shape
   0.upto(rh-1) { |i|
       0.upto(ch-1) { |j|
          h[i, j] *= w.col(i).mul(num.col(j)).sum / w.col(i).sum
       }
   }
   0.upto(rw-1) { |i|
       0.upto(cw-1) { |j|
          w[i, j] *= h.row(j).mul(num.row(i)).sum / h.row(j).sum
       }
   }
   return dist, w, h
end



# Euclidean distance
def dist_eu(a, b)
   # a,b are GSL::Matrix objects
   dist = 0
   (a-b).collect { |d| dist += d * d}
   dist
end

# Kullback-Leibler divergence
def dist_kl(a, b)
   # a, b are GSL::Matrix objects
   dist = 0
   r, c = a.shape
   0.upto(r-1) { |i|
       0.upto(c-1) { |j|
          if (b[i,j] == 0) 
             puts "error"
             dist = 2**32
             break
          end
          dist += a[i,j] * Math::log(a[i,j]/b[i,j]) - a[i,j] + b[i,j]
       }
   }
   dist
end




Tanimoto Coefficient

Posted by marco
Wed, 02 Jul 2008 00:10:00 GMT

A small Ruby snippet to calculate the Tanimoto coefficient (aka extended Jaccard coefficient) of two real vectors.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

#!/usr/bin/ruby

class Array 
   def sum
      inject( 0 ) { |sum,x| sum+x }
   end
   def sum_square
      inject( 0 ) { |sum,x| sum+x*x }
   end
   def *(other) # dot_product
      ret = []
      return nil if !other.is_a? Array || size != other.size
      self.each_with_index {|x, i| ret << x * other[i]}
      ret.sum
   end
end

def tanimoto(a, b)
   dot = (a * b)
   den = a.sum_square + b.sum_square - dot
   dot.to_f/den.to_f
end

# puts tanimoto([1,2,2],[3,3,1])



Pearson Correlation Coefficient

Posted by marco
Tue, 01 Jul 2008 00:13:00 GMT

A small Ruby snippet to calculate the Pearson produt-moment correlation coefficient:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

#!/usr/bin/ruby

class Array 
   def sum
      inject( 0 ) { |sum,x| sum+x }
   end
   def sum_square
      inject( 0 ) { |sum,x| sum+x*x }
   end
   def mean
      sum / size
   end
   def *(other) # dot product
      ret = []
      return nil if !other.is_a? Array || size != other.size
      self.each_with_index {|x, i| ret << x * other[i]}
      ret.sum
   end
end


x = Array.new(100) {|i| i }
y = Array.new(100) {|i| -i*2 }


sumx = x.sum
sumy  = y.sum
num = (x.size * (x*y)) - (sumx * sumy)
den = Math.sqrt((x.size * x.sum_square) - sumx * sumx) * Math.sqrt((y.size * y.sum_square) - sumy * sumy)
r = num /den

puts "X= [#{x.join(', ')}]"
puts "Y= [#{y.join(', ')}]"
puts "r= #{r}"


Ruby Fiber Ring Benchmark

Posted by marco
Thu, 22 May 2008 17:51:00 GMT

I stole an exercise from the Erlang world: the ring benchmark

I used the exercise as a way to get acquainted with a new class in Ruby, Fiber (starting from the version 1.9). Fibers are a way to implement asymmetric coroutines in Ruby,

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/ruby1.9

require 'fiber'
require 'benchmark'

class Ring
   attr_reader :id
   attr_accessor :attach 

   def initialize(id)
      @id = id
      @fiber = Fiber.new do
         pass_message
      end
   end

   def |(other)
      other.attach = self if !other.nil?
      other
   end

   def resume
      @fiber.resume
    end

   def pass_message
      while message = message_in
         message_out(message)      
      end
   end

   def message_in
      @attach.resume if !@attach.nil?
   end

   def message_out(message)
      Fiber.yield(message)
   end

end

class RingStart < Ring
   attr_accessor :message
   def initialize(n, m, message)
      @m = m
      @message = message
      super(n)
   end
   
   def pass_message 
      loop { message_out(@message) }
   end

end


def create_chain_r(i, chain)
   # recursive version
   return chain if i<=0
   r = chain.nil? ? Ring.new(i) :  chain | Ring.new(i)
   create_chain(i-1, r)
end

def create_chain(n, chain)
   # loop version
   # needed to avoid stack overflow for high n
   n.downto(0) {
      chain = chain | Ring.new(n)
   }
   chain
end



n=ARGV[0].to_i
m=ARGV[1].to_i
mess=:hello
tm  = Benchmark.measure {
   ringu = RingStart.new(0, m, mess)
   chain = create_chain(n, ringu)
   m.times { ringu.message = chain.resume }
}.format("%10.6r\n").gsub!(/\(|\)/, "")

puts "#{n}, #{m}, #{tm}"
The comparison with the equivalent Erlang program is of course unfair, for one of the Erlang specialty is the spawning of cooperative processes. Anyway, here are the results:    
NMExecution Time
Ruby [ms]Erlang [ms]
10 10 1 0
10 20 2 0
10 50 4 10
10 100 10 0
10 200 18 0
10 500 42 0
10 1000 82 10
100 10 15 0
100 20 23 10
100 50 60 0
100 100 99 10
100 200 186 20
100 500 433 50
100 1000 805 90
1000 10 320 10
1000 20 423 20
1000 50 809 60
1000 100 1479 130
1000 200 2803 250
1000 500 6914 600
1000 1000 15410 1190
10000 10 16168 190
10000 20 17491 320
10000 50 21348 710
10000 100 28314 1480
10000 200 41547 2860
10000 500 81166 7200
10000 1000 147570 14190

Postgresql Graph

Posted by marco
Thu, 13 Mar 2008 12:19:00 GMT

I wrote a small tool, postgresqlgraph, to monitor and plot the statistics of a postgresql database, using rrdtool.

The tool is able to monitor every database created inside a postgresql installation.

The parameters plotted for each database in the graph are:

  • numbackends: number of backends:
  • xact_commit: transactions committed;
  • xact_rollback: transactions rolled back;
  • blks_read: blocks read from the disk;:
  • blks_hit: blocks hit in the cache;
  • tup_returned: tuples returned (only in postgresql 8.3)
  • tup_fetched: tuples fetched (only in postgresql 8.3)
  • tup_inserted: tuples inserted (only in postgresql 8.3)
  • tup_updated: tuples updated (only in postgresql 8.3)
  • tup_deleted: deleted (only in postgresql 8.3)

For each parameter the graph shows the maximum and average values, and plots the derivative (rate change).

The software is published under GPLv3 .

A demo is available here.

Please feel free to submit me bugs, critics, improvements, or tell me if you find the tool of any worthwhileness.

A Ruby Solution to Lars' Problem

Posted by marco
Wed, 06 Sep 2006 20:19:00 GMT

I’m trying to get acquainted with Ruby as much as I’m am with Perl, so in the quest of problems apt to be solved in Ruby I found this one. The original problem is stated here.

Here it is my solution:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#!/usr/bin/ruby

top = ARGV[0].to_i

word = Hash.new(0)
count = Array.new()
while line = $stdin.gets
   line.downcase.scan(/\w+/) { |w| word[w] +=1 }
end
word.each do |key, value|
   count << [value, key]
end
count.sort_by { |a| [-a[0], a[1]] }[0...top].each do |key|
   puts "#{key[0]} #{key[1]}" 
end

Note that using

sort_by
is twice as fast as the more usual fashion
count.sort { |a,b| a[0] == b[0] ? a[1] <=> b[1] : b[0] <=> a[0] }
yet is slower than the python version…

Solving Google Code Jam "countPaths" in ruby

Posted by marco
Sun, 20 Aug 2006 19:56:00 GMT
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/ruby -w

class WordPath
    def countPaths(grid, find)
        @grid = grid
        match = 0
        @row = @grid.length
        @col = @grid[0].length
        @interval = {}
        @matches = {}
        (0...@row).each do |x|
             (0...@col).each do |y|
                 match += NeighSearch(x, y, find)
             end
        end
        match
    end
    def Around(x,y)
        return @interval[[x, y]] if  @interval[[x, y]]
        ar = Array.new
         (-1..1).each do |ox|
             (-1..1).each do |oy|
                 next if((ox  0) && (oy  0))
                 ar.push([x+ox, y+oy]) if ( (x+ox>=0) &&
                                                           (y+oy>=0) &&
                                                           (x+ox<=@row-1) &&
                                                           (y+oy<=@col-1))
            end
        end
        @interval[[x, y]] = ar
        ar
    end
    def NeighSearch(x, y, find)
        return 1 if((@gridx  find[0]) && (find.length  1))
        return 0 if (@gridx != find0)
        count = 0
        Around(x,y).each do |x, y|
            if ! @matches[[x, y, find]]
                @matches[[x, y, find]] = NeighSearch(x, y, find[1find.length])
            end
            count += @matches[[x, y, find]]
        end
        count
    end
end
wp = WordPath.new
puts wp.countPaths([ABC”, FED”, GHI”], ABCDEFGHI”)
puts wp.countPaths([ABC”, FED”, GAI”], ABCDEA”)
puts wp.countPaths([ABC”, DEF”, GHI”], ABCD”)
puts wp.countPaths([AA”, AA”], AAAA”)
puts wp.countPaths([ABABA”, BABAB”, ABABA”, BABAB”, ABABA”], ABABABBA”)
puts wp.countPaths([AAAAA”, AAAAA”, AAAAA”, AAAAA”, AAAAA”], AAAAAAAAAAA”)
puts wp.countPaths([AB”, CD”], AA”)