LOGICAL_0 = 1e-10
THRESHOLD = 0.85
class Array
def *(array)
res = nil
if self[0].size == array.size && !array.empty?
dim = array[0].size
res = Array.new(self.size){Array.new(dim)}
self.each_with_index do |line,i|
dim.times do |j|
value = 0
line.each_with_index do |a,k|
value += a * array[k][j]
end
res[i][j] = value
end
end
end
res
end
def trans
res = Array.new(self[0].size){Array.new(self.size)}
self.each_with_index do |line,i|
line.each_with_index do |num,j|
res[j][i] = num
end
end
res
end
def sort_zip(array)
a1 = self.clone
a2 = array.clone
for i in 1...a1.size
key = a1[i]
ano = a2[i]
j = i - 1
while j >= 0 && yield(key,a1[j])
a1[j + 1] = a1[j]
a2[j + 1] = a2[j]
j -= 1
end
a1[j + 1] = key
a2[j + 1] = ano
end
{:self=>a1,:other=>a2}
end
def formal
res = ""
self.each do |line|
res += "|"
line.each do |num|
res += num.round(3).to_s + "\t"
end
res += "|\n"
end
res
end
def find_max
max_num = 0
pos = nil
self.each_with_index do |line,i|
line.each_with_index do |num,j|
if i != j
abs = num.abs
if abs > max_num
pos = [i,j]
max_num = abs
end
end
end
end
{value:max_num,i:pos[0],j:pos[1]}
end
end
def standarlize(address)
def get_ori_data(address)
res = Array.new
File.open(address) do |file|
file.each_line do |line|
res.push(line.chomp.split("\t").map {|i| i.to_f})
end
end
res
end
def standarlize_a_line(array)
sum,sum2 = 0.0,0.0
array.each do |num|
sum += num
sum2 += num * num
end
mean = sum / array.size
s = Math.sqrt((sum2 - sum * mean) / (array.size - 1))
if s > 1e-10
array.map do |num|
((num - mean) / s)
end
else
array.map do |num|
0
end
end
end
(get_ori_data(address).trans).map{|line| standarlize_a_line(line)}
end
def r_array(data)
dim = data.size
free_degree = data[0].size - 1
res = Array.new(dim){Array.new(dim)}
dim.times do |i|
dim.times do |j|
r = 1
if i > j
r = res[j][i]
elsif i < j
r = 0
data[i].zip(data[j]) do |a,b|
r += a * b
end
r /= free_degree
end
res[i][j] = r
end
end
res
end
def jacobi(r)
def get_i(n)
array = Array.new(n){Array.new(n){0}}
n.times {|k| array[k][k] = 1}
array
end
dim = r.size
t = get_i(dim)
loop do
res = r.find_max
if res[:value] > LOGICAL_0
i,j = res[:i],res[:j]
delta = r[i][i] - r[j][j]
theta = delta.abs < LOGICAL_0 ? -Math::PI/4 : Math.atan(2*r[i][j]/delta)/2
new_t = get_i(dim)
cos = Math.cos(theta)
sin = Math.sin(theta)
new_t[i][i],new_t[j][j] = cos,cos
new_t[i][j],new_t[j][i] = -sin,sin
r = new_t.trans*r*new_t
r[i][j] = 0
t *= new_t
else
values = Array.new(dim)
dim.times do |i|
values[i] = r[i][i]
end
return values.sort_zip(t.trans){|a,b| a > b}
end
end
end
def contribution(values)
n = values.size
res = Array.new(n)
sum = 0
values.each {|v| sum += v}
n.times {|i| res[i] = values[i] / sum}
res
end
def sum_contribution(con)
thres_index = 0
res = Array.new(con.size){0}
con.each_with_index do |rate,index|
res[index] = res[index - 1] + rate
if thres_index == 0 && res[index] > THRESHOLD
thres_index = index
end
end
{:threshold=>thres_index,sum_con:res}
end
def load_array(vectors,values)
dim = [vectors.size,vectors[0].size]
res = Array.new(dim[1]){Array.new(dim[0])}
dim[1].times do |i|
dim[0].times do |j|
res[i][j] = Math.sqrt(values[j]) * vectors[j][i]
end
end
res
end
def scores(vectors,data)
(vectors * data).trans
end
def analysis(scores_array,num_of_component=1,amount=4,highest=true)
nc = num_of_component - 1
dim = scores_array.size
order = Array.new(dim)
dim.times {|i| order[i] = i + 1}
order = (scores_array.sort_zip(order){|a,b| a[nc] > b[nc]})[:other]
res = Array.new(amount)
if highest
amount.times do |i|
res[i] = order[i]
end
else
amount.times do |i|
res[i] = order[-i-1]
end
end
res
end
def main
data = standarlize("input.txt")
cca = r_array(data)
puts "相关系数矩阵"
puts cca.formal
jac = jacobi(cca)
con = contribution(jac[:self])
sc = sum_contribution(con)
puts "特征值及其百分率"
puts "主成分\t特征值\t百分数\t累计百分数"
jac[:self].each_with_index do |v,i|
puts "#{i+1}\t#{v.round(3)}\t#{con[i].round(3)}\t#{sc[:sum_con][i].round(4)}"
end
vectors = jac[:other][0..sc[:threshold]]
puts "主成分载荷矩阵"
puts load_array(vectors,jac[:self]).formal
s = scores(jac[:other],data)
puts "第一主成分得分最高的流域"
res = analysis(s)
res.each do |i|
print i.to_s + "\t"
end
print "\n"
res.each do |i|
print s[i - 1][0].round(3).to_s + "\t"
end
print "\n"
puts "第一主成分得分最低的流域"
res = analysis(s,1,4,false)
res.each do |i|
print i.to_s + "\t"
end
print "\n"
res.each do |i|
print s[i - 1][0].round(3).to_s + "\t"
end
end
main