> index <- sample(nrow(data), nrow(data)*0.2) #sample data
> index
[1] 83 47 146 59 111 94 121 14 15 122 33 22 87 78 66 133 18 79 19
[20] 9 88 32 116 60 7 104 67 20 86 1
> data[index,]
Sepal.Length Sepal.Width Petal.Length Petal.Width Species Species2
83 5.8 2.7 3.9 1.2 versicolor vervir
47 5.1 3.8 1.6 0.2 setosa setosa
146 6.7 3.0 5.2 2.3 virginica vervir
59 6.6 2.9 4.6 1.3 versicolor vervir
111 6.5 3.2 5.1 2.0 virginica vervir
94 5.0 2.3 3.3 1.0 versicolor vervir
121 6.9 3.2 5.7 2.3 virginica vervir
14 4.3 3.0 1.1 0.1 setosa setosa
15 5.8 4.0 1.2 0.2 setosa setosa
122 5.6 2.8 4.9 2.0 virginica vervir
33 5.2 4.1 1.5 0.1 setosa setosa
22 5.1 3.7 1.5 0.4 setosa setosa
87 6.7 3.1 4.7 1.5 versicolor vervir
78 6.7 3.0 5.0 1.7 versicolor vervir
66 6.7 3.1 4.4 1.4 versicolor vervir
133 6.4 2.8 5.6 2.2 virginica vervir
18 5.1 3.5 1.4 0.3 setosa setosa
79 6.0 2.9 4.5 1.5 versicolor vervir
19 5.7 3.8 1.7 0.3 setosa setosa
9 4.4 2.9 1.4 0.2 setosa setosa
88 6.3 2.3 4.4 1.3 versicolor vervir
32 5.4 3.4 1.5 0.4 setosa setosa
116 6.4 3.2 5.3 2.3 virginica vervir
60 5.2 2.7 3.9 1.4 versicolor vervir
7 4.6 3.4 1.4 0.3 setosa setosa
104 6.3 2.9 5.6 1.8 virginica vervir
67 5.6 3.0 4.5 1.5 versicolor vervir
20 5.1 3.8 1.5 0.3 setosa setosa
86 6.0 3.4 4.5 1.6 versicolor vervir
1 5.1 3.5 1.4 0.2 setosa setosa
> res <- glm(Species~.,iris[index,], family=binomial) #linear regression
警告メッセージ:
glm.fit: 数値的に 0 か 1 である確率が生じました
> res$coefficient
(Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
6.587948 -7.617771 -13.925058 36.005598 -31.809827
> logLik(res) #log-scale likelyhood
'log Lik.' -1.785923e-10 (df=5)
> predict(res,iris,type="response") # predictions from fitting functions
1 2 3 4 5 6
2.220446e-16 4.328295e-13 2.220446e-16 3.870520e-11 2.220446e-16 2.220446e-16
7 8 9 10 11 12
2.220446e-16 2.220446e-16 7.856922e-11 9.478634e-11 2.220446e-16 4.737275e-12
13 14 15 16 17 18
2.231699e-11 2.220446e-16 2.220446e-16 2.220446e-16 2.220446e-16 2.220446e-16
19 20 21 22 23 24
2.220446e-16 2.220446e-16 1.795636e-12 2.220446e-16 2.220446e-16 2.220446e-16
25 26 27 28 29 30
2.326153e-07 2.709491e-10 2.220446e-16 2.220446e-16 2.220446e-16 1.643915e-10
31 32 33 34 35 36
3.088882e-10 2.220446e-16 2.220446e-16 2.220446e-16 3.937881e-12 2.220446e-16
37 38 39 40 41 42
2.220446e-16 2.220446e-16 5.330791e-13 2.220446e-16 2.220446e-16 1.769196e-10
43 44 45 46 47 48
2.220446e-16 2.220446e-16 1.556449e-13 2.220446e-16 2.220446e-16 2.626083e-13
49 50 51 52 53 54
2.220446e-16 2.220446e-16 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
55 56 57 58 59 60
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
61 62 63 64 65 66
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 9.999926e-01 1.000000e+00
67 68 69 70 71 72
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
73 74 75 76 77 78
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
79 80 81 82 83 84
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
85 86 87 88 89 90
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
91 92 93 94 95 96
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
97 98 99 100 101 102
1.000000e+00 1.000000e+00 9.974026e-01 1.000000e+00 1.000000e+00 1.000000e+00
103 104 105 106 107 108
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
109 110 111 112 113 114
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
115 116 117 118 119 120
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
121 122 123 124 125 126
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
127 128 129 130 131 132
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
133 134 135 136 137 138
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
139 140 141 142 143 144
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
145 146 147 148 149 150
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
> levels(data$Species2)
[1] "setosa" "vervir"
> yhat <- levels(data$Species2)[(predict(res,iris,type="response")>0.5)+1]
> yhat
[1] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
[9] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
[17] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
[25] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
[33] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
[41] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
[49] "setosa" "setosa" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[57] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[65] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[73] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[81] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[89] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[97] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[105] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[113] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[121] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[129] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[137] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[145] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
> mean(yhat[index] != data$Species2[index]) #training error
[1] 0
> mean(yhat[-index] != data$Species2[-index]) #estimation error
[1] 0
> table(true=data$Species2,prediction=yhat) #show result
prediction
true setosa vervir
setosa 50 0
vervir 0 100
Showing posts with label machine learning. Show all posts
Showing posts with label machine learning. Show all posts
2012/08/21
R tips: first contact with linear pattern recognition in R.
ラベル:
machine learning,
R,
tips
This is just test code for linear pattern recognition. Below code is introduced in
http://www.slideshare.net/sleipnir002/05-12739580.
2011/08/16
python tips: draw beta distribution with matplotlib
ラベル:
machine learning,
python
import numpy as np
import numpy as np
import pylab as pl
import scipy.special as ss
def beta(a, b, mew):
e1 = ss.gamma(a + b)
e2 = ss.gamma(a)
e3 = ss.gamma(b)
e4 = mew ** (a - 1)
e5 = (1 - mew) ** (b - 1)
return (e1/(e2*e3)) * e4 * e5
def plot_beta(a, b):
Ly = []
Lx = []
mews = np.mgrid[0:1:100j]
for mew in mews:
Lx.append(mew)
Ly.append(beta(a, b, mew))
pl.plot(Lx, Ly, label="a=%f, b=%f" %(a,b))
def main():
plot_beta(0.1, 0.1)
plot_beta(1, 1)
plot_beta(2, 3)
plot_beta(8, 4)
pl.xlim(0.0, 1.0)
pl.ylim(0.0, 3.0)
pl.legend()
pl.show()
if __name__ == "__main__":
main()
2011/08/11
python tips: draw poisson pdf graph with matplotlib
ラベル:
machine learning,
python
this is a code for drawing a graph of poisson pdf with matplotlib.
#!/usr/bin/env python
#coding: utf-8
import pylab as pl
import numpy as np
def get_factorial(n):
if n == 0:
return 1
else:
return n * get_factorial(n-1)
def get_probability(p, n, r):
mew = float(n) * p
denom = (mew ** r) * np.exp(-1 * mew)
molec = get_factorial(r)
return denom/float(molec)
def plot_poisson_distrib(p, n, lt, lab):
xl = [nr for nr in range(0, 20, 1)]
yl = [get_probability(p, n, nr) for nr in range(0, 20, 1)]
pl.plot(xl, yl, lt, label=lab)
def main():
p = 0.02
mew = 1.0
plot_poisson_distrib(p, (mew / p), 'b-', 'P(%f,%f)' %(mew,p))
mew = 2.5
plot_poisson_distrib(p, (mew / p), 'r-', 'P(%f,%f)' %(mew,p))
mew = 5.0
plot_poisson_distrib(p, (mew / p), 'g-', 'P(%f,%f)' %(mew,p))
pl.ylabel('p(x)')
pl.xlabel('x')
pl.legend()
pl.show()
if __name__ == "__main__":
main()
and output of above code is:
python tips: draw binary pdf graph with matplotlib
ラベル:
machine learning,
python
this is a code for drawing a graph of binary probability distribution with matplotlib.
#!/usr/bin/env python
#coding: utf-8
import pylab as pl
def combination(n, r):
if (r == 0) or (n == r):
return 1.0
denom = reduce(lambda x, y: x*y, [i for i in range(1, n+1)])
mol_l = reduce(lambda x, y: x*y, [i for i in range(1, n-r+1)])
mol_r = reduce(lambda x, y: x*y, [i for i in range(1, r+1)])
return float(denom / (mol_l * mol_r))
def get_probability(p, n, r):
q = 1 - p
return combination(n, r) * (p ** r) * (q ** (n-r))
def plot_bin_distrib(p, n, lt, lab):
xl = [nr for nr in range(0, n+1, 1)]
yl = [get_probability(p, n, nr) for nr in range(0, n+1, 1)]
pl.plot(xl, yl, lt, label=lab)
def main():
p = 1.0/3.0
n = 10
plot_bin_distrib(p, n, 'b-', 'Bin(%s,%f)' %(n,p))
n = 20
plot_bin_distrib(p, n, 'r-', 'Bin(%s,%f)' %(n,p))
n = 30
plot_bin_distrib(p, n, 'g-', 'Bin(%s,%f)' %(n,p))
n = 40
plot_bin_distrib(p, n, 'y-', 'Bin(%s,%f)' %(n,p))
pl.ylabel('p(x)')
pl.xlabel('x')
pl.legend()
pl.show()
if __name__ == "__main__":
main()
and here is output from above code.
2011/08/03
machine learning: curve-fitting
ラベル:
machine learning,
python
#coding: utf-8
import numpy as np
from pylab import *
import sys
M = 9
def y(x, wlist):
ret = wlist[0]
for i in range(1, M+1):
ret += wlist[i] * (x ** i)
return ret
def estimate(xlist, tlist, lam):
A = []
for i in range(M+1):
for j in range(M+1):
temp = (xlist**(i+j)).sum()
if i == j:
temp += lam
A.append(temp)
A = array(A).reshape(M+1, M+1)
T = []
for i in range(M+1):
T.append(((xlist**i) * tlist).sum())
T = array(T)
wlist = np.linalg.solve(A, T)
return wlist
def rms(xlist, tlist, wlist):
E_w = 0
N = len(xlist)
for n in range(0, N):
sum = 0
for j in range(0, M+1):
sum += wlist[j] * (xlist[n] ** j)
E_w += ((sum - tlist[n]) ** 2)/2
return str(np.sqrt(2 * E_w / N))
def example1():
# number of training data
N = 10
# generate training data
xlist = np.linspace(0, 1, N) # extract N-points
tlist = np.sin(2 * np.pi * xlist) + np.random.normal(0, 0.2, xlist.size)
# estimate parametaer w
wlist = estimate(xlist, tlist, np.exp(-18.0))
#print wlist
print "E_RMS for training data: %s" % rms(xlist, tlist, wlist)
N = 100
xlist2 = np.linspace(0, 1, N) # extract N-points
tlist2 = np.sin(2 * np.pi * xlist2) + np.random.normal(0, 0.2, xlist2.size)
print "E_RMS for test data: %s" % rms(xlist2, tlist2, wlist)
# generate original data
xs = np.linspace(0, 1, 1000)
# ideal and model
ideal = np.sin(2 * np.pi * xs)
model = [y(x, wlist) for x in xs]
# plot training data and original data
plot(xlist, tlist, 'bo')
plot(xlist2, tlist2, 'rd')
plot(xs, ideal, 'g-')
plot(xs, model, 'r-')
xlim(0.0, 1.0)
ylim(-1.5, 1.5)
show()
if __name__ == "__main__":
example1()
2011/05/01
Road to PRMN (2011.05.01)
ラベル:
machine learning
I started to study linear algebra and statistics for understanding PRMN(Pattern and Recognition and Machine Learning) by C.M.Bishop.
My plan to get through with PRMN is like this:
1. study fundamentals of linear algebra with these books:
[ゼロから学ぶ線形代数]
[プログラミングのための線形代数]
2. study fundamentals of calculus with this book:
[ゼロから学ぶ微分積分]
3. study fundamentals of probability statistics with this book:
[プログラミングのための確率統計]
4. PRMN?
| PRMN japanese edition I |
My plan to get through with PRMN is like this:
1. study fundamentals of linear algebra with these books:
[ゼロから学ぶ線形代数]
[プログラミングのための線形代数]
2. study fundamentals of calculus with this book:
[ゼロから学ぶ微分積分]
3. study fundamentals of probability statistics with this book:
[プログラミングのための確率統計]
4. PRMN?
Subscribe to:
Posts (Atom)


