2012/08/21

R tips: first contact with linear pattern recognition in R.

This is just test code for linear pattern recognition. Below code is introduced in http://www.slideshare.net/sleipnir002/05-12739580.
> index <- sample(nrow(data), nrow(data)*0.2) #sample data
> index
 [1]  83  47 146  59 111  94 121  14  15 122  33  22  87  78  66 133  18  79  19
[20]   9  88  32 116  60   7 104  67  20  86   1
> data[index,]
    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species Species2
83           5.8         2.7          3.9         1.2 versicolor   vervir
47           5.1         3.8          1.6         0.2     setosa   setosa
146          6.7         3.0          5.2         2.3  virginica   vervir
59           6.6         2.9          4.6         1.3 versicolor   vervir
111          6.5         3.2          5.1         2.0  virginica   vervir
94           5.0         2.3          3.3         1.0 versicolor   vervir
121          6.9         3.2          5.7         2.3  virginica   vervir
14           4.3         3.0          1.1         0.1     setosa   setosa
15           5.8         4.0          1.2         0.2     setosa   setosa
122          5.6         2.8          4.9         2.0  virginica   vervir
33           5.2         4.1          1.5         0.1     setosa   setosa
22           5.1         3.7          1.5         0.4     setosa   setosa
87           6.7         3.1          4.7         1.5 versicolor   vervir
78           6.7         3.0          5.0         1.7 versicolor   vervir
66           6.7         3.1          4.4         1.4 versicolor   vervir
133          6.4         2.8          5.6         2.2  virginica   vervir
18           5.1         3.5          1.4         0.3     setosa   setosa
79           6.0         2.9          4.5         1.5 versicolor   vervir
19           5.7         3.8          1.7         0.3     setosa   setosa
9            4.4         2.9          1.4         0.2     setosa   setosa
88           6.3         2.3          4.4         1.3 versicolor   vervir
32           5.4         3.4          1.5         0.4     setosa   setosa
116          6.4         3.2          5.3         2.3  virginica   vervir
60           5.2         2.7          3.9         1.4 versicolor   vervir
7            4.6         3.4          1.4         0.3     setosa   setosa
104          6.3         2.9          5.6         1.8  virginica   vervir
67           5.6         3.0          4.5         1.5 versicolor   vervir
20           5.1         3.8          1.5         0.3     setosa   setosa
86           6.0         3.4          4.5         1.6 versicolor   vervir
1            5.1         3.5          1.4         0.2     setosa   setosa
> res <- glm(Species~.,iris[index,], family=binomial) #linear regression
 警告メッセージ: 
 glm.fit: 数値的に 0 か 1 である確率が生じました  
> res$coefficient
 (Intercept) Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
    6.587948    -7.617771   -13.925058    36.005598   -31.809827 
> logLik(res) #log-scale likelyhood
'log Lik.' -1.785923e-10 (df=5)
> predict(res,iris,type="response") # predictions from fitting functions
           1            2            3            4            5            6 
2.220446e-16 4.328295e-13 2.220446e-16 3.870520e-11 2.220446e-16 2.220446e-16 
           7            8            9           10           11           12 
2.220446e-16 2.220446e-16 7.856922e-11 9.478634e-11 2.220446e-16 4.737275e-12 
          13           14           15           16           17           18 
2.231699e-11 2.220446e-16 2.220446e-16 2.220446e-16 2.220446e-16 2.220446e-16 
          19           20           21           22           23           24 
2.220446e-16 2.220446e-16 1.795636e-12 2.220446e-16 2.220446e-16 2.220446e-16 
          25           26           27           28           29           30 
2.326153e-07 2.709491e-10 2.220446e-16 2.220446e-16 2.220446e-16 1.643915e-10 
          31           32           33           34           35           36 
3.088882e-10 2.220446e-16 2.220446e-16 2.220446e-16 3.937881e-12 2.220446e-16 
          37           38           39           40           41           42 
2.220446e-16 2.220446e-16 5.330791e-13 2.220446e-16 2.220446e-16 1.769196e-10 
          43           44           45           46           47           48 
2.220446e-16 2.220446e-16 1.556449e-13 2.220446e-16 2.220446e-16 2.626083e-13 
          49           50           51           52           53           54 
2.220446e-16 2.220446e-16 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
          55           56           57           58           59           60 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
          61           62           63           64           65           66 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 9.999926e-01 1.000000e+00 
          67           68           69           70           71           72 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
          73           74           75           76           77           78 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
          79           80           81           82           83           84 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
          85           86           87           88           89           90 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
          91           92           93           94           95           96 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
          97           98           99          100          101          102 
1.000000e+00 1.000000e+00 9.974026e-01 1.000000e+00 1.000000e+00 1.000000e+00 
         103          104          105          106          107          108 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
         109          110          111          112          113          114 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
         115          116          117          118          119          120 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
         121          122          123          124          125          126 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
         127          128          129          130          131          132 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
         133          134          135          136          137          138 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
         139          140          141          142          143          144 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
         145          146          147          148          149          150 
1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 
> levels(data$Species2)
[1] "setosa" "vervir"
> yhat <- levels(data$Species2)[(predict(res,iris,type="response")>0.5)+1]
> yhat
  [1] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
  [9] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
 [17] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
 [25] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
 [33] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
 [41] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
 [49] "setosa" "setosa" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
 [57] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
 [65] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
 [73] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
 [81] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
 [89] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
 [97] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[105] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[113] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[121] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[129] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[137] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
[145] "vervir" "vervir" "vervir" "vervir" "vervir" "vervir"
> mean(yhat[index] != data$Species2[index]) #training error
[1] 0
> mean(yhat[-index] != data$Species2[-index]) #estimation error
[1] 0
> table(true=data$Species2,prediction=yhat) #show result
        prediction
true     setosa vervir
  setosa     50      0
  vervir      0    100

No comments:

Post a Comment

100