require(SAENET)
aburl='http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data'
names=c('sex','length','diameter','height','whole.weight','shucked.weight',
'viscera.weight','shell.weight','rings')
data=read.table(aburl,header=F,sep=',',col.names = names)
data$sex<-NULL
data$height[data$height==0]=NA
data<-na.omit(data)
data1<-as.matrix(data)
set.seed(2016)
n=nrow(data)
train<-sample(1:n,10,FALSE)
#3个隐藏层,c(5,4,2)
fit<-SAENET.train(X.train = data1[train,],
n.nodes = c(5,4,2),
unit.type = "logistic",
lambda = 1e-5,
beta = 1e-5,
rho=0.07,
epsilon = 0.1,
max.iterations = 100,
optim.method = c("BFGS"),
rel.tol = 0.01,
rescale.flag = TRUE,
rescaling.offset = 0.001)autoencoding...Optimizer counts:function gradient 13 11 Optimizer: successful convergence.Optimizer: convergence = 0, message = J.init = 0.8221106, J.final = 0.01901952, mean(rho.hat.final) = 0.5837676autoencoding...Optimizer counts:function gradient 5 3 Optimizer: successful convergence.Optimizer: convergence = 0, message = J.init = 0.006495883, J.final = 0.0001957245, mean(rho.hat.final) = 0.5104738autoencoding...Optimizer counts:function gradient 5 3 Optimizer: successful convergence.Optimizer: convergence = 0, message = J.init = 0.004135759, J.final = 9.179506e-06, mean(rho.hat.final) = 0.4861495
> fit[[3]]$X.output
[,1] [,2]
753 0.4837342 0.4885643
597 0.4837314 0.4885684
3514 0.4837309 0.4885684
558 0.4837333 0.4885653
1993 0.4837282 0.4885726
506 0.4837351 0.4885621
2572 0.4837315 0.4885684
3713 0.4837321 0.4885674
11 0.4837346 0.4885632
223 0.4837310 0.4885684 753被标签为观测值1,597被标签为观测值2,...,223被标签为观测值10require(RcppDL)
require(ltm)
data("Mobility")
data<-Mobility
set.seed(17)
sample<-sample(1:n,1000,FALSE)#1000个观测值
data<-as.matrix(Mobility[sample,])
n=nrow(data)
train<-sample(1:n,800,FALSE)#800个训练集
x_train<-matrix(as.numeric(unlist(data[train,])),nrow = nrow(data[train,]))
x_test<-matrix(as.numeric(unlist(data[-train,])),nrow = nrow(data[-train,]))
nrow(x_train)
nrow(x_test)
x_train<-x_train[,-3]
x_test<-x_test[,-3]
head(x_train) [,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] 1 0 1 0 0 0 0
[2,] 1 0 1 0 0 0 0
[3,] 1 1 0 0 0 0 0
[4,] 1 0 0 0 0 0 0
[5,] 1 1 0 0 1 0 0
[6,] 1 0 0 0 0 0 0
> head(x_test)
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] 1 0 1 0 0 0 0
[2,] 0 0 0 0 0 0 0
[3,] 1 0 1 0 0 0 0
[4,] 1 1 1 0 0 0 0
[5,] 1 1 1 0 0 0 0
[6,] 0 0 0 0 0 0 0y_train<-data[train,3]
temp<-ifelse(y_train==0,1,0)
y_train<-cbind(y_train,temp)
head(y_train) y_train temp
832 1 0
230 0 1
1212 1 0
823 1 0
774 1 0
3192 1 0nrow(y_train)
y_test<-data[-train,3]
temp1<-ifelse(y_test==0,1,0)
y_test<-cbind(y_test,temp1)
head(y_test) y_test temp1
4043 1 0
1809 0 1
3475 0 1
1089 1 0
3074 1 0
3335 1 0
> nrow(y_test)
[1] 200library(RcppDL)
hidden=c(10,10)
fit<-Rsda(x_train,y_train,hidden)#默认噪声水平是30%
setCorruptionLevel(fit,x=0.0)
summary(fit)$PretrainLearningRate
[1] 0.1
$CorruptionLevel
[1] 0
$PretrainingEpochs
[1] 1000
$FinetuneLearningRate
[1] 0.1
$FinetuneEpochs
[1] 500pretrain(fit)
finetune(fit)
predProb<-predict(fit,x_test)
head(predProb,6) [,1] [,2]
[1,] 0.8412920 0.15870799
[2,] 0.4979298 0.50207019
[3,] 0.8412920 0.15870799
[4,] 0.9201304 0.07986961
[5,] 0.9201304 0.07986961
[6,] 0.4979298 0.50207019
> head(y_test,3)
y_test temp1
4043 1 0
1809 0 1
3475 0 1
> pred1<-ifelse(predProb[,1]>=0.5,1,0)
> table(pred1,y_test[,1],dnn=c("Predicted","Observed"))
Observed
Predicted 0 1
0 20 12
1 31 137#重新建模,时间增加25%的噪声
setCorruptionLevel(fit,x=0.25)
pretrain(fit)
finetune(fit)
predProb<-predict(fit,x_test)
pred1<-ifelse(predProb[,1]>=0.5,1,0)
table(pred1,y_test[,1],dnn=c("Predicted","Observed")) Observed
Predicted 0 1
1 51 149