#-*- coding: utf-8 -*-
from skimage importio,transformimportglobimportosimporttensorflow as tfimportnumpy as npimporttime
path='D:/code/python/Anaconda3/envs/faces'
#将所有的图片resize成100*100
w=128h=128c=3
#读取图片
defread_img(path):
cate=[path+'/'+x for x in os.listdir(path) if os.path.isdir(path+'/'+x)]
imgs=[]
labels=[]for idx,folder inenumerate(cate):for im in glob.glob(folder+'/*.png'):print('reading the images:%s'%(im))
img=io.imread(im)
img=transform.resize(img,(w,h,c))
imgs.append(img)
labels.append(idx)returnnp.asarray(imgs,np.float32),np.asarray(labels,np.int32)
data,label=read_img(path)#打乱顺序
num_example=data.shape[0]
arr=np.arange(num_example)
np.random.shuffle(arr)
data=data[arr]
label=label[arr]#将所有数据分为训练集和验证集
ratio=0.8s=np.int(num_example*ratio)
x_train=data[:s]
y_train=label[:s]
x_val=data[s:]
y_val=label[s:]#-----------------构建网络----------------------#占位符
x=tf.placeholder(tf.float32,shape=[None,w,h,c],name='x')
y_=tf.placeholder(tf.int32,shape=[None,],name='y_')defCNNlayer():#第一个卷积层(128——>64)
conv1=tf.layers.conv2d(
inputs=x,
filters=32,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
pool1=tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)#第二个卷积层(64->32)
conv2=tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[5, 5],
padding="same",
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
pool2=tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)#第三个卷积层(32->16)
conv3=tf.layers.conv2d(
inputs=pool2,
filters=128,
kernel_size=[3, 3],
padding="same",
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
pool3=tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2)#第四个卷积层(16->8)
conv4=tf.layers.conv2d(
inputs=pool3,
filters=128,
kernel_size=[3, 3],
padding="same",
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
pool4=tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=2)
re1= tf.reshape(pool4, [-1, 8 * 8 * 128])#全连接层
dense1 = tf.layers.dense(inputs=re1,
units=1024,
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003))
dense2= tf.layers.dense(inputs=dense1,
units=512,
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003))
logits= tf.layers.dense(inputs=dense2,
units=60,
activation=None,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003))returnlogits#---------------------------网络结束---------------------------
logits=CNNlayer()
loss=tf.losses.sparse_softmax_cross_entropy(labels=y_,logits=logits)
train_op=tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
correct_prediction= tf.equal(tf.cast(tf.argmax(logits,1),tf.int32), y_)
acc=tf.reduce_mean(tf.cast(correct_prediction, tf.float32))#定义一个函数,按批次取数据
def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):assert len(inputs) ==len(targets)ifshuffle:
indices=np.arange(len(inputs))
np.random.shuffle(indices)for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):ifshuffle:
excerpt= indices[start_idx:start_idx +batch_size]else:
excerpt= slice(start_idx, start_idx +batch_size)yieldinputs[excerpt], targets[excerpt]#训练和测试数据,可将n_epoch设置更大一些
saver=tf.train.Saver(max_to_keep=3)
max_acc=0
f=open('ckpt1/acc.txt','w')
n_epoch=10batch_size=64sess=tf.InteractiveSession()
sess.run(tf.global_variables_initializer())for epoch inrange(n_epoch):
start_time=time.time()#training
train_loss, train_acc, n_batch =0, 0, 0for x_train_a, y_train_a in minibatches(x_train, y_train, batch_size, shuffle=True):
_,err,ac=sess.run([train_op,loss,acc], feed_dict={x: x_train_a, y_: y_train_a})
train_loss+= err; train_acc += ac; n_batch += 1
print("train loss: %f" % (train_loss/n_batch))print("train acc: %f" % (train_acc/n_batch))#validation
val_loss, val_acc, n_batch =0, 0, 0for x_val_a, y_val_a in minibatches(x_val, y_val, batch_size, shuffle=False):
err, ac= sess.run([loss,acc], feed_dict={x: x_val_a, y_: y_val_a})
val_loss+= err; val_acc += ac; n_batch += 1
print("validation loss: %f" % (val_loss/n_batch))print("validation acc: %f" % (val_acc/n_batch))
f.write(str(epoch+1)+', val_acc:'+str(val_acc)+' ')if val_acc>max_acc:
max_acc=val_acc
saver.save(sess,'ckpt1/faces.ckpt',global_step=epoch+1)
f.close()
sess.close()
0.随机森林的思考 随机森林的决策树是分别采样建立的,各个决策树之间是相对独立的。那么,在我们得到了第k-1棵决策树之后,能否通过现有的样本和决策树的信息, 对第m颗树的建立产生有益的影响呢?在随机森林建立之后,采用的投票过程能否增加一定的权值呢?在选取样本的时候,我们能否对于分类错误的样本给予更大的权值,使之得到更多的重视呢?...
如果想在训练和验证时监视不同的summary,将train summary ops和val summary ops放进不同的集合中即可。 train_writer = tf.summary.FileWriter(log_dir + '/train', sess.graph) val_writer = tf.summary.FileWr...
有时候我们会遇到调整后的模型反而不如调整前表现好的情况,这可能和数据的随机分割有关系。在这个不平衡的数据情况下,最好用层化(stratification)的方法,比如: from sklearn.cross_validation import StratifiedShuffleSplit...ssscv = StratifiedShu...
以4-fold validation training为例 (1) 给定数据集data和标签集label 样本个数为 sampNum = len(data) (2) 将给定的所有examples分为10组 每个fold个数为 foldNum = sampNum/10 (3) 将给定的所有examples分为10组 参考...