2018-10-18 18:15:44 luoganttcc 阅读数 879

参考1
参考2

train_on_batch
n_epoch = 12
batch_size = 16
for e in range(n_epoch):
    print("epoch", e)
    batch_num = 0
    loss_sum=np.array([0.0,0.0])
    for X_train, y_train in GET_DATASET_SHUFFLE(train_X, batch_size, True): # chunks of 100 images 
        for X_batch, y_batch in train_datagen.flow(X_train, y_train, batch_size=batch_size): # chunks of 32 samples
            loss = model.train_on_batch(X_batch, y_batch)
            loss_sum += loss 
            batch_num += 1
            break #手动break
        if batch_num%200==0:
            print("epoch %s, batch %s: train_loss = %.4f, train_acc = %.4f"%(e, batch_num, loss_sum[0]/200, loss_sum[1]/200))
            loss_sum=np.array([0.0,0.0])
    res = model.evaluate_generator(GET_DATASET_SHUFFLE(val_X, batch_size, False),int(len(val_X)/batch_size))
    print("val_loss = %.4f, val_acc = %.4f: "%( res[0], res[1]))

    model.save("weight.h5")
2019-07-10 15:08:28 weixin_44109874 阅读数 50

整个训练的代码如下:

import os
import numpy as np
from PIL import Image
from keras.models import Sequential
from keras.layers import Convolution2D, Flatten, MaxPooling2D, Dense, Activation
from keras.optimizers import Adam
from keras.utils import np_utils

#Pre process images

class PreFile(object):
    def __init__(self, FilePath, DogType):
        self.FilePath = FilePath
        self.DogType = DogType

    def FileReName(self):
        count = 0
        for type in self.DogType:
            subfolder = os.listdir(self.FilePath + type)
            for subclass in subfolder:
                print('count_classese:-->',count)
                print(subclass)
                print(self.FilePath + type + subclass)
                os.rename(self.FilePath + type + '/' + subclass, self.FilePath + 
                type + '/' + str(count) + '_' + subclass.split('.')[0])
            count += 1

    def FileResize(self, Width, Hight, Output_folder):
        for type in self.DogType:
            print(type)
            file = os.listdir(self.FilePath + type)
            for i in file:
                img_open = Image.open(self.FilePath + type + '/' + i)
                conv_RGB = img_open.convert('RGB')
                new_img = conv_RGB.resize((Width, Hight), Image.BILINEAR)
                new_img.save(os.path.join(Output_folder, os.path.basename(i)))

#main training program
class Training(object):
    def __init__(self, batch_size, number_batch, categories, train_floder):
        self.batch_size = batch_size
        self.number_batch = number_batch
        self.categories = categories
        self.train_floder = train_floder

    #Read image and return Numpy array
    def read_train_images(self, filename):
        img = Image.open(self.train_floder + filename)
        return np.array(img)

    def train(self):
        train_img_list = []     #x_train
        train_label_list = []   #y_train
        for file in os.listdir(self.train_floder):
            file_img_in_array = self.read_train_images(filename = file)
            train_img_list.append(file_img_in_array)
            train_label_list.append(int(file.split('_')[0]))
        train_img_list = np.array(train_img_list)
        train_label_list = np.array(train_label_list)

        train_label_list = np_utils.to_categorical(train_label_list,
                                                   self.categories) 

        train_img_list = train_img_list.astype('float32')
        train_img_list /= 255.0

        #-- step Neural Network CNN
        model = Sequential()

        #CNN Layer - 1  #input shape (40,100,100,3)
        model.add(Convolution2D(
            input_shape=(100,100,3),
            filters=32, #next layer output (100,100,32)
            kernel_size=(5,5), #pixel filtered
            padding='same',  #外边距处理
        ))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(
            pool_size=(2,2),  # Output next layer (50,50,32)
            strides=(2,2),
            padding='same'
        ))

        #CNN Layer - 2
        model.add(Convolution2D(
            filters=64,  # next layer output (50,50,64)
            kernel_size=(2, 2),  # pixel filtered
            padding='same',  # 外边距处理
        ))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(
            pool_size=(2, 2),  # Output next layer (25,25,64)
            strides=(2, 2),
            padding='same'
        ))

        #Fully connected Layer - 1
        model.add(Flatten())  #降维打击
        model.add(Dense(1024))
        model.add(Activation('relu'))

        #Fully connected Layer - 2
        model.add(Dense(512))
        model.add(Activation('relu'))

        #Fully connected Layer - 3
        model.add(Dense(256))
        model.add(Activation('relu'))

        #Fully connected Layer - 4
        model.add(Dense(self.categories))
        model.add(Activation('softmax'))

        #Define Optimizer
        adam = Adam(lr=0.0001)
        model.compile(optimizer=adam,
                      loss='categorical_crossentropy',
                      metrics=['accuracy']
                      )

        # Fire up the network

        model.fit(
            x=train_img_list,
            y=train_label_list,
            epochs=self.number_batch,
            batch_size=self.batch_size,
            verbose=1
        )

        # Save your model
        model.save('./dogfinder.h5')

def main():
    DogType = ['哈士奇','德国牧羊犬']
    Train = Training(batch_size=1024,
                     number_batch=30,
                     categories=2,
                     train_floder='train_img/')
    Train.train()

main()

 

2019-03-27 21:10:36 Harrison509 阅读数 791

一、读取文件夹数据

train_idx = 0
npy_idx = 0
path = './/dataset_path//'
files = os.listdir(path)
random.shuffle(files)
images = []
labels = []
for f in files: #目录下所有文件夹
    file_path = os.path.join(path, str(f)) + '//'
    for root, dirs, files in os.walk(file_path):
        for file in files:
            if os.path.splitext(file)[1] == '.png':
                train_idx = train_idx + 1
                img_path = os.path.join(file_path, str(file))
                # print('img_path={}'.format(img_path))
                img = image.load_img(img_path, target_size=image_size)
                img_array = image.img_to_array(img)
                images.append(img_array)
                labels.append(f) 
images = np.array(images)   #(num, h, w, 3)
labels = np.array(labels)   #(num, )
images /= 255
x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size=0.2)  #划分训练数据、训练标签、验证数据、验证标签

二.模型构建与编译

""" 共4层卷积网、二层全连接层"""

model = Sequential()

model.add(Conv2D(32, kernel_size=(5, 5), input_shape=(img_h, img_h, 3), activation='relu', padding='same'))
model.add(MaxPool2D())
model.add(Dropout(0.3))

model.add(Conv2D(64, kernel_size=(5, 5), activation='relu', padding='same'))
model.add(MaxPool2D())
model.add(Dropout(0.3))

model.add(Conv2D(128, kernel_size=(5, 5), activation='relu', padding='same'))
model.add(MaxPool2D())
model.add(Dropout(0.5))

model.add(Conv2D(256, kernel_size=(5, 5), activation='relu', padding='same'))
model.add(MaxPool2D())
model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(n_class, activation='softmax')) #25分类

model.summary()
model.compile(loss=loss_func, optimizer=Adam(lr=0.0003), metrics=['accuracy'])

三.数据喂入

model.fit(x_train, y_train,
      batch_size=nbatch_size,
      epochs=nepochs,
      verbose=1,
      validation_data=(x_test, y_test))

四.模型保存

yaml_string = model.to_yaml()
with open('./models/model_name.yaml', 'w') as outfile:
    outfile.write(yaml_string)
model.save_weights('./models/model_name.h5')
2019-02-08 23:20:33 weixin_39965184 阅读数 246

1.keras训练minst数据集

  1. 环境keras,jupyter notebook
  2. 加载keras模块
from __future__ import print_function
import numpy as np
np.random.seed(1337)

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense,Dropout,Activation
from keras.optimizers import SGD,Adam,RMSprop
from keras.utils import np_utils
  1. 如需绘制模型请加载plot
from keras.utils.vis_utils import plot_model
  1. 变量初始化
batch_size=128
nb_classes=10
nb_epoch=20
  1. 定义加载数据集函数(keras自带数据集下载不了,被防火墙挡着)
def load_data(path='mnist.npz'):
    """Loads the MNIST dataset
    #Arguments
       path:path where to cache the dataset locally
    #Returns
       Tuple of Numpy arrays:`(x_train,y_train),(x_text,y_text)`
    
    """
    path='.\mnist.npz'
    f=np.load(path)
    x_train,y_train=f['x_train'],f['y_train']
    x_test,y_test=f['x_test'],f['y_test']
    f.close()
    return (x_train,y_train),(x_test,y_test)
  1. 数据准备
(X_train, y_train), (X_test, y_test) =load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
  1. 转换类标号
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
  1. 建立模型
    #使用Sequential() model = Sequential() model.add(Dense(512, input_shape=(784,))) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(10)) model.add(Activation('softmax'))
  2. 打印模型
model.summary()
  1. 绘制模型结构图,并保存成图片
plot_model(model,to_file='model.png')
  1. 训练与评估
    #编译模型 model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])

  2. #迭代训练

history = model.fit(X_train, Y_train,
                    batch_size=batch_size, nb_epoch=nb_epoch,
                    verbose=1, validation_data=(X_test, Y_test))`
  1. 模型评估
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
  1. 模型保存
model.save('mnist-mpl.h5')
2019-03-26 15:00:54 u013491950 阅读数 801

对于keras加载训练数据,官方上没有详说。然而网上查各种资料,写法太多,通过自己跑代码测试总结以下几条,方便自己以后使用。

总的来说keras模型加载数据主要有三种方式:.fit(), .fit_generator()和.train_on_batch()。

1.fit():

上函数,各个参数的意义就不解释了

fit(x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None)

从官方文档中可以看出,fit()是需要先把整个数据集加载进来,然后喂入网络,因为minist数据集比较小,这么做是可行的,但对于实际开发而言,这么做是不可行的,需要大量的内存资源,同时不能对数据进行在线提升。

一次性加载整个数据集的示例代码:

任务为猫和狗的二分类,train_data下包含cat和dog两个文件夹,代码将两个文件夹下图片和标签存入numpy数组,返回为训练数据和训练标签。

def load_data():
    tran_imags = []
    labels = []
    seq_names = ['cat','dog']
    for seq_name in seq_names:
        frames = sorted(os.listdir(os.path.join(root_path,'data','train_data', seq_name)))
        for frame in frames:
            imgs = [os.path.join(root_path, 'data', 'train_data', seq_name, frame)]
            imgs = np.array(Image.open(imgs[0]))
            tran_imags.append(imgs)
            if seq_name=='cat':
                labels.append(0)
            else:
                labels.append(1)
    return np.array(tran_imags), np.array(labels)
##    
train_data,train_labs = load_data()
model.fit(train_data,keras.utils.to_categorical(train_labs),batch_size=32,epochs=50,verbose=1)

2.fit_generator()

fit_generator()需要将数据集和标签写成生成器格式

fit_generator(generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0)

1).从txt文件读取图片路径的生成器,不进行数据增强

以下代码从给定路径的txt文本中循环读取图片路径,每次读取一个batch_size的图片,并存入numpy数组返回。其中,当读到文本末尾时,将指针指向文件第一行。

def generate_array_from_txt(path, batch_size,num_class):
    with open(path) as f:
        while True:
            imgs = []
            labs = np.zeros(shape=(batch_size,num_class))
            i = 0
            while len(imgs) < batch_size:
                line = f.readline()
                if not line:
                    f.seek(0)
                    line = f.readline()
                img_path = line.split(' ')[0]
                lab = line.split(' ')[1]
                img = np.array(Image.open(os.path.join('./',img_path)))
                lab = keras.utils.to_categorical(int(lab)-1,num_classes=num_class)
                imgs.append(img)
                labs[i] = lab
                i = i +1
            yield (np.array(imgs),labs)
 ##使用如下
 gen = generate_arrays_from_txt(txt_path,batch_size,num_class)
 model.fit_generator(gen,steps_per_epoch=N, epochs=EPOCN)
 ## 因为生成器是无限生成数据,所以它不知道一轮要训练多少图片,所以steps_per_epoch为数据集的总数除以batch_size。

我的txt文本格式如下:前面是图片路径,后面是类别标签,因为从1开始的,所以to_categorical 里面减了1.
在这里插入图片描述

2).使用.flow_from_directory(directory)

使用ImageDataGenerator类,ImageDataGenerator类有.flow()与.flow_from_directory(directory)两个加载数据的方法,个人认为第一个偏向于先将数据全部加载(看过的示例代码都是这样的),第二个从图片目录利用生成器返回数据。

2.1 用于分类网络,返回图像以及标签
## 声明一个ImageDataGenerator类对象,并给出你需要进行的数据增强选项
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)
##调用.flow_from_director()方法,第一个为数据集路径。生成数据集及标签
train_generator = train_datagen.flow_from_directory(
        './data/train_data',
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical')
##
model.fit_generator(train_generator,steps_per_epoch=N, epochs=EPOCH)

我的数据集目录结构如下:
在这里插入图片描述

2.2 用于pix2pix

当用于图像分割、超分辨率重建等需要像素对应像素的任务时,标签也为图片(单通道或多通道)。
示例:加载用于图像分割的图像与mask,mask为单通道灰度图像,目标为白色,其余背景为黑色。

# 分别定义两个ImageDataGenerator对象
image_datagen = ImageDataGenerator(featurewise_center=True,
                                   featurewise_std_normalization=True,
                                   rescale= 1./255)
mask_datagen = ImageDataGenerator(rescale= 1./255)

seed = 1
#训练图片路径
image_generator = image_datagen.flow_from_directory(
    'data/data_seg/davis_train',
    class_mode=None,
    seed=seed)
# 指定mask
mask_generator = mask_datagen.flow_from_directory(
    'data/data_seg/davis_label',
    class_mode=None,
    color_mode = 'grayscale'
    seed=seed)
# 将以上两个生成器合为一个
train_generator = zip(image_generator, mask_generator)
# 
model.fit_generator(
    train_generator,
    steps_per_epoch=STEPS_NUM,
    epochs=EPOCHS)

对于标签为图像的数据,当用这种方式加载的时候,需将class_mode指定为None,表示不返回标签。对于训练图片和标签要保证顺序不变,一一对应,名字可不同
在这里插入图片描述在这里插入图片描述
需要将两个生成器的seed指定为相同的数字,此时两个生成器返回的图片对就一一对应

3) 使用flow(x, y=None)

使用.flow()时,需要将训练数据加载到内存中,每次填充一个Batch_size的数据进网络

train_data, train_labs = load_data()
dataGenerator = ImageDataGenerator(
        preprocessing_function=normalize)
gen = dataGenerator.flow(train_data, train_labs, batch_size=8)
model.fit_generator(gen)

4) 使用.flow_from_dataframe()

dataframe中保存的是图片名字和label

import pandas as pd
df=pd.read_csv(r".\train.csv")
datagen=ImageDataGenerator(rescale=1./255)
train_generator=datagen.flow_from_dataframe(dataframe=df, directory=".\train_imgs", x_col="id", y_col="label", class_mode="categorical", target_size=(32,32), batch_size=32)

3.train_on_batch()

类似于TensorFlow的数据填充了,一次喂一个batch_size的数据。

train_on_batch(x, y, sample_weight=None, class_weight=None)

采用2.2中的生成器例子

train_generator = zip(image_generator, mask_generator)
steps = len(train_generator)/ batch_size * EPOCH
step = 0
for train_batch, label_batch in train_generator:
    if step == steps:
       break
    step += 1
    train_on_batch(train_batch, label_batch, sample_weight=None, class_weight=None)

4.对生成器返回数据进行处理

使用生成器时,如果需要对图片进行一定的处理,可以在ImageDataGenerator中定义预处理函数,但是要求返回的shape不能改变。
如果要对图片的shape进行改变,可将生成器返回结果再次包装为生成器,如下例:

# 实例化ImageDataGenerator,同时指定预处理函数
datagen = ImageDataGenerator(
        preprocessing_function=normalize)

# 定义生成器,每次从datagen中取出一个Batch,然后对数据进行自己的操作
def image_a_b_gen(data_path):
    for batch in datagen.flow_from_directory(data_path,
                                             target_size=(768, 1024),
                                             color_mode='rgb',
                                             class_mode=None,
                                             batch_size=batch_size,
                                             shuffle=True):
        lab_batch = rgb2lab(batch)
        X_batch = lab_batch[:, :, :, 0]
        Y_batch = lab_batch[:, :, :, 1:] / 128
        yield (np.expand_dims(X_batch, axis=3), Y_batch)
没有更多推荐了,返回首页