精华内容
下载资源
问答
  • vgg16图像分类
    2022-08-08 17:17:47
    1.dataset代码
    import random
    
    from torchvision import datasets,transforms
    from torchvision.transforms import functional_pil as F_pil
    from torch.utils.data import DataLoader
    import torch.nn as nn
    from PIL import Image
    from torchvision.transforms import functional as F
    
    IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp')
    
    
    class MergeTwoImageTransformer(nn.Module):
        def __init__(self,image_paths):
            super(MergeTwoImageTransformer, self).__init__()
            self.image_paths=image_paths
        def forward(self,img):
            if not F_pil._is_pil_image(img):
                raise ValueError('图像合并暂时仅支持pillow对象')
            img_w,img_h=F._get_image_size(img)
            #随机选择一个路径
            img_path=random.choice(self.image_paths)
            #图像加载
            other_img=datasets.folder.pil_loader(img_path)
            #图像剪切
            i,j,h,w=transforms.RandomResizedCrop.get_params(other_img,scale=(0.5,1.0),ratio=(3./4.,4./3.))
            other_img=F.resized_crop(other_img,i,j,h,w,(img_h,img_w),transforms.InterpolationMode.BILINEAR)
            # 图像合并
            img=Image.blend(img,other_img,0.15)
            return img
    
    
    class Dataset:
        def __init__(self,root_dir,batch_size=8,num_works=0,train=True,shuffle=None):
            if shuffle is None:
                shuffle = train
            self.root_dir=root_dir
            _, class_to_idx = datasets.folder.find_classes(self.root_dir)
            image_paths = datasets.ImageFolder.make_dataset(
                self.root_dir,
                class_to_idx,
                IMG_EXTENSIONS
            )
            self.image_paths = [s[0] for s in image_paths]
            transform=self.get_train_transform() if train else self.get_valid_transform()
            self.dataset=datasets.ImageFolder(
                root=self.root_dir,
                transform=transform
            )
            self.loader=DataLoader(
                dataset=self.dataset,
                shuffle=shuffle,
                num_workers=num_works,
                batch_size=batch_size,
                prefetch_factor=2 if num_works==0 else batch_size*num_works)
    
        def __len__(self):
            return len(self.dataset.imgs)
        def __iter__(self):
            for data in self.loader:
                yield data
    
        def get_train_transform(self):
            return transforms.Compose([
                MergeTwoImageTransformer(self.image_paths),
                transforms.RandomHorizontalFlip(p=0.4),
                transforms.RandomResizedCrop(size=(224,224),scale=(0.6,1.0)),
                transforms.ColorJitter(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
        @staticmethod
        def get_valid_transform():
            return transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
    

    2.测试准确率代码

    import torch
    import torch.nn as nn
    class AccuracyScore(nn.Module):
        def __init__(self):
            super(AccuracyScore, self).__init__()
    
        # noinspection PyMethodMayBeStatic
        def forward(self, y_pred, y_true):
            y_pred_dim = y_pred.dim()
            y_true_dim = y_true.dim()
            if y_pred_dim == y_true_dim:
                pass
            elif y_pred_dim == y_true_dim + 1:
                y_pred = torch.argmax(y_pred, dim=1)
            else:
                raise ValueError("格式异常!")
            y_pred = y_pred.to(y_true.dtype)
            correct = (y_pred == y_true)
            return torch.mean(correct.to(torch.float32))

    3.模型训练测试代码

    import torch.nn as nn
    from torchvision import models
    import torch
    from image_classify_model import dataset
    from torch.utils.tensorboard import SummaryWriter
    import os
    from image_classify_model.metric import AccuracyScore
    import torch.optim as optim
    import numpy as np
    from . import network
    
    # os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
    # os.environ['CUDA_VISIBLE_DEVICES']='1'
    
    class Network(nn.Module):
        def __init__(self,num_classes=2,**kwargs):
            super(Network, self).__init__()
            vgg16=models.vgg16_bn(pretrained=True)
            for module in vgg16.parameters():
                module.requires_grad=False
            vgg16.classifier[3]=nn.Linear(4096,64)
            vgg16.classifier[6]=nn.Linear(64,num_classes)
            self.vgg16=nn.DataParallel(vgg16)
    
        def forward(self,x):
            return self.vgg16(x)
    
    
    class ImageClassifiModel(object):
        def __init__(self,num_classes=2,summary_dir=None,model_dir=None,lr=0.005,momentum=0.5,weight_decay=0.05,nesterov=True,batch_size=16,num_works=0,use_gpu=True):
            super(ImageClassifiModel, self).__init__()
            self.device = torch.device('cuda' if use_gpu and torch.cuda.is_available() else 'cpu')
            self.net=Network(num_classes=num_classes)
            self.net=self.net.to(self.device)
            self.loss_fn=nn.CrossEntropyLoss()
            self.loss_fn.to(self.device)
            self.metrics={
                'acc':AccuracyScore()
            }
            self.train_optim=optim.SGD(params=[p for p in self.net.parameters() if p.requires_grad],
                                       lr=lr,momentum=momentum,nesterov=nesterov,weight_decay=weight_decay)
            self.batch_size=batch_size
            self.num_works=num_works
            if not os.path.exists(model_dir):
                os.makedirs(model_dir)
            self.model_dir = model_dir
            if not os.path.exists(summary_dir):
                os.makedirs(summary_dir)
            writer=SummaryWriter(log_dir=summary_dir)
            writer.add_graph(self.net,torch.empty(self.batch_size,3,224,224))
            writer.close()
            self.summary_dir=summary_dir
    
    
        def training(self,train_data_dir,test_data_dir,total_epoch,summary_step_interval=200,eval_epoch_interval=1,save_epoch_interval=10):
            trainset=dataset.Dataset(root_dir=train_data_dir,batch_size=self.batch_size,num_works=self.num_works,train=True,shuffle=True)
            testset = dataset.Dataset(root_dir=test_data_dir, batch_size=self.batch_size,
                                       num_works=self.num_works, train=False, shuffle=False)
            writer = SummaryWriter(log_dir=os.path.join(self.summary_dir,'training'))
            total_samples = len(trainset)
            train_step = 0
            test_step = 0
            for epoch in range(total_epoch):
                # 训练操作
                self.net.train(True)
                train_loss = []
                for data in trainset:
                    inputs, labels = data
                    inputs, labels = inputs.to(self.device), labels.to(self.device)
    
                    # 前向过程
                    outputs = self.net(inputs)
                    _loss = self.loss_fn(outputs, labels)
                    _metrics={}
                    for _key in self.metrics:
                        _metrics[_key]=self.metrics[_key](outputs,labels).cpu().numpy()
    
                    self.train_optim.zero_grad()
                    _loss.backward()
                    self.train_optim.step()
    
                    train_loss.append(_loss.item())
                    if train_step % summary_step_interval == 0:
                        # 可视化输出
                        writer.add_scalar('train_loss', _loss, train_step)
                        writer.add_scalars('train_metrics', _metrics, train_step)
                        print(f"Train {epoch + 1}/{total_epoch} {train_step} "
                              f"loss:{_loss.item():.3f} accuracy:{_metrics.get('acc',-0.0):.3f}")
                    train_step += 1
    
                # 测试操作
                if epoch%eval_epoch_interval==0:
                    self.net.eval()
                    test_loss = []
                    for data in testset:
                        inputs, labels = data
                        inputs,labels=inputs.to(self.device),labels.to(self.device)
    
    
                        # 前向过程
                        outputs = self.net(inputs)
                        _loss = self.loss_fn(outputs, labels)
                        _metrics = {}
                        for _key in self.metrics:
                            _metrics[_key] = self.metrics[_key](outputs, labels).cpu().numpy()
    
                        test_loss.append(_loss.item())
                        if test_step % summary_step_interval == 0:
                            # 可视化输出
                            writer.add_scalar('test_loss', _loss, test_step)
                            writer.add_scalars('test_metrics', _metrics, test_step)
                            print(f"Test {epoch + 1}/{total_epoch} {test_step} "
                                  f"loss:{_loss.item():.3f} accuracy:{_metrics.get('acc',-0.0):.3f}")
                        test_step += 1
                # 每个epoch计算损失
                    writer.add_scalars('epoch_loss', {'train': np.mean(train_loss), 'test': np.mean(test_loss)}, epoch)
                else:
                    writer.add_scalars('epoch_loss', {'train': np.mean(train_loss)}, epoch)
                if epoch%save_epoch_interval==0:
                        # self.net.remove_hook_fn()
                        torch.save(self.net,os.path.join(self.model_dir,f'model{epoch}.pt'))
                        # self.net.add_hook_fn()
            torch.save(self.net,os.path.join(self.model_dir,f'model{total_epoch}.pt'))
            writer.close()
        def eval(self):
            pass
    

    4.main函数

    root_dir=r'D:\pythonProject\0807'
    
    m=model.ImageClassifiModel(model_dir=os.path.join(root_dir,'output01','model'),
                               summary_dir=os.path.join(root_dir,'output01','summary'),
                               batch_size=16,
                               use_gpu=True)
    root_dir=r'D:\pythonProject\dogcat'
    m.training(save_epoch_interval=2,
               total_epoch=10,
               eval_epoch_interval=2,
               summary_step_interval=100,
               train_data_dir=os.path.join(root_dir,'train'),
               test_data_dir=os.path.join(root_dir,'test')
               )

    更多相关内容
  • 利用vgg6提取图像的深度特征再进行分类
  • VGG16 图像分类

    深度学习——(4)VGG16 图像分类


    上次使用ResNet对图像进行分类,因为现在数据是同一类图像,想要对他进行更加细致的分类,还在学习其他的分类model。最近用VGG16,是站在巨人的肩膀上的一篇博客,是对以前模型的微调,但是真的有学习到,就算是自己的学习记录,后面会学习并尝试其他的model。

    1. model

    各个VGG在ImageNet上的训练参数下载

    model_urls = {
        'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
        'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
        'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
        'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'
    }
    
    # -*- coding: utf-8 -*-
    import torch.nn as nn
    import torch
    import torchvision.models as models
    # official pretrain weights
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    class VGG(nn.Module):
        def __init__(self, features, num_classes=1000, init_weights=False):
            super(VGG, self).__init__()
            self.features = features
            if init_weights==True: # 如果要加载imageNet的权重,还原
                self.classifier = nn.Sequential(
                    nn.Linear(512*7*7, 4096),
                    nn.ReLU(True),
                    nn.Dropout(p=0.1),
                    nn.Linear(4096, 4096),
                    nn.ReLU(True),
                    nn.Dropout(p=0.1),
                    nn.Linear(4096,num_classes))
            else :
                self.classifier = nn.Sequential(
                nn.Linear(512*7*7, 4096),
                nn.ReLU(True),
                nn.Dropout(p=0.1),
                nn.Linear(4096, 1024),
                nn.ReLU(True),
                nn.Dropout(p=0.1),
                nn.Linear(1024, 256),
                nn.ReLU(True),
                nn.Dropout(p=0.1),
                nn.Linear(256, 64),
                nn.ReLU(True),             
                nn.Linear(64, 16),
                nn.ReLU(True),            
                nn.Linear(16, num_classes))
    
        def forward(self, x):
            # N x 3 x 224 x 224
            x = self.features(x)
            # N x 512 x 7 x 7
            x = torch.flatten(x, start_dim=1)
            # N x 512*7*7
            x = self.classifier(x)
            return x
    
        def _initialize_weights(self):
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                    nn.init.xavier_uniform_(m.weight)
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
                elif isinstance(m, nn.Linear):
                    nn.init.xavier_uniform_(m.weight)
                    # nn.init.normal_(m.weight, 0, 0.01)
                    nn.init.constant_(m.bias, 0)
    
    
    def make_features(cfg: list):
        layers = []
        in_channels = 3
        for v in cfg:
            if v == "M":
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
                layers += [conv2d, nn.ReLU(True)]
                in_channels = v
        return nn.Sequential(*layers)
    
    
    cfgs = {
        'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
        'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
        'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
        'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
    }
    
    
    def vgg(model_name="vgg16", **kwargs):
        assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name)
        cfg = cfgs[model_name]
    
        model = VGG(make_features(cfg), **kwargs)
        return model
    

    2. train

    # -*- coding: utf-8 -*-
    import os
    import sys
    import json
    
    import torch
    import torch.nn as nn
    from torchvision import transforms, datasets
    import torch.optim as optim
    from tqdm import tqdm
    
    from model import vgg
    from pandas.core.frame import DataFrame
    
    global log # 存储loss值
    log=[] 
    def main():
        #参数设置
        
        image_path = 'D:/Python/VGG16-CAM/data/' # 图片存储路径,data下面包含的是各个类别的文件夹
        model_name = "vgg16"
        
        pre_path='D:/Python/VAE/weights/vgg16-397923af.pth'# 如果没有以前训练好的权重,先使用ImageNet训练好的权重进行初始化
        save_path = 'D:/Python/VGG16-CAM/{}Net.pth'.format(model_name)#model权重保存的路径
        
        num_classes=5 # 具体分类
        batch_size = 64 
        epochs = 1000
        
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #使用CPU还是GPU
        print("using {} device.".format(device))
        
        # 将数据标准化,并转为tensor类型
        data_transform = {   
            "train": transforms.Compose([transforms.Resize([224,224]),
                                         transforms.RandomHorizontalFlip(),
                                         transforms.ToTensor(),
                                         transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])]),
            "val": transforms.Compose([transforms.Resize([224, 224]),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])}
    
        # 训练加载数据
        assert os.path.exists(image_path), "{} path does not exist.".format(image_path) #确保图片路径无误
        train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                             transform=data_transform["train"])
        train_num = len(train_dataset)
    # 具体分类写入json
    #    {"0": "grade0","1": "grade1","2": "grade2","3": "grade3","4": "grade4"}
        grade_list = train_dataset.class_to_idx
        cla_dict = dict((val, key) for key, val in grade_list.items())
        # write dict into json file
        json_str = json.dumps(cla_dict, indent=4)
        with open('class_indices.json', 'w') as json_file:
            json_file.write(json_str)
    
        
        nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
        print('Using {} dataloader workers every process'.format(nw))
        # 转为dataloader型
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size, shuffle=True,
                                                   num_workers=nw)
        # 加载验证数据集
        validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                                transform=data_transform["val"])
        val_num = len(validate_dataset)
        validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                      batch_size=batch_size, shuffle=False,
                                                      num_workers=nw)
        print("using {} images for training, {} images for validation.".format(train_num,val_num))
    
    # 加载模型  
        if os.path.exists(save_path): #如果有已经训练好的权重,直接加载
            net = vgg(model_name=model_name, num_classes=num_classes).to(device)#此处初始化时init_weights=FALSE
            net.load_state_dict(torch.load(save_path, map_location=device))#加载权重
            for parameter in net.parameters(): #required_grad==False 才不会反向传播,只训练下面部分(微调)
                parameter.required_grad = False
            net.classifier = nn.Sequential( #定义自己的分类器
                nn.Linear(512*7*7, 4096),
                nn.ReLU(True),
                nn.Dropout(p=0.1),
                nn.Linear(4096, 1024),
                nn.ReLU(True),
                nn.Dropout(p=0.1),
                nn.Linear(1024, 256),
                nn.ReLU(True),
                nn.Dropout(p=0.1),
                nn.Linear(256, 64),
                nn.ReLU(True),             
                nn.Linear(64, 16),
                nn.ReLU(True),            
                nn.Linear(16, num_classes))
        else:
            net = vgg(model_name=model_name, num_classes=1000,init_weights=True) # 没有训练好的权重,需要加载ImageNet的权重
            net.load_state_dict(torch.load(pre_path, map_location=device))
            for parameter in net.parameters():
                parameter.required_grad = False
            net.classifier = nn.Sequential(
                nn.Linear(512*7*7, 4096),
                nn.ReLU(True),
                nn.Dropout(p=0.1),
                nn.Linear(4096, 1024),
                nn.ReLU(True),
                nn.Dropout(p=0.1),
                nn.Linear(1024, 256),
                nn.ReLU(True),
                nn.Dropout(p=0.1),
                nn.Linear(256, 64),
                nn.ReLU(True), 
                nn.Linear(64, 16),
                nn.ReLU(True), 
                nn.Linear(16, num_classes))
        
        net.to(device)
        class_weight=torch.tensor([0.06756,0.1426,0.106,0.225,0.458]) # 数据不平衡,在loss计算过程中使用class_weight,给每一类一个权重
        loss_function = nn.CrossEntropyLoss(class_weight)
        optimizer = optim.Adam(net.parameters(), lr=0.0001)  # 优化器
    
        
        best_acc = 0.5
        train_steps = len(train_loader)
        val_steps=len(validate_loader)
        for epoch in range(epochs):
            # 训练
            net.train()
            train_acc = 0.5 # 训练集上的准确率,每个epoch
            train_loss = 0.0 # 训练集上的loss值
            train_bar = tqdm(train_loader, file=sys.stdout)
            for step, data in enumerate(train_bar):
                images, labels = data
                optimizer.zero_grad()
                outputs = net(images.to(device))
                train_y = torch.max(outputs, dim=1)[1]
                
                loss = loss_function(outputs, labels.to(device)) # 计算loss值用于反向传播
                train_acc += torch.eq(train_y, labels.to(device)).sum().item() 
                loss.backward()
                optimizer.step()
                # 动态显示loss的变化
                train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                         epochs,
                                                                         loss)
                
                # 统计一个epoch上训练的loss值
                train_loss += loss.item()
    
            # 验证
            net.eval()
            val_loss=0.0
            val_acc = 0.5  # accumulate accurate / epoch
            with torch.no_grad():
                val_bar = tqdm(validate_loader, file=sys.stdout)
                for val_data in val_bar:
                    val_images, val_labels = val_data               
                    outputs = net(val_images.to(device))
                    predict_y = torch.max(outputs, dim=1)[1]
                    
                    loss_val = loss_function(outputs, val_labels.to(device))
                    val_acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
                    val_loss += loss_val.item()
    
            val_accurate = val_acc / val_num
            train_accurate= train_acc /train_num
            print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
                  (epoch + 1, train_loss / train_steps, val_accurate))
            # 保存最优的weight
            if val_accurate > best_acc:
                best_acc = val_accurate
                torch.save(net.state_dict(), save_path)
            # 将每一轮的train_loss和train_acc,val_loss和val_acc记录
            log.append([epoch,train_loss/train_steps,train_accurate,val_loss/val_steps,val_accurate])
        print('Finished Training')
        
    if __name__ == '__main__':
        csv_path="D:/Python/VGG16-CAM/VGG_predict.csv"
        main()
     	data=DataFrame(data=log,columns=['epoch','train_loss','train_acc','val_loss','val_acc'])
     	data.to_csv(csv_path, index_label="index_label")
    

    上面定义的log,并且将log变量定义为全局变量,是为了在程序中断后还可以记录以前运行得到的loss等指标,其实初始的意愿是想要将1000个epoch中得到的acc和loss都进行记录,但是总是在一半因为各种原因程序被kill。准备尝试定义early-stop,当loss的变化在一定范围的时候提前终止训练。后期更新early-stop。

    3. predict

    # -*- coding: utf-8 -*-
    import os
    import json
    
    import torch
    from PIL import Image
    from torchvision import transforms
    import matplotlib.pyplot as plt
    from model import vgg
    
    def main():
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
        data_transform = transforms.Compose(
            [transforms.Resize([224, 224]),
             transforms.ToTensor(),
             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    
        # load image
        img_path = "D:/Python/VGG16-CAM/data/val/grade1/test_15524.jpg"
        assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
        img = Image.open(img_path)
        plt.imshow(img)
        # [N, C, H, W]
        img = data_transform(img)
        # expand batch dimension
        img = torch.unsqueeze(img, dim=0)
    
        # read class_indict
        json_path = './class_indices.json'
        assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
    
        with open(json_path, "r") as f:
            class_indict = json.load(f)
        
        # create model
        model = vgg(model_name="vgg16", num_classes=5).to(device)
        # load model weights
        weights_path = "D:/Python/VGG16-CAM/vgg16Net.pth"
        print(model)
        assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
        model.load_state_dict(torch.load(weights_path, map_location=device))
    
        model.eval()
        with torch.no_grad():
            # predict class
            output = torch.squeeze(model(img.to(device))).cpu()
            predict = torch.softmax(output, dim=0)
            predict_cla = torch.argmax(predict).numpy()
    
        print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
        plt.title(print_res)
        for i in range(len(predict)):
            print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                      predict[i].numpy()))
        plt.show()
    if __name__ == '__main__':
        main()
    

    就这样,希望对大家有帮助~

    展开全文
  • ),并使用VGG模型完成下面图像分类的实验(建议使用Python语言,Pytorch 框架)。图像分类数据集:CIFAR-10,由10个类的60000个32x32彩色图像组成,每个类有6000个图像;有50000个训练样本(训练集)和10000个测试样本...
  • PyTorch迁移学习入门——VGG16 图像分类

    千次阅读 多人点赞 2020-03-31 06:00:48
    数据集介绍 考虑到VGG16要求图像的形状为(224,224,3),即像素为224x224的彩色图像,因为我准备用这个数据集进行实验。所谓的应急车辆包括:警车、消防车和救护车。在数据集中有一个emergency_train.csv,用来存放...

    前言

    迁移学习指的是保存已有问题的解决模型,并将其利用在其他不同但相关问题上。 比如说,训练用来辨识汽车的模型也可以被用来提升识别卡车的能力。很多情况下迁移学习能够简化或降低模型构建的难度,甚至还能取得不错的准确度。
    本文将针对一个小的图片数据集,使用PyTorch进行迁移学习演示,包括如何使用预训练模型,并将结果自己搭建的卷积神经网络模型进行性能比较。

    数据集介绍

    考虑到VGG16要求图像的形状为(224,224,3),即像素为224x224的彩色图像,因为我准备用这个数据集进行实验。所谓的应急车辆包括:警车、消防车和救护车。在数据集中有一个emergency_train.csv,用来存放训练样本的标签。
    数据集下载:【提取码:pyne】

    选取预训练模型

    预训练模型是由某个人或团队为解决特定问题而已经设计和训练好的模型。预训练模型在深度学习项目中非常有用,因为并非所有人都拥有足够多的算力。我们需要使用本地机器,因此预训练的模型就可以节约很多时间。预训练的模型通过将其权重和偏差矩阵传递给新模型来得以共享他们训练好的参数。因此,在进行迁移学习之前,我要首先选择一个合适的预训练模型,然后将其权重和偏差矩阵传递给新模型。针对不同的深度学习任务可能有很多预训练模型可用,现在针对我要做的这个任务确定哪种模型最适合,根据我们的数据集介绍,我会选择VGG16在ImageNet上的预训练模型,而不是在MNIST上的预训练模型,因为我们的数据集中包含车辆图像,ImageNet中具有丰富的车辆图像,因此前者应该更为合理。总之,选择预训练模型时不是考虑参数量和性能表现,而是考虑任务间的相关性以及数据集的相似程度。

    数据处理

    # PyTorch libraries and modules
    import torch
    from torch.autograd import Variable
    from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
    from torch.optim import Adam, SGD
    import pandas as pd
    import numpy as np
    from tqdm import tqdm
    
    # torchvision for pre-trained models
    from torchvision import models
    
    # 导入读取和展示图片工具
    from skimage.io import imread
    from skimage.transform import resize
    import matplotlib.pyplot as plt
    
    # 数据切分,制作验证集
    from sklearn.model_selection import train_test_split
    
    # 模型评价
    from sklearn.metrics import accuracy_score
    

    接下来,读取包含图像名称和相应标签的.csv文件,并查看内容:

    # loading dataset
    train = pd.read_csv('emergency_train.csv')
    print(train.shape)
    train.head(10)
    

    在这里插入图片描述
    该csv文件中包含两列:

    • image_names: 代表数据集中所有图像的名称
    • Emergency_or_no: 指定特定图像属于紧急类别还是非紧急类别。0表示图像是非紧急车辆,1表示紧急车辆

    接下来,我们将加载所有图像并将其以数组格式存储:

    # 加载训练图像
    train_img = []
    for img_name in tqdm(train['image_names']):
        # defining the image path
        image_path = 'images/' + img_name
        # reading the image
        img = imread(image_path)
        # normalizing the pixel values
        img = img/255
        # resizing the image to (224,224,3)
        img = resize(img, output_shape=(224,224,3), mode='constant', anti_aliasing=True)
        # converting the type of pixel to float 32
        img = img.astype('float32')
        # appending the image into the list
        train_img.append(img)
    
    # converting the list to numpy array
    train_x = np.array(train_img)
    train_x.shape
    

    在这里插入图片描述
    加载这些图像大约花费22秒钟。数据集中有1646张图像作为训练,由于VGG16需要所有此特定形状的图像,因此需要将所有图像重塑为(224,224,3)。现在让我们可视化来自数据集的一些图像:

    # Exploring the data
    index = 10
    plt.imshow(train_x[index])
    if (train['emergency_or_not'][index] == 1):
        print('It is an Emergency vehicle')
    else:
        print('It is a Non-Emergency vehicle')
    

    在这里插入图片描述
    这是一辆普通的汽车,因此显示为非紧急车辆标签。现在将目标值(0 or 1)存储在单独的变量中:

    # defining the target
    train_y = train['emergency_or_not'].values
    

    让我们借助sklearn划分数据集,这里只使用验证集来评估我们的模型,你也可以尝试将数据集划分为:训练/ 验证/ 测试,三个部分。

    # create validation set
    train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size = 0.1, random_state = 13, stratify=train_y)
    (train_x.shape, train_y.shape), (val_x.shape, val_y.shape)
    

    在这里插入图片描述
    最后划分的训练集中有1,481张图像,验证集中有165张图像。现在,我们要将数据集转换为torch格式:

    #先转换训练集
    # converting training images into torch format
    train_x = train_x.reshape(1481, 3, 224, 224)
    train_x  = torch.from_numpy(train_x)
    
    # converting the target into torch format
    train_y = train_y.astype(int)
    train_y = torch.from_numpy(train_y)
    
    # shape of training data
    train_x.shape, train_y.shape
    
    #同样地对验证集进行转换
    # converting validation images into torch format
    val_x = val_x.reshape(165, 3, 224, 224)
    val_x  = torch.from_numpy(val_x)
    
    # converting the target into torch format
    val_y = val_y.astype(int)
    val_y = torch.from_numpy(val_y)
    
    # shape of validation data
    val_x.shape, val_y.shape
    

    我们的数据已经准备好!在下一部分中,我们将使用预训练模型来解决此问题之前,将建立卷积神经网络(CNN)。

    使用普通CNN卷积神经网络

    终于到了模型构建,在使用迁移学习解决问题之前,先使用普通的CNN模型训练处一个对照组(baseline)。
    我们先构建一个非常简单的CNN架构,该架构具有两个卷积层以从图像中提取特征,最后是一个全连接层以对这些特征进行分类:

    class Net(Module):   
        def __init__(self):
            super(Net, self).__init__()
    
            self.cnn_layers = Sequential(
                # Defining a 2D convolution layer
                Conv2d(3, 4, kernel_size=3, stride=1, padding=1),
                BatchNorm2d(4),
                ReLU(inplace=True),
                MaxPool2d(kernel_size=2, stride=2),
                # Defining another 2D convolution layer
                Conv2d(4, 8, kernel_size=3, stride=1, padding=1),
                BatchNorm2d(8),
                ReLU(inplace=True),
                MaxPool2d(kernel_size=2, stride=2),
            )
    
            self.linear_layers = Sequential(
                Linear(8 * 56 * 56, 2)
            )
    
        # Defining the forward pass    
        def forward(self, x):
            x = self.cnn_layers(x)
            x = x.view(x.size(0), -1)
            x = self.linear_layers(x)
            return x
    

    现在,为模型定义优化器,学习率和损失函数,并使用GPU训练模型:

    # defining the model
    model = Net()
    # defining the optimizer
    optimizer = Adam(model.parameters(), lr=0.0001)
    # defining the loss function
    criterion = CrossEntropyLoss()
    # checking if GPU is available
    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()
    
    print(model)
    

    在这里插入图片描述
    接下来设置迭代次数和训练批次大小,这里使用 batch_size = 128, epochs = 15 :

    # batch size of the model
    batch_size = 128
    
    # number of epochs to train the model
    n_epochs = 15
    
    for epoch in range(1, n_epochs+1):
    
        # keep track of training and validation loss
        train_loss = 0.0
            
        permutation = torch.randperm(train_x.size()[0])
    
        training_loss = []
        for i in tqdm(range(0,train_x.size()[0], batch_size)):
    
            indices = permutation[i:i+batch_size]
            batch_x, batch_y = train_x[indices], train_y[indices]
            
            if torch.cuda.is_available():
                batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
            
            optimizer.zero_grad()
            # in case you wanted a semi-full example
            outputs = model(batch_x)
            loss = criterion(outputs,batch_y)
    
            training_loss.append(loss.item())
            loss.backward()
            optimizer.step()
            
        training_loss = np.average(training_loss)
        print('epoch: \t', epoch, '\t training loss: \t', training_loss)
    

    在这里插入图片描述
    打印了训练进度和损失,正常情况下每次训练后训练损失都在减少。接下来验证准确性:

    # prediction for training set
    prediction = []
    target = []
    permutation = torch.randperm(train_x.size()[0])
    for i in tqdm(range(0,train_x.size()[0], batch_size)):
        indices = permutation[i:i+batch_size]
        batch_x, batch_y = train_x[indices], train_y[indices]
    
        if torch.cuda.is_available():
            batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
    
        with torch.no_grad():
            output = model(batch_x.cuda())
    
        softmax = torch.exp(output).cpu()
        prob = list(softmax.numpy())
        predictions = np.argmax(prob, axis=1)
        prediction.append(predictions)
        target.append(batch_y)
        
    # training accuracy
    accuracy = []
    for i in range(len(prediction)):
        accuracy.append(accuracy_score(target[i],prediction[i]))
        
    print('training accuracy: \t', np.average(accuracy))
    
    # prediction for validation set
    prediction_val = []
    target_val = []
    permutation = torch.randperm(val_x.size()[0])
    for i in tqdm(range(0,val_x.size()[0], batch_size)):
        indices = permutation[i:i+batch_size]
        batch_x, batch_y = val_x[indices], val_y[indices]
    
        if torch.cuda.is_available():
            batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
    
        with torch.no_grad():
            output = model(batch_x.cuda())
    
        softmax = torch.exp(output).cpu()
        prob = list(softmax.numpy())
        predictions = np.argmax(prob, axis=1)
        prediction_val.append(predictions)
        target_val.append(batch_y.cpu())
        
    # validation accuracy
    accuracy_val = []
    for i in range(len(prediction_val)):
        accuracy_val.append(accuracy_score(target_val[i],prediction_val[i]))
        
    print('validation accuracy: \t', np.average(accuracy_val))
    

    在这里插入图片描述
    验证准确性为69.7%。现在我们有了baseline,接下来我们使用迁移学习来解决此分类问题。

    使用迁移学习进行分类

    我们将使用在ImageNet数据集上训练的VGG16预训练模型。让我们先说下使用迁移学习训练模型的步骤:

    1. 加载预训练模型的权重-在本例中为VGG16
    2. 根据手头的问题对模型进行微调(不更新预训练模型中部分层的参数)
    3. 使用这些预训练的权重来提取我们训练集的图像特征
    4. 最后,使用提取的特征来训练微调模型

    因此,让我们首先尝试加载预训练模型的权重:
    torchvision—使用预训练模型参考

    # loading the pretrained model
    model = models.vgg16_bn(pretrained=True)
    

    在这里插入图片描述
    现在,我们将微调模型。我们不会训练VGG16模型的各个层,因此让我们冻结这些层的权重:

    # Freeze model weights
    for param in model.parameters():
        param.requires_grad = False
    

    由于我们只有2个类别需要预测,并且VGG16在ImageNet上有1000个类别,因此我们需要根据任务更新最后一层,因此我们将只训练最后一层,可以通过设置该层中的requires_grad=True来只对最后一层进行权值更新。让我们将训练设置为GPU训练:

    # checking if GPU is available
    if torch.cuda.is_available():
        model = model.cuda()
    # Add on classifier
    # 添加分类器,只更新最后一层的权重
    model.classifier[6] = Sequential(
                          Linear(4096, 2))
    model.classifier
    '''
    输出设为2,并且更新这层的权重。
    另外注意我们这里没有设置激活函数和dropout之类的防止过拟合手段,是为了与上面的CNN有可比性。
    '''
    for param in model.classifier[6].parameters():
        param.requires_grad = True
    

    在这里插入图片描述
    现在,我们将使用预训练模型来提取训练图像和验证图像的特征,将batch_size设置为128(同样,您可以根据需要增加或减少该batch_size):

    # batch_size
    batch_size = 128
    
    # extracting features for train data
    data_x = []
    label_x = []
    
    inputs,labels = train_x, train_y
    
    for i in tqdm(range(int(train_x.shape[0]/batch_size)+1)):
        input_data = inputs[i*batch_size:(i+1)*batch_size]
        label_data = labels[i*batch_size:(i+1)*batch_size]
        input_data , label_data = Variable(input_data.cuda()),Variable(label_data.cuda())
        x = model.features(input_data)
        data_x.extend(x.data.cpu().numpy())
        label_x.extend(label_data.data.cpu().numpy())
    
    
    # extracting features for validation data
    data_y = []
    label_y = []
    
    inputs,labels = val_x, val_y
    
    for i in tqdm(range(int(val_x.shape[0]/batch_size)+1)):
        input_data = inputs[i*batch_size:(i+1)*batch_size]
        label_data = labels[i*batch_size:(i+1)*batch_size]
        input_data , label_data = Variable(input_data.cuda()),Variable(label_data.cuda())
        x = model.features(input_data)
        data_y.extend(x.data.cpu().numpy())
        label_y.extend(label_data.data.cpu().numpy())
    

    接下来,我们将这些数据转换为torch格式:

    # converting the features into torch format
    x_train  = torch.from_numpy(np.array(data_x))
    x_train = x_train.view(x_train.size(0), -1)
    y_train  = torch.from_numpy(np.array(label_x))
    x_val  = torch.from_numpy(np.array(data_y))
    x_val = x_val.view(x_val.size(0), -1)
    y_val  = torch.from_numpy(np.array(label_y))
    

    我们仍然需要为模型定义优化器和损失函数:

    import torch.optim as optim
    
    # specify loss function (categorical cross-entropy)
    criterion = CrossEntropyLoss()
    
    # specify optimizer (stochastic gradient descent) and learning rate
    optimizer = optim.Adam(model.classifier[6].parameters(), lr=0.0005)
    

    现在需要训练我们的模型,为了公平比较,仍然设置15次迭代周期,并将batch_size设置为128:

    # batch size
    batch_size = 128
    model = model.cpu()# 在colal上运行这块的cuda()一直出现错误,改为了cpu训练,在后面再改回cuda。
    # number of epochs to train the model
    n_epochs = 15 # 与CNN一致设置为15
    
    for epoch in tqdm(range(1, n_epochs+1)):
    
        # keep track of training and validation loss
        train_loss = 0.0
            
        permutation = torch.randperm(x_train.size()[0])
    
        training_loss = []
        for i in range(0,x_train.size()[0], batch_size):
    
            indices = permutation[i:i+batch_size]
            batch_x, batch_y = x_train[indices], y_train[indices]
            
            # if torch.cuda.is_available():
            #     batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
            
            optimizer.zero_grad()
            # in case you wanted a semi-full example
            outputs = model.classifier(batch_x)
            loss = criterion(outputs,batch_y)
    
            training_loss.append(loss.item())
            loss.backward()
            optimizer.step()
            
        training_loss = np.average(training_loss)
        print('epoch: \t', epoch, '\t training loss: \t', training_loss)
    

    在这里插入图片描述

    # prediction for training set
    prediction = []
    target = []
    permutation = torch.randperm(x_train.size()[0])
    for i in tqdm(range(0,x_train.size()[0], batch_size)):
        indices = permutation[i:i+batch_size]
        batch_x, batch_y = x_train[indices], y_train[indices]
    
        if torch.cuda.is_available():
            batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
    
        with torch.no_grad():
            output = model.classifier(batch_x.cuda())
    
        softmax = torch.exp(output).cpu()
        prob = list(softmax.numpy())
        predictions = np.argmax(prob, axis=1)
        prediction.append(predictions)
        target.append(batch_y)
        
    # training accuracy
    accuracy = []
    for i in range(len(prediction)):
        accuracy.append(accuracy_score(target[i],prediction[i]))
        
    print('training accuracy: \t', np.average(accuracy))
    

    我们在训练集上的准确度达到82.5%。现在让我们检查验证准确性:

    # prediction for validation set
    prediction_val = []
    target_val = []
    permutation = torch.randperm(x_val.size()[0])
    for i in tqdm(range(0,x_val.size()[0], batch_size)):
        indices = permutation[i:i+batch_size]
        batch_x, batch_y = x_val[indices], y_val[indices]
    
        if torch.cuda.is_available():
            batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
    
        with torch.no_grad():
            output = model.classifier(batch_x.cuda())
    
        softmax = torch.exp(output).cpu()
        prob = list(softmax.numpy())
        predictions = np.argmax(prob, axis=1)
        prediction_val.append(predictions)
        target_val.append(batch_y)
        
    # validation accuracy
    accuracy_val = []
    for i in range(len(prediction_val)):
        accuracy_val.append(accuracy_score(target_val[i],prediction_val[i]))
        
    print('validation accuracy: \t', np.average(accuracy_val))
    

    在这里插入图片描述模型在验证集上的准确性也相似,达到80.2%。训练和验证的准确性几乎是相同的,因此可以说该模型泛化能力较好。以下是我们的结果摘要:我们可以推断,与CNN模型相比,使用VGG16预训练模型提高了精度。

    ModelTraining AccuracyValidation Accuracy
    CNN87.6%69.7%
    VGG1682.5%80.2%

    结论

    我们完成了使用预训练模型和迁移学习方法来解决图像分类问题。首先了解什么是预训练模型,以及如何根据实际问题选择正确的预训练模型。然后,进行了一个案例研究,将车辆图像分类为紧急情况或非紧急情况。我们首先使用CNN模型解决了此案例研究,然后使用VGG16预训练模型解决了相同的问题。发现使用VGG16预训练模型提高了模型性能,且获得了更好的结果。
    现在对使用PyTorch进行迁移学习有一个初步的了解,推荐从图像分类问题入手迁移学习,因为这是一类基础问题,并尝试应用转移学习来解决它们,这将帮助理解迁移学习的工作原理。

    参考:
    Transfer Learning in Pytorch

    Master the Powerful Art of Transfer Learning using PyTorch

    Transfer Learning for Computer Vision Tutorial

    展开全文
  • TensorFlow 实现VGG16图像分类

    千次阅读 2020-06-07 21:50:06
    2014年,Karen Simonyan等人提出VGG-16,夺得ILSVRC 2014的亚军。 通过Tensorflow实现VGG-16图像分类

    VGG网络结构及原理

    VGG16图像分类基于tensorflow实现,主要包含以下四个程序:

    • vgg16.py:读入模型参数构建模型
    • utils.py:读入图片,概率显示
    • nclasses.py:含labels字典
    • app.py:应用程序,实现图像识别

    1. vgg16.py 构建模型

    程序结构如下:
    在这里插入图片描述

    (1) __init __

    加载网络参数到data_dict

    def __init__(self, vgg16_path=None):
            if vgg16_path is None:
                vgg16_path = os.path.join(os.getcwd(), "vgg16.npy") 
                self.data_dict = np.load(vgg16_path, encoding='latin1').item() 
    

    字典key的列表如下所示,分别对应13个卷积层以及3个全连接层的参数 W W W和偏置 b b b

    ['conv1_1_W', 'conv1_1_b',
     'conv1_2_W', 'conv1_2_b',
     'conv2_1_W', 'conv2_1_b',
     'conv2_2_W', 'conv2_2_b',
     'conv3_1_W', 'conv3_1_b',
     'conv3_2_W', 'conv3_2_b',
     'conv3_3_W', 'conv3_3_b',
     'conv4_1_W', 'conv4_1_b',
     'conv4_2_W', 'conv4_2_b',
     'conv4_3_W', 'conv4_3_b',
     'conv5_1_W', 'conv5_1_b',
     'conv5_2_W', 'conv5_2_b',
     'conv5_3_W', 'conv5_3_b',
     'fc6_W', 'fc6_b',
     'fc7_W', 'fc7_b',
     'fc8_W', 'fc8_b']
    

    (2) forward

    复现网络结构

    def forward(self, images):
            rgb_scaled = images * 255.0 
            #RGB 转化为 BGR格式
            red, green, blue = tf.split(rgb_scaled,3,3) 
            bgr = tf.concat([     
                blue - VGG_MEAN[0],
                green - VGG_MEAN[1],
                red - VGG_MEAN[2]],3)
            
            self.conv1_1 = self.conv_layer(bgr, "conv1_1") 
            self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
            self.pool1 = self.max_pool_2x2(self.conv1_2, "pool1")
            
            self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
            self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
            self.pool2 = self.max_pool_2x2(self.conv2_2, "pool2")
    
            self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
            self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
            self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
            self.pool3 = self.max_pool_2x2(self.conv3_3, "pool3")
            
            self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
            self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
            self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
            self.pool4 = self.max_pool_2x2(self.conv4_3, "pool4")
            
            self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
            self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
            self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
            self.pool5 = self.max_pool_2x2(self.conv5_3, "pool5")
            
            self.fc6 = self.fc_layer(self.pool5, "fc6") 
            self.relu6 = tf.nn.relu(self.fc6) 
            
            self.fc7 = self.fc_layer(self.relu6, "fc7")
            self.relu7 = tf.nn.relu(self.fc7)
            
            self.fc8 = self.fc_layer(self.relu7, "fc8")
            self.prob = tf.nn.softmax(self.fc8, name="prob")
    
            self.data_dict = None 
    

    :需要将图片由RGB 转化为BGR格式,这主要因为opencv默认通道是bgr的,这是为兼容某些硬件的遗留问题。

    • RGB代表红绿蓝。R在高位,G在中间,B在低位。
    • BGR是相同的,除了区域顺序颠倒。

    卷积层

    def conv_layer(self, x, name):
           with tf.variable_scope(name): 
               w = self.get_conv_filter(name) 
               conv = tf.nn.conv2d(x, w, [1, 1, 1, 1], padding='SAME') 
               conv_biases = self.get_bias(name) 
               result = tf.nn.relu(tf.nn.bias_add(conv, conv_biases)) 
               return result
    

    池化层

     def max_pool_2x2(self, x, name):
           return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
    

    全连接层

      def fc_layer(self, x, name):
          with tf.variable_scope(name): 
              shape = x.get_shape().as_list() 
              dim = 1
              for i in shape[1:]:
                  dim *= i 
              x = tf.reshape(x, [-1, dim])
              w = self.get_fc_weight(name) 
              b = self.get_bias(name) 
                  
              result = tf.nn.bias_add(tf.matmul(x, w), b) 
              return result
    

    2. utils.py 处理图片

    将图片处理称为 1 × 224 × 224 × 3 1 \times 224 \times 224 \times 3 1×224×224×3 格式

    3. nclasses.py 字典

    格式如下:

     0: 'tench\n Tinca tinca',
     1: 'goldfish\n Carassius auratus',
     2: 'great white shark\n white shark\n man-eater\n man-eating shark\n Carcharodon carcharias',
     3: 'tiger shark\n Galeocerdo cuvieri',
     4: 'hammerhead\n hammerhead shark',
     5: 'electric ray\n crampfish\n numbfish\n torpedo',
    

    4. app.py 主应用程序

    识别程序如下:

    with tf.Session() as sess:
        images = tf.placeholder(tf.float32, [1, 224, 224, 3])
        #通过vgg16的初始化函数 实例化vgg,读出了保存在npy文件中的模型参数
        vgg = vgg16.Vgg16() 
        vgg.forward(images) #复现神经网络结构
        # 得出1000个分类的概率分布
        probability = sess.run(vgg.prob, feed_dict={images:img_ready})
        #概率最高的5个 概率索引值存入top5
        top5 = np.argsort(probability[0])[-1:-6:-1]
        print("top5:",top5)
        values = []
        bar_label = []  #标签字典对应的值 5个物种的名称
        for n, i in enumerate(top5): 
            print("n:",n)
            print("i:",i)
            values.append(probability[0][i]) 
            bar_label.append(labels[i]) 
            print(i, ":", labels[i], "----", utils.percent(probability[0][i]) )
    

    关注公众号机器学习Zero,回复模型,下载源代码、测试图片及VGG16的模型。
    在这里插入图片描述

    展开全文
  • 本次图像的分类识别实验采用的是基于 tensorflow+VGG-16图像分类识别模型。vgg是由 Simonyan 和 Zisserman 在文献《Very Deep Convolutional Networks for Large-Scale ImageRecognition》上提出的卷积神经网络...
  • 利用vgg16进行的猫狗图像分类,由于其在模型比普通的cnn的层更加复杂,所以分类效果较优。同时可根据结果添加dropout层避免出现的过拟合现象。训练和测试图像较大没有放上来,可私聊/评论后发送。
  • 里面包含着VGG16VGG19的模型,是对CIFAR10进行一个图像分类的源码,全部可以运行,并且在这里面,如果不想运行那么久,也有一个模型可以直接加载即可,非常全面。包含了可视化,训练模型,测试模型,加载模型,...
  • 能够根据事先训练好模型,对输入的动物图像进行分类识别,而且具有较高的准确率
  • 由于最近项目的需要利用深度学习模型完成图像分类的任务,我的个人数据集比较简单因此选用VGG16深度学习模型,后期数据集增加之后会采用VGG19深度学习模型。 目录 1、VGG16网络 2、训练以及需要注意的地方 3、测试...
  • 这是vgg16神经网络基于python的实现,可以用来识别图像中出现的物体。
  • VGG-图像分类.zip

    2021-04-16 11:18:48
    利用keras实现VGG16模型,对猫狗图片进行二分类,附有24000张图片作为训练测试
  • 首先基于Pytorch实现Vgg16网络,命名为model.py(可为其他任意名字,但是后续导入时要记得更改) import torch import torch.nn as nn class VGG16(nn.Module): def __init__(self): super(VGG16, self).__init...
  • data, batch_size=64) test_dataloader = DataLoader(test_data, batch_size=64) 目录结构 network是写的是vgg16的网络结构 VGG16的架构如下 代码 import torch from torch import nn # 定义网路结构 class VGG16(nn...
  • 目录1 VGG161.1 VGG16简介1.2 VGG16结构1.3 VGG16特点2 数据文件3 代码3.1 ImageDataGenerator和VGG16迁移学习3.2 VGG16迁移学习+转移矩阵3.2.1 训练3.2.2 单张图像预测3.2.3 测试获取准确率3.2.4 批量图像预测实现...
  • Keras-VGG16-图片分类

    千次阅读 2021-04-15 20:21:34
    Keras-VGG16-图片分类 VGG模型的名称,来源于牛津大学的Oxford Visual Geometry Group,该网络模型是他们在ILSVRC 2014上的相关工作,主要是证明了增加网络的深度能够在一定程度上影响网络最终的性能。 目录Keras-...
  • pytorch+VGG16 双分支输入 图像分类

    千次阅读 多人点赞 2021-04-15 14:59:09
    将神经网络改为双输入通道 pytorch+VGG16 图像分类背景介绍解决方法解决步骤一 修改神经网络解决步骤二 实现自己的数据集类解决步骤三 改变训练过程解决效果 背景介绍 本人是个菜鸟,初学神经网络,第一个任务就是...
  • VGG16多图片分类.docx

    2019-11-14 21:47:29
    深度学习算法,vgg16属于经典算法,是python语言,免费下载哦。VGG16多图片分类。深度学习算法,vgg16属于经典算法,是python语言,免费下载哦。VGG16多图片分类
  • Pytorch实现VGG16,在Cifar10上做分类,91%精度
  • pytorch_vgg16_classify.py

    2020-09-07 21:14:28
    pytorch1.5实现的vgg16分类。在真实数据集测试成功 pytorch1.5实现的vgg16分类。在真实数据集测试成功 pytorch1.5实现的vgg16分类。在真实数据集测试成功
  • 基于tensorflow + Vgg16进行图像分类识别

    万次阅读 多人点赞 2018-03-22 11:06:06
    1. VGG-16介绍 vgg是在Very Deep Convolutional Networks for Large-Scale Image Recognition期刊上提出的。...vgg-16是一种深度卷积神经网络模型,16表示其深度,在图像分类等任务中取得了不错的效果。 vgg...
  • VGG16网络进行迁移学习用来进行图像的多分类
  • 医疗的 将vgg16应用到医学图像(视网膜)分类
  • 本文是利用卷积神经网络VGG16模型对花卉和手势进行分类识别,利用卷积神经网络的卷积层进行特征提取,连接层起到分类器的作用,对图像进行分类。所用软件是pycharm,解释器是python3.6。
  • VGG16_Tensorflow版本实现图片分类

    千次阅读 热门讨论 2019-12-11 15:01:53
    start 首先将本文涉及到的代码、预训练模型和数据集上传,大家可以自行下载: ...VGG16代码链接: https://pan.baidu.com/s/1Xy5H3t9SVnQM2OMorH4pmQ 提取码:zju1 预训练模型VGG16.npy链接: ...
  • 转移学习使用VGGNet对花朵图像进行分类
  • resNet50 Vgg16 图像分类

    千次阅读 2017-04-06 22:29:27
    Kera的应用模块Application提供了带有预训练权重的Keras模型,这些模型可以用来进行预测、特征提取和fine...图片分类模型的示例 利用ResNet50网络进行ImageNet分类 from keras.applications.resnet50 import ResNe

空空如也

空空如也

1 2 3 4 5 ... 20
收藏数 27,708
精华内容 11,083
关键字:

vgg16图像分类