Tensorboard val 准确率为 0.83,但手动测试是随机的

Tensorboard val accuracy is 0.83, but manual testing is random

提问人:Willy Lutz 提问时间:11/16/2023 最后编辑:Willy Lutz 更新时间:11/16/2023 访问量:17

问:

我正在开发一个用于对比学习的 SimCLR/Resnet18 模型。

我已经在多个批量大小上训练了它,并使用来自 tensorboard 的数据,得到了下面显示的验证精度图,显示最大精度为 0.83。 注意:我的训练精度为 1,所以我的模型有过拟合

根据我的理解,我将我的微调模型(不是借口)加载到 ,并将我在保存在 的借口中训练的 Resnet 模型作为骨干performance.pylogs/fine_tuning_simple_eval/epoch150_batch160/epoch150_batch160_state_dict.ckpt'resnet18_backbone_weights.ckpt'

我的问题:当我用我的来测试我的预测的准确性时,我有纯粹的随机预测。 关于为什么我在这两个分数之间有如此巨大的差异,有什么帮助吗?performance.py

Batch optimization

以下是实现:

pretext 任务的一些类:

import os
from typing import Optional, Tuple, Callable, Any

import numpy as np
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as T
from PIL import Image
from pl_bolts.optimizers import LinearWarmupCosineAnnealingLR
from torch.optim import Adam
from torchvision.datasets import VisionDataset
from torchvision.datasets.utils import verify_str_arg

train_rgb_mean = [0.485, 0.456, 0.406]
train_rgb_std = [0.229, 0.224, 0.225]


class AddProjection(nn.Module):
    def __init__(self, config, model=None, mlp_dim=512):
        super(AddProjection, self).__init__()
        embedding_size = config.embedding_size
        self.backbone = default(model, models.resnet18(pretrained=False, num_classes=config.embedding_size))
        mlp_dim = default(mlp_dim, self.backbone.fc.in_features)
        print('Dim MLP input:', mlp_dim)
        self.backbone.fc = nn.Identity()

        # add mlp projection head
        self.projection = nn.Sequential(
            nn.Linear(in_features=mlp_dim, out_features=mlp_dim),
            nn.BatchNorm1d(mlp_dim),
            nn.ReLU(),
            nn.Linear(in_features=mlp_dim, out_features=embedding_size),
            nn.BatchNorm1d(embedding_size),
        )

    def forward(self, x, return_embedding=False):
        embedding = self.backbone(x)
        if return_embedding:
            return embedding
        return self.projection(embedding)


class SimCLR_pl(pl.LightningModule):
    def __init__(self, config, model=None, feat_dim=512):
        super().__init__()
        self.config = config

        self.model = AddProjection(config, model=model, mlp_dim=feat_dim)

        self.loss = ContrastiveLoss(config.batch_size, temperature=self.config.temperature)

    def forward(self, X):
        return self.model(X)

    def training_step(self, batch, batch_idx):
        (x1, x2), labels = batch
        z1 = self.model(x1)
        z2 = self.model(x2)
        loss = self.loss(z1, z2)
        self.log('Contrastive loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):
        max_epochs = int(self.config.epochs)
        param_groups = define_param_groups(self.model, self.config.weight_decay, 'adam')
        lr = self.config.lr
        optimizer = Adam(param_groups, lr=lr, weight_decay=self.config.weight_decay)

        print(f'Optimizer Adam, '
              f'Learning Rate {lr}, '
              f'Effective batch size {self.config.batch_size * self.config.gradient_accumulation_steps}')

        scheduler_warmup = LinearWarmupCosineAnnealingLR(optimizer, warmup_epochs=10, max_epochs=max_epochs,
                                                         warmup_start_lr=0.0)

        return [optimizer], [scheduler_warmup]



class Hparams:
    def __init__(self,
                 log_every_n_steps=1,
                 epochs=100,
                 seed=77777,
                 cuda=True,
                 img_size=224,
                 save="./saved_models/",
                 gradient_accumulation_steps=1,
                 batch_size=32,
                 lr=1e-3,
                 embedding_size=128,
                 temperature=0.5,
                 load=False,
                 weight_decay=1e-6,
                 checkpoint_path='./SimCLR_ResNet18.ckpt'):
        self.log_every_n_steps = log_every_n_steps
        self.epochs = epochs  # number of training epochs
        self.seed = seed  # randomness seed
        self.cuda = cuda  # use nvidia gpu
        self.img_size = img_size  # image shape
        self.save = save # save checkpoint
        self.load = load  # load pretrained checkpoint
        self.gradient_accumulation_steps = gradient_accumulation_steps  # gradient accumulation steps
        self.batch_size = batch_size
        self.lr = lr  # for ADAm only
        self.weight_decay = weight_decay
        self.embedding_size = embedding_size  # papers value is 128
        self.temperature = temperature  # 0.1 or 0.5
        self.checkpoint_path = checkpoint_path  # replace checkpoint path here


class Augment:
    """
    A stochastic data augmentation module
    Transforms any given data example randomly
    resulting in two correlated views of the same example,
    denoted x ̃i and x ̃j, which we consider as a positive pair.
    """

    def __init__(self, img_size, s=1):
        color_jitter = T.ColorJitter(
            0.8 * s, 0.8 * s, 0.8 * s, 0.2 * s
        )
        # 10% of the image
        blur = T.GaussianBlur((3, 3), (0.1, 2.0))

        self.train_transform = T.Compose(
            [
                T.RandomResizedCrop(size=img_size),
                T.RandomHorizontalFlip(p=0.5),  # with 0.5 probability
                T.RandomApply([color_jitter], p=0.8),
                T.RandomApply([blur], p=0.5),
                T.RandomGrayscale(p=0.2),
                # imagenet stats
                T.ToTensor(),
                T.Normalize(mean=train_rgb_mean, std=train_rgb_std)
            ]
        )

        self.test_transform = T.Compose(
            [
                T.ToTensor(),
                T.Normalize(mean=train_rgb_mean, std=train_rgb_std),
            ]
        )

    def __call__(self, x):
        return self.train_transform(x), self.train_transform(x)

我的微调课程:

import pytorch_lightning as pl
import torch
from torch.optim import SGD


class Hparams_eval:
    def __init__(self, log_every_n_steps=1,
                 epochs=150,
                 seed=77777,
                 cuda=True,
                 img_size=224,
                 save="./saved_models/",
                 gradient_accumulation_steps=1,
                 batch_size=32,
                 lr=1e-5,
                 embedding_size=128,
                 temperature=0.5):
        self.log_every_n_steps = log_every_n_steps
        self.epochs = epochs  # number of training epochs
        self.seed = seed  # randomness seed
        self.cuda = cuda  # use nvidia gpu
        self.img_size = img_size  # image shape
        self.save = save  # save checkpoint
        self.gradient_accumulation_steps = gradient_accumulation_steps  # gradient accumulation steps
        self.batch_size = batch_size
        self.lr = lr
        self.embedding_size = embedding_size  # papers value is 128
        self.temperature = temperature  # 0.1 or 0.5


class SimCLR_eval(pl.LightningModule):
    def __init__(self, lr, model=None, linear_eval=False):
        super().__init__()
        self.lr = lr
        self.linear_eval = linear_eval
        if self.linear_eval:
            model.eval()
        self.mlp = torch.nn.Sequential(
            torch.nn.ReLU(),  # todo upgrade MLP
            torch.nn.Dropout(0.1),
            torch.nn.Linear(512, 2)
        )

        self.model = torch.nn.Sequential(
            model, self.mlp
        )
        self.loss = torch.nn.CrossEntropyLoss()

    def forward(self, X):
        return self.model(X)

    def training_step(self, batch, batch_idx):
        x, y = batch
        z = self.forward(x)
        loss = self.loss(z, y)
        self.log('Cross Entropy loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)

        predicted = z.argmax(1)
        acc = (predicted == y).sum().item() / y.size(0)
        self.log('Train Acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        z = self.forward(x)
        loss = self.loss(z, y)
        self.log('Val CE loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=True)

        predicted = z.argmax(1)
        acc = (predicted == y).sum().item() / y.size(0)
        self.log('Val Accuracy', acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)

        return loss

    def configure_optimizers(self):
        if self.linear_eval:
            print(f"\n\n Attention! Linear evaluation \n")
            optimizer = SGD(self.mlp.parameters(), lr=self.lr, momentum=0.9)
        else:
            optimizer = SGD(self.model.parameters(), lr=self.lr, momentum=0.9)
        return [optimizer]

performance.py:

class ModelTester():
    def __init__(self):
        self.train_config = Hparams_eval()
        # load resnet backbone
        self.backbone = models.resnet18(pretrained=False)
        self.backbone.fc = nn.Identity()
        self.backbone.load_state_dict(torch.load('resnet18_backbone_weights.ckpt')['model_state_dict'])
        
        self.model = SimCLR_eval(self.train_config.lr, model=self.backbone, linear_eval=True)
        self.model.load_state_dict(torch.load(Path('logs/fine_tuning_simple_eval/epoch150_batch160/'
                                                   'epoch150_batch160_state_dict.ckpt')))
        
        augment = Augment(224)
        self.transform = augment.train_transform
    
    def test_unseen(self, image):
        image_input = self.transform(image)
        image_input = image_input.unsqueeze(0)
        
        output = self.model(image_input)
        prediction = torch.max(output.data, 1)[1].numpy()
        return int(prediction[0])


tester = ModelTester()
images = ff.get_all_files("/media/wlutz/TOSHIBA EXT/Image Analysis/HIV IMAGES/")
shuffle(images)

for i in images:
    img = Image.open(i)
    output = tester.test_unseen(image=img)

编辑:忘了放我如何微调我的模型

available_gpus = len([torch.cuda.device(i) for i in range(torch.cuda.device_count())])
train_config = Hparams_eval(epochs=epoch, batch_size=batch_size)
save_model_path = os.path.join(os.getcwd(), "saved_models/")
print('available_gpus:', available_gpus)
filename = 'SimCLR_ResNet18_finetune_'
reproducibility(train_config)
save_name = filename + '_Final.ckpt'

# load resnet backbone
backbone = models.resnet18(pretrained=False)
backbone.fc = nn.Identity()
checkpoint = torch.load('resnet18_backbone_weights.ckpt')
backbone.load_state_dict(checkpoint['model_state_dict'])
model = SimCLR_eval(train_config.lr, model=backbone, linear_eval=False)

# preprocessing and data loaders
transform_preprocess = Augment(train_config.img_size).test_transform
data_loader = get_idr_dataloader(train_config.batch_size, transform=transform_preprocess,
                                 split='train+finetune')
data_loader_test = get_idr_dataloader(train_config.batch_size, transform=transform_preprocess,
                                      split='test+finetune')

# callbacks and trainer
accumulator = GradientAccumulationScheduler(scheduling={0: train_config.gradient_accumulation_steps})

checkpoint_callback = ModelCheckpoint(filename=filename, dirpath=save_model_path, save_last=True,
                                      save_top_k=2,
                                      monitor='Val Accuracy_epoch', mode='max')

tb_logger = pl_loggers.TensorBoardLogger(save_dir='logs', name='fine_tuning',
                                         version=f"epoch{train_config.epochs}_batch{train_config.batch_size}")
trainer = Trainer(callbacks=[checkpoint_callback, accumulator],
                  gpus=available_gpus,
                  max_epochs=train_config.epochs,
                  log_every_n_steps=train_config.log_every_n_steps,
                  logger=tb_logger)

trainer.fit(model, data_loader, data_loader_test)
trainer.save_checkpoint(save_name)
torch.save(model.state_dict(), f"logs/fine_tuning_simple_eval/epoch{epoch}_batch{batch_size}/"
                               f"epoch{epoch}_batch{batch_size}_state_dict.ckpt")
send_telegram_notification(f"epoch {epoch} batch {batch_size} fine tuning done")

编辑2:将 performance.py 中的Linear_eval更改为True,问题仍然存在

Python TensorFlow 机器学习 深度学习 ResNet

评论


答: 暂无答案