提问人:Willy Lutz 提问时间:11/16/2023 最后编辑:Willy Lutz 更新时间:11/16/2023 访问量:17
Tensorboard val 准确率为 0.83,但手动测试是随机的
Tensorboard val accuracy is 0.83, but manual testing is random
问:
我正在开发一个用于对比学习的 SimCLR/Resnet18 模型。
我已经在多个批量大小上训练了它,并使用来自 tensorboard 的数据,得到了下面显示的验证精度图,显示最大精度为 0.83。 注意:我的训练精度为 1,所以我的模型有过拟合
根据我的理解,我将我的微调模型(不是借口)加载到 ,并将我在保存在 的借口中训练的 Resnet 模型作为骨干performance.py
logs/fine_tuning_simple_eval/epoch150_batch160/epoch150_batch160_state_dict.ckpt
'resnet18_backbone_weights.ckpt'
我的问题:当我用我的来测试我的预测的准确性时,我有纯粹的随机预测。
关于为什么我在这两个分数之间有如此巨大的差异,有什么帮助吗?performance.py
以下是实现:
pretext 任务的一些类:
import os
from typing import Optional, Tuple, Callable, Any
import numpy as np
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as T
from PIL import Image
from pl_bolts.optimizers import LinearWarmupCosineAnnealingLR
from torch.optim import Adam
from torchvision.datasets import VisionDataset
from torchvision.datasets.utils import verify_str_arg
train_rgb_mean = [0.485, 0.456, 0.406]
train_rgb_std = [0.229, 0.224, 0.225]
class AddProjection(nn.Module):
def __init__(self, config, model=None, mlp_dim=512):
super(AddProjection, self).__init__()
embedding_size = config.embedding_size
self.backbone = default(model, models.resnet18(pretrained=False, num_classes=config.embedding_size))
mlp_dim = default(mlp_dim, self.backbone.fc.in_features)
print('Dim MLP input:', mlp_dim)
self.backbone.fc = nn.Identity()
# add mlp projection head
self.projection = nn.Sequential(
nn.Linear(in_features=mlp_dim, out_features=mlp_dim),
nn.BatchNorm1d(mlp_dim),
nn.ReLU(),
nn.Linear(in_features=mlp_dim, out_features=embedding_size),
nn.BatchNorm1d(embedding_size),
)
def forward(self, x, return_embedding=False):
embedding = self.backbone(x)
if return_embedding:
return embedding
return self.projection(embedding)
class SimCLR_pl(pl.LightningModule):
def __init__(self, config, model=None, feat_dim=512):
super().__init__()
self.config = config
self.model = AddProjection(config, model=model, mlp_dim=feat_dim)
self.loss = ContrastiveLoss(config.batch_size, temperature=self.config.temperature)
def forward(self, X):
return self.model(X)
def training_step(self, batch, batch_idx):
(x1, x2), labels = batch
z1 = self.model(x1)
z2 = self.model(x2)
loss = self.loss(z1, z2)
self.log('Contrastive loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
return loss
def configure_optimizers(self):
max_epochs = int(self.config.epochs)
param_groups = define_param_groups(self.model, self.config.weight_decay, 'adam')
lr = self.config.lr
optimizer = Adam(param_groups, lr=lr, weight_decay=self.config.weight_decay)
print(f'Optimizer Adam, '
f'Learning Rate {lr}, '
f'Effective batch size {self.config.batch_size * self.config.gradient_accumulation_steps}')
scheduler_warmup = LinearWarmupCosineAnnealingLR(optimizer, warmup_epochs=10, max_epochs=max_epochs,
warmup_start_lr=0.0)
return [optimizer], [scheduler_warmup]
class Hparams:
def __init__(self,
log_every_n_steps=1,
epochs=100,
seed=77777,
cuda=True,
img_size=224,
save="./saved_models/",
gradient_accumulation_steps=1,
batch_size=32,
lr=1e-3,
embedding_size=128,
temperature=0.5,
load=False,
weight_decay=1e-6,
checkpoint_path='./SimCLR_ResNet18.ckpt'):
self.log_every_n_steps = log_every_n_steps
self.epochs = epochs # number of training epochs
self.seed = seed # randomness seed
self.cuda = cuda # use nvidia gpu
self.img_size = img_size # image shape
self.save = save # save checkpoint
self.load = load # load pretrained checkpoint
self.gradient_accumulation_steps = gradient_accumulation_steps # gradient accumulation steps
self.batch_size = batch_size
self.lr = lr # for ADAm only
self.weight_decay = weight_decay
self.embedding_size = embedding_size # papers value is 128
self.temperature = temperature # 0.1 or 0.5
self.checkpoint_path = checkpoint_path # replace checkpoint path here
class Augment:
"""
A stochastic data augmentation module
Transforms any given data example randomly
resulting in two correlated views of the same example,
denoted x ̃i and x ̃j, which we consider as a positive pair.
"""
def __init__(self, img_size, s=1):
color_jitter = T.ColorJitter(
0.8 * s, 0.8 * s, 0.8 * s, 0.2 * s
)
# 10% of the image
blur = T.GaussianBlur((3, 3), (0.1, 2.0))
self.train_transform = T.Compose(
[
T.RandomResizedCrop(size=img_size),
T.RandomHorizontalFlip(p=0.5), # with 0.5 probability
T.RandomApply([color_jitter], p=0.8),
T.RandomApply([blur], p=0.5),
T.RandomGrayscale(p=0.2),
# imagenet stats
T.ToTensor(),
T.Normalize(mean=train_rgb_mean, std=train_rgb_std)
]
)
self.test_transform = T.Compose(
[
T.ToTensor(),
T.Normalize(mean=train_rgb_mean, std=train_rgb_std),
]
)
def __call__(self, x):
return self.train_transform(x), self.train_transform(x)
我的微调课程:
import pytorch_lightning as pl
import torch
from torch.optim import SGD
class Hparams_eval:
def __init__(self, log_every_n_steps=1,
epochs=150,
seed=77777,
cuda=True,
img_size=224,
save="./saved_models/",
gradient_accumulation_steps=1,
batch_size=32,
lr=1e-5,
embedding_size=128,
temperature=0.5):
self.log_every_n_steps = log_every_n_steps
self.epochs = epochs # number of training epochs
self.seed = seed # randomness seed
self.cuda = cuda # use nvidia gpu
self.img_size = img_size # image shape
self.save = save # save checkpoint
self.gradient_accumulation_steps = gradient_accumulation_steps # gradient accumulation steps
self.batch_size = batch_size
self.lr = lr
self.embedding_size = embedding_size # papers value is 128
self.temperature = temperature # 0.1 or 0.5
class SimCLR_eval(pl.LightningModule):
def __init__(self, lr, model=None, linear_eval=False):
super().__init__()
self.lr = lr
self.linear_eval = linear_eval
if self.linear_eval:
model.eval()
self.mlp = torch.nn.Sequential(
torch.nn.ReLU(), # todo upgrade MLP
torch.nn.Dropout(0.1),
torch.nn.Linear(512, 2)
)
self.model = torch.nn.Sequential(
model, self.mlp
)
self.loss = torch.nn.CrossEntropyLoss()
def forward(self, X):
return self.model(X)
def training_step(self, batch, batch_idx):
x, y = batch
z = self.forward(x)
loss = self.loss(z, y)
self.log('Cross Entropy loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
predicted = z.argmax(1)
acc = (predicted == y).sum().item() / y.size(0)
self.log('Train Acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
return loss
def validation_step(self, batch, batch_idx):
x, y = batch
z = self.forward(x)
loss = self.loss(z, y)
self.log('Val CE loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=True)
predicted = z.argmax(1)
acc = (predicted == y).sum().item() / y.size(0)
self.log('Val Accuracy', acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)
return loss
def configure_optimizers(self):
if self.linear_eval:
print(f"\n\n Attention! Linear evaluation \n")
optimizer = SGD(self.mlp.parameters(), lr=self.lr, momentum=0.9)
else:
optimizer = SGD(self.model.parameters(), lr=self.lr, momentum=0.9)
return [optimizer]
performance.py:
class ModelTester():
def __init__(self):
self.train_config = Hparams_eval()
# load resnet backbone
self.backbone = models.resnet18(pretrained=False)
self.backbone.fc = nn.Identity()
self.backbone.load_state_dict(torch.load('resnet18_backbone_weights.ckpt')['model_state_dict'])
self.model = SimCLR_eval(self.train_config.lr, model=self.backbone, linear_eval=True)
self.model.load_state_dict(torch.load(Path('logs/fine_tuning_simple_eval/epoch150_batch160/'
'epoch150_batch160_state_dict.ckpt')))
augment = Augment(224)
self.transform = augment.train_transform
def test_unseen(self, image):
image_input = self.transform(image)
image_input = image_input.unsqueeze(0)
output = self.model(image_input)
prediction = torch.max(output.data, 1)[1].numpy()
return int(prediction[0])
tester = ModelTester()
images = ff.get_all_files("/media/wlutz/TOSHIBA EXT/Image Analysis/HIV IMAGES/")
shuffle(images)
for i in images:
img = Image.open(i)
output = tester.test_unseen(image=img)
编辑:忘了放我如何微调我的模型
available_gpus = len([torch.cuda.device(i) for i in range(torch.cuda.device_count())])
train_config = Hparams_eval(epochs=epoch, batch_size=batch_size)
save_model_path = os.path.join(os.getcwd(), "saved_models/")
print('available_gpus:', available_gpus)
filename = 'SimCLR_ResNet18_finetune_'
reproducibility(train_config)
save_name = filename + '_Final.ckpt'
# load resnet backbone
backbone = models.resnet18(pretrained=False)
backbone.fc = nn.Identity()
checkpoint = torch.load('resnet18_backbone_weights.ckpt')
backbone.load_state_dict(checkpoint['model_state_dict'])
model = SimCLR_eval(train_config.lr, model=backbone, linear_eval=False)
# preprocessing and data loaders
transform_preprocess = Augment(train_config.img_size).test_transform
data_loader = get_idr_dataloader(train_config.batch_size, transform=transform_preprocess,
split='train+finetune')
data_loader_test = get_idr_dataloader(train_config.batch_size, transform=transform_preprocess,
split='test+finetune')
# callbacks and trainer
accumulator = GradientAccumulationScheduler(scheduling={0: train_config.gradient_accumulation_steps})
checkpoint_callback = ModelCheckpoint(filename=filename, dirpath=save_model_path, save_last=True,
save_top_k=2,
monitor='Val Accuracy_epoch', mode='max')
tb_logger = pl_loggers.TensorBoardLogger(save_dir='logs', name='fine_tuning',
version=f"epoch{train_config.epochs}_batch{train_config.batch_size}")
trainer = Trainer(callbacks=[checkpoint_callback, accumulator],
gpus=available_gpus,
max_epochs=train_config.epochs,
log_every_n_steps=train_config.log_every_n_steps,
logger=tb_logger)
trainer.fit(model, data_loader, data_loader_test)
trainer.save_checkpoint(save_name)
torch.save(model.state_dict(), f"logs/fine_tuning_simple_eval/epoch{epoch}_batch{batch_size}/"
f"epoch{epoch}_batch{batch_size}_state_dict.ckpt")
send_telegram_notification(f"epoch {epoch} batch {batch_size} fine tuning done")
编辑2:将 performance.py 中的Linear_eval更改为True,问题仍然存在
答: 暂无答案
评论