Merge a050ffceb0 into 8a70a8cd6f

2025-06-26 18:18:11 +00:00 · 2024-09-06 00:38:57 +08:00 · 2024-09-06 00:38:57 +08:00 · bf4c20b32b
commit bf4c20b32b
parent 8a70a8cd6f a050ffceb0
6 changed files with 5 additions and 186 deletions
--- a/environment.yml
+++ b/environment.yml
@ -9,7 +9,7 @@ dependencies:
  - python=3.7.13
  - pip=22.3.1
  - pytorch=1.12.1
-  - torchaudio=0.12.1
+  - torchmetrics
  - torchvision=0.13.1
  - tqdm
  - pip:
--- a/lpipsPyTorch/init.py
+++ b/lpipsPyTorch/init.py
@ -1,21 +0,0 @@
-import torch
-
-from .modules.lpips import LPIPS
-
-
-def lpips(x: torch.Tensor,
-          y: torch.Tensor,
-          net_type: str = 'alex',
-          version: str = '0.1'):
-    r"""Function that measures
-    Learned Perceptual Image Patch Similarity (LPIPS).
-
-    Arguments:
-        x, y (torch.Tensor): the input tensors to compare.
-        net_type (str): the network type to compare the features: 
-                        'alex' | 'squeeze' | 'vgg'. Default: 'alex'.
-        version (str): the version of LPIPS. Default: 0.1.
-    """
-    device = x.device
-    criterion = LPIPS(net_type, version).to(device)
-    return criterion(x, y)
--- a/lpipsPyTorch/modules/lpips.py
+++ b/lpipsPyTorch/modules/lpips.py
@ -1,36 +0,0 @@
-import torch
-import torch.nn as nn
-
-from .networks import get_network, LinLayers
-from .utils import get_state_dict
-
-
-class LPIPS(nn.Module):
-    r"""Creates a criterion that measures
-    Learned Perceptual Image Patch Similarity (LPIPS).
-
-    Arguments:
-        net_type (str): the network type to compare the features: 
-                        'alex' | 'squeeze' | 'vgg'. Default: 'alex'.
-        version (str): the version of LPIPS. Default: 0.1.
-    """
-    def __init__(self, net_type: str = 'alex', version: str = '0.1'):
-
-        assert version in ['0.1'], 'v0.1 is only supported now'
-
-        super(LPIPS, self).__init__()
-
-        # pretrained network
-        self.net = get_network(net_type)
-
-        # linear layers
-        self.lin = LinLayers(self.net.n_channels_list)
-        self.lin.load_state_dict(get_state_dict(net_type, version))
-
-    def forward(self, x: torch.Tensor, y: torch.Tensor):
-        feat_x, feat_y = self.net(x), self.net(y)
-
-        diff = [(fx - fy) ** 2 for fx, fy in zip(feat_x, feat_y)]
-        res = [l(d).mean((2, 3), True) for d, l in zip(diff, self.lin)]
-
-        return torch.sum(torch.cat(res, 0), 0, True)
--- a/lpipsPyTorch/modules/networks.py
+++ b/lpipsPyTorch/modules/networks.py
@ -1,96 +0,0 @@
-from typing import Sequence
-
-from itertools import chain
-
-import torch
-import torch.nn as nn
-from torchvision import models
-
-from .utils import normalize_activation
-
-
-def get_network(net_type: str):
-    if net_type == 'alex':
-        return AlexNet()
-    elif net_type == 'squeeze':
-        return SqueezeNet()
-    elif net_type == 'vgg':
-        return VGG16()
-    else:
-        raise NotImplementedError('choose net_type from [alex, squeeze, vgg].')
-
-
-class LinLayers(nn.ModuleList):
-    def __init__(self, n_channels_list: Sequence[int]):
-        super(LinLayers, self).__init__([
-            nn.Sequential(
-                nn.Identity(),
-                nn.Conv2d(nc, 1, 1, 1, 0, bias=False)
-            ) for nc in n_channels_list
-        ])
-
-        for param in self.parameters():
-            param.requires_grad = False
-
-
-class BaseNet(nn.Module):
-    def __init__(self):
-        super(BaseNet, self).__init__()
-
-        # register buffer
-        self.register_buffer(
-            'mean', torch.Tensor([-.030, -.088, -.188])[None, :, None, None])
-        self.register_buffer(
-            'std', torch.Tensor([.458, .448, .450])[None, :, None, None])
-
-    def set_requires_grad(self, state: bool):
-        for param in chain(self.parameters(), self.buffers()):
-            param.requires_grad = state
-
-    def z_score(self, x: torch.Tensor):
-        return (x - self.mean) / self.std
-
-    def forward(self, x: torch.Tensor):
-        x = self.z_score(x)
-
-        output = []
-        for i, (_, layer) in enumerate(self.layers._modules.items(), 1):
-            x = layer(x)
-            if i in self.target_layers:
-                output.append(normalize_activation(x))
-            if len(output) == len(self.target_layers):
-                break
-        return output
-
-
-class SqueezeNet(BaseNet):
-    def __init__(self):
-        super(SqueezeNet, self).__init__()
-
-        self.layers = models.squeezenet1_1(True).features
-        self.target_layers = [2, 5, 8, 10, 11, 12, 13]
-        self.n_channels_list = [64, 128, 256, 384, 384, 512, 512]
-
-        self.set_requires_grad(False)
-
-
-class AlexNet(BaseNet):
-    def __init__(self):
-        super(AlexNet, self).__init__()
-
-        self.layers = models.alexnet(True).features
-        self.target_layers = [2, 5, 8, 10, 12]
-        self.n_channels_list = [64, 192, 384, 256, 256]
-
-        self.set_requires_grad(False)
-
-
-class VGG16(BaseNet):
-    def __init__(self):
-        super(VGG16, self).__init__()
-
-        self.layers = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1).features
-        self.target_layers = [4, 9, 16, 23, 30]
-        self.n_channels_list = [64, 128, 256, 512, 512]
-
-        self.set_requires_grad(False)
--- a/lpipsPyTorch/modules/utils.py
+++ b/lpipsPyTorch/modules/utils.py
@ -1,30 +0,0 @@
-from collections import OrderedDict
-
-import torch
-
-
-def normalize_activation(x, eps=1e-10):
-    norm_factor = torch.sqrt(torch.sum(x ** 2, dim=1, keepdim=True))
-    return x / (norm_factor + eps)
-
-
-def get_state_dict(net_type: str = 'alex', version: str = '0.1'):
-    # build url
-    url = 'https://raw.githubusercontent.com/richzhang/PerceptualSimilarity/' \
-        + f'master/lpips/weights/v{version}/{net_type}.pth'
-
-    # download
-    old_state_dict = torch.hub.load_state_dict_from_url(
-        url, progress=True,
-        map_location=None if torch.cuda.is_available() else torch.device('cpu')
-    )
-
-    # rename keys
-    new_state_dict = OrderedDict()
-    for key, val in old_state_dict.items():
-        new_key = key
-        new_key = new_key.replace('lin', '')
-        new_key = new_key.replace('model.', '')
-        new_state_dict[new_key] = val
-
-    return new_state_dict
--- a/metrics.py
+++ b/metrics.py
@ -15,12 +15,14 @@ from PIL import Image
 import torch
 import torchvision.transforms.functional as tf
 from utils.loss_utils import ssim
-from lpipsPyTorch import lpips
+from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity
 import json
 from tqdm import tqdm
 from utils.image_utils import psnr
 from argparse import ArgumentParser

+lpips = LearnedPerceptualImagePatchSimilarity(net_type='vgg').cuda()
+
 def readImages(renders_dir, gt_dir):
    renders = []
    gts = []
@ -71,7 +73,7 @@ def evaluate(model_paths):
                for idx in tqdm(range(len(renders)), desc="Metric evaluation progress"):
                    ssims.append(ssim(renders[idx], gts[idx]))
                    psnrs.append(psnr(renders[idx], gts[idx]))
-                    lpipss.append(lpips(renders[idx], gts[idx], net_type='vgg'))
+                    lpipss.append(lpips(renders[idx], gts[idx]))

                print("  SSIM : {:>12.7f}".format(torch.tensor(ssims).mean(), ".5"))
                print("  PSNR : {:>12.7f}".format(torch.tensor(psnrs).mean(), ".5"))