You are currently viewing 解析深度學習:Grad-CAM 的視覺化技術如何提升模型可解釋性

解析深度學習:Grad-CAM 的視覺化技術如何提升模型可解釋性

歡迎來到這篇關於 Grad-CAM 的教學文章。Grad-CAM 是一種強大的工具,能夠幫助我們理解深度學習模型如何對圖像進行分類。透過視覺化類別激活圖(Class Activation Maps,CAM),我們可以直觀地看到模型在做出預測時對圖像的哪些部分感到關注。這不僅可以增強我們對模型學習模式的理解,也能夠幫助我們識別和改進模型的弱點。在這篇文章中,我們將一起探索 Grad-CAM 的原理,並通過實際的程式碼示例,學習如何應用 Grad-CAM 來視覺化深度學習模型的決策過程。無論你是深度學習的新手還是有經驗的專家,這篇文章都能為你提供寶貴的資訊和實用的技巧。讓我們一起深入探索 Grad-CAM 的世界吧!

導入必要的庫

from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, EigenGradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image

讀取訓練好的模型

model = build_network(config)()
state_dict = load_checkpoint(args, config)
model.load_state_dict(state_dict['model'])
model = model.to(DEVICE)

選擇目標層(target_layer) :GradCAM需要指定模型中的目標層來視覺化該層的特徵圖(feature maps)。

target_layer = model.nets[::-1]  # 這裡假設model有一個名為nets的屬性,且您希望使用最後一層

初始化GradCAM :根據您選擇的視覺化方法初始化GradCAM或其他類似工具。

cam = GradCAMPlusPlus(model=model, target_layers=target_layer)

處理圖像: 讀取圖像,並進行適當的預處理以適配模型的輸入要求。

rgb_img = cv2.imread(path, 1)[:, :, ::-1]  # 讀取並轉換通道
rgb_img = cv2.resize(rgb_img, (224, 224)) # 調整大小至模型輸入尺寸
input_tensor = preprocess_image(rgb_img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 預處理

生成GradCAM視覺化:使用模型、處理後的圖像及指定的目標層生成GradCAM視覺化。

grayscale_cam = cam(input_tensor=input_tensor, aug_smooth=True, eigen_smooth=True)
cam_image = show_cam_on_image(rgb_img, grayscale_cam[0]) # 假設只處理了一張圖像

保存GradCAM視覺化結果

cv2.imwrite(f'./gradcam_output/{filename}_gradcam.png', cam_image)

完整程式碼

import cv2
import torch
import numpy as np
from torchvision import models, transforms
from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, EigenGradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image

class GradCamVisualizer:
    def __init__(self, model, target_layer, use_cuda=False):
        self.model = model.eval()
        self.target_layer = target_layer
        self.use_cuda = use_cuda and torch.cuda.is_available()

        if self.use_cuda:
            self.model = self.model.cuda()

    def preprocess_image(self, img_path):
        means = [0.485, 0.456, 0.406]
        stds = [0.229, 0.224, 0.225]

        preprocess = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=means, std=stds)
        ])

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = preprocess(img)
        img = img.unsqueeze(0)
        return img

    def apply_gradcam(self, input_tensor):
        gradcam = GradCAMPlusPlus(model=self.model, target_layers=[self.target_layer])
        grayscale_cam = gradcam(input_tensor=input_tensor)
        grayscale_cam = grayscale_cam[0, :]
        return grayscale_cam

    def visualize(self, img_path, output_path):
        input_tensor = self.preprocess_image(img_path)

        if self.use_cuda:
            input_tensor = input_tensor.cuda()

        grayscale_cam = self.apply_gradcam(input_tensor)        

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)        
        img = img.astype(np.float32) / 255.0  # Convert to float32 and normalize

        cam_image = show_cam_on_image(img, grayscale_cam, use_rgb=False)

        cv2.imwrite(output_path, cam_image)  # Convert back to [0, 255] range

model = models.vgg16(pretrained=True)
target_layer = model.features[-1]

visualizer = GradCamVisualizer(model, target_layer, use_cuda=False)

img_path = "your_pic.jpg"
output_path = 'gradcam_pic.jpg'
visualizer.visualize(img_path, output_path)
0 0 votes
Article Rating
Subscribe
Notify of
guest

0 Comments
Oldest
Newest Most Voted
Inline Feedbacks
View all comments