In [1]:
from pymongo import MongoClient

# Connect to local MongoDB database
client = MongoClient("mongodb://localhost:27017")

db = client["knravish_mwdb_phase_1"]


In [2]:
# Create/access feature descriptor collection
fd_collection = db["fd_collection"]


### (Task 1's code without visualization)

In [4]:
import cv2
import numpy as np
from scipy.stats import skew

import torch
import torchvision.transforms as transforms

import torchvision.datasets as datasets

dataset_path = "C:\Kaushik\ASU\CSE 515 - Multimedia and Web Databases\Project\Phase 1\Datasets"

dataset = datasets.Caltech101(
    root=dataset_path,
    download=False,  # True if you wish to download for first time
)


In [5]:
# Class transform to partition image into rows x cols grid


class GridPartition:
    def __init__(self, rows, cols):
        self.rows = rows
        self.cols = cols

    def __call__(self, img):
        img_width, img_height = img.size
        cell_width = img_width // self.cols
        cell_height = img_height // self.rows

        grids = []
        for i in range(self.rows):
            for j in range(self.cols):
                left = j * cell_width
                top = i * cell_height
                right = left + cell_width
                bottom = top + cell_height
                grid = img.crop((left, top, right, bottom))
                grids.append(grid)

        return grids


def compute_color_moments(image):
    image = np.array(image)  # Convert PIL Image to NumPy array
    moments = []

    for channel in range(3):  # Iterate over RGB channels
        channel_data = image[:, :, channel]
        mean = np.mean(channel_data)
        std_dev = np.std(channel_data)
        skewness = skew(channel_data, axis=None)
        moments.append([mean, std_dev, skewness])

    return moments


# Iterate over grid cells and return as 1-d array for easier resizing by torch
def compute_color_moments_for_grid(grid):
    color_moments = [compute_color_moments(grid_cell) for grid_cell in grid]
    return np.array(color_moments).flatten()


def combine_color_moments(grid_color_moments):
    return torch.Tensor(grid_color_moments).view(
        10, 10, 3, 3
    )  # resize as needed: 10x10 grid, 3 channels per cell, 3 moments per channel


CM_transform = transforms.Compose(
    [
        transforms.Resize((100, 300)),  # resize to H:W=100:300
        GridPartition(
            rows=10, cols=10
        ),  # partition into grid of 10 rows, 10 columns as a list
        compute_color_moments_for_grid,
        combine_color_moments,
    ]
)


In [6]:
def compute_gradient_histogram(grid_cell):
    histograms = []

    # Convert grid cell to NumPy array
    grid_array = np.array(grid_cell, dtype=np.uint8)

    # Compute the gradient using first-order central differences
    dx = cv2.Sobel(
        grid_array, cv2.CV_32F, dx=1, dy=0, ksize=1
    )  # first order x derivative = [-1, 0, 1]
    dy = cv2.Sobel(
        grid_array, cv2.CV_32F, dx=0, dy=1, ksize=1
    )  # first order y derivative = [-1, 0, 1]^T

    # Compute magnitude and direction of gradients
    magnitude = np.sqrt(dx**2 + dy**2)
    direction = np.arctan2(dy, dx) * 180 / np.pi  # in degrees

    # Compute HOG - 9 bins, counted across the range of -180 to 180 degrees, weighted by gradient magnitude
    histogram, _ = np.histogram(direction, bins=9, range=(-180, 180), weights=magnitude)

    histograms.append(histogram)

    return histograms


def compute_histograms_for_grid(grid):
    histograms = [compute_gradient_histogram(grid_cell) for grid_cell in grid]
    return np.array(histograms).flatten()


def combine_histograms(grid_histograms):
    return torch.Tensor(grid_histograms).view(10, 10, 9)


HOG_transform = transforms.Compose(
    [
        transforms.Grayscale(num_output_channels=1),  # grayscale transform
        transforms.Resize((100, 300)),  # resize to H:W=100:300
        GridPartition(
            rows=10, cols=10
        ),  # partition into grid of 10 rows, 10 columns as a list
        compute_histograms_for_grid,
        combine_histograms,
    ]
)


In [7]:
import torchvision.models as models

# Load model
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)

# use GPU (Nvidia)
if torch.cuda.is_available():
    dev = torch.device("cuda")
    torch.cuda.empty_cache()
else:
    dev = torch.device("cpu")

model = model.to(dev)


# Feature extractor for all layers at once


class FeatureExtractor(torch.nn.Module):
    def __init__(self, model, layers):
        super().__init__()
        self.model = model
        self.layers = layers
        self._features = {layer: None for layer in layers}  # store layer outputs here

        # Create hooks for all specified layers at once
        for layer_id in layers:
            layer = dict(self.model.named_modules())[layer_id]  # get actual layer in the model
            layer.register_forward_hook(self.save_outputs_hook(layer_id))  # register feature extractor hook on layer

    # Hook to save output of layer
    def save_outputs_hook(self, layer_id):
        def fn(_module, _input, output):
            self._features[layer_id] = output

        return fn

    # Forward pass returns extracted features
    def forward(self, input):
        _ = self.model(input)
        return self._features


def resnet_extractor(image, img_channels):
    # ResNet50 expects 3 channel image
    if img_channels != 3:
        return (None, None, None)

    resized_image = (
        torch.Tensor(np.array(transforms.Resize((224, 224))(image)).flatten())
        .view(1, 3, 224, 224)
        .to(dev)
    )

    # Attach all hooks on model and extract features
    resnet_features = FeatureExtractor(model=model, layers=["avgpool", "layer3", "fc"])
    features = resnet_features(resized_image)

    avgpool_2048 = features["avgpool"]
    # Reshape the vector into row pairs of elements and average across rows
    avgpool_1024_fd = torch.mean(avgpool_2048.view(-1, 2), axis=1)

    layer3_1024_14_14 = features["layer3"]
    # Reshape the vector into 1024 rows of 196 elements and average across rows
    layer3_1024_fd = torch.mean(layer3_1024_14_14.view(1024, -1), axis=1)

    fc_1000_fd = features["fc"].view(1000)

    return (
        avgpool_1024_fd.detach().cpu().tolist(),
        layer3_1024_fd.detach().cpu().tolist(),
        fc_1000_fd.detach().cpu().tolist(),
    )


## Process all images and store in collection (one-time processing)

In [None]:
start = 0
stop = len(dataset)
step = 1

for idx in range(start, stop, step):
    img, label = dataset[idx]

    img_shape = np.array(img).shape

    if len(img_shape) >= 3 and img_shape[2] >= 3:
        cm_fd = CM_transform(img).tolist()
        img_channels = 3
    else:
        # no color moments for grayscale images
        # TODO: perhaps we could do conversion by stacking channels? or is there some grayscale-to-RGB function?
        cm_fd = None
        img_channels = 1

    hog_fd = HOG_transform(img).tolist()
    avgpool_1024_fd, layer3_1024_fd, fc_1000_fd = resnet_extractor(img, img_channels)

    # Store to collection
    fd_collection.insert_one(
        {
            "image_id": idx,
            "true_label": label,
            "channels": img_channels,
            "cm_fd": cm_fd,
            "hog_fd": hog_fd,
            "avgpool_fd": avgpool_1024_fd,
            "layer3_fd": layer3_1024_fd,
            "fc_fd": fc_1000_fd,
        }
    )


  skewness = skew(channel_data, axis=None)


In [21]:
fd_collection.count_documents({})

8677

In [22]:
# Remove duplicates (accidental re-runs)
distinct_values = fd_collection.distinct("image_id")

for fieldValue in distinct_values:
    i = 0
    for doc in fd_collection.find({"image_id": fieldValue}):
        if i:
            print('deleted _id: '+str(doc["_id"]))
            fd_collection.delete_one({"_id": doc["_id"]})
        i += 1

In [24]:
fd_collection.find_one({"image_id":0})

{'_id': ObjectId('64fe6e2d58b6f6d24a66bfa5'),
 'image_id': 0,
 'true_label': 0,
 'channels': 3,
 'cm_fd': [[[[100.82333374023438, 40.358543395996094, 0.13933934271335602],
    [99.3499984741211, 44.04740905761719, 0.15900608897209167],
    [94.0433349609375, 45.91791534423828, 0.09955936670303345]],
   [[130.44667053222656, 24.007648468017578, -0.46499618887901306],
    [134.52667236328125, 24.5846004486084, -0.45613545179367065],
    [128.3433380126953, 23.146896362304688, -0.5018450021743774]],
   [[123.83333587646484, 35.85069274902344, -1.0393890142440796],
    [128.33999633789062, 36.404640197753906, -0.9526290893554688],
    [121.1066665649414, 34.42230987548828, -0.9304346442222595]],
   [[96.08000183105469, 60.86192321777344, 0.06415293365716934],
    [100.71333312988281, 62.73349380493164, 0.06397320330142975],
    [100.71333312988281, 60.03324890136719, -0.007280993741005659]],
   [[159.75999450683594, 34.77627182006836, -1.1367359161376953],
    [164.97999572753906, 36.10115