From 6de50666e87a39aa67bcda279bc3672d0e986823 Mon Sep 17 00:00:00 2001 From: Kaushik Narayan R Date: Sat, 9 Sep 2023 13:24:01 -0700 Subject: [PATCH] task 2 done, task 1 slightly edited --- 1.ipynb | 73 ++++++----- 2.ipynb | 378 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 413 insertions(+), 38 deletions(-) create mode 100644 2.ipynb diff --git a/1.ipynb b/1.ipynb index 79ad982..5a70f24 100644 --- a/1.ipynb +++ b/1.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 13, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -38,7 +38,7 @@ "dataset_path = \"C:\\Kaushik\\ASU\\CSE 515 - Multimedia and Web Databases\\Project\\Datasets\"\n", "\n", "dataset = datasets.Caltech101(\n", - " root=\"C:\\Kaushik\\ASU\\CSE 515 - Multimedia and Web Databases\\Project\\Datasets\",\n", + " root=dataset_path,\n", " download=False, # True if you wish to download for first time\n", ")\n" ] @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -75,7 +75,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -114,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -143,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -162,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -617,7 +617,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -674,7 +674,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -701,12 +701,12 @@ "\n", " histograms.append(histogram)\n", "\n", - " return histograms\n" + " return histograms" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -717,7 +717,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -727,7 +727,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -1072,7 +1072,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -1117,7 +1117,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -1215,7 +1215,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1279,7 +1279,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -1470,7 +1470,7 @@ ")" ] }, - "execution_count": 30, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1504,27 +1504,24 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ - "# Generalized feature extractor module for any PyTorch model\n", + "# Feature extractor for all layers at once\n", + "\n", "\n", "class FeatureExtractor(torch.nn.Module):\n", " def __init__(self, model, layers):\n", " super().__init__()\n", " self.model = model\n", " self.layers = layers\n", - " self._features = {\n", - " layer: torch.empty(0) for layer in layers\n", - " } # store layer outputs here\n", + " self._features = {layer: None for layer in layers} # store layer outputs here\n", "\n", " # Create hooks for all specified layers at once\n", " for layer_id in layers:\n", - " layer = dict([*self.model.named_modules()])[layer_id] # get actual layer in the model\n", - " layer.register_forward_hook(\n", - " self.save_outputs_hook(layer_id)\n", - " ) # register feature extractor hook on layer\n", + " layer = dict(self.model.named_modules())[layer_id] # get actual layer in the model\n", + " layer.register_forward_hook(self.save_outputs_hook(layer_id)) # register feature extractor hook on layer\n", "\n", " # Hook to save output of layer\n", " def save_outputs_hook(self, layer_id):\n", @@ -1534,21 +1531,21 @@ " return fn\n", "\n", " # Forward pass returns extracted features\n", - " def forward(self, x):\n", - " _ = self.model(x)\n", - " return self._features\n" + " def forward(self, input):\n", + " _ = self.model(input)\n", + " return self._features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Resize image to 224x224 with 3 channels for ResNet50" + "Resize image to 224x224 with 3 channels for ResNet50 (no grayscale)" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -1561,7 +1558,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -1591,7 +1588,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -1624,7 +1621,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -1705,7 +1702,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -1924,7 +1921,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 23, "metadata": {}, "outputs": [ { diff --git a/2.ipynb b/2.ipynb new file mode 100644 index 0000000..57b33b8 --- /dev/null +++ b/2.ipynb @@ -0,0 +1,378 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from pymongo import MongoClient\n", + "\n", + "# Connect to local MongoDB database\n", + "client = MongoClient(\"mongodb://localhost:27017\")\n", + "\n", + "db = client[\"knravish_mwdb_phase_1\"]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Create/access feature descriptor collection\n", + "fd_collection = db[\"fd_collection\"]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### (Task 1's code without visualization)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import cv2\n", + "import numpy as np\n", + "from scipy.stats import skew\n", + "\n", + "import torch\n", + "import torchvision.transforms as transforms\n", + "\n", + "import torchvision.datasets as datasets\n", + "\n", + "dataset_path = \"C:\\Kaushik\\ASU\\CSE 515 - Multimedia and Web Databases\\Project\\Datasets\"\n", + "\n", + "dataset = datasets.Caltech101(\n", + " root=dataset_path,\n", + " download=False, # True if you wish to download for first time\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Class transform to partition image into rows x cols grid\n", + "\n", + "\n", + "class GridPartition:\n", + " def __init__(self, rows, cols):\n", + " self.rows = rows\n", + " self.cols = cols\n", + "\n", + " def __call__(self, img):\n", + " img_width, img_height = img.size\n", + " cell_width = img_width // self.cols\n", + " cell_height = img_height // self.rows\n", + "\n", + " grids = []\n", + " for i in range(self.rows):\n", + " for j in range(self.cols):\n", + " left = j * cell_width\n", + " top = i * cell_height\n", + " right = left + cell_width\n", + " bottom = top + cell_height\n", + " grid = img.crop((left, top, right, bottom))\n", + " grids.append(grid)\n", + "\n", + " return grids\n", + "\n", + "\n", + "def compute_color_moments(image):\n", + " image = np.array(image) # Convert PIL Image to NumPy array\n", + " moments = []\n", + "\n", + " for channel in range(3): # Iterate over RGB channels\n", + " channel_data = image[:, :, channel]\n", + " mean = np.mean(channel_data)\n", + " std_dev = np.std(channel_data)\n", + " skewness = skew(channel_data, axis=None)\n", + " moments.append([mean, std_dev, skewness])\n", + "\n", + " return moments\n", + "\n", + "\n", + "# Iterate over grid cells and return as 1-d array for easier resizing by torch\n", + "def compute_color_moments_for_grid(grid):\n", + " color_moments = [compute_color_moments(grid_cell) for grid_cell in grid]\n", + " return np.array(color_moments).flatten()\n", + "\n", + "\n", + "def combine_color_moments(grid_color_moments):\n", + " return torch.Tensor(grid_color_moments).view(\n", + " 10, 10, 3, 3\n", + " ) # resize as needed: 10x10 grid, 3 channels per cell, 3 moments per channel\n", + "\n", + "\n", + "CM_transform = transforms.Compose(\n", + " [\n", + " transforms.Resize((100, 300)), # resize to H:W=100:300\n", + " GridPartition(\n", + " rows=10, cols=10\n", + " ), # partition into grid of 10 rows, 10 columns as a list\n", + " compute_color_moments_for_grid,\n", + " combine_color_moments,\n", + " ]\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def compute_gradient_histogram(grid_cell):\n", + " histograms = []\n", + "\n", + " # Convert grid cell to NumPy array\n", + " grid_array = np.array(grid_cell, dtype=np.uint8)\n", + "\n", + " # Compute the gradient using first-order central differences\n", + " dx = cv2.Sobel(\n", + " grid_array, cv2.CV_32F, dx=1, dy=0, ksize=1\n", + " ) # first order x derivative = [-1, 0, 1]\n", + " dy = cv2.Sobel(\n", + " grid_array, cv2.CV_32F, dx=0, dy=1, ksize=1\n", + " ) # first order y derivative = [-1, 0, 1]^T\n", + "\n", + " # Compute magnitude and direction of gradients\n", + " magnitude = np.sqrt(dx**2 + dy**2)\n", + " direction = np.arctan2(dy, dx) * 180 / np.pi # in degrees\n", + "\n", + " # Compute HOG - 9 bins, counted across the range of -180 to 180 degrees, weighted by gradient magnitude\n", + " histogram, _ = np.histogram(direction, bins=9, range=(-180, 180), weights=magnitude)\n", + "\n", + " histograms.append(histogram)\n", + "\n", + " return histograms\n", + "\n", + "\n", + "def compute_histograms_for_grid(grid):\n", + " histograms = [compute_gradient_histogram(grid_cell) for grid_cell in grid]\n", + " return np.array(histograms).flatten()\n", + "\n", + "\n", + "def combine_histograms(grid_histograms):\n", + " return torch.Tensor(grid_histograms).view(10, 10, 9)\n", + "\n", + "\n", + "HOG_transform = transforms.Compose(\n", + " [\n", + " transforms.Grayscale(num_output_channels=1), # grayscale transform\n", + " transforms.Resize((100, 300)), # resize to H:W=100:300\n", + " GridPartition(\n", + " rows=10, cols=10\n", + " ), # partition into grid of 10 rows, 10 columns as a list\n", + " compute_histograms_for_grid,\n", + " combine_histograms,\n", + " ]\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "import torchvision.models as models\n", + "\n", + "# Load model\n", + "model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)\n", + "\n", + "# use GPU (Nvidia)\n", + "if torch.cuda.is_available():\n", + " dev = torch.device(\"cuda\")\n", + " torch.cuda.empty_cache()\n", + "else:\n", + " dev = torch.device(\"cpu\")\n", + "\n", + "model = model.to(dev)\n", + "\n", + "\n", + "# Feature extractor for all layers at once\n", + "\n", + "\n", + "class FeatureExtractor(torch.nn.Module):\n", + " def __init__(self, model, layers):\n", + " super().__init__()\n", + " self.model = model\n", + " self.layers = layers\n", + " self._features = {layer: None for layer in layers} # store layer outputs here\n", + "\n", + " # Create hooks for all specified layers at once\n", + " for layer_id in layers:\n", + " layer = dict(self.model.named_modules())[layer_id] # get actual layer in the model\n", + " layer.register_forward_hook(self.save_outputs_hook(layer_id)) # register feature extractor hook on layer\n", + "\n", + " # Hook to save output of layer\n", + " def save_outputs_hook(self, layer_id):\n", + " def fn(_module, _input, output):\n", + " self._features[layer_id] = output\n", + "\n", + " return fn\n", + "\n", + " # Forward pass returns extracted features\n", + " def forward(self, input):\n", + " _ = self.model(input)\n", + " return self._features\n", + "\n", + "\n", + "def resnet_extractor(image, img_channels):\n", + " # ResNet50 expects 3 channel image\n", + " if img_channels != 3:\n", + " return (None, None, None)\n", + "\n", + " resized_image = (\n", + " torch.Tensor(np.array(transforms.Resize((224, 224))(image)).flatten())\n", + " .view(1, 3, 224, 224)\n", + " .to(dev)\n", + " )\n", + "\n", + " # Attach all hooks on model and extract features\n", + " resnet_features = FeatureExtractor(model=model, layers=[\"avgpool\", \"layer3\", \"fc\"])\n", + " features = resnet_features(resized_image)\n", + "\n", + " avgpool_2048 = features[\"avgpool\"]\n", + " # Reshape the vector into row pairs of elements and average across rows\n", + " avgpool_1024_fd = torch.mean(avgpool_2048.view(-1, 2), axis=1)\n", + "\n", + " layer3_1024_14_14 = features[\"layer3\"]\n", + " # Reshape the vector into 1024 rows of 196 elements and average across rows\n", + " layer3_1024_fd = torch.mean(layer3_1024_14_14.view(1024, -1), axis=1)\n", + "\n", + " fc_1000_fd = features[\"fc\"].view(1000)\n", + "\n", + " return (\n", + " avgpool_1024_fd.detach().cpu().tolist(),\n", + " layer3_1024_fd.detach().cpu().tolist(),\n", + " fc_1000_fd.detach().cpu().tolist(),\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Process all images and store in collection (one-time processing)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\rknar\\AppData\\Local\\Temp\\ipykernel_8384\\3604855272.py:35: RuntimeWarning: Precision loss occurred in moment calculation due to catastrophic cancellation. This occurs when the data are nearly identical. Results may be unreliable.\n", + " skewness = skew(channel_data, axis=None)\n" + ] + } + ], + "source": [ + "start = 0\n", + "stop = len(dataset)\n", + "step = 1\n", + "\n", + "for idx in range(start, stop, step):\n", + " img, label = dataset[idx]\n", + "\n", + " img_shape = np.array(img).shape\n", + "\n", + " if len(img_shape) >= 3 and img_shape[2] >= 3:\n", + " cm_fd = CM_transform(img).tolist()\n", + " img_channels = 3\n", + " else:\n", + " # no color moments for grayscale images\n", + " # TODO: perhaps we could do conversion by stacking channels? or is there some grayscale-to-RGB function?\n", + " cm_fd = None\n", + " img_channels = 1\n", + "\n", + " hog_fd = HOG_transform(img).tolist()\n", + " avgpool_1024_fd, layer3_1024_fd, fc_1000_fd = resnet_extractor(img, img_channels)\n", + "\n", + " # Store to collection\n", + " fd_collection.insert_one(\n", + " {\n", + " \"image_id\": idx,\n", + " \"true_label\": label,\n", + " \"channels\": img_channels,\n", + " \"cm_fd\": cm_fd,\n", + " \"hog_fd\": hog_fd,\n", + " \"avgpool_fd\": avgpool_1024_fd,\n", + " \"layer3_fd\": layer3_1024_fd,\n", + " \"fc_fd\": fc_1000_fd,\n", + " }\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1581" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Remove duplicates (accidental re-runs)\n", + "distinct_values = fd_collection.distinct(\"image_id\")\n", + "\n", + "for fieldValue in distinct_values:\n", + " i = 0\n", + " for doc in fd_collection.find({\"image_id\": fieldValue}):\n", + " if i:\n", + " fd_collection.delete_one({\"_id\": doc[\"_id\"]})\n", + " i += 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}