mirror of
https://github.com/20kaushik02/CSE515_MWDB_Project.git
synced 2025-12-06 09:34:07 +00:00
838 lines
222 KiB
Plaintext
838 lines
222 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from pymongo import MongoClient\n",
|
|
"\n",
|
|
"# Connect to local MongoDB database\n",
|
|
"client = MongoClient(\"mongodb://localhost:27017\")\n",
|
|
"\n",
|
|
"db = client[\"phase_2_madhura\"]\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Create/access feature descriptor collection\n",
|
|
"fd_collection = db[\"fd_collection_madhura\"]\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import math\n",
|
|
"import cv2\n",
|
|
"import numpy as np\n",
|
|
"from scipy.stats import skew\n",
|
|
"\n",
|
|
"import torch\n",
|
|
"import torchvision.transforms as transforms\n",
|
|
"\n",
|
|
"import torchvision.datasets as datasets\n",
|
|
"\n",
|
|
"import os\n",
|
|
"from dotenv import load_dotenv\n",
|
|
"\n",
|
|
"load_dotenv()\n",
|
|
"\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"import warnings\n",
|
|
"warnings.filterwarnings('ignore')\n",
|
|
"%matplotlib inline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"root_directory = \"C:/Users/Shubhi/OneDrive/Desktop/Fall 2023/MWDB/phase1\"\n",
|
|
"# Load as tensors of shape (channels, (img_shape))\n",
|
|
"def datasetTransform(image):\n",
|
|
" \n",
|
|
" return transforms.Compose(\n",
|
|
" [\n",
|
|
" transforms.ToTensor() # ToTensor by default scales to [0,1] range, the input range for ResNet\n",
|
|
" ]\n",
|
|
" )(image)\n",
|
|
"\n",
|
|
"\n",
|
|
"dataset = datasets.Caltech101(\n",
|
|
" root=root_directory,\n",
|
|
" download=False, # True if you wish to download for first time\n",
|
|
" transform=datasetTransform,\n",
|
|
")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Class transform to partition image into rows x cols grid\n",
|
|
"\n",
|
|
"class GridPartition:\n",
|
|
" def __init__(self, rows, cols):\n",
|
|
" self.rows = rows\n",
|
|
" self.cols = cols\n",
|
|
"\n",
|
|
" def __call__(self, img):\n",
|
|
" img_width, img_height = img.size()[1:] # first element is channel\n",
|
|
" cell_width = img_width // self.cols\n",
|
|
" cell_height = img_height // self.rows\n",
|
|
"\n",
|
|
" grids = []\n",
|
|
" for i in range(self.rows):\n",
|
|
" for j in range(self.cols):\n",
|
|
" left = j * cell_width\n",
|
|
" right = left + cell_width\n",
|
|
"\n",
|
|
" top = i * cell_height\n",
|
|
" bottom = top + cell_height\n",
|
|
"\n",
|
|
" # Slice out\n",
|
|
" grid = img[:, left:right, top:bottom]\n",
|
|
" grids.append(grid)\n",
|
|
"\n",
|
|
" return grids\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def compute_color_moments(image):\n",
|
|
" image = np.array(image) # Convert tensor to NumPy array\n",
|
|
" moments = []\n",
|
|
"\n",
|
|
" for channel in range(3): # Iterate over RGB channels\n",
|
|
" channel_data = image[:, :, channel]\n",
|
|
" mean = np.mean(channel_data)\n",
|
|
" std_dev = np.std(channel_data)\n",
|
|
"\n",
|
|
" # Avoiding NaN values\n",
|
|
" skew_cubed = np.mean((channel_data - mean) ** 3)\n",
|
|
" if skew_cubed > 0:\n",
|
|
" skew = math.pow(skew_cubed, float(1) / 3)\n",
|
|
" elif skew_cubed < 0:\n",
|
|
" skew = -math.pow(abs(skew_cubed), float(1) / 3)\n",
|
|
" else:\n",
|
|
" skew = 0\n",
|
|
"\n",
|
|
" moments.append([mean, std_dev, skew])\n",
|
|
"\n",
|
|
" return moments\n",
|
|
"\n",
|
|
"\n",
|
|
"# Iterate over grid cells and return as 1-d array for easier resizing by torch\n",
|
|
"def compute_color_moments_for_grid(grid):\n",
|
|
" color_moments = [compute_color_moments(grid_cell) for grid_cell in grid]\n",
|
|
" return np.array(color_moments).flatten()\n",
|
|
"\n",
|
|
"\n",
|
|
"def combine_color_moments(grid_color_moments):\n",
|
|
" return torch.Tensor(grid_color_moments).view(\n",
|
|
" 10, 10, 3, 3\n",
|
|
" ) # resize as needed: 10x10 grid, 3 channels per cell, 3 moments per channel\n",
|
|
"\n",
|
|
"\n",
|
|
"CM_transform = transforms.Compose(\n",
|
|
" [\n",
|
|
" transforms.Resize((100, 300)), # resize to H:W=100:300\n",
|
|
" GridPartition(\n",
|
|
" rows=10, cols=10\n",
|
|
" ), # partition into grid of 10 rows, 10 columns as a list\n",
|
|
" compute_color_moments_for_grid,\n",
|
|
" combine_color_moments,\n",
|
|
" ]\n",
|
|
")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def compute_gradient_histogram(grid_cell):\n",
|
|
" histograms = []\n",
|
|
"\n",
|
|
" # Convert grid cell to NumPy array\n",
|
|
" grid_array = np.array(grid_cell, dtype=np.float32)\n",
|
|
" grid_array = grid_array.reshape(\n",
|
|
" grid_array.shape[1], grid_array.shape[2]\n",
|
|
" ) # ignore extra dimension\n",
|
|
"\n",
|
|
" # Compute the gradient using first-order central differences\n",
|
|
" dx = cv2.Sobel(\n",
|
|
" grid_array, cv2.CV_32F, dx=1, dy=0, ksize=1\n",
|
|
" ) # first order x derivative = [-1, 0, 1]\n",
|
|
" dy = cv2.Sobel(\n",
|
|
" grid_array, cv2.CV_32F, dx=0, dy=1, ksize=1\n",
|
|
" ) # first order y derivative = [-1, 0, 1]^T\n",
|
|
"\n",
|
|
" # Compute magnitude and direction of gradients\n",
|
|
" magnitude = np.sqrt(dx**2 + dy**2)\n",
|
|
" direction = np.arctan2(dy, dx) * 180 / np.pi # in degrees\n",
|
|
"\n",
|
|
" # Compute HOG - 9 bins, counted across the range of -180 to 180 degrees, weighted by gradient magnitude\n",
|
|
" histogram, _ = np.histogram(direction, bins=9, range=(-180, 180), weights=magnitude)\n",
|
|
"\n",
|
|
" histograms.append(histogram)\n",
|
|
"\n",
|
|
" return histograms\n",
|
|
"\n",
|
|
"\n",
|
|
"def compute_histograms_for_grid(grid):\n",
|
|
" histograms = [compute_gradient_histogram(grid_cell) for grid_cell in grid]\n",
|
|
" return np.array(histograms).flatten()\n",
|
|
"\n",
|
|
"\n",
|
|
"def combine_histograms(grid_histograms):\n",
|
|
" return torch.Tensor(grid_histograms).view(10, 10, 9)\n",
|
|
"\n",
|
|
"\n",
|
|
"HOG_transform = transforms.Compose(\n",
|
|
" [\n",
|
|
" transforms.Grayscale(num_output_channels=1), # grayscale transform\n",
|
|
" transforms.Resize((100, 300)), # resize to H:W=100:300\n",
|
|
" GridPartition(\n",
|
|
" rows=10, cols=10\n",
|
|
" ), # partition into grid of 10 rows, 10 columns as a list\n",
|
|
" compute_histograms_for_grid,\n",
|
|
" combine_histograms,\n",
|
|
" ]\n",
|
|
")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import torchvision.models as models\n",
|
|
"\n",
|
|
"# Load model\n",
|
|
"model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)\n",
|
|
"\n",
|
|
"# use GPU (Nvidia)\n",
|
|
"if torch.cuda.is_available():\n",
|
|
" dev = torch.device(\"cuda\")\n",
|
|
" torch.cuda.empty_cache()\n",
|
|
"else:\n",
|
|
" dev = torch.device(\"cpu\")\n",
|
|
"\n",
|
|
"model = model.to(dev)\n",
|
|
"model.eval() # switch to inference mode - important! since we're using pre-trained model\n",
|
|
"\n",
|
|
"\n",
|
|
"# Feature extractor for all layers at once\n",
|
|
"class FeatureExtractor(torch.nn.Module):\n",
|
|
" def __init__(self, model, layers):\n",
|
|
" super().__init__()\n",
|
|
" self.model = model\n",
|
|
" self.layers = layers\n",
|
|
" self._features = {layer: None for layer in layers} # store layer outputs here\n",
|
|
"\n",
|
|
" # Create hooks for all specified layers at once\n",
|
|
" for layer_id in layers:\n",
|
|
" layer = dict(self.model.named_modules())[\n",
|
|
" layer_id\n",
|
|
" ] # get actual layer in the model\n",
|
|
" layer.register_forward_hook(\n",
|
|
" self.save_outputs_hook(layer_id)\n",
|
|
" ) # register feature extractor hook on layer\n",
|
|
"\n",
|
|
" # Hook to save output of layer\n",
|
|
" def save_outputs_hook(self, layer_id):\n",
|
|
" def fn(_module, _input, output):\n",
|
|
" self._features[layer_id] = output\n",
|
|
" return fn\n",
|
|
"\n",
|
|
" # Forward pass returns extracted features\n",
|
|
" def forward(self, input):\n",
|
|
" _ = self.model(input)\n",
|
|
" return self._features\n",
|
|
"\n",
|
|
"\n",
|
|
"def resnet_extractor(image):\n",
|
|
" resized_image = (\n",
|
|
" torch.Tensor(np.array(transforms.Resize((224, 224))(image)).flatten())\n",
|
|
" .view(1, 3, 224, 224)\n",
|
|
" .to(dev)\n",
|
|
" )\n",
|
|
"\n",
|
|
" #complete_resnet_features = model.predict(image)\n",
|
|
" \n",
|
|
" # Attach all hooks on model and extract features\n",
|
|
" resnet_features = FeatureExtractor(model=model, layers=[\"avgpool\", \"layer3\", \"fc\"])\n",
|
|
" features = resnet_features(resized_image)\n",
|
|
"\n",
|
|
" avgpool_2048 = features[\"avgpool\"]\n",
|
|
" # Reshape the vector into row pairs of elements and average across rows\n",
|
|
" avgpool_1024_fd = torch.mean(avgpool_2048.view(-1, 2), axis=1)\n",
|
|
"\n",
|
|
" layer3_1024_14_14 = features[\"layer3\"]\n",
|
|
" # Reshape the vector into 1024 rows of 196 elements and average across rows\n",
|
|
" layer3_1024_fd = torch.mean(layer3_1024_14_14.view(1024, -1), axis=1)\n",
|
|
"\n",
|
|
" fc_1000_fd = features[\"fc\"].view(1000)\n",
|
|
"\n",
|
|
"\n",
|
|
" return (\n",
|
|
" avgpool_1024_fd.detach().cpu().tolist(),\n",
|
|
" layer3_1024_fd.detach().cpu().tolist(),\n",
|
|
" fc_1000_fd.detach().cpu().tolist(),\n",
|
|
" #complete_resnet_features.detach().cpu().tolist(),\n",
|
|
" )\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import torchvision.models as models\n",
|
|
"import torch\n",
|
|
"import numpy as np\n",
|
|
"from torchvision import transforms\n",
|
|
"\n",
|
|
"# Load model\n",
|
|
"model = models.resnet50(pretrained=True)\n",
|
|
"\n",
|
|
"# use GPU (Nvidia)\n",
|
|
"if torch.cuda.is_available():\n",
|
|
" dev = torch.device(\"cuda\")\n",
|
|
"else:\n",
|
|
" dev = torch.device(\"cpu\")\n",
|
|
"\n",
|
|
"model = model.to(dev)\n",
|
|
"model.eval() # switch to inference mode - important! since we're using pre-trained model\n",
|
|
"\n",
|
|
"def complete_resnet_extractor(image):\n",
|
|
" #resized_image = transforms.Resize((224, 224))(image)\n",
|
|
" #normalized_image = transforms.ToTensor()(resized_image).unsqueeze(0).to(dev)\n",
|
|
" resized_image = (\n",
|
|
" torch.Tensor(np.array(transforms.Resize((224, 224))(image)).flatten())\n",
|
|
" .view(1, 3, 224, 224)\n",
|
|
" .to(dev)\n",
|
|
" )\n",
|
|
"\n",
|
|
" with torch.no_grad():\n",
|
|
" features = model(resized_image)\n",
|
|
"\n",
|
|
" return features.detach().cpu().tolist()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_all_fd(image_id):\n",
|
|
" img, label = dataset[image_id]\n",
|
|
"\n",
|
|
" img_shape = np.array(img).shape\n",
|
|
"\n",
|
|
" if img_shape[0] >= 3:\n",
|
|
" true_channels = 3\n",
|
|
" else:\n",
|
|
" # stacking the grayscale channel on itself thrice to get RGB dimensions\n",
|
|
" img = torch.tensor(np.stack((np.array(img[0, :, :]),) * 3, axis=0))\n",
|
|
" true_channels = 1\n",
|
|
"\n",
|
|
" cm_fd = CM_transform(img).tolist()\n",
|
|
" hog_fd = HOG_transform(img).tolist()\n",
|
|
" avgpool_1024_fd, layer3_1024_fd, fc_1000_fd = resnet_extractor(img)\n",
|
|
" resnet_fd = complete_resnet_extractor(img)\n",
|
|
"\n",
|
|
" return {\n",
|
|
" \"image_id\": image_id,\n",
|
|
" \"true_label\": label,\n",
|
|
" \"true_channels\": true_channels,\n",
|
|
" \"cm_fd\": cm_fd,\n",
|
|
" \"hog_fd\": hog_fd,\n",
|
|
" \"avgpool_fd\": avgpool_1024_fd,\n",
|
|
" \"layer3_fd\": layer3_1024_fd,\n",
|
|
" \"fc_fd\": fc_1000_fd,\n",
|
|
" \"resnet_fd\": resnet_fd,\n",
|
|
" }\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Feature Extraction for full Database"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'\\nstart = 0\\nstop = len(dataset)\\nstep = 2 # even-numbered image IDs only\\n\\nfor idx in range(start, stop, step):\\n image_fd = get_all_fd(idx)\\n\\n # Store to collection (update if existing)\\n fd_collection.update_one(\\n {\"image_id\": idx},\\n {\"$set\": image_fd},\\n upsert=True,\\n )\\n'"
|
|
]
|
|
},
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"\"\"\"\n",
|
|
"start = 0\n",
|
|
"stop = len(dataset)\n",
|
|
"step = 2 # even-numbered image IDs only\n",
|
|
"\n",
|
|
"for idx in range(start, stop, step):\n",
|
|
" image_fd = get_all_fd(idx)\n",
|
|
"\n",
|
|
" # Store to collection (update if existing)\n",
|
|
" fd_collection.update_one(\n",
|
|
" {\"image_id\": idx},\n",
|
|
" {\"$set\": image_fd},\n",
|
|
" upsert=True,\n",
|
|
" )\n",
|
|
"\"\"\"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import math\n",
|
|
"\n",
|
|
"def euclidean_distance_measure(img_1_fd, img_2_fd):\n",
|
|
" img_1_fd_reshaped = img_1_fd.flatten()\n",
|
|
" img_2_fd_reshaped = img_2_fd.flatten()\n",
|
|
"\n",
|
|
" # Calculate Euclidean distance\n",
|
|
" return math.dist(img_1_fd_reshaped, img_2_fd_reshaped)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def cosine_distance_measure(img_1_fd, img_2_fd):\n",
|
|
" img_1_fd_reshaped = img_1_fd.flatten()\n",
|
|
" img_2_fd_reshaped = img_2_fd.flatten()\n",
|
|
"\n",
|
|
" # Calculate dot product\n",
|
|
" dot_product = np.dot(img_1_fd_reshaped, img_2_fd_reshaped.T)\n",
|
|
"\n",
|
|
" # Calculate magnitude (L2 norm) of the feature descriptor\n",
|
|
" magnitude1 = np.linalg.norm(img_1_fd_reshaped)\n",
|
|
" magnitude2 = np.linalg.norm(img_2_fd_reshaped)\n",
|
|
"\n",
|
|
" # Calculate cosine distance (similarity is higher => distance should be lower, so subtract from 1)\n",
|
|
" cosine_similarity = dot_product / (magnitude1 * magnitude2)\n",
|
|
" return 1 - cosine_similarity\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from scipy.stats import pearsonr\n",
|
|
"\n",
|
|
"def pearson_distance_measure(img_1_fd, img_2_fd):\n",
|
|
" # Replace nan with 0 (color moments)\n",
|
|
" img_1_fd_reshaped = img_1_fd.flatten()\n",
|
|
" img_2_fd_reshaped = img_2_fd.flatten()\n",
|
|
"\n",
|
|
" # Invert and scale in half to fit the actual range [-1, 1] into the new range [0, 1]\n",
|
|
" # such that lower distance implies more similarity\n",
|
|
" return 0.5 * (1 - pearsonr(img_1_fd_reshaped, img_2_fd_reshaped).statistic)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"valid_feature_models = [\"cm\", \"hog\", \"avgpool\", \"layer3\", \"fc\", \"resnet\"]\n",
|
|
"valid_distance_measures = {\n",
|
|
" \"euclidean\": euclidean_distance_measure,\n",
|
|
" \"cosine\": cosine_distance_measure,\n",
|
|
" \"pearson\": pearson_distance_measure,\n",
|
|
"}\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#funtion to display similar images\n",
|
|
"def show_similar_images(target_image_id, k, feature_model, distance_measure, save_plots=False):\n",
|
|
" print(\n",
|
|
" \"Showing {} similar images for image ID {}, using {} for {} feature descriptor...\".format(\n",
|
|
" k, target_image_id, distance_measure.__name__, feature_model\n",
|
|
" )\n",
|
|
" )\n",
|
|
"\n",
|
|
" # store target_image itself\n",
|
|
" min_dists = {target_image_id: 0}\n",
|
|
"\n",
|
|
" if target_image_id % 2 == 0:\n",
|
|
" # Get target image's feature descriptors from database\n",
|
|
" target_image = fd_collection.find_one({\"image_id\": target_image_id})\n",
|
|
" else:\n",
|
|
" # Calculate target image's feature descriptors\n",
|
|
" target_image = get_all_fd(target_image_id)\n",
|
|
"\n",
|
|
" target_image_fd = np.array(target_image[feature_model + \"_fd\"])\n",
|
|
"\n",
|
|
" assert (\n",
|
|
" feature_model in valid_feature_models\n",
|
|
" ), \"feature_model should be one of \" + str(valid_feature_models)\n",
|
|
"\n",
|
|
" assert (\n",
|
|
" distance_measure in valid_distance_measures.values()\n",
|
|
" ), \"distance_measure should be one of \" + str(list(valid_distance_measures.keys()))\n",
|
|
"\n",
|
|
" # only RGB for non RGB images\n",
|
|
" if feature_model != \"hog\":\n",
|
|
" all_images = fd_collection.find({\"true_channels\": 3})\n",
|
|
" else:\n",
|
|
" all_images = fd_collection.find()\n",
|
|
"\n",
|
|
" for cur_img in all_images:\n",
|
|
" cur_img_id = cur_img[\"image_id\"]\n",
|
|
" # skip target itself\n",
|
|
" if cur_img_id == target_image_id:\n",
|
|
" continue\n",
|
|
" cur_img_fd = np.array(cur_img[feature_model + \"_fd\"])\n",
|
|
" cur_dist = distance_measure(\n",
|
|
" cur_img_fd,\n",
|
|
" target_image_fd,\n",
|
|
" )\n",
|
|
"\n",
|
|
" # store first k images irrespective of distance (so that we store no more than k minimum distances)\n",
|
|
" if len(min_dists) < k + 1:\n",
|
|
" min_dists[cur_img_id] = cur_dist\n",
|
|
"\n",
|
|
" # if lower distance:\n",
|
|
" elif cur_dist < max(min_dists.values()):\n",
|
|
" # add to min_dists\n",
|
|
" min_dists.update({cur_img_id: cur_dist})\n",
|
|
" # remove greatest distance by index\n",
|
|
" min_dists.pop(max(min_dists, key=min_dists.get))\n",
|
|
"\n",
|
|
" min_dists = dict(sorted(min_dists.items(), key=lambda item: item[1]))\n",
|
|
"\n",
|
|
" fig, axs = plt.subplots(1, k + 1, figsize=(32, 12))\n",
|
|
" for idx, (img_id, distance) in enumerate(min_dists.items()):\n",
|
|
" cur_img, _cur_label = dataset[img_id]\n",
|
|
" axs[idx].imshow(transforms.ToPILImage()(cur_img))\n",
|
|
" if idx == 0:\n",
|
|
" axs[idx].set_title(f\"Target image\")\n",
|
|
" else:\n",
|
|
" axs[idx].set_title(f\"Distance: {round(distance, 3)}\")\n",
|
|
" axs[idx].axis(\"off\")\n",
|
|
"\n",
|
|
" if save_plots:\n",
|
|
" plt.savefig(\n",
|
|
" f\"Plots/Image_{target_image_id}_{feature_model}_{distance_measure.__name__}_k{k}.png\"\n",
|
|
" )\n",
|
|
"\n",
|
|
" plt.show()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'\\nselected_image_id = int(input(\"Enter image ID: \"))\\nsample_image, sample_label = dataset[selected_image_id]\\nplt.imshow(transforms.ToPILImage()(sample_image))\\nplt.show()\\n\\nk = int(input(\"Enter value of k: \"))\\nif k < 1:\\n raise ValueError(\"k should be positive integer\")\\n\\nselected_feature_model = str(\\n input(\"Enter feature model - one of \" + str(valid_feature_models))\\n)\\n\\nselected_distance_measure = valid_distance_measures[str(\\n input(\"Enter distance measure - one of \" + str(list(valid_distance_measures.keys())))\\n)]\\nshow_similar_images(selected_image_id, k, selected_feature_model, selected_distance_measure, save_plots=False)\\n'"
|
|
]
|
|
},
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"#TASK 0\n",
|
|
"\"\"\"\n",
|
|
"selected_image_id = int(input(\"Enter image ID: \"))\n",
|
|
"sample_image, sample_label = dataset[selected_image_id]\n",
|
|
"plt.imshow(transforms.ToPILImage()(sample_image))\n",
|
|
"plt.show()\n",
|
|
"\n",
|
|
"k = int(input(\"Enter value of k: \"))\n",
|
|
"if k < 1:\n",
|
|
" raise ValueError(\"k should be positive integer\")\n",
|
|
"\n",
|
|
"selected_feature_model = str(\n",
|
|
" input(\"Enter feature model - one of \" + str(valid_feature_models))\n",
|
|
")\n",
|
|
"\n",
|
|
"selected_distance_measure = valid_distance_measures[str(\n",
|
|
" input(\"Enter distance measure - one of \" + str(list(valid_distance_measures.keys())))\n",
|
|
")]\n",
|
|
"show_similar_images(selected_image_id, k, selected_feature_model, selected_distance_measure, save_plots=False)\n",
|
|
"\"\"\"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'\\ndataset_label = int(input(\"Enter label no. (between 0 to 100): \"))\\nif dataset_label < 0 or dataset_label>100:\\n raise ValueError(\"dataset_label should be between 0 and 100 only\")\\nselected_image_id=-1\\nquery = {\"true_label\": dataset_label} \\ncursor = fd_collection.find(query).limit(1)\\nselected_image_id=cursor[0][\"image_id\"]\\n\\nsample_image, sample_label = dataset[selected_image_id]\\nplt.imshow(transforms.ToPILImage()(sample_image))\\nplt.show()\\n\\nk = int(input(\"Enter value of k: \"))\\nif k < 1:\\n raise ValueError(\"k should be positive integer\")\\n\\nselected_feature_model = str(\\n input(\"Enter feature model - one of \" + str(valid_feature_models))\\n)\\n\\nselected_distance_measure = valid_distance_measures[str(\\n input(\"Enter distance measure - one of \" + str(list(valid_distance_measures.keys())))\\n)]\\n\\nshow_similar_images(selected_image_id, k, selected_feature_model, selected_distance_measure, save_plots=False)\\n'"
|
|
]
|
|
},
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"#TASK 1-we'll be using pranav's code (mean method)\n",
|
|
"\"\"\"\n",
|
|
"dataset_label = int(input(\"Enter label no. (between 0 to 100): \"))\n",
|
|
"if dataset_label < 0 or dataset_label>100:\n",
|
|
" raise ValueError(\"dataset_label should be between 0 and 100 only\")\n",
|
|
"selected_image_id=-1\n",
|
|
"query = {\"true_label\": dataset_label} \n",
|
|
"cursor = fd_collection.find(query).limit(1)\n",
|
|
"selected_image_id=cursor[0][\"image_id\"]\n",
|
|
"\n",
|
|
"sample_image, sample_label = dataset[selected_image_id]\n",
|
|
"plt.imshow(transforms.ToPILImage()(sample_image))\n",
|
|
"plt.show()\n",
|
|
"\n",
|
|
"k = int(input(\"Enter value of k: \"))\n",
|
|
"if k < 1:\n",
|
|
" raise ValueError(\"k should be positive integer\")\n",
|
|
"\n",
|
|
"selected_feature_model = str(\n",
|
|
" input(\"Enter feature model - one of \" + str(valid_feature_models))\n",
|
|
")\n",
|
|
"\n",
|
|
"selected_distance_measure = valid_distance_measures[str(\n",
|
|
" input(\"Enter distance measure - one of \" + str(list(valid_distance_measures.keys())))\n",
|
|
")]\n",
|
|
"\n",
|
|
"show_similar_images(selected_image_id, k, selected_feature_model, selected_distance_measure, save_plots=False)\n",
|
|
"\"\"\"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Displaying 5 similar labels for image ID 1, using cosine_distance_measure for resnet feature descriptor...\n",
|
|
"Label of target image: 0\n",
|
|
"Label: 1 ; distance: 0.08507879924049078\n",
|
|
"Label: 83 ; distance: 0.39863399244364184\n",
|
|
"Label: 96 ; distance: 0.4092512330836007\n",
|
|
"Label: 100 ; distance: 0.42865075274386166\n",
|
|
"Label: 31 ; distance: 0.4400374717970962\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"#TASK 2a\n",
|
|
"\n",
|
|
"#funtion to display similar labels\n",
|
|
"def show_similar_labels(target_image_id, k, feature_model, distance_measure, save_plots=False):\n",
|
|
" print(\n",
|
|
" \"Displaying {} similar labels for image ID {}, using {} for {} feature descriptor...\".format(\n",
|
|
" k, target_image_id, distance_measure.__name__, feature_model\n",
|
|
" #k, target_image_id, distance_measure, feature_model\n",
|
|
" )\n",
|
|
" )\n",
|
|
"\n",
|
|
" # store target_image itself\n",
|
|
" min_dists = {target_image_id: 0}\n",
|
|
" \n",
|
|
"\n",
|
|
" if target_image_id % 2 == 0:\n",
|
|
" # Get target image's feature descriptors from database\n",
|
|
" target_image = fd_collection.find_one({\"image_id\": target_image_id})\n",
|
|
" else:\n",
|
|
" # Calculate target image's feature descriptors\n",
|
|
" target_image = get_all_fd(target_image_id)\n",
|
|
" \n",
|
|
" #cursor = fd_collection.find({\"image_id\": target_image_id})\n",
|
|
" #print(\"cursor\", cursor)\n",
|
|
" label=target_image[\"true_label\"]\n",
|
|
" print(\"Label of target image: \", label)\n",
|
|
" label_dict = {target_image_id: label}\n",
|
|
" \n",
|
|
" target_image_fd = np.array(target_image[feature_model + \"_fd\"])\n",
|
|
"\n",
|
|
" assert (\n",
|
|
" feature_model in valid_feature_models\n",
|
|
" ), \"feature_model should be one of \" + str(valid_feature_models)\n",
|
|
"\n",
|
|
" assert (\n",
|
|
" distance_measure in valid_distance_measures.values()\n",
|
|
" ), \"distance_measure should be one of \" + str(list(valid_distance_measures.keys()))\n",
|
|
"\n",
|
|
" # only RGB for non RGB images\n",
|
|
" if feature_model != \"hog\":\n",
|
|
" all_images = fd_collection.find({\"true_channels\": 3})\n",
|
|
" else:\n",
|
|
" all_images = fd_collection.find()\n",
|
|
"\n",
|
|
" for cur_img in all_images:\n",
|
|
" cur_img_id = cur_img[\"image_id\"]\n",
|
|
" # skip target itself\n",
|
|
" if cur_img_id == target_image_id:\n",
|
|
" continue\n",
|
|
" cur_img_fd = np.array(cur_img[feature_model + \"_fd\"])\n",
|
|
" cur_dist = distance_measure(\n",
|
|
" cur_img_fd,\n",
|
|
" target_image_fd,\n",
|
|
" )\n",
|
|
" cursor = fd_collection.find({\"image_id\": cur_img_id})\n",
|
|
" label=cursor[0][\"true_label\"]\n",
|
|
"\n",
|
|
" # store first k images irrespective of distance (so that we store no more than k minimum distances)\n",
|
|
" if len(min_dists) < k + 1 and label not in label_dict.values():\n",
|
|
" min_dists[cur_img_id] = cur_dist\n",
|
|
" label_dict[cur_img_id] = label\n",
|
|
"\n",
|
|
" # if lower distance:\n",
|
|
" elif cur_dist < max(min_dists.values()) and label not in label_dict.values():\n",
|
|
" # add to min_dists\n",
|
|
" min_dists.update({cur_img_id: cur_dist})\n",
|
|
" label_dict.update({cur_img_id: label})\n",
|
|
" # remove greatest distance by index\n",
|
|
" pop_key=max(min_dists, key=min_dists.get)\n",
|
|
" min_dists.pop(pop_key)\n",
|
|
" label_dict.pop(pop_key)\n",
|
|
"\n",
|
|
" min_dists = dict(sorted(min_dists.items(), key=lambda item: item[1]))\n",
|
|
"\n",
|
|
" for image_id in min_dists.keys():\n",
|
|
" if image_id==target_image_id:\n",
|
|
" continue\n",
|
|
" else:\n",
|
|
" print(\"Label: \", label_dict[image_id], \"; distance: \", min_dists[image_id])\n",
|
|
" \n",
|
|
"#---------------------------------------------------------------------------------------------------------------------------\n",
|
|
"\n",
|
|
"selected_image_id = int(input(\"Enter image ID: \"))\n",
|
|
"sample_image, sample_label = dataset[selected_image_id]\n",
|
|
"plt.imshow(transforms.ToPILImage()(sample_image))\n",
|
|
"plt.show()\n",
|
|
"\n",
|
|
"k = int(input(\"Enter value of k: \"))\n",
|
|
"if k < 1:\n",
|
|
" raise ValueError(\"k should be positive integer\")\n",
|
|
"\"\"\"\n",
|
|
"selected_feature_model = str(\n",
|
|
" input(\"Enter feature model - one of \" + str(valid_feature_models))\n",
|
|
")\n",
|
|
"\n",
|
|
"selected_distance_measure = valid_distance_measures[str(\n",
|
|
" input(\"Enter distance measure - one of \" + str(list(valid_distance_measures.keys())))\n",
|
|
")]\n",
|
|
"show_similar_labels(selected_image_id, k, selected_feature_model, selected_distance_measure, save_plots=False)\n",
|
|
"\"\"\"\n",
|
|
"\n",
|
|
"#TASK 2b\n",
|
|
"\n",
|
|
"selected_feature_model = \"resnet\"\n",
|
|
"selected_distance_measure = valid_distance_measures[\"cosine\"]\n",
|
|
"show_similar_labels(selected_image_id, k, selected_feature_model, selected_distance_measure, save_plots=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.11"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|