From 6adc8bcf70eca85d38cf7ecb8fc948ce39841722 Mon Sep 17 00:00:00 2001
From: pranavbrkr <pranavbrkr@gmail.com>
Date: Sat, 14 Oct 2023 05:59:55 -0700
Subject: [PATCH] task 8

---
 Phase 2/task_8.ipynb | 342 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 342 insertions(+)
 create mode 100644 Phase 2/task_8.ipynb

diff --git a/Phase 2/task_8.ipynb b/Phase 2/task_8.ipynb
new file mode 100644
index 0000000..da19b0b
--- /dev/null
+++ b/Phase 2/task_8.ipynb	
@@ -0,0 +1,342 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 140,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 141,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import os\n",
+    "import numpy as np\n",
+    "from utils import *\n",
+    "import math\n",
+    "import heapq\n",
+    "import random"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 142,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n",
+    "all_images = fd_collection.find()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 143,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "image_sim-cm_fd-kmeans-10-semantics.json loaded\n"
+     ]
+    }
+   ],
+   "source": [
+    "selected_latent_space = valid_latent_spaces[\n",
+    "    str(input(\"Enter latent space - one of \" + str(list(valid_latent_spaces.keys()))))\n",
+    "]\n",
+    "\n",
+    "selected_feature_model = valid_feature_models[\n",
+    "    str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
+    "]\n",
+    "\n",
+    "k = int(input(\"Enter value of k: \"))\n",
+    "if k < 1:\n",
+    "    raise ValueError(\"k should be a positive integer\")\n",
+    "\n",
+    "selected_dim_reduction_method = str(\n",
+    "    input(\n",
+    "        \"Enter dimensionality reduction method - one of \"\n",
+    "        + str(list(valid_dim_reduction_methods.keys()))\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "image_id = int(input(\"Enter image ID: \"))\n",
+    "if image_id < 0 and image_id > 8676 and image_id % 2 != 0:\n",
+    "    raise ValueError(\"image id should be even number between 0 and 8676\")\n",
+    "\n",
+    "img_label = all_images[int(image_id / 2)][\"true_label\"]\n",
+    "\n",
+    "knum = int(input(\"Enter value of knum: \"))\n",
+    "if knum < 1:\n",
+    "    raise ValueError(\"knum should be a positive integer\")\n",
+    "\n",
+    "match selected_latent_space:\n",
+    "    case \"\":\n",
+    "        if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n",
+    "            data = json.load(open(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n",
+    "            print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n",
+    "        else:\n",
+    "          print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")\n",
+    "    case \"cp\":\n",
+    "        if os.path.exists(f\"{selected_feature_model}-cp-{k}-semantics.json\"):\n",
+    "            data = json.load(open(f\"{selected_feature_model}-cp-{k}-semantics.json\"))\n",
+    "            print(f\"{selected_feature_model}-cp-{k}-semantics.json loaded\")\n",
+    "        else:          \n",
+    "          print(f\"{selected_feature_model}-cp-{k}-semantics.json does not exist\")\n",
+    "    case _:\n",
+    "        if os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n",
+    "            data = json.load(open(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n",
+    "            print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n",
+    "        else:\n",
+    "          print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def extract_similarities_ls1_ls4(latent_space, dim_reduction, selected_feature_model, data, image_id):\n",
+    "\n",
+    "  image_fd = np.array(all_images[int(image_id / 2)][selected_feature_model]).flatten()\n",
+    "\n",
+    "  match dim_reduction:\n",
+    "\n",
+    "    case 'svd':\n",
+    "      U = np.array(data[\"image-semantic\"])\n",
+    "      S = np.array(data[\"semantics-core\"])\n",
+    "      if len(S.shape) == 1:\n",
+    "        S = np.diag(S)\n",
+    "      V = np.transpose(np.array(data[\"semantic-feature\"]))\n",
+    "      \n",
+    "      comparison_feature_space = np.matmul(U, S)\n",
+    "\n",
+    "      if latent_space == \"image_sim\":\n",
+    "        comparison_vector = comparison_feature_space[int(image_id / 2)]\n",
+    "      else:\n",
+    "        comparison_vector = np.matmul(np.matmul(image_fd, V), S)\n",
+    "    \n",
+    "    case \"nmf\":\n",
+    "      H = np.array(data['semantic-feature'])\n",
+    "      comparison_feature_space = np.array(data['image-semantic'])\n",
+    "\n",
+    "      if latent_space == \"image_sim\":\n",
+    "        comparison_vector = comparison_feature_space[int(image_id / 2)]\n",
+    "      else:\n",
+    "        comparison_vector = np.matmul(image_fd, np.transpose(H))\n",
+    "\n",
+    "    case \"kmeans\":\n",
+    "      comparison_vector = []\n",
+    "      comparison_feature_space = np.array(data[\"image-semantic\"])\n",
+    "      S = np.array(data[\"semantic-feature\"])\n",
+    "\n",
+    "      for centroid in S:\n",
+    "        if latent_space == \"image_sim\":\n",
+    "          sim_matrix = np.array(data[\"sim-matrix\"])\n",
+    "          comparison_vector.append(math.dist(sim_matrix[int(image_id / 2)], centroid))\n",
+    "        else:\n",
+    "          comparison_vector.append(math.dist(image_fd, centroid))\n",
+    "\n",
+    "  n = len(comparison_feature_space)\n",
+    "\n",
+    "  distances = []\n",
+    "  for i in range(n):\n",
+    "    if (i * 2) != image_id:\n",
+    "      distances.append({\"image_id\": i, \"label\": all_images[i][\"true_label\"], \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n",
+    "\n",
+    "  distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)\n",
+    "\n",
+    "  similar_labels = []\n",
+    "  unique_labels = set()\n",
+    "\n",
+    "  for img in distances:\n",
+    "    if img['label'] not in unique_labels:\n",
+    "      similar_labels.append(img)\n",
+    "      unique_labels.add(img[\"label\"])\n",
+    "\n",
+    "      if len(similar_labels) == knum:\n",
+    "        break\n",
+    "\n",
+    "\n",
+    "  for x in similar_labels:\n",
+    "    print(x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 144,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def extract_similarities_ls2(data, image_id):\n",
+    "\n",
+    "  IS = np.array(data[\"image-semantic\"])\n",
+    "  S = np.array(data[\"semantics-core\"])\n",
+    "\n",
+    "  if len(S.shape) == 1:\n",
+    "    S = np.diag(S)\n",
+    "\n",
+    "  comparison_feature_space = np.matmul(IS, S)\n",
+    "  comparison_vector = comparison_feature_space[int(image_id / 2)]\n",
+    "\n",
+    "  distances = []\n",
+    "\n",
+    "  n = len(comparison_feature_space)\n",
+    "  for i in range(n):\n",
+    "    if i != (image_id / 2):\n",
+    "      distances.append({\"image_id\": i * 2, \"label\":  all_images[i][\"true_label\"], \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n",
+    "  \n",
+    "  distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)\n",
+    "\n",
+    "  similar_labels = []\n",
+    "  unique_labels = set()\n",
+    "\n",
+    "  for img in distances:\n",
+    "    if img[\"label\"] not in unique_labels and img[\"label\"] != img_label:\n",
+    "      similar_labels.append(img)\n",
+    "      unique_labels.add(img[\"label\"])\n",
+    "\n",
+    "      if len(similar_labels) == knum:\n",
+    "        break\n",
+    "\n",
+    "\n",
+    "  for x in similar_labels:\n",
+    "    print(x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 145,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def extract_similarities_ls3(dim_reduction, data, image_id):\n",
+    "\n",
+    "  img_label = all_images[int(image_id / 2)][\"true_label\"]\n",
+    "\n",
+    "  match dim_reduction:\n",
+    "\n",
+    "    case 'svd':\n",
+    "      U = np.array(data[\"image-semantic\"])\n",
+    "      S = np.array(data[\"semantics-core\"])\n",
+    "      V = np.transpose(np.array(data[\"semantic-feature\"]))\n",
+    "\n",
+    "      comparison_feature_space = np.matmul(U, S)\n",
+    "      comparison_vector = comparison_feature_space[img_label]\n",
+    "    \n",
+    "    case \"nmf\":\n",
+    "      comparison_feature_space = np.array(data['image-semantic'])\n",
+    "      comparison_vector = comparison_feature_space[img_label]\n",
+    "\n",
+    "    case \"kmeans\":\n",
+    "      comparison_feature_space = np.array(data[\"image-semantic\"])\n",
+    "      comparison_vector = comparison_feature_space[img_label]\n",
+    "\n",
+    "  n = len(comparison_feature_space)\n",
+    "  distance = float('inf')\n",
+    "  most_similar_label = img_label\n",
+    "  distances = []\n",
+    "  for i in range(n):\n",
+    "    if i != img_label:\n",
+    "      distances.append({\"label\": i, \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n",
+    "      # temp_distance = math.dist(comparison_vector, comparison_feature_space[i])\n",
+    "      # if distance > temp_distance:\n",
+    "      #   distance = temp_distance\n",
+    "      #   most_similar_label = i\n",
+    "\n",
+    "  # label_images = [x[\"image_id\"] for x in all_images if x[\"true_label\"] == most_similar_label]\n",
+    "  # similar_images = random.sample(label_images, knum)\n",
+    "\n",
+    "  # print(f\"Most similar label to {img_label} is {most_similar_label}\")\n",
+    "  distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:knum]\n",
+    "\n",
+    "  for img in distances:\n",
+    "    print(img)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 147,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'image_id': 2457, 'label': 39, 'distance': 5.400083378408386}\n",
+      "{'image_id': 2629, 'label': 46, 'distance': 6.360136822031199}\n",
+      "{'image_id': 1916, 'label': 23, 'distance': 8.279651870400942}\n",
+      "{'image_id': 1975, 'label': 24, 'distance': 9.305370097143731}\n",
+      "{'image_id': 3287, 'label': 65, 'distance': 9.696792665660324}\n",
+      "{'image_id': 292, 'label': 1, 'distance': 10.198675122162054}\n",
+      "{'image_id': 3965, 'label': 90, 'distance': 11.544874878013612}\n",
+      "{'image_id': 4018, 'label': 92, 'distance': 12.064116415014514}\n",
+      "{'image_id': 4307, 'label': 99, 'distance': 14.448284626506538}\n",
+      "{'image_id': 2329, 'label': 35, 'distance': 14.742475318290913}\n"
+     ]
+    }
+   ],
+   "source": [
+    "match selected_latent_space:\n",
+    "\n",
+    "  case \"\" | \"image_sim\":\n",
+    "    \n",
+    "    extract_similarities_ls1_ls4(selected_latent_space, selected_dim_reduction_method, selected_feature_model, data, image_id)\n",
+    "\n",
+    "  case \"label_sim\":\n",
+    "\n",
+    "    extract_similarities_ls3(selected_dim_reduction_method, data, image_id)\n",
+    "\n",
+    "  case \"cp\":\n",
+    "\n",
+    "    extract_similarities_ls2(data, image_id)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}