refactored pranav's code for task 5

2025-12-06 12:44:06 +00:00 · 2023-10-11 16:56:23 -07:00 · 2023-10-11 16:56:23 -07:00 · 6e21bc168a
commit 6e21bc168a
parent 9e05228e94
3 changed files with 274 additions and 154 deletions
--- a/2/task3.ipynb
+++ b/2/task3.ipynb
@ -1,142 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from pymongo import MongoClient\n",
    "from task0a import *\n",
    "import scipy\n",
    "import numpy as np\n",
    "from sklearn.decomposition import NMF\n",
    "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
    "from sklearn.cluster import KMeans\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "client = MongoClient()\n",
    "client = MongoClient(host=\"localhost\", port=27017)\n",
    "\n",
    "# Select the database\n",
    "db = client.Multimedia_Web_DBs\n",
    "\n",
    "# Fetch all documents from the collection and then sort them by \"_id\"\n",
    "feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n",
    "feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)\n",
    "\n",
    "num_labels = 101"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def extractKLatentSemantics(k, feature_model, dim_reduction):\n",
    "\n",
    "  feature_vectors = [x[feature_model] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
    "  feature_labels = [x[\"label\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
    "  feature_ids = [x[\"_id\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
    "\n",
    "  filename = ''\n",
    "\n",
    "\n",
    "  match dim_reduction:\n",
    "\n",
    "    case 1:\n",
    "      filename = f'{feature_model}-svd-semantics.json'\n",
    "      U, S, Vh = scipy.sparse.linalg.svds(np.array(feature_vectors), k=k)\n",
    "      k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
    "\n",
    "    case 2:\n",
    "      filename = f'{feature_model}-nnmf-semantics.json'\n",
    "      model = NMF(n_components = k, init = 'random', solver = 'cd', alpha_H = 0.01, alpha_W = 0.01, max_iter = 10000)\n",
    "      min_value = np.min(feature_vectors)\n",
    "      feature_vectors_shifted = feature_vectors - min_value\n",
    "      U = model.fit_transform(np.array(feature_vectors_shifted))\n",
    "      k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
    "\n",
    "    case 3:\n",
    "      filename = f'{feature_model}-lda-semantics.json'\n",
    "      U = LinearDiscriminantAnalysis(n_components = k).fit_transform(feature_vectors, feature_labels)\n",
    "      k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
    "\n",
    "    case 4:\n",
    "      filename = f'{feature_model}-kmeans-semantics.json'\n",
    "      kmeans = KMeans(n_clusters = k)\n",
    "      kmeans.fit(feature_vectors)\n",
    "      U = kmeans.transform(feature_vectors)\n",
    "      k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
    "  \n",
    "  k_latent_semantics = [{\"_id\": item[0], \"semantics\": item[1]} for item in k_latent_semantics]\n",
    "  with open(filename, 'w', encoding='utf-8') as f:\n",
    "    json.dump(k_latent_semantics, f, ensure_ascii = False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def main():\n",
    "\n",
    "  # Load dataset\n",
    "\n",
    "  # User input for Image ID\n",
    "  k = int(input(\"Enter k: \"))\n",
    "\n",
    "  features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n",
    "\n",
    "  # User input for feature model to extract\n",
    "  print(\"\\n1: Color moments\")\n",
    "  print(\"2: HOG\")\n",
    "  print(\"3: Resnet50 Avgpool layer\")\n",
    "  print(\"4: Resnet50 Layer 3\")\n",
    "  print(\"5: Resnet50 FC layer\")\n",
    "  feature_model = features[int(input(\"Select the feature model: \")) - 1]\n",
    "\n",
    "  print(\"\\n1. SVD\")\n",
    "  print(\"2. NNMF\")\n",
    "  print(\"3. LDA\")\n",
    "  print(\"4. k-means\")\n",
    "  dim_reduction = int(input(\"Select the dimensionality reduction technique: \"))\n",
    "\n",
    "  extractKLatentSemantics(k, feature_model, dim_reduction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if __name__ == \"__main__\":\n",
    "   main()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/2/task_5.ipynb
+++ b/2/task_5.ipynb
@ -0,0 +1,214 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from utils import *\n",
    "warnings.filterwarnings('ignore')\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Applying svd on the fc_fd space to get 10 latent semantics (showing only top 10 image-weight pairs for each latent semantic)...\n",
      "Latent semantic no. 0\n",
      "Image_ID\t80\t-\tWeight\t0.2614097705550824\n",
      "Image_ID\t74\t-\tWeight\t0.255431983850539\n",
      "Image_ID\t72\t-\tWeight\t0.24329045773521019\n",
      "Image_ID\t76\t-\tWeight\t0.22867416408250565\n",
      "Image_ID\t38\t-\tWeight\t0.19933358228759127\n",
      "Image_ID\t70\t-\tWeight\t0.18697368408982706\n",
      "Image_ID\t78\t-\tWeight\t0.13796715203849405\n",
      "Image_ID\t130\t-\tWeight\t0.12802644225327572\n",
      "Image_ID\t128\t-\tWeight\t0.12766513481071043\n",
      "Image_ID\t116\t-\tWeight\t0.12432195172872901\n",
      "Latent semantic no. 1\n",
      "Image_ID\t42\t-\tWeight\t0.24451953308549035\n",
      "Image_ID\t104\t-\tWeight\t0.17513827022527176\n",
      "Image_ID\t2\t-\tWeight\t0.17502495949250704\n",
      "Image_ID\t0\t-\tWeight\t0.17209867451969002\n",
      "Image_ID\t170\t-\tWeight\t0.16656363902027468\n",
      "Image_ID\t96\t-\tWeight\t0.15318453472976815\n",
      "Image_ID\t40\t-\tWeight\t0.1432149719665029\n",
      "Image_ID\t44\t-\tWeight\t0.1429496131499582\n",
      "Image_ID\t160\t-\tWeight\t0.13479710738132986\n",
      "Image_ID\t6\t-\tWeight\t0.1264545662660414\n",
      "Latent semantic no. 2\n",
      "Image_ID\t86\t-\tWeight\t0.21244971577008848\n",
      "Image_ID\t96\t-\tWeight\t0.19744514449239337\n",
      "Image_ID\t90\t-\tWeight\t0.19463642108355275\n",
      "Image_ID\t32\t-\tWeight\t0.18145091969843855\n",
      "Image_ID\t42\t-\tWeight\t0.16316970985189788\n",
      "Image_ID\t26\t-\tWeight\t0.15711519451212017\n",
      "Image_ID\t184\t-\tWeight\t0.14991640994990046\n",
      "Image_ID\t134\t-\tWeight\t0.1462330756631442\n",
      "Image_ID\t40\t-\tWeight\t0.14437675159652016\n",
      "Image_ID\t182\t-\tWeight\t0.1383518461119224\n",
      "Latent semantic no. 3\n",
      "Image_ID\t90\t-\tWeight\t0.1720078267722524\n",
      "Image_ID\t156\t-\tWeight\t0.16000154385617743\n",
      "Image_ID\t158\t-\tWeight\t0.1512646317732056\n",
      "Image_ID\t160\t-\tWeight\t0.14646801598350143\n",
      "Image_ID\t152\t-\tWeight\t0.1464352560589073\n",
      "Image_ID\t150\t-\tWeight\t0.14619374900432364\n",
      "Image_ID\t30\t-\tWeight\t0.14143498327111978\n",
      "Image_ID\t36\t-\tWeight\t0.14028252934190766\n",
      "Image_ID\t92\t-\tWeight\t0.14010606099568526\n",
      "Image_ID\t96\t-\tWeight\t0.12878454015856147\n",
      "Latent semantic no. 4\n",
      "Image_ID\t0\t-\tWeight\t0.1851068625752792\n",
      "Image_ID\t68\t-\tWeight\t0.18233577289211206\n",
      "Image_ID\t70\t-\tWeight\t0.17658848660973384\n",
      "Image_ID\t2\t-\tWeight\t0.1740864069632969\n",
      "Image_ID\t64\t-\tWeight\t0.1652208125636303\n",
      "Image_ID\t144\t-\tWeight\t0.1473307832877541\n",
      "Image_ID\t140\t-\tWeight\t0.13555748295430797\n",
      "Image_ID\t142\t-\tWeight\t0.12823249250147356\n",
      "Image_ID\t86\t-\tWeight\t0.12718092599165637\n",
      "Image_ID\t76\t-\tWeight\t0.1252879989162334\n",
      "Latent semantic no. 5\n",
      "Image_ID\t38\t-\tWeight\t0.18831453133913492\n",
      "Image_ID\t44\t-\tWeight\t0.17741038115946053\n",
      "Image_ID\t42\t-\tWeight\t0.16444727858214978\n",
      "Image_ID\t130\t-\tWeight\t0.15436113645002744\n",
      "Image_ID\t40\t-\tWeight\t0.1536450181907607\n",
      "Image_ID\t132\t-\tWeight\t0.14964910372393345\n",
      "Image_ID\t46\t-\tWeight\t0.147369630386678\n",
      "Image_ID\t36\t-\tWeight\t0.14003912645014002\n",
      "Image_ID\t128\t-\tWeight\t0.13864439525825356\n",
      "Image_ID\t138\t-\tWeight\t0.13770732538821512\n",
      "Latent semantic no. 6\n",
      "Image_ID\t114\t-\tWeight\t0.15664448468019831\n",
      "Image_ID\t2\t-\tWeight\t0.15491061836983144\n",
      "Image_ID\t0\t-\tWeight\t0.1530303208538504\n",
      "Image_ID\t6\t-\tWeight\t0.15295162665264536\n",
      "Image_ID\t106\t-\tWeight\t0.14505207452002586\n",
      "Image_ID\t110\t-\tWeight\t0.14364619871330633\n",
      "Image_ID\t104\t-\tWeight\t0.14360445482307752\n",
      "Image_ID\t116\t-\tWeight\t0.14309751290704328\n",
      "Image_ID\t108\t-\tWeight\t0.14103122187663494\n",
      "Image_ID\t112\t-\tWeight\t0.13936814882577545\n",
      "Latent semantic no. 7\n",
      "Image_ID\t158\t-\tWeight\t0.15332739573127638\n",
      "Image_ID\t152\t-\tWeight\t0.15027095321242787\n",
      "Image_ID\t2\t-\tWeight\t0.148228537938103\n",
      "Image_ID\t0\t-\tWeight\t0.14693245027728857\n",
      "Image_ID\t156\t-\tWeight\t0.1439438847861891\n",
      "Image_ID\t8\t-\tWeight\t0.14356918947005834\n",
      "Image_ID\t10\t-\tWeight\t0.1431162549061445\n",
      "Image_ID\t6\t-\tWeight\t0.14277108702825383\n",
      "Image_ID\t150\t-\tWeight\t0.1424099571884803\n",
      "Image_ID\t164\t-\tWeight\t0.13731169848767164\n",
      "Latent semantic no. 8\n",
      "Image_ID\t136\t-\tWeight\t0.14826723874051348\n",
      "Image_ID\t142\t-\tWeight\t0.1444905135922577\n",
      "Image_ID\t116\t-\tWeight\t0.14310970423245634\n",
      "Image_ID\t132\t-\tWeight\t0.13967210710664973\n",
      "Image_ID\t152\t-\tWeight\t0.13699976834141417\n",
      "Image_ID\t114\t-\tWeight\t0.13649814331495427\n",
      "Image_ID\t138\t-\tWeight\t0.13624706512987708\n",
      "Image_ID\t106\t-\tWeight\t0.13620952950667425\n",
      "Image_ID\t110\t-\tWeight\t0.1346054901033104\n",
      "Image_ID\t144\t-\tWeight\t0.13436573258693213\n",
      "Latent semantic no. 9\n",
      "Image_ID\t38\t-\tWeight\t0.15911686596038474\n",
      "Image_ID\t2\t-\tWeight\t0.15207108925634513\n",
      "Image_ID\t0\t-\tWeight\t0.15116756158498235\n",
      "Image_ID\t6\t-\tWeight\t0.15009399187071035\n",
      "Image_ID\t10\t-\tWeight\t0.14437025978168486\n",
      "Image_ID\t4\t-\tWeight\t0.14315858315130434\n",
      "Image_ID\t34\t-\tWeight\t0.14296451776950192\n",
      "Image_ID\t22\t-\tWeight\t0.14272703151065388\n",
      "Image_ID\t24\t-\tWeight\t0.14254462871698045\n",
      "Image_ID\t20\t-\tWeight\t0.14096073579756538\n"
     ]
    }
   ],
   "source": [
    "selected_feature_model = valid_feature_models[\n",
    "    str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
    "]\n",
    "\n",
    "k = int(input(\"Enter value of k: \"))\n",
    "if k < 1:\n",
    "    raise ValueError(\"k should be a positive integer\")\n",
    "\n",
    "selected_dim_reduction_method = str(\n",
    "    input(\n",
    "        \"Enter dimensionality reduction method - one of \"\n",
    "        + str(list(valid_dim_reduction_methods.keys()))\n",
    "    )\n",
    ")\n",
    "\n",
    "label_sim_matrix = find_label_label_similarity(fd_collection,selected_feature_model)\n",
    "\n",
    "extract_latent_semantics(\n",
    "    fd_collection,\n",
    "    k,\n",
    "    selected_feature_model,\n",
    "    selected_dim_reduction_method,\n",
    "    sim_matrix=label_sim_matrix,\n",
    "    top_images=10,\n",
    "    fn_prefix='label_sim-'\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/2/utils.py
+++ b/2/utils.py
@ -641,9 +641,9 @@ class KMeans:
            for c in self.cluster_centers_:
                prev_centroid = prev_centroids[c]
                current_centroid = self.cluster_centers_[c]
-                convergence_tol = np.sum(abs(
+                convergence_tol = np.sum(
-                    (prev_centroid - current_centroid) / prev_centroid * 100.0
+                    abs((prev_centroid - current_centroid) / prev_centroid * 100.0)
-                ))
+                )
                if convergence_tol > self.tol:
                    optimized = False
                    if self.verbose > 0:
@ -676,11 +676,19 @@ class KMeans:
 def extract_latent_semantics(
-    fd_collection, k, feature_model, dim_reduction_method, top_images=None
+    fd_collection,
    k,
    feature_model,
    dim_reduction_method,
    sim_matrix=None,
    top_images=None,
    fn_prefix="",
 ):
    """
    Extract latent semantics for entire collection at once for a given feature_model and dim_reduction_method, and display the imageID-semantic weight pairs
    Use `sim_matrix` to manually give similarity matrix instead of feature space
    Leave `top_images` blank to display all imageID-weight pairs
    """
@ -694,18 +702,28 @@ def extract_latent_semantics(
    )
    all_images = list(fd_collection.find())
    feature_vectors = np.array([img[feature_model] for img in all_images])
    feature_labels = [img["true_label"] for img in all_images]
    feature_ids = [img["image_id"] for img in all_images]
    top_img_str = ""
    if top_images is not None:
        top_img_str = f" (showing only top {top_images} image-weight pairs for each latent semantic)"
-    print(
+
-        "Applying {} on the {} space to get {} latent semantics{}...".format(
+    # if similarity matrix is provided
-            dim_reduction_method, feature_model, k, top_img_str
+    if sim_matrix is not None:
        feature_vectors = sim_matrix
        print(
            "Applying {} on the {} space to get {} latent semantics{}...".format(
                dim_reduction_method, feature_model, k, top_img_str
            )
        )
    # else take feature space from database
    else:
        feature_vectors = np.array([img[feature_model] for img in all_images])
        print(
            "Applying {} on the given similarity matrix to get {} latent semantics{}...".format(
                dim_reduction_method, k, top_img_str
            )
        )
    )
    displayed_latent_semantics = {}
    all_latent_semantics = {}
@ -827,8 +845,38 @@ def extract_latent_semantics(
            print(f"Image_ID\t{image_id}\t-\tWeight\t{weight}")
    with open(
-        f"{feature_model}-{dim_reduction_method}-{k}-semantics.json",
+        f"{fn_prefix}{feature_model}-{dim_reduction_method}-{k}-semantics.json",
        "w",
        encoding="utf-8",
    ) as output_file:
        json.dump(all_latent_semantics, output_file, ensure_ascii=False)
 def find_label_label_similarity(fd_collection, feature_model):
    """
    Calculate similarity between labels. Lower values indicate higher similarities
    """
    assert (
        feature_model in valid_feature_models.values()
    ), "feature_model should be one of " + str(list(valid_feature_models.keys()))
    label_sim_matrix = []
    label_mean_vectors = []
    num_labels = 101
    for label in range(num_labels):
        # get representative vectors for the label
        label_mean_vectors.append(
            calculate_label_representatives(fd_collection, label, feature_model)
        )
    label_sim_matrix = np.zeros((num_labels, num_labels))
    for i in range(num_labels):
        for j in range(i + 1, num_labels):
            # Note: lower the value, lower the distance => higher the similarity
            label_sim_matrix[i][j] = feature_distance_matches[feature_model](
                np.array(label_mean_vectors[i]), np.array(label_mean_vectors[j])
            )
    return label_sim_matrix