diff --git a/Phase 2/task6.ipynb b/Phase 2/task6.ipynb
new file mode 100644
index 0000000..8c08a17
--- /dev/null
+++ b/Phase 2/task6.ipynb	
@@ -0,0 +1,181 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import math\n",
+    "from pymongo import MongoClient\n",
+    "import scipy\n",
+    "import numpy as np\n",
+    "from sklearn.decomposition import NMF\n",
+    "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
+    "from sklearn.cluster import KMeans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client = MongoClient()\n",
+    "client = MongoClient(host = \"localhost\", port = 27017)\n",
+    "\n",
+    "# Select the database\n",
+    "db = client.Multimedia_Web_DBs\n",
+    "\n",
+    "# Fetch all documents from the collection and then sort them by \"_id\"\n",
+    "feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n",
+    "feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def extractKLatentSemantics(k, image_sim_matrix, feature_model, dim_reduction):\n",
+    "\n",
+    "  feature_ids = [x[\"_id\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
+    "  feature_labels = [x[\"label\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
+    "\n",
+    "  filename = 'ls4.json'\n",
+    "\n",
+    "  match dim_reduction:\n",
+    "\n",
+    "    case 1:\n",
+    "      U, S, Vh = scipy.sparse.linalg.svds(np.array(image_sim_matrix), k=k)\n",
+    "      k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
+    "\n",
+    "    case 2:\n",
+    "      model = NMF(n_components = k, init = 'random', solver = 'cd', alpha_H = 0.01, alpha_W = 0.01, max_iter = 10000)\n",
+    "      min_value = np.min(image_sim_matrix)\n",
+    "      feature_vectors_shifted = image_sim_matrix - min_value\n",
+    "      U = model.fit_transform(np.array(feature_vectors_shifted))\n",
+    "      k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
+    "\n",
+    "    case 3:\n",
+    "      U = LinearDiscriminantAnalysis(n_components = k).fit_transform(image_sim_matrix, feature_labels)\n",
+    "      k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
+    "\n",
+    "    case 4:\n",
+    "      kmeans = KMeans(n_clusters = k)\n",
+    "      kmeans.fit(image_sim_matrix)\n",
+    "      U = kmeans.transform(image_sim_matrix)\n",
+    "      k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
+    "  \n",
+    "  k_latent_semantics = [{\"_id\": item[0], \"semantics\": item[1]} for item in k_latent_semantics]\n",
+    "  with open(filename, 'w', encoding='utf-8') as f:\n",
+    "    json.dump(k_latent_semantics, f, ensure_ascii = False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def findImageImageSimMatrix(feature_model):\n",
+    "   \n",
+    "  feature_vectors = [x[feature_model] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
+    "\n",
+    "  n = len(feature_vectors)\n",
+    "\n",
+    "  image_sim_matrix = np.zeros((n, n))\n",
+    "\n",
+    "  for i in range(n):\n",
+    "    for j in range(i + 1, n):\n",
+    "\n",
+    "      match feature_model:\n",
+    "\n",
+    "        case \"color_moments\":\n",
+    "          image_sim_matrix[i][j] = image_sim_matrix[j][i] = math.dist(feature_vectors[i], feature_vectors[j])\n",
+    "        \n",
+    "        case \"hog\":\n",
+    "          image_sim_matrix[i][j] = image_sim_matrix[j][i] = (np.dot(feature_vectors[i], feature_vectors[j]) / (np.linalg.norm(feature_vectors[i]) * np.linalg.norm(feature_vectors[j])))\n",
+    "\n",
+    "        case \"avgpool\" | \"layer3\" | \"fc\":\n",
+    "          image_sim_matrix[i][j] = image_sim_matrix[j][i] = scipy.stats.pearsonr(feature_vectors[i], feature_vectors[j]).statistic\n",
+    "          \n",
+    "  return image_sim_matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "def main():\n",
+    "\n",
+    "  k = int(input(\"Enter k: \"))\n",
+    "\n",
+    "  features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n",
+    "\n",
+    "  # User input for feature model to extract\n",
+    "  print(\"\\n1: Color moments\")\n",
+    "  print(\"2: HOG\")\n",
+    "  print(\"3: Resnet50 Avgpool layer\")\n",
+    "  print(\"4: Resnet50 Layer 3\")\n",
+    "  print(\"5: Resnet50 FC layer\")\n",
+    "  feature_model = features[int(input(\"Select the feature model: \")) - 1]\n",
+    "\n",
+    "  print(\"\\n1. SVD\")\n",
+    "  print(\"2. NNMF\")\n",
+    "  print(\"3. LDA\")\n",
+    "  print(\"4. k-means\")\n",
+    "  dim_reduction = int(input(\"Select the dimensionality reduction technique: \"))\n",
+    "\n",
+    "  image_sim_matrix = findImageImageSimMatrix(feature_model)\n",
+    "  print(image_sim_matrix)\n",
+    "\n",
+    "  extractKLatentSemantics(k, image_sim_matrix, feature_model, dim_reduction)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if __name__ == \"__main__\":\n",
+    "   main()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Phase 2/task_3.ipynb b/Phase 2/task_3.ipynb
index 34c0ca1..b8fb165 100644
--- a/Phase 2/task_3.ipynb	
+++ b/Phase 2/task_3.ipynb	
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -13,7 +13,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -22,7 +22,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -31,115 +31,115 @@
      "text": [
       "Applying svd on the cm_fd space to get 10 latent semantics (showing only top 10 image-weight pairs for each latent semantic)...\n",
       "Latent semantic no. 0\n",
-      "Image_ID\t7654\t-\tWeight\t0.08162189274964751\n",
-      "Image_ID\t8634\t-\tWeight\t0.06673589485778451\n",
-      "Image_ID\t5740\t-\tWeight\t0.060058821201972104\n",
-      "Image_ID\t6106\t-\tWeight\t0.05306661393931607\n",
-      "Image_ID\t5456\t-\tWeight\t0.05170171570330845\n",
-      "Image_ID\t7814\t-\tWeight\t0.04997978865116185\n",
-      "Image_ID\t6248\t-\tWeight\t0.04946683639815072\n",
-      "Image_ID\t5354\t-\tWeight\t0.04864381025793171\n",
-      "Image_ID\t6108\t-\tWeight\t0.04796763934338538\n",
-      "Image_ID\t5438\t-\tWeight\t0.047874747600689466\n",
+      "Image_ID\t7654\t-\tWeight\t0.0816218927496473\n",
+      "Image_ID\t8634\t-\tWeight\t0.0667358948577843\n",
+      "Image_ID\t5740\t-\tWeight\t0.06005882120197204\n",
+      "Image_ID\t6106\t-\tWeight\t0.0530666139393161\n",
+      "Image_ID\t5456\t-\tWeight\t0.051701715703308504\n",
+      "Image_ID\t7814\t-\tWeight\t0.04997978865116192\n",
+      "Image_ID\t6248\t-\tWeight\t0.04946683639815059\n",
+      "Image_ID\t5354\t-\tWeight\t0.04864381025793159\n",
+      "Image_ID\t6108\t-\tWeight\t0.0479676393433854\n",
+      "Image_ID\t5438\t-\tWeight\t0.04787474760068962\n",
       "Latent semantic no. 1\n",
-      "Image_ID\t8026\t-\tWeight\t0.06478360955460367\n",
-      "Image_ID\t6016\t-\tWeight\t0.0632709906607753\n",
-      "Image_ID\t3744\t-\tWeight\t0.05347414608321652\n",
-      "Image_ID\t3720\t-\tWeight\t0.0517124023583583\n",
-      "Image_ID\t7896\t-\tWeight\t0.049366978424645006\n",
-      "Image_ID\t6014\t-\tWeight\t0.047637173390389816\n",
-      "Image_ID\t6768\t-\tWeight\t0.04742408995375774\n",
-      "Image_ID\t4050\t-\tWeight\t0.0456343920101654\n",
-      "Image_ID\t6000\t-\tWeight\t0.04535273415975713\n",
-      "Image_ID\t6552\t-\tWeight\t0.04525300117499444\n",
+      "Image_ID\t7654\t-\tWeight\t0.05566187740909836\n",
+      "Image_ID\t7880\t-\tWeight\t0.05304265128270742\n",
+      "Image_ID\t5132\t-\tWeight\t0.052802620405367526\n",
+      "Image_ID\t4516\t-\tWeight\t0.05032667794065215\n",
+      "Image_ID\t3064\t-\tWeight\t0.04996389545581616\n",
+      "Image_ID\t7808\t-\tWeight\t0.04885211523705829\n",
+      "Image_ID\t8102\t-\tWeight\t0.04821048869059779\n",
+      "Image_ID\t5336\t-\tWeight\t0.047392911537133244\n",
+      "Image_ID\t3058\t-\tWeight\t0.04622961181395915\n",
+      "Image_ID\t7484\t-\tWeight\t0.04563242634411927\n",
       "Latent semantic no. 2\n",
-      "Image_ID\t7654\t-\tWeight\t0.0704670166327785\n",
-      "Image_ID\t2804\t-\tWeight\t0.059682344110996065\n",
-      "Image_ID\t2710\t-\tWeight\t0.059199111598090534\n",
-      "Image_ID\t3436\t-\tWeight\t0.05368202357324355\n",
-      "Image_ID\t7936\t-\tWeight\t0.053276991496894154\n",
-      "Image_ID\t2708\t-\tWeight\t0.048527019795007204\n",
-      "Image_ID\t3764\t-\tWeight\t0.04835537239641643\n",
-      "Image_ID\t7928\t-\tWeight\t0.047998989024259496\n",
-      "Image_ID\t5684\t-\tWeight\t0.04723047448150771\n",
-      "Image_ID\t5126\t-\tWeight\t0.04720498270016634\n",
+      "Image_ID\t7654\t-\tWeight\t0.07046701663277787\n",
+      "Image_ID\t2804\t-\tWeight\t0.059682344110995336\n",
+      "Image_ID\t2710\t-\tWeight\t0.05919911159809061\n",
+      "Image_ID\t3436\t-\tWeight\t0.05368202357324448\n",
+      "Image_ID\t7936\t-\tWeight\t0.05327699149689366\n",
+      "Image_ID\t2708\t-\tWeight\t0.04852701979500758\n",
+      "Image_ID\t3764\t-\tWeight\t0.04835537239641772\n",
+      "Image_ID\t7928\t-\tWeight\t0.04799898902425922\n",
+      "Image_ID\t5684\t-\tWeight\t0.04723047448150721\n",
+      "Image_ID\t5126\t-\tWeight\t0.04720498270016626\n",
       "Latent semantic no. 3\n",
-      "Image_ID\t6356\t-\tWeight\t0.0754447261688377\n",
-      "Image_ID\t6480\t-\tWeight\t0.06540890240964665\n",
-      "Image_ID\t4756\t-\tWeight\t0.06075370676621832\n",
-      "Image_ID\t8656\t-\tWeight\t0.060505116069252685\n",
-      "Image_ID\t6050\t-\tWeight\t0.058111632773274836\n",
-      "Image_ID\t6324\t-\tWeight\t0.056492568599917435\n",
-      "Image_ID\t8138\t-\tWeight\t0.0557967464751822\n",
-      "Image_ID\t3460\t-\tWeight\t0.05508818833516222\n",
-      "Image_ID\t200\t-\tWeight\t0.05459477384213874\n",
-      "Image_ID\t7220\t-\tWeight\t0.05376222500332758\n",
+      "Image_ID\t8654\t-\tWeight\t0.08668332932816088\n",
+      "Image_ID\t8618\t-\tWeight\t0.08568859853566119\n",
+      "Image_ID\t8658\t-\tWeight\t0.0777605087520117\n",
+      "Image_ID\t3306\t-\tWeight\t0.0745220591779124\n",
+      "Image_ID\t8620\t-\tWeight\t0.07351843281590886\n",
+      "Image_ID\t8638\t-\tWeight\t0.06948884666766826\n",
+      "Image_ID\t6754\t-\tWeight\t0.06896434951935482\n",
+      "Image_ID\t8676\t-\tWeight\t0.06623938393792103\n",
+      "Image_ID\t4650\t-\tWeight\t0.06566930583744507\n",
+      "Image_ID\t8636\t-\tWeight\t0.06499098805246775\n",
       "Latent semantic no. 4\n",
-      "Image_ID\t7370\t-\tWeight\t0.05281026462493995\n",
-      "Image_ID\t6528\t-\tWeight\t0.05252803707219332\n",
-      "Image_ID\t8056\t-\tWeight\t0.05175019567880743\n",
-      "Image_ID\t2958\t-\tWeight\t0.05123118911737749\n",
-      "Image_ID\t4614\t-\tWeight\t0.05061302210733273\n",
-      "Image_ID\t8292\t-\tWeight\t0.05000577057549489\n",
-      "Image_ID\t7888\t-\tWeight\t0.04905059301012787\n",
-      "Image_ID\t6540\t-\tWeight\t0.048139958875035395\n",
-      "Image_ID\t6064\t-\tWeight\t0.04605896293857696\n",
-      "Image_ID\t2974\t-\tWeight\t0.04488429099909397\n",
+      "Image_ID\t7370\t-\tWeight\t0.05281026462494081\n",
+      "Image_ID\t6528\t-\tWeight\t0.05252803707219361\n",
+      "Image_ID\t8056\t-\tWeight\t0.0517501956788071\n",
+      "Image_ID\t2958\t-\tWeight\t0.051231189117377514\n",
+      "Image_ID\t4614\t-\tWeight\t0.05061302210733084\n",
+      "Image_ID\t8292\t-\tWeight\t0.05000577057549516\n",
+      "Image_ID\t7888\t-\tWeight\t0.04905059301012733\n",
+      "Image_ID\t6540\t-\tWeight\t0.048139958875035006\n",
+      "Image_ID\t6064\t-\tWeight\t0.04605896293857509\n",
+      "Image_ID\t2974\t-\tWeight\t0.04488429099909442\n",
       "Latent semantic no. 5\n",
-      "Image_ID\t8570\t-\tWeight\t0.08379938013632145\n",
-      "Image_ID\t7784\t-\tWeight\t0.0723847258804912\n",
-      "Image_ID\t4152\t-\tWeight\t0.060769224719766333\n",
-      "Image_ID\t5114\t-\tWeight\t0.053872121517690504\n",
-      "Image_ID\t7774\t-\tWeight\t0.05324887247523992\n",
-      "Image_ID\t8614\t-\tWeight\t0.05319742868629013\n",
-      "Image_ID\t3072\t-\tWeight\t0.05083994521792821\n",
-      "Image_ID\t7798\t-\tWeight\t0.05059807413594892\n",
-      "Image_ID\t5118\t-\tWeight\t0.05022770477320976\n",
-      "Image_ID\t7040\t-\tWeight\t0.04996996742218053\n",
+      "Image_ID\t8570\t-\tWeight\t0.08379938013632153\n",
+      "Image_ID\t7784\t-\tWeight\t0.07238472588049127\n",
+      "Image_ID\t4152\t-\tWeight\t0.06076922471976642\n",
+      "Image_ID\t5114\t-\tWeight\t0.05387212151769057\n",
+      "Image_ID\t7774\t-\tWeight\t0.05324887247524\n",
+      "Image_ID\t8614\t-\tWeight\t0.05319742868629018\n",
+      "Image_ID\t3072\t-\tWeight\t0.05083994521792827\n",
+      "Image_ID\t7798\t-\tWeight\t0.050598074135949\n",
+      "Image_ID\t5118\t-\tWeight\t0.05022770477320978\n",
+      "Image_ID\t7040\t-\tWeight\t0.04996996742218058\n",
       "Latent semantic no. 6\n",
-      "Image_ID\t8570\t-\tWeight\t0.07082421149695754\n",
-      "Image_ID\t7774\t-\tWeight\t0.06546594547486781\n",
-      "Image_ID\t4152\t-\tWeight\t0.06440870014673936\n",
+      "Image_ID\t8570\t-\tWeight\t0.07082421149695753\n",
+      "Image_ID\t7774\t-\tWeight\t0.06546594547486784\n",
+      "Image_ID\t4152\t-\tWeight\t0.06440870014673937\n",
       "Image_ID\t5118\t-\tWeight\t0.06264436903974217\n",
-      "Image_ID\t7784\t-\tWeight\t0.06203552824772956\n",
-      "Image_ID\t7798\t-\tWeight\t0.05899354962287134\n",
-      "Image_ID\t7896\t-\tWeight\t0.05648444493570963\n",
+      "Image_ID\t7784\t-\tWeight\t0.06203552824772957\n",
+      "Image_ID\t7798\t-\tWeight\t0.05899354962287138\n",
+      "Image_ID\t7896\t-\tWeight\t0.056484444935709706\n",
       "Image_ID\t7766\t-\tWeight\t0.056063042928801675\n",
-      "Image_ID\t7792\t-\tWeight\t0.055578803018497686\n",
-      "Image_ID\t7834\t-\tWeight\t0.055567509183302555\n",
+      "Image_ID\t7792\t-\tWeight\t0.05557880301849769\n",
+      "Image_ID\t7834\t-\tWeight\t0.05556750918330256\n",
       "Latent semantic no. 7\n",
-      "Image_ID\t7912\t-\tWeight\t0.06634864556518678\n",
-      "Image_ID\t5534\t-\tWeight\t0.05913926717735747\n",
-      "Image_ID\t5550\t-\tWeight\t0.049468125695492526\n",
-      "Image_ID\t2106\t-\tWeight\t0.048274676516220805\n",
-      "Image_ID\t7804\t-\tWeight\t0.04822832951751611\n",
-      "Image_ID\t6198\t-\tWeight\t0.04795521082538372\n",
-      "Image_ID\t6728\t-\tWeight\t0.04729135404469566\n",
-      "Image_ID\t5588\t-\tWeight\t0.04715637083533252\n",
-      "Image_ID\t7276\t-\tWeight\t0.04637482601331893\n",
-      "Image_ID\t6730\t-\tWeight\t0.045930617636659\n",
+      "Image_ID\t1140\t-\tWeight\t0.05317423066517462\n",
+      "Image_ID\t5510\t-\tWeight\t0.052651188836683724\n",
+      "Image_ID\t5282\t-\tWeight\t0.05122146559887229\n",
+      "Image_ID\t1260\t-\tWeight\t0.050478632782130786\n",
+      "Image_ID\t1692\t-\tWeight\t0.05043911725770527\n",
+      "Image_ID\t8656\t-\tWeight\t0.04943228673655803\n",
+      "Image_ID\t1242\t-\tWeight\t0.04886689682608001\n",
+      "Image_ID\t7844\t-\tWeight\t0.048768495445578465\n",
+      "Image_ID\t5100\t-\tWeight\t0.04867702517715619\n",
+      "Image_ID\t5300\t-\tWeight\t0.048353062438932816\n",
       "Latent semantic no. 8\n",
-      "Image_ID\t1798\t-\tWeight\t0.04586412291217343\n",
-      "Image_ID\t1802\t-\tWeight\t0.044772142290101236\n",
-      "Image_ID\t1806\t-\tWeight\t0.044448676280621977\n",
-      "Image_ID\t1202\t-\tWeight\t0.043679466488681935\n",
-      "Image_ID\t1786\t-\tWeight\t0.04351371229636818\n",
-      "Image_ID\t1784\t-\tWeight\t0.04346765741634348\n",
-      "Image_ID\t1790\t-\tWeight\t0.04288750664761761\n",
-      "Image_ID\t1642\t-\tWeight\t0.041863484069841805\n",
-      "Image_ID\t1788\t-\tWeight\t0.04089406629514228\n",
-      "Image_ID\t1796\t-\tWeight\t0.04068815222347919\n",
+      "Image_ID\t1798\t-\tWeight\t0.0458641229121734\n",
+      "Image_ID\t1802\t-\tWeight\t0.044772142290101194\n",
+      "Image_ID\t1806\t-\tWeight\t0.044448676280621935\n",
+      "Image_ID\t1202\t-\tWeight\t0.043679466488681894\n",
+      "Image_ID\t1786\t-\tWeight\t0.043513712296368134\n",
+      "Image_ID\t1784\t-\tWeight\t0.043467657416343425\n",
+      "Image_ID\t1790\t-\tWeight\t0.04288750664761759\n",
+      "Image_ID\t1642\t-\tWeight\t0.041863484069841764\n",
+      "Image_ID\t1788\t-\tWeight\t0.04089406629514224\n",
+      "Image_ID\t1796\t-\tWeight\t0.04068815222347914\n",
       "Latent semantic no. 9\n",
-      "Image_ID\t8582\t-\tWeight\t0.02577153311253718\n",
-      "Image_ID\t8612\t-\tWeight\t0.025608143819276445\n",
-      "Image_ID\t7290\t-\tWeight\t0.025578071187110543\n",
-      "Image_ID\t7298\t-\tWeight\t0.025350467801040884\n",
-      "Image_ID\t7302\t-\tWeight\t0.02531661140938117\n",
-      "Image_ID\t7318\t-\tWeight\t0.025212779767014252\n",
-      "Image_ID\t8580\t-\tWeight\t0.025201323062899284\n",
-      "Image_ID\t6392\t-\tWeight\t0.02517086205642468\n",
-      "Image_ID\t2738\t-\tWeight\t0.025106516897995135\n",
-      "Image_ID\t6420\t-\tWeight\t0.02510499876667641\n"
+      "Image_ID\t8616\t-\tWeight\t-0.001110683188398373\n",
+      "Image_ID\t5234\t-\tWeight\t-0.001470742377963864\n",
+      "Image_ID\t3838\t-\tWeight\t-0.0018268938101953923\n",
+      "Image_ID\t7428\t-\tWeight\t-0.001978912864613778\n",
+      "Image_ID\t4664\t-\tWeight\t-0.0020551982165007863\n",
+      "Image_ID\t2754\t-\tWeight\t-0.002091620047637018\n",
+      "Image_ID\t2806\t-\tWeight\t-0.0021702921217260757\n",
+      "Image_ID\t3820\t-\tWeight\t-0.002247214027498397\n",
+      "Image_ID\t3786\t-\tWeight\t-0.002360567100195792\n",
+      "Image_ID\t4928\t-\tWeight\t-0.002395118791388935\n"
      ]
     }
    ],
@@ -159,7 +159,7 @@
     "    )\n",
     ")\n",
     "\n",
-    "extract_latent_semantics(\n",
+    "extract_latent_semantics_from_feature_model(\n",
     "    fd_collection,\n",
     "    k,\n",
     "    selected_feature_model,\n",
diff --git a/Phase 2/task_5.ipynb b/Phase 2/task_5.ipynb
index 0b1cdb0..bddcc7b 100644
--- a/Phase 2/task_5.ipynb	
+++ b/Phase 2/task_5.ipynb	
@@ -14,16 +14,6 @@
      ]
     }
    ],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
    "source": [
     "from utils import *\n",
     "warnings.filterwarnings('ignore')\n",
@@ -32,7 +22,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -41,124 +31,124 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Applying svd on the given similarity matrix to get 10 latent semantics (showing only top 10 image-weight pairs for each latent semantic)...\n",
+      "Applying svd on the given similarity matrix to get 10 latent semantics (showing only top 10 label-weight pairs for each latent semantic)...\n",
       "Latent semantic no. 0\n",
-      "Image_ID\t200\t-\tWeight\t0.0\n",
-      "Image_ID\t198\t-\tWeight\t-0.004684806351746236\n",
-      "Image_ID\t196\t-\tWeight\t-0.007271577414375871\n",
-      "Image_ID\t194\t-\tWeight\t-0.011073051177514079\n",
-      "Image_ID\t192\t-\tWeight\t-0.011680371639188197\n",
-      "Image_ID\t188\t-\tWeight\t-0.014876024947438421\n",
-      "Image_ID\t186\t-\tWeight\t-0.017327189984007427\n",
-      "Image_ID\t190\t-\tWeight\t-0.021143262428570023\n",
-      "Image_ID\t182\t-\tWeight\t-0.026835375354998945\n",
-      "Image_ID\t180\t-\tWeight\t-0.030539133156424272\n",
+      "label\t28\t-\tWeight\t0.2583354411312026\n",
+      "label\t29\t-\tWeight\t0.2301362547676974\n",
+      "label\t33\t-\tWeight\t0.2129183683279978\n",
+      "label\t9\t-\tWeight\t0.17625685452423093\n",
+      "label\t95\t-\tWeight\t0.16277551497836534\n",
+      "label\t47\t-\tWeight\t0.1424860388015467\n",
+      "label\t39\t-\tWeight\t0.1349747704005884\n",
+      "label\t30\t-\tWeight\t0.13251434767496492\n",
+      "label\t52\t-\tWeight\t0.12669069496270755\n",
+      "label\t8\t-\tWeight\t0.1257730807471899\n",
       "Latent semantic no. 1\n",
-      "Image_ID\t130\t-\tWeight\t0.21209688019072415\n",
-      "Image_ID\t138\t-\tWeight\t0.20392427070510372\n",
-      "Image_ID\t120\t-\tWeight\t0.1528415927574225\n",
-      "Image_ID\t132\t-\tWeight\t0.14995762877608315\n",
-      "Image_ID\t160\t-\tWeight\t0.1488052541453248\n",
-      "Image_ID\t136\t-\tWeight\t0.14309946283137032\n",
-      "Image_ID\t164\t-\tWeight\t0.1374261619484733\n",
-      "Image_ID\t140\t-\tWeight\t0.13528239495542024\n",
-      "Image_ID\t128\t-\tWeight\t0.12811923299406092\n",
-      "Image_ID\t152\t-\tWeight\t0.12752116772697258\n",
+      "label\t96\t-\tWeight\t0.2666765976054894\n",
+      "label\t97\t-\tWeight\t0.19087869496500426\n",
+      "label\t25\t-\tWeight\t0.17776094778851348\n",
+      "label\t3\t-\tWeight\t0.1759798805642099\n",
+      "label\t98\t-\tWeight\t0.16951497899752574\n",
+      "label\t22\t-\tWeight\t0.1667032655640346\n",
+      "label\t24\t-\tWeight\t0.16034180060184824\n",
+      "label\t19\t-\tWeight\t0.15345532912389587\n",
+      "label\t52\t-\tWeight\t0.13271640119612757\n",
+      "label\t29\t-\tWeight\t0.12856388746021633\n",
       "Latent semantic no. 2\n",
-      "Image_ID\t4\t-\tWeight\t0.2518749001016952\n",
-      "Image_ID\t8\t-\tWeight\t0.24177133880298157\n",
-      "Image_ID\t58\t-\tWeight\t0.1467873881626323\n",
-      "Image_ID\t0\t-\tWeight\t0.1384139791414865\n",
-      "Image_ID\t56\t-\tWeight\t0.11818058158618501\n",
-      "Image_ID\t20\t-\tWeight\t0.1102967668802325\n",
-      "Image_ID\t84\t-\tWeight\t0.1044376029159064\n",
-      "Image_ID\t18\t-\tWeight\t0.10262843674760519\n",
-      "Image_ID\t138\t-\tWeight\t0.10181762652349924\n",
-      "Image_ID\t70\t-\tWeight\t0.10127861659022899\n",
+      "label\t46\t-\tWeight\t0.21813474254675366\n",
+      "label\t79\t-\tWeight\t0.19091788352587957\n",
+      "label\t55\t-\tWeight\t0.1871080482210247\n",
+      "label\t56\t-\tWeight\t0.18322792605578184\n",
+      "label\t78\t-\tWeight\t0.17506936966351683\n",
+      "label\t98\t-\tWeight\t0.1733164832137484\n",
+      "label\t22\t-\tWeight\t0.17114312653027375\n",
+      "label\t38\t-\tWeight\t0.16928636840289424\n",
+      "label\t45\t-\tWeight\t0.1567042877228484\n",
+      "label\t4\t-\tWeight\t0.15108693899889344\n",
       "Latent semantic no. 3\n",
-      "Image_ID\t84\t-\tWeight\t0.16299489544466675\n",
-      "Image_ID\t94\t-\tWeight\t0.155336350677209\n",
-      "Image_ID\t70\t-\tWeight\t0.14011002627071287\n",
-      "Image_ID\t102\t-\tWeight\t0.13701247594788535\n",
-      "Image_ID\t88\t-\tWeight\t0.1320753872066342\n",
-      "Image_ID\t82\t-\tWeight\t0.1320716816148611\n",
-      "Image_ID\t86\t-\tWeight\t0.12902969925360877\n",
-      "Image_ID\t72\t-\tWeight\t0.12610296358207826\n",
-      "Image_ID\t92\t-\tWeight\t0.12596461453701044\n",
-      "Image_ID\t66\t-\tWeight\t0.12532841063277217\n",
+      "label\t96\t-\tWeight\t0.2736613529052896\n",
+      "label\t98\t-\tWeight\t0.218185914155306\n",
+      "label\t22\t-\tWeight\t0.1963451355822489\n",
+      "label\t3\t-\tWeight\t0.17627732148468614\n",
+      "label\t39\t-\tWeight\t0.1728992502839298\n",
+      "label\t52\t-\tWeight\t0.15597562436756945\n",
+      "label\t51\t-\tWeight\t0.1291470561734402\n",
+      "label\t30\t-\tWeight\t0.12453129554714541\n",
+      "label\t18\t-\tWeight\t0.1236867360720947\n",
+      "label\t38\t-\tWeight\t0.12184856229773917\n",
       "Latent semantic no. 4\n",
-      "Image_ID\t176\t-\tWeight\t0.17418620419170064\n",
-      "Image_ID\t184\t-\tWeight\t0.16284491366511475\n",
-      "Image_ID\t178\t-\tWeight\t0.15835141260945226\n",
-      "Image_ID\t182\t-\tWeight\t0.1563230190106094\n",
-      "Image_ID\t180\t-\tWeight\t0.14992527858819726\n",
-      "Image_ID\t170\t-\tWeight\t0.1461798073190985\n",
-      "Image_ID\t174\t-\tWeight\t0.13541698801645058\n",
-      "Image_ID\t166\t-\tWeight\t0.12423630035289784\n",
-      "Image_ID\t172\t-\tWeight\t0.1234361443074221\n",
-      "Image_ID\t52\t-\tWeight\t0.12074682250121946\n",
+      "label\t6\t-\tWeight\t0.23875690719216863\n",
+      "label\t67\t-\tWeight\t0.21007869938490106\n",
+      "label\t63\t-\tWeight\t0.18822840034389135\n",
+      "label\t14\t-\tWeight\t0.18738002200878218\n",
+      "label\t87\t-\tWeight\t0.17508576062247283\n",
+      "label\t23\t-\tWeight\t0.167492867766091\n",
+      "label\t15\t-\tWeight\t0.15522709562173342\n",
+      "label\t61\t-\tWeight\t0.13244353806854162\n",
+      "label\t45\t-\tWeight\t0.12833204093005665\n",
+      "label\t68\t-\tWeight\t0.12622315521729294\n",
       "Latent semantic no. 5\n",
-      "Image_ID\t184\t-\tWeight\t0.25060450796637307\n",
-      "Image_ID\t96\t-\tWeight\t0.19653319773940384\n",
-      "Image_ID\t4\t-\tWeight\t0.1927615510140044\n",
-      "Image_ID\t190\t-\tWeight\t0.1823467475920773\n",
-      "Image_ID\t104\t-\tWeight\t0.17232402315708764\n",
-      "Image_ID\t176\t-\tWeight\t0.15944267571419668\n",
-      "Image_ID\t2\t-\tWeight\t0.15830010074390483\n",
-      "Image_ID\t180\t-\tWeight\t0.15710086389623582\n",
-      "Image_ID\t86\t-\tWeight\t0.1531972222034532\n",
-      "Image_ID\t178\t-\tWeight\t0.14864580852650564\n",
+      "label\t30\t-\tWeight\t0.17385975982344382\n",
+      "label\t25\t-\tWeight\t0.14655711054814133\n",
+      "label\t39\t-\tWeight\t0.13307896633493813\n",
+      "label\t68\t-\tWeight\t0.12851498788897622\n",
+      "label\t24\t-\tWeight\t0.12828250585375986\n",
+      "label\t0\t-\tWeight\t0.12500243174429157\n",
+      "label\t1\t-\tWeight\t0.12371257574727512\n",
+      "label\t77\t-\tWeight\t0.12370279647800499\n",
+      "label\t89\t-\tWeight\t0.12233344688386875\n",
+      "label\t83\t-\tWeight\t0.11445596984835589\n",
       "Latent semantic no. 6\n",
-      "Image_ID\t160\t-\tWeight\t0.2664558477429268\n",
-      "Image_ID\t86\t-\tWeight\t0.22964178511691158\n",
-      "Image_ID\t4\t-\tWeight\t0.2027946708731003\n",
-      "Image_ID\t8\t-\tWeight\t0.17594388183949075\n",
-      "Image_ID\t96\t-\tWeight\t0.15932731178540344\n",
-      "Image_ID\t150\t-\tWeight\t0.1557669882841681\n",
-      "Image_ID\t42\t-\tWeight\t0.15015687757605228\n",
-      "Image_ID\t70\t-\tWeight\t0.14221366935133106\n",
-      "Image_ID\t166\t-\tWeight\t0.13822990110337333\n",
-      "Image_ID\t170\t-\tWeight\t0.136006921209686\n",
+      "label\t17\t-\tWeight\t0.2335282879255542\n",
+      "label\t48\t-\tWeight\t0.19418795795666355\n",
+      "label\t21\t-\tWeight\t0.19013440200231033\n",
+      "label\t85\t-\tWeight\t0.17503295059460947\n",
+      "label\t11\t-\tWeight\t0.14933372636956993\n",
+      "label\t1\t-\tWeight\t0.1384254243377172\n",
+      "label\t0\t-\tWeight\t0.13078647401074162\n",
+      "label\t57\t-\tWeight\t0.11374248801163754\n",
+      "label\t10\t-\tWeight\t0.10468223841103744\n",
+      "label\t99\t-\tWeight\t0.10191451131216464\n",
       "Latent semantic no. 7\n",
-      "Image_ID\t0\t-\tWeight\t0.18579423291522054\n",
-      "Image_ID\t160\t-\tWeight\t0.15838043091994455\n",
-      "Image_ID\t12\t-\tWeight\t0.1569899414230264\n",
-      "Image_ID\t16\t-\tWeight\t0.15348073631252238\n",
-      "Image_ID\t20\t-\tWeight\t0.14749435830520785\n",
-      "Image_ID\t18\t-\tWeight\t0.14710442040625207\n",
-      "Image_ID\t14\t-\tWeight\t0.14572307182896904\n",
-      "Image_ID\t2\t-\tWeight\t0.135886756644037\n",
-      "Image_ID\t158\t-\tWeight\t0.12716375063129493\n",
-      "Image_ID\t154\t-\tWeight\t0.11653475862758583\n",
+      "label\t82\t-\tWeight\t0.23372455436757703\n",
+      "label\t95\t-\tWeight\t0.21795238756371887\n",
+      "label\t60\t-\tWeight\t0.18080422229063045\n",
+      "label\t16\t-\tWeight\t0.1806105172209771\n",
+      "label\t27\t-\tWeight\t0.17365150902149876\n",
+      "label\t59\t-\tWeight\t0.17250044548228938\n",
+      "label\t26\t-\tWeight\t0.1661853291143862\n",
+      "label\t13\t-\tWeight\t0.16331211225170805\n",
+      "label\t34\t-\tWeight\t0.1523080193090529\n",
+      "label\t67\t-\tWeight\t0.13577900574984025\n",
       "Latent semantic no. 8\n",
-      "Image_ID\t128\t-\tWeight\t0.20162255290912043\n",
-      "Image_ID\t64\t-\tWeight\t0.2013551710742827\n",
-      "Image_ID\t76\t-\tWeight\t0.19200691322367733\n",
-      "Image_ID\t68\t-\tWeight\t0.183262211696717\n",
-      "Image_ID\t2\t-\tWeight\t0.17626949463475755\n",
-      "Image_ID\t126\t-\tWeight\t0.17260073717551033\n",
-      "Image_ID\t130\t-\tWeight\t0.16679745247386799\n",
-      "Image_ID\t0\t-\tWeight\t0.15145696367688846\n",
-      "Image_ID\t80\t-\tWeight\t0.13382645234168947\n",
-      "Image_ID\t132\t-\tWeight\t0.12607547198838437\n",
+      "label\t53\t-\tWeight\t0.2259481751468642\n",
+      "label\t37\t-\tWeight\t0.21583443408756542\n",
+      "label\t76\t-\tWeight\t0.20483376297311964\n",
+      "label\t44\t-\tWeight\t0.1690198227623472\n",
+      "label\t68\t-\tWeight\t0.1650723880318989\n",
+      "label\t28\t-\tWeight\t0.15689929414378492\n",
+      "label\t14\t-\tWeight\t0.1564371673909956\n",
+      "label\t54\t-\tWeight\t0.1553627917623035\n",
+      "label\t51\t-\tWeight\t0.14380435363337046\n",
+      "label\t36\t-\tWeight\t0.13510425005259438\n",
       "Latent semantic no. 9\n",
-      "Image_ID\t110\t-\tWeight\t0.2380313932091839\n",
-      "Image_ID\t126\t-\tWeight\t0.22284705922022288\n",
-      "Image_ID\t170\t-\tWeight\t0.20294066349000953\n",
-      "Image_ID\t58\t-\tWeight\t0.19271846291888434\n",
-      "Image_ID\t166\t-\tWeight\t0.16710379029940944\n",
-      "Image_ID\t118\t-\tWeight\t0.16159034411481996\n",
-      "Image_ID\t42\t-\tWeight\t0.1585043891315177\n",
-      "Image_ID\t120\t-\tWeight\t0.15529190621970054\n",
-      "Image_ID\t56\t-\tWeight\t0.1484578124120866\n",
-      "Image_ID\t160\t-\tWeight\t0.13578707023661948\n"
+      "label\t19\t-\tWeight\t0.11741024839079275\n",
+      "label\t40\t-\tWeight\t0.11107319334138463\n",
+      "label\t53\t-\tWeight\t0.11058750626248925\n",
+      "label\t51\t-\tWeight\t0.10794606425819818\n",
+      "label\t96\t-\tWeight\t0.10735468567860716\n",
+      "label\t55\t-\tWeight\t0.10731282010915796\n",
+      "label\t50\t-\tWeight\t0.10703093662670059\n",
+      "label\t1\t-\tWeight\t0.10651036503732043\n",
+      "label\t79\t-\tWeight\t0.10640855392103846\n",
+      "label\t47\t-\tWeight\t0.10594110421348357\n"
      ]
     }
    ],
@@ -180,14 +170,13 @@
     "\n",
     "label_sim_matrix = find_label_label_similarity(fd_collection,selected_feature_model)\n",
     "\n",
-    "extract_latent_semantics(\n",
-    "    fd_collection,\n",
-    "    k,\n",
+    "extract_latent_semantics_from_sim_matrix(\n",
+    "    label_sim_matrix,\n",
     "    selected_feature_model,\n",
+    "    \"label\",\n",
+    "    k,\n",
     "    selected_dim_reduction_method,\n",
-    "    sim_matrix=label_sim_matrix,\n",
     "    top_images=10,\n",
-    "    fn_prefix='label_sim-'\n",
     ")\n"
    ]
   },
diff --git a/Phase 2/task_6.ipynb b/Phase 2/task_6.ipynb
new file mode 100644
index 0000000..15a3afb
--- /dev/null
+++ b/Phase 2/task_6.ipynb	
@@ -0,0 +1,78 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from utils import *\n",
+    "warnings.filterwarnings('ignore')\n",
+    "%matplotlib inline\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "selected_feature_model = valid_feature_models[\n",
+    "    str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
+    "]\n",
+    "\n",
+    "k = int(input(\"Enter value of k: \"))\n",
+    "if k < 1:\n",
+    "    raise ValueError(\"k should be a positive integer\")\n",
+    "\n",
+    "selected_dim_reduction_method = str(\n",
+    "    input(\n",
+    "        \"Enter dimensionality reduction method - one of \"\n",
+    "        + str(list(valid_dim_reduction_methods.keys()))\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "image_sim_matrix = find_image_image_similarity(fd_collection,selected_feature_model)\n",
+    "\n",
+    "extract_latent_semantics_from_sim_matrix(\n",
+    "    image_sim_matrix,\n",
+    "    selected_feature_model,\n",
+    "    \"image\",\n",
+    "\tk,\n",
+    "    selected_dim_reduction_method,\n",
+    "    top_images=10,\n",
+    ")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Phase 2/utils.py b/Phase 2/utils.py
index fa749ff..6dbb8ed 100644
--- a/Phase 2/utils.py	
+++ b/Phase 2/utils.py	
@@ -523,7 +523,9 @@ def calculate_label_representatives(fd_collection, label, feature_model):
     """Calculate representative feature vector of a label as the mean of all feature vectors under a feature model"""
 
     label_fds = [
-        np.array(img_fds[feature_model]).flatten()  # get the specific feature model's feature vector
+        np.array(
+            img_fds[feature_model]
+        ).flatten()  # get the specific feature model's feature vector
         for img_fds in fd_collection.find(
             {"true_label": label}
         )  # repeat for all images
@@ -569,7 +571,7 @@ def show_similar_images_for_label(
 
     for cur_img in all_images:
         cur_img_id = cur_img["image_id"]
-        cur_img_fd = np.array(cur_img[feature_model])
+        cur_img_fd = np.array(cur_img[feature_model]).flatten()
 
         cur_dist = distance_measure(
             cur_img_fd,
@@ -658,15 +660,13 @@ def show_similar_labels_for_image(
 
     label_dict = {target_image_id: target_label}
 
-    target_image_fd = np.array(target_image[feature_model])
-
     all_images = fd_collection.find({})
     for cur_img in all_images:
         cur_img_id = cur_img["image_id"]
         # skip target itself
         if cur_img_id == target_image_id:
             continue
-        cur_img_fd = np.array(cur_img[feature_model])
+        cur_img_fd = np.array(cur_img[feature_model]).flatten()
         cur_dist = distance_measure(
             cur_img_fd,
             target_image_fd,
@@ -698,11 +698,11 @@ def show_similar_labels_for_image(
             continue
         else:
             sample_image, sample_label = dataset[image_id]
-            axs[idx-1].imshow(transforms.ToPILImage()(sample_image))
-            axs[idx-1].set_title(
+            axs[idx - 1].imshow(transforms.ToPILImage()(sample_image))
+            axs[idx - 1].set_title(
                 f"Label: {label_dict[image_id]}; Distance: {min_dists[image_id]}"
             )
-        axs[idx-1].axis("off")
+        axs[idx - 1].axis("off")
 
     if save_plots:
         plt.savefig(
@@ -841,20 +841,16 @@ def svd(matrix, k):
 
     return left_singular_vectors, np.diag(singular_values), right_singular_vectors.T
 
-def extract_latent_semantics(
+def extract_latent_semantics_from_feature_model(
     fd_collection,
     k,
     feature_model,
     dim_reduction_method,
-    sim_matrix=None,
     top_images=None,
-    fn_prefix="",
 ):
     """
     Extract latent semantics for entire collection at once for a given feature_model and dim_reduction_method, and display the imageID-semantic weight pairs
 
-    Use `sim_matrix` to manually give similarity matrix instead of feature space
-
     Leave `top_images` blank to display all imageID-weight pairs
     """
 
@@ -874,22 +870,14 @@ def extract_latent_semantics(
     if top_images is not None:
         top_img_str = f" (showing only top {top_images} image-weight pairs for each latent semantic)"
 
-    # if similarity matrix is provided
-    if sim_matrix is not None:
-        feature_vectors = sim_matrix
-        print(
-            "Applying {} on the given similarity matrix to get {} latent semantics{}...".format(
-                dim_reduction_method, k, top_img_str
-            )
-        )
-    # else take feature space from database
-    else:
-        feature_vectors = np.array([np.array(img[feature_model]).flatten() for img in all_images])
-        print(
-            "Applying {} on the {} space to get {} latent semantics{}...".format(
-                dim_reduction_method, feature_model, k, top_img_str
-            )
+    feature_vectors = np.array(
+        [np.array(img[feature_model]).flatten() for img in all_images]
+    )
+    print(
+        "Applying {} on the {} space to get {} latent semantics{}...".format(
+            dim_reduction_method, feature_model, k, top_img_str
         )
+    )
 
     displayed_latent_semantics = {}
     all_latent_semantics = {}
@@ -1011,12 +999,180 @@ def extract_latent_semantics(
             print(f"Image_ID\t{image_id}\t-\tWeight\t{weight}")
 
     with open(
-        f"{fn_prefix}{feature_model}-{dim_reduction_method}-{k}-semantics.json",
+        f"{feature_model}-{dim_reduction_method}-{k}-semantics.json",
         "w",
         encoding="utf-8",
     ) as output_file:
         json.dump(all_latent_semantics, output_file, ensure_ascii=False)
 
+def extract_latent_semantics_from_sim_matrix(
+    sim_matrix,
+    feature_model,
+    sim_type,
+    k,
+    dim_reduction_method,
+    top_images=None,
+):
+    """
+    Extract latent semantics for a given similarity matrix for a given dim_reduction_method, and display the object-semantic weight pairs
+
+    Leave `top_images` blank to display all imageID-weight pairs
+    """
+
+    assert sim_type in ["image", "label"], "sim_type should be one of " + str(
+        ["image", "label"]
+    )
+    assert (
+        feature_model in valid_feature_models.values()
+    ), "feature_model should be one of " + str(list(valid_feature_models.keys()))
+    assert (
+        dim_reduction_method in valid_dim_reduction_methods.keys()
+    ), "dim_reduction_method should be one of " + str(
+        list(valid_dim_reduction_methods.keys())
+    )
+    assert len(sim_matrix) == len(sim_matrix[0]), "sim_matrix must be square matrix"
+
+    top_img_str = ""
+    if top_images is not None:
+        top_img_str = f" (showing only top {top_images} {sim_type}-weight pairs for each latent semantic)"
+
+    feature_vectors = sim_matrix
+    feature_ids = list(range(len(sim_matrix)))
+
+    print(
+        "Applying {} on the given similarity matrix to get {} latent semantics{}...".format(
+            dim_reduction_method, k, top_img_str
+        )
+    )
+
+    displayed_latent_semantics = {}
+    all_latent_semantics = {}
+
+    match valid_dim_reduction_methods[dim_reduction_method]:
+        # singular value decomposition
+        # sparse version of SVD to get only k singular values
+        case 1:
+            U, S, V_T = svds(feature_vectors, k=k)
+
+            all_latent_semantics = {
+                "image-semantic": U.tolist(),
+                "semantics-core": S.tolist(),
+                "semantic-feature": V_T.tolist(),
+            }
+
+            # for each latent semantic, sort imageID-weight pairs by weights in descending order
+            displayed_latent_semantics = [
+                sorted(
+                    list(zip(feature_ids, latent_semantic)),
+                    key=lambda x: x[1],
+                    reverse=True,
+                )[:top_images]
+                for latent_semantic in U.T
+            ]
+
+        # non-negative matrix factorization
+        case 2:
+            # NNMF requires non-negative input data
+            # so shift the input by subtracting the smallest value
+            min_value = np.min(feature_vectors)
+            feature_vectors_shifted = feature_vectors - min_value
+
+            model = NMF(
+                n_components=k,
+                init="random",
+                solver="cd",
+                alpha_H=0.01,
+                alpha_W=0.01,
+                max_iter=10000,
+            )
+            model.fit(feature_vectors_shifted)
+
+            W = model.transform(feature_vectors_shifted)
+            H = model.components_
+
+            all_latent_semantics = {
+                "image-semantic": W.tolist(),
+                "semantic-feature": H.tolist(),
+            }
+
+            # for each latent semantic, sort imageID-weight pairs by weights in descending order
+            displayed_latent_semantics = [
+                sorted(
+                    list(zip(feature_ids, latent_semantic)),
+                    key=lambda x: x[1],
+                    reverse=True,
+                )[:top_images]
+                for latent_semantic in W.T
+            ]
+
+        # unsupervised LDA to extract topics (Latent Dirichlet Allocation)
+        # Note: LDA takes a bit of time
+        case 3:
+            # LDA requires non-negative input data
+            # so shift the input by subtracting the smallest value
+            min_value = np.min(feature_vectors)
+            feature_vectors_shifted = feature_vectors - min_value
+
+            model = LatentDirichletAllocation(
+                n_components=k, learning_method="online", verbose=4
+            )
+            model.fit(feature_vectors_shifted)
+
+            # K (k x fd_dim) is the factor matrix for latent semantic-feature pairs
+            K = model.components_
+            # X (4339 x k) is the other factor matrix for image ID-latent semantic pairs
+            X = model.transform(feature_vectors_shifted)
+
+            all_latent_semantics = {
+                "image-semantic": X.tolist(),
+                "semantic-feature": K.tolist(),
+            }
+
+            # for each latent semantic, sort imageID-weight pairs by weights in descending order
+            displayed_latent_semantics = [
+                sorted(
+                    list(zip(feature_ids, latent_semantic)),
+                    key=lambda x: x[1],
+                    reverse=True,
+                )[:top_images]
+                for latent_semantic in X.T
+            ]
+
+        # k-means clustering to reduce to k clusters/dimensions
+        case 4:
+            model = KMeans(n_clusters=k, verbose=2).fit(feature_vectors)
+            CC = model.cluster_centers_
+            Y = model.transform(feature_vectors)
+
+            all_latent_semantics = {
+                "image-semantic": Y.tolist(),
+                "semantic-feature": list(CC.values()),
+            }
+
+            # for each latent semantic, sort imageID-weight pairs by weights in descending order
+            displayed_latent_semantics = [
+                sorted(
+                    list(zip(feature_ids, latent_semantic)),
+                    key=lambda x: x[1],
+                    reverse=False,
+                )[:top_images]
+                for latent_semantic in Y.T
+            ]
+
+    for idx, latent_semantic in enumerate(displayed_latent_semantics):
+        print(f"Latent semantic no. {idx}")
+        for obj_id, weight in latent_semantic:
+            print(f"{sim_type}\t{obj_id}\t-\tWeight\t{weight}")
+
+    # Finally also save sim_matrix
+    all_latent_semantics["sim-matrix"] = sim_matrix.tolist()
+
+    with open(
+        f"{sim_type}_sim-{feature_model}-{dim_reduction_method}-{k}-semantics.json",
+        "w",
+        encoding="utf-8",
+    ) as output_file:
+        json.dump(all_latent_semantics, output_file, ensure_ascii=False)
 
 def find_label_label_similarity(fd_collection, feature_model):
     """
@@ -1039,10 +1195,38 @@ def find_label_label_similarity(fd_collection, feature_model):
 
     label_sim_matrix = np.zeros((num_labels, num_labels))
 
+    # Calculate half and fill the other
     for i in range(num_labels):
         for j in range(i + 1, num_labels):
             # Note: lower the value, lower the distance => higher the similarity
-            label_sim_matrix[i][j] = feature_distance_matches[feature_model](
-                np.array(label_mean_vectors[i]), np.array(label_mean_vectors[j])
-            )
+            label_sim_matrix[i][j] = label_sim_matrix[j][i] = feature_distance_matches[
+                feature_model
+            ](np.array(label_mean_vectors[i]), np.array(label_mean_vectors[j]))
     return label_sim_matrix
+
+
+def find_image_image_similarity(fd_collection, feature_model):
+    """
+    Calculate similarity between images. Lower values indicate higher similarities
+    """
+    assert (
+        feature_model in valid_feature_models.values()
+    ), "feature_model should be one of " + str(list(valid_feature_models.keys()))
+
+    feature_vectors = [
+        np.array(
+            img_fds[feature_model]
+        ).flatten()  # get the specific feature model's feature vector
+        for img_fds in fd_collection.find()  # repeat for all images
+    ]
+    num_images = len(feature_vectors)
+    image_sim_matrix = np.zeros((num_images, num_images))
+
+    # Calculate half and fill the other
+    for i in range(num_images):
+        for j in range(i + 1, num_images):
+            # Note: lower the value, lower the distance => higher the similarity
+            image_sim_matrix[i][j] = image_sim_matrix[j][i] = feature_distance_matches[
+                feature_model
+            ](np.array(feature_vectors[i]), np.array(feature_vectors[j]))
+    return image_sim_matrix