Merge branch 'master' of https://github.com/20kaushik02/CSE515_MWDB_Project into task9

2025-12-06 09:24:07 +00:00 · 2023-10-13 11:32:41 -07:00 · 2023-10-13 11:32:41 -07:00 · 950d4237b3
commit 950d4237b3
parent 53e284ae95 b935d9ca34
5 changed files with 158 additions and 174 deletions
--- a/2/requirements.txt
+++ b/2/requirements.txt
@ -11,3 +11,4 @@ ipython
 notebook
 ipykernel
 python-dotenv
+tensorly
--- a/2/task_4.ipynb
+++ b/2/task_4.ipynb
@ -0,0 +1,52 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from utils import *\n",
+    "warnings.filterwarnings('ignore')\n",
+    "%matplotlib inline\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/2/task_4.py
+++ b/2/task_4.py
@ -0,0 +1,64 @@
+import json
+import tensorly as tl
+import numpy as np
+from pymongo import MongoClient
+from phase1_mongodb import *
+
+client = MongoClient()
+client = MongoClient(host="localhost", port=27017)
+
+# Select the database
+db = client.Multimedia_Web_DBs
+
+caltechDataset = loadDataset()
+num_labels = 101
+# Fetch all documents from the collection and then sort them by "_id"
+feature_descriptors = list(db.Feature_Descriptors.find({}))
+feature_descriptors = sorted(list(db.Feature_Descriptors.find({})), key=lambda x: x["_id"], reverse=False)
+label_ids = [x["label"] for x in feature_descriptors]
+
+def compute_cp_decomposition(feature_model, rank):
+    
+    label_vectors = [(x["label"], x[feature_model]) for x in feature_descriptors if x["_id"] % 2 == 0]
+
+    num_labels = 101
+    tensor_shape = (len(label_vectors), len(feature_descriptors[0][feature_model]), num_labels)
+    tensor = np.zeros(tensor_shape)
+    for id in range(len(label_vectors)):
+        label = label_vectors[id][0]
+        tensor[id, :, label] = label_vectors[id][1]
+    
+    weights, factors = tl.decomposition.parafac(tensor, rank=rank, normalize_factors=True)
+    return weights, factors
+
+
+def main():
+
+    
+    
+    # Step 4: Perform CP-decomposition (parafac) to extract latent semantics
+   
+    features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']
+
+    # User input for feature model to extract
+    print("1: Color moments")
+    print("2: HOG")
+    print("3: Resnet50 Avgpool layer")
+    print("4: Resnet50 Layer 3")
+    print("5: Resnet50 FC layer")
+    feature_model = features[int(input("Select the feature model: ")) - 1]
+    k = int(input("Enter k: "))
+    weights, factors = compute_cp_decomposition(feature_model, k)
+    k_latent_semantics = list(zip(label_ids, factors[0].tolist()))
+    k_latent_semantics_display = sorted(list(zip(label_ids, factors[0].tolist())), key = lambda x: x[1][0], reverse = True)
+    k_latent_semantics_display = [{"_id": item[0], "semantics": item[1]} for item in k_latent_semantics_display]
+    filename = f'{feature_model}-CP-semantics-{k}.json'
+    k_latent_semantics = [{"_id": item[0], "semantics": item[1]} for item in k_latent_semantics]
+    with open(filename, 'w', encoding='utf-8') as f:
+        json.dump(k_latent_semantics, f, ensure_ascii = False)
+    
+    print(k_latent_semantics_display)
+
+
+if __name__ == "__main__":
+   main()
--- a/2/task_5.ipynb
+++ b/2/task_5.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -13,7 +13,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -22,159 +22,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Applying kmeans on the given similarity matrix to get 10 latent semantics (showing only top 10 label-weight pairs for each latent semantic)...\n",
-      "Initialized centroids\n",
-      "Iteration 0\n",
-      "Iteration 1\n",
-      "Iteration 2\n",
-      "Iteration 3\n",
-      "Iteration 4\n",
-      "Iteration 5\n",
-      "Iteration 6\n",
-      "Iteration 7\n",
-      "Iteration 8\n",
-      "Iteration 9\n",
-      "Iteration 10\n",
-      "Iteration 11\n",
-      "Iter 11 - Converged\n",
-      "Latent semantic no. 0\n",
-      "label\t84\t-\tWeight\t16.953715652557495\n",
-      "label\t34\t-\tWeight\t17.25164883471016\n",
-      "label\t1\t-\tWeight\t17.379970016799952\n",
-      "label\t72\t-\tWeight\t17.439397073433092\n",
-      "label\t32\t-\tWeight\t17.447297173030393\n",
-      "label\t31\t-\tWeight\t17.448932606262144\n",
-      "label\t40\t-\tWeight\t17.561159943630802\n",
-      "label\t79\t-\tWeight\t17.572813876633166\n",
-      "label\t5\t-\tWeight\t17.721278660592027\n",
-      "label\t56\t-\tWeight\t17.731177291838822\n",
-      "Latent semantic no. 1\n",
-      "label\t84\t-\tWeight\t19.27643729221191\n",
-      "label\t5\t-\tWeight\t19.449814613173483\n",
-      "label\t32\t-\tWeight\t19.684592406270944\n",
-      "label\t63\t-\tWeight\t19.911988624963808\n",
-      "label\t79\t-\tWeight\t19.930151237028223\n",
-      "label\t38\t-\tWeight\t19.948477661871497\n",
-      "label\t89\t-\tWeight\t19.965086791647906\n",
-      "label\t94\t-\tWeight\t19.990956583854018\n",
-      "label\t72\t-\tWeight\t19.99680017871235\n",
-      "label\t45\t-\tWeight\t20.058898160614795\n",
-      "Latent semantic no. 2\n",
-      "label\t0\t-\tWeight\tnan\n",
-      "label\t1\t-\tWeight\tnan\n",
-      "label\t2\t-\tWeight\tnan\n",
-      "label\t3\t-\tWeight\tnan\n",
-      "label\t4\t-\tWeight\tnan\n",
-      "label\t5\t-\tWeight\tnan\n",
-      "label\t6\t-\tWeight\tnan\n",
-      "label\t7\t-\tWeight\tnan\n",
-      "label\t8\t-\tWeight\tnan\n",
-      "label\t9\t-\tWeight\tnan\n",
-      "Latent semantic no. 3\n",
-      "label\t0\t-\tWeight\tnan\n",
-      "label\t1\t-\tWeight\tnan\n",
-      "label\t2\t-\tWeight\tnan\n",
-      "label\t3\t-\tWeight\tnan\n",
-      "label\t4\t-\tWeight\tnan\n",
-      "label\t5\t-\tWeight\tnan\n",
-      "label\t6\t-\tWeight\tnan\n",
-      "label\t7\t-\tWeight\tnan\n",
-      "label\t8\t-\tWeight\tnan\n",
-      "label\t9\t-\tWeight\tnan\n",
-      "Latent semantic no. 4\n",
-      "label\t32\t-\tWeight\t18.607843379925203\n",
-      "label\t89\t-\tWeight\t18.671771165930238\n",
-      "label\t84\t-\tWeight\t18.83858895833768\n",
-      "label\t79\t-\tWeight\t18.84775924713071\n",
-      "label\t55\t-\tWeight\t18.88614269359777\n",
-      "label\t34\t-\tWeight\t18.891433443455583\n",
-      "label\t11\t-\tWeight\t19.034715149675442\n",
-      "label\t63\t-\tWeight\t19.042445031693624\n",
-      "label\t5\t-\tWeight\t19.075471660855772\n",
-      "label\t59\t-\tWeight\t19.096232354525338\n",
-      "Latent semantic no. 5\n",
-      "label\t88\t-\tWeight\t17.332684081151356\n",
-      "label\t100\t-\tWeight\t17.414638052725692\n",
-      "label\t89\t-\tWeight\t17.64193670680817\n",
-      "label\t46\t-\tWeight\t17.663677856892257\n",
-      "label\t5\t-\tWeight\t17.750606635854105\n",
-      "label\t11\t-\tWeight\t17.921812162626082\n",
-      "label\t17\t-\tWeight\t17.99728875058849\n",
-      "label\t64\t-\tWeight\t18.20535869665654\n",
-      "label\t84\t-\tWeight\t18.280826365832894\n",
-      "label\t59\t-\tWeight\t18.48939095974247\n",
-      "Latent semantic no. 6\n",
-      "label\t0\t-\tWeight\tnan\n",
-      "label\t1\t-\tWeight\tnan\n",
-      "label\t2\t-\tWeight\tnan\n",
-      "label\t3\t-\tWeight\tnan\n",
-      "label\t4\t-\tWeight\tnan\n",
-      "label\t5\t-\tWeight\tnan\n",
-      "label\t6\t-\tWeight\tnan\n",
-      "label\t7\t-\tWeight\tnan\n",
-      "label\t8\t-\tWeight\tnan\n",
-      "label\t9\t-\tWeight\tnan\n",
-      "Latent semantic no. 7\n",
-      "label\t59\t-\tWeight\t19.676597202857955\n",
-      "label\t72\t-\tWeight\t19.687934144875545\n",
-      "label\t89\t-\tWeight\t19.830805124280474\n",
-      "label\t90\t-\tWeight\t20.021426354120276\n",
-      "label\t77\t-\tWeight\t20.05776182294002\n",
-      "label\t34\t-\tWeight\t20.058245159709028\n",
-      "label\t70\t-\tWeight\t20.117786048649382\n",
-      "label\t68\t-\tWeight\t20.139598145778074\n",
-      "label\t88\t-\tWeight\t20.185751240083068\n",
-      "label\t38\t-\tWeight\t20.208902223231863\n",
-      "Latent semantic no. 8\n",
-      "label\t0\t-\tWeight\tnan\n",
-      "label\t1\t-\tWeight\tnan\n",
-      "label\t2\t-\tWeight\tnan\n",
-      "label\t3\t-\tWeight\tnan\n",
-      "label\t4\t-\tWeight\tnan\n",
-      "label\t5\t-\tWeight\tnan\n",
-      "label\t6\t-\tWeight\tnan\n",
-      "label\t7\t-\tWeight\tnan\n",
-      "label\t8\t-\tWeight\tnan\n",
-      "label\t9\t-\tWeight\tnan\n",
-      "Latent semantic no. 9\n",
-      "label\t0\t-\tWeight\tnan\n",
-      "label\t1\t-\tWeight\tnan\n",
-      "label\t2\t-\tWeight\tnan\n",
-      "label\t3\t-\tWeight\tnan\n",
-      "label\t4\t-\tWeight\tnan\n",
-      "label\t5\t-\tWeight\tnan\n",
-      "label\t6\t-\tWeight\tnan\n",
-      "label\t7\t-\tWeight\tnan\n",
-      "label\t8\t-\tWeight\tnan\n",
-      "label\t9\t-\tWeight\tnan\n"
-     ]
-    },
-    {
-     "ename": "TypeError",
-     "evalue": "Object of type ndarray is not JSON serializable",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task_5.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[0;32m      <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m selected_dim_reduction_method \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m     \u001b[39minput\u001b[39m(\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m         \u001b[39m\"\u001b[39m\u001b[39mEnter dimensionality reduction method - one of \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=11'>12</a>\u001b[0m         \u001b[39m+\u001b[39m \u001b[39mstr\u001b[39m(\u001b[39mlist\u001b[39m(valid_dim_reduction_methods\u001b[39m.\u001b[39mkeys()))\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=12'>13</a>\u001b[0m     )\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=13'>14</a>\u001b[0m )\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=15'>16</a>\u001b[0m label_sim_matrix \u001b[39m=\u001b[39m find_label_label_similarity(fd_collection,selected_feature_model)\n\u001b[1;32m---> <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=17'>18</a>\u001b[0m extract_latent_semantics_from_sim_matrix(\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=18'>19</a>\u001b[0m     label_sim_matrix,\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=19'>20</a>\u001b[0m     selected_feature_model,\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=20'>21</a>\u001b[0m     \u001b[39m\"\u001b[39;49m\u001b[39mlabel\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=21'>22</a>\u001b[0m     k,\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=22'>23</a>\u001b[0m     selected_dim_reduction_method,\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=23'>24</a>\u001b[0m     top_images\u001b[39m=\u001b[39;49m\u001b[39m10\u001b[39;49m,\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=24'>25</a>\u001b[0m )\n",
-      "File \u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\utils.py:1193\u001b[0m, in \u001b[0;36mextract_latent_semantics_from_sim_matrix\u001b[1;34m(sim_matrix, feature_model, sim_type, k, dim_reduction_method, top_images)\u001b[0m\n\u001b[0;32m   1187\u001b[0m \u001b[39mfor\u001b[39;00m label \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(num_labels):\n\u001b[0;32m   1188\u001b[0m     \u001b[39m# get representative vectors for the label\u001b[39;00m\n\u001b[0;32m   1189\u001b[0m     label_mean_vectors\u001b[39m.\u001b[39mappend(\n\u001b[0;32m   1190\u001b[0m         calculate_label_representatives(fd_collection, label, feature_model)\n\u001b[0;32m   1191\u001b[0m     )\n\u001b[1;32m-> 1193\u001b[0m label_sim_matrix \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mzeros((num_labels, num_labels))\n\u001b[0;32m   1195\u001b[0m \u001b[39m# Calculate half and fill the other\u001b[39;00m\n\u001b[0;32m   1196\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(num_labels):\n",
-      "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\__init__.py:179\u001b[0m, in \u001b[0;36mdump\u001b[1;34m(obj, fp, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)\u001b[0m\n\u001b[0;32m    173\u001b[0m     iterable \u001b[39m=\u001b[39m \u001b[39mcls\u001b[39m(skipkeys\u001b[39m=\u001b[39mskipkeys, ensure_ascii\u001b[39m=\u001b[39mensure_ascii,\n\u001b[0;32m    174\u001b[0m         check_circular\u001b[39m=\u001b[39mcheck_circular, allow_nan\u001b[39m=\u001b[39mallow_nan, indent\u001b[39m=\u001b[39mindent,\n\u001b[0;32m    175\u001b[0m         separators\u001b[39m=\u001b[39mseparators,\n\u001b[0;32m    176\u001b[0m         default\u001b[39m=\u001b[39mdefault, sort_keys\u001b[39m=\u001b[39msort_keys, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkw)\u001b[39m.\u001b[39miterencode(obj)\n\u001b[0;32m    177\u001b[0m \u001b[39m# could accelerate with writelines in some versions of Python, at\u001b[39;00m\n\u001b[0;32m    178\u001b[0m \u001b[39m# a debuggability cost\u001b[39;00m\n\u001b[1;32m--> 179\u001b[0m \u001b[39mfor\u001b[39;49;00m chunk \u001b[39min\u001b[39;49;00m iterable:\n\u001b[0;32m    180\u001b[0m     fp\u001b[39m.\u001b[39;49mwrite(chunk)\n",
-      "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:432\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode\u001b[1;34m(o, _current_indent_level)\u001b[0m\n\u001b[0;32m    430\u001b[0m     \u001b[39myield from\u001b[39;00m _iterencode_list(o, _current_indent_level)\n\u001b[0;32m    431\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(o, \u001b[39mdict\u001b[39m):\n\u001b[1;32m--> 432\u001b[0m     \u001b[39myield from\u001b[39;00m _iterencode_dict(o, _current_indent_level)\n\u001b[0;32m    433\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m    434\u001b[0m     \u001b[39mif\u001b[39;00m markers \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
-      "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:406\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode_dict\u001b[1;34m(dct, _current_indent_level)\u001b[0m\n\u001b[0;32m    404\u001b[0m         \u001b[39melse\u001b[39;00m:\n\u001b[0;32m    405\u001b[0m             chunks \u001b[39m=\u001b[39m _iterencode(value, _current_indent_level)\n\u001b[1;32m--> 406\u001b[0m         \u001b[39myield from\u001b[39;00m chunks\n\u001b[0;32m    407\u001b[0m \u001b[39mif\u001b[39;00m newline_indent \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m    408\u001b[0m     _current_indent_level \u001b[39m-\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n",
-      "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:326\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode_list\u001b[1;34m(lst, _current_indent_level)\u001b[0m\n\u001b[0;32m    324\u001b[0m         \u001b[39melse\u001b[39;00m:\n\u001b[0;32m    325\u001b[0m             chunks \u001b[39m=\u001b[39m _iterencode(value, _current_indent_level)\n\u001b[1;32m--> 326\u001b[0m         \u001b[39myield from\u001b[39;00m chunks\n\u001b[0;32m    327\u001b[0m \u001b[39mif\u001b[39;00m newline_indent \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m    328\u001b[0m     _current_indent_level \u001b[39m-\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n",
-      "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:439\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode\u001b[1;34m(o, _current_indent_level)\u001b[0m\n\u001b[0;32m    437\u001b[0m         \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mCircular reference detected\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m    438\u001b[0m     markers[markerid] \u001b[39m=\u001b[39m o\n\u001b[1;32m--> 439\u001b[0m o \u001b[39m=\u001b[39m _default(o)\n\u001b[0;32m    440\u001b[0m \u001b[39myield from\u001b[39;00m _iterencode(o, _current_indent_level)\n\u001b[0;32m    441\u001b[0m \u001b[39mif\u001b[39;00m markers \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
-      "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:180\u001b[0m, in \u001b[0;36mJSONEncoder.default\u001b[1;34m(self, o)\u001b[0m\n\u001b[0;32m    161\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdefault\u001b[39m(\u001b[39mself\u001b[39m, o):\n\u001b[0;32m    162\u001b[0m \u001b[39m    \u001b[39m\u001b[39m\"\"\"Implement this method in a subclass such that it returns\u001b[39;00m\n\u001b[0;32m    163\u001b[0m \u001b[39m    a serializable object for ``o``, or calls the base implementation\u001b[39;00m\n\u001b[0;32m    164\u001b[0m \u001b[39m    (to raise a ``TypeError``).\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    178\u001b[0m \n\u001b[0;32m    179\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[1;32m--> 180\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mTypeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mObject of type \u001b[39m\u001b[39m{\u001b[39;00mo\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m    181\u001b[0m                     \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mis not JSON serializable\u001b[39m\u001b[39m'\u001b[39m)\n",
-      "\u001b[1;31mTypeError\u001b[0m: Object of type ndarray is not JSON serializable"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "selected_feature_model = valid_feature_models[\n",
    "    str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
@ -200,7 +50,7 @@
    "    k,\n",
    "    selected_dim_reduction_method,\n",
    "    top_images=10,\n",
-    ")\n"
+    ")"
   ]
  },
  {
--- a/2/utils.py
+++ b/2/utils.py
@ -18,6 +18,8 @@ import torchvision.transforms as transforms
 from torchvision.datasets import Caltech101
 from torchvision.models import resnet50, ResNet50_Weights

+import tensorly as tl
+
 # OS and env
 import json
 from os import getenv
@ -58,6 +60,8 @@ def loadDataset(dataset):


 dataset = loadDataset(Caltech101)
+NUM_LABELS = 101
+NUM_IMAGES = 4339


 class GridPartition:
@ -1054,7 +1058,7 @@ def extract_latent_semantics_from_sim_matrix(
                "semantic-feature": V_T.tolist(),
            }

-            # for each latent semantic, sort imageID-weight pairs by weights in descending order
+            # for each latent semantic, sort object-weight pairs by weights in descending order
            displayed_latent_semantics = [
                sorted(
                    list(zip(feature_ids, latent_semantic)),
@ -1089,7 +1093,7 @@ def extract_latent_semantics_from_sim_matrix(
                "semantic-feature": H.tolist(),
            }

-            # for each latent semantic, sort imageID-weight pairs by weights in descending order
+            # for each latent semantic, sort object-weight pairs by weights in descending order
            displayed_latent_semantics = [
                sorted(
                    list(zip(feature_ids, latent_semantic)),
@ -1122,7 +1126,7 @@ def extract_latent_semantics_from_sim_matrix(
                "semantic-feature": K.tolist(),
            }

-            # for each latent semantic, sort imageID-weight pairs by weights in descending order
+            # for each latent semantic, sort object-weight pairs by weights in descending order
            displayed_latent_semantics = [
                sorted(
                    list(zip(feature_ids, latent_semantic)),
@ -1140,10 +1144,10 @@ def extract_latent_semantics_from_sim_matrix(

            all_latent_semantics = {
                "image-semantic": Y.tolist(),
-                "semantic-feature": list(CC.values()),
+                "semantic-feature": CC.tolist(),
            }

-            # for each latent semantic, sort imageID-weight pairs by weights in descending order
+            # for each latent semantic, sort object-weight pairs by weights in ascending order
            displayed_latent_semantics = [
                sorted(
                    list(zip(feature_ids, latent_semantic)),
@ -1152,9 +1156,8 @@ def extract_latent_semantics_from_sim_matrix(
                )[:top_images]
                for latent_semantic in Y.T
            ]
-
-    if valid_dim_reduction_methods[dim_reduction_method] == 4:
            print("Note: for K-Means we display distances, in ascending order")
+
    for idx, latent_semantic in enumerate(displayed_latent_semantics):
        print(f"Latent semantic no. {idx}")
        for obj_id, weight in latent_semantic:
@ -1185,19 +1188,17 @@ def find_label_label_similarity(fd_collection, feature_model):
    label_sim_matrix = []
    label_mean_vectors = []

-    num_labels = 101
-
-    for label in range(num_labels):
+    for label in range(NUM_LABELS):
        # get representative vectors for the label
        label_mean_vectors.append(
            calculate_label_representatives(fd_collection, label, feature_model)
        )

-    label_sim_matrix = np.zeros((num_labels, num_labels))
+    label_sim_matrix = np.zeros((NUM_LABELS, NUM_LABELS))

    # Calculate half and fill the other
-    for i in range(num_labels):
-        for j in range(i + 1, num_labels):
+    for i in range(NUM_LABELS):
+        for j in range(i + 1, NUM_LABELS):
            # Note: lower the value, lower the distance => higher the similarity
            label_sim_matrix[i][j] = label_sim_matrix[j][i] = feature_distance_matches[
                feature_model
@ -1219,14 +1220,30 @@ def find_image_image_similarity(fd_collection, feature_model):
        ).flatten()  # get the specific feature model's feature vector
        for img_fds in fd_collection.find()  # repeat for all images
    ]
-    num_images = len(feature_vectors)
-    image_sim_matrix = np.zeros((num_images, num_images))
+    image_sim_matrix = np.zeros((NUM_IMAGES, NUM_IMAGES))

    # Calculate half and fill the other
-    for i in range(num_images):
-        for j in range(i + 1, num_images):
+    for i in range(NUM_IMAGES):
+        for j in range(i + 1, NUM_IMAGES):
            # Note: lower the value, lower the distance => higher the similarity
            image_sim_matrix[i][j] = image_sim_matrix[j][i] = feature_distance_matches[
                feature_model
            ](np.array(feature_vectors[i]), np.array(feature_vectors[j]))
    return image_sim_matrix
+
+def compute_cp_decomposition(fd_collection, feature_model, rank):
+    assert (
+        feature_model in valid_feature_models.values()
+    ), "feature_model should be one of " + str(list(valid_feature_models.keys()))
+
+    all_images = list(fd_collection.find())
+
+    # (images, features, labels)
+    data_tensor_shape = (NUM_IMAGES, len(all_images[0][feature_model]), NUM_LABELS)
+    data_tensor = np.zeros(data_tensor_shape)
+    for id in range(NUM_IMAGES):
+        label = all_images[id]["true_label"]
+        data_tensor[id, :, label] = all_images[id][feature_model]
+    
+    weights_tensor, factor_matrices = tl.decomposition.parafac(data_tensor, rank=rank, normalize_factors=True)
+    return weights_tensor, factor_matrices