Merge branch 'master' of https://github.com/20kaushik02/CSE515_MWDB_Project into task9

2026-03-11 23:34:05 +00:00 · 2023-10-13 11:32:41 -07:00
parent 53e284ae95 b935d9ca34
commit 950d4237b3
5 changed files with 158 additions and 174 deletions
--- a/2/requirements.txt
+++ b/2/requirements.txt
@@ -11,3 +11,4 @@ ipython
 notebook
 ipykernel
 python-dotenv
 tensorly
--- a/2/task_4.ipynb
+++ b/2/task_4.ipynb
@@ -0,0 +1,52 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from utils import *\n",
    "warnings.filterwarnings('ignore')\n",
    "%matplotlib inline\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/2/task_4.py
+++ b/2/task_4.py
@@ -0,0 +1,64 @@
 import json
 import tensorly as tl
 import numpy as np
 from pymongo import MongoClient
 from phase1_mongodb import *
 client = MongoClient()
 client = MongoClient(host="localhost", port=27017)
 # Select the database
 db = client.Multimedia_Web_DBs
 caltechDataset = loadDataset()
 num_labels = 101
 # Fetch all documents from the collection and then sort them by "_id"
 feature_descriptors = list(db.Feature_Descriptors.find({}))
 feature_descriptors = sorted(list(db.Feature_Descriptors.find({})), key=lambda x: x["_id"], reverse=False)
 label_ids = [x["label"] for x in feature_descriptors]
 def compute_cp_decomposition(feature_model, rank):
    label_vectors = [(x["label"], x[feature_model]) for x in feature_descriptors if x["_id"] % 2 == 0]
    num_labels = 101
    tensor_shape = (len(label_vectors), len(feature_descriptors[0][feature_model]), num_labels)
    tensor = np.zeros(tensor_shape)
    for id in range(len(label_vectors)):
        label = label_vectors[id][0]
        tensor[id, :, label] = label_vectors[id][1]
    weights, factors = tl.decomposition.parafac(tensor, rank=rank, normalize_factors=True)
    return weights, factors
 def main():
    # Step 4: Perform CP-decomposition (parafac) to extract latent semantics
    features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']
    # User input for feature model to extract
    print("1: Color moments")
    print("2: HOG")
    print("3: Resnet50 Avgpool layer")
    print("4: Resnet50 Layer 3")
    print("5: Resnet50 FC layer")
    feature_model = features[int(input("Select the feature model: ")) - 1]
    k = int(input("Enter k: "))
    weights, factors = compute_cp_decomposition(feature_model, k)
    k_latent_semantics = list(zip(label_ids, factors[0].tolist()))
    k_latent_semantics_display = sorted(list(zip(label_ids, factors[0].tolist())), key = lambda x: x[1][0], reverse = True)
    k_latent_semantics_display = [{"_id": item[0], "semantics": item[1]} for item in k_latent_semantics_display]
    filename = f'{feature_model}-CP-semantics-{k}.json'
    k_latent_semantics = [{"_id": item[0], "semantics": item[1]} for item in k_latent_semantics]
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(k_latent_semantics, f, ensure_ascii = False)
    print(k_latent_semantics_display)
 if __name__ == "__main__":
   main()
--- a/2/task_5.ipynb
+++ b/2/task_5.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -13,7 +13,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -22,159 +22,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
+   "outputs": [],
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Applying kmeans on the given similarity matrix to get 10 latent semantics (showing only top 10 label-weight pairs for each latent semantic)...\n",
      "Initialized centroids\n",
      "Iteration 0\n",
      "Iteration 1\n",
      "Iteration 2\n",
      "Iteration 3\n",
      "Iteration 4\n",
      "Iteration 5\n",
      "Iteration 6\n",
      "Iteration 7\n",
      "Iteration 8\n",
      "Iteration 9\n",
      "Iteration 10\n",
      "Iteration 11\n",
      "Iter 11 - Converged\n",
      "Latent semantic no. 0\n",
      "label\t84\t-\tWeight\t16.953715652557495\n",
      "label\t34\t-\tWeight\t17.25164883471016\n",
      "label\t1\t-\tWeight\t17.379970016799952\n",
      "label\t72\t-\tWeight\t17.439397073433092\n",
      "label\t32\t-\tWeight\t17.447297173030393\n",
      "label\t31\t-\tWeight\t17.448932606262144\n",
      "label\t40\t-\tWeight\t17.561159943630802\n",
      "label\t79\t-\tWeight\t17.572813876633166\n",
      "label\t5\t-\tWeight\t17.721278660592027\n",
      "label\t56\t-\tWeight\t17.731177291838822\n",
      "Latent semantic no. 1\n",
      "label\t84\t-\tWeight\t19.27643729221191\n",
      "label\t5\t-\tWeight\t19.449814613173483\n",
      "label\t32\t-\tWeight\t19.684592406270944\n",
      "label\t63\t-\tWeight\t19.911988624963808\n",
      "label\t79\t-\tWeight\t19.930151237028223\n",
      "label\t38\t-\tWeight\t19.948477661871497\n",
      "label\t89\t-\tWeight\t19.965086791647906\n",
      "label\t94\t-\tWeight\t19.990956583854018\n",
      "label\t72\t-\tWeight\t19.99680017871235\n",
      "label\t45\t-\tWeight\t20.058898160614795\n",
      "Latent semantic no. 2\n",
      "label\t0\t-\tWeight\tnan\n",
      "label\t1\t-\tWeight\tnan\n",
      "label\t2\t-\tWeight\tnan\n",
      "label\t3\t-\tWeight\tnan\n",
      "label\t4\t-\tWeight\tnan\n",
      "label\t5\t-\tWeight\tnan\n",
      "label\t6\t-\tWeight\tnan\n",
      "label\t7\t-\tWeight\tnan\n",
      "label\t8\t-\tWeight\tnan\n",
      "label\t9\t-\tWeight\tnan\n",
      "Latent semantic no. 3\n",
      "label\t0\t-\tWeight\tnan\n",
      "label\t1\t-\tWeight\tnan\n",
      "label\t2\t-\tWeight\tnan\n",
      "label\t3\t-\tWeight\tnan\n",
      "label\t4\t-\tWeight\tnan\n",
      "label\t5\t-\tWeight\tnan\n",
      "label\t6\t-\tWeight\tnan\n",
      "label\t7\t-\tWeight\tnan\n",
      "label\t8\t-\tWeight\tnan\n",
      "label\t9\t-\tWeight\tnan\n",
      "Latent semantic no. 4\n",
      "label\t32\t-\tWeight\t18.607843379925203\n",
      "label\t89\t-\tWeight\t18.671771165930238\n",
      "label\t84\t-\tWeight\t18.83858895833768\n",
      "label\t79\t-\tWeight\t18.84775924713071\n",
      "label\t55\t-\tWeight\t18.88614269359777\n",
      "label\t34\t-\tWeight\t18.891433443455583\n",
      "label\t11\t-\tWeight\t19.034715149675442\n",
      "label\t63\t-\tWeight\t19.042445031693624\n",
      "label\t5\t-\tWeight\t19.075471660855772\n",
      "label\t59\t-\tWeight\t19.096232354525338\n",
      "Latent semantic no. 5\n",
      "label\t88\t-\tWeight\t17.332684081151356\n",
      "label\t100\t-\tWeight\t17.414638052725692\n",
      "label\t89\t-\tWeight\t17.64193670680817\n",
      "label\t46\t-\tWeight\t17.663677856892257\n",
      "label\t5\t-\tWeight\t17.750606635854105\n",
      "label\t11\t-\tWeight\t17.921812162626082\n",
      "label\t17\t-\tWeight\t17.99728875058849\n",
      "label\t64\t-\tWeight\t18.20535869665654\n",
      "label\t84\t-\tWeight\t18.280826365832894\n",
      "label\t59\t-\tWeight\t18.48939095974247\n",
      "Latent semantic no. 6\n",
      "label\t0\t-\tWeight\tnan\n",
      "label\t1\t-\tWeight\tnan\n",
      "label\t2\t-\tWeight\tnan\n",
      "label\t3\t-\tWeight\tnan\n",
      "label\t4\t-\tWeight\tnan\n",
      "label\t5\t-\tWeight\tnan\n",
      "label\t6\t-\tWeight\tnan\n",
      "label\t7\t-\tWeight\tnan\n",
      "label\t8\t-\tWeight\tnan\n",
      "label\t9\t-\tWeight\tnan\n",
      "Latent semantic no. 7\n",
      "label\t59\t-\tWeight\t19.676597202857955\n",
      "label\t72\t-\tWeight\t19.687934144875545\n",
      "label\t89\t-\tWeight\t19.830805124280474\n",
      "label\t90\t-\tWeight\t20.021426354120276\n",
      "label\t77\t-\tWeight\t20.05776182294002\n",
      "label\t34\t-\tWeight\t20.058245159709028\n",
      "label\t70\t-\tWeight\t20.117786048649382\n",
      "label\t68\t-\tWeight\t20.139598145778074\n",
      "label\t88\t-\tWeight\t20.185751240083068\n",
      "label\t38\t-\tWeight\t20.208902223231863\n",
      "Latent semantic no. 8\n",
      "label\t0\t-\tWeight\tnan\n",
      "label\t1\t-\tWeight\tnan\n",
      "label\t2\t-\tWeight\tnan\n",
      "label\t3\t-\tWeight\tnan\n",
      "label\t4\t-\tWeight\tnan\n",
      "label\t5\t-\tWeight\tnan\n",
      "label\t6\t-\tWeight\tnan\n",
      "label\t7\t-\tWeight\tnan\n",
      "label\t8\t-\tWeight\tnan\n",
      "label\t9\t-\tWeight\tnan\n",
      "Latent semantic no. 9\n",
      "label\t0\t-\tWeight\tnan\n",
      "label\t1\t-\tWeight\tnan\n",
      "label\t2\t-\tWeight\tnan\n",
      "label\t3\t-\tWeight\tnan\n",
      "label\t4\t-\tWeight\tnan\n",
      "label\t5\t-\tWeight\tnan\n",
      "label\t6\t-\tWeight\tnan\n",
      "label\t7\t-\tWeight\tnan\n",
      "label\t8\t-\tWeight\tnan\n",
      "label\t9\t-\tWeight\tnan\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "Object of type ndarray is not JSON serializable",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task_5.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[0;32m      <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m selected_dim_reduction_method \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m     \u001b[39minput\u001b[39m(\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m         \u001b[39m\"\u001b[39m\u001b[39mEnter dimensionality reduction method - one of \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=11'>12</a>\u001b[0m         \u001b[39m+\u001b[39m \u001b[39mstr\u001b[39m(\u001b[39mlist\u001b[39m(valid_dim_reduction_methods\u001b[39m.\u001b[39mkeys()))\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=12'>13</a>\u001b[0m     )\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=13'>14</a>\u001b[0m )\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=15'>16</a>\u001b[0m label_sim_matrix \u001b[39m=\u001b[39m find_label_label_similarity(fd_collection,selected_feature_model)\n\u001b[1;32m---> <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=17'>18</a>\u001b[0m extract_latent_semantics_from_sim_matrix(\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=18'>19</a>\u001b[0m     label_sim_matrix,\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=19'>20</a>\u001b[0m     selected_feature_model,\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=20'>21</a>\u001b[0m     \u001b[39m\"\u001b[39;49m\u001b[39mlabel\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=21'>22</a>\u001b[0m     k,\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=22'>23</a>\u001b[0m     selected_dim_reduction_method,\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=23'>24</a>\u001b[0m     top_images\u001b[39m=\u001b[39;49m\u001b[39m10\u001b[39;49m,\n\u001b[0;32m     <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task_5.ipynb#W6sZmlsZQ%3D%3D?line=24'>25</a>\u001b[0m )\n",
      "File \u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\utils.py:1193\u001b[0m, in \u001b[0;36mextract_latent_semantics_from_sim_matrix\u001b[1;34m(sim_matrix, feature_model, sim_type, k, dim_reduction_method, top_images)\u001b[0m\n\u001b[0;32m   1187\u001b[0m \u001b[39mfor\u001b[39;00m label \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(num_labels):\n\u001b[0;32m   1188\u001b[0m     \u001b[39m# get representative vectors for the label\u001b[39;00m\n\u001b[0;32m   1189\u001b[0m     label_mean_vectors\u001b[39m.\u001b[39mappend(\n\u001b[0;32m   1190\u001b[0m         calculate_label_representatives(fd_collection, label, feature_model)\n\u001b[0;32m   1191\u001b[0m     )\n\u001b[1;32m-> 1193\u001b[0m label_sim_matrix \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mzeros((num_labels, num_labels))\n\u001b[0;32m   1195\u001b[0m \u001b[39m# Calculate half and fill the other\u001b[39;00m\n\u001b[0;32m   1196\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(num_labels):\n",
      "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\__init__.py:179\u001b[0m, in \u001b[0;36mdump\u001b[1;34m(obj, fp, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)\u001b[0m\n\u001b[0;32m    173\u001b[0m     iterable \u001b[39m=\u001b[39m \u001b[39mcls\u001b[39m(skipkeys\u001b[39m=\u001b[39mskipkeys, ensure_ascii\u001b[39m=\u001b[39mensure_ascii,\n\u001b[0;32m    174\u001b[0m         check_circular\u001b[39m=\u001b[39mcheck_circular, allow_nan\u001b[39m=\u001b[39mallow_nan, indent\u001b[39m=\u001b[39mindent,\n\u001b[0;32m    175\u001b[0m         separators\u001b[39m=\u001b[39mseparators,\n\u001b[0;32m    176\u001b[0m         default\u001b[39m=\u001b[39mdefault, sort_keys\u001b[39m=\u001b[39msort_keys, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkw)\u001b[39m.\u001b[39miterencode(obj)\n\u001b[0;32m    177\u001b[0m \u001b[39m# could accelerate with writelines in some versions of Python, at\u001b[39;00m\n\u001b[0;32m    178\u001b[0m \u001b[39m# a debuggability cost\u001b[39;00m\n\u001b[1;32m--> 179\u001b[0m \u001b[39mfor\u001b[39;49;00m chunk \u001b[39min\u001b[39;49;00m iterable:\n\u001b[0;32m    180\u001b[0m     fp\u001b[39m.\u001b[39;49mwrite(chunk)\n",
      "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:432\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode\u001b[1;34m(o, _current_indent_level)\u001b[0m\n\u001b[0;32m    430\u001b[0m     \u001b[39myield from\u001b[39;00m _iterencode_list(o, _current_indent_level)\n\u001b[0;32m    431\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(o, \u001b[39mdict\u001b[39m):\n\u001b[1;32m--> 432\u001b[0m     \u001b[39myield from\u001b[39;00m _iterencode_dict(o, _current_indent_level)\n\u001b[0;32m    433\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m    434\u001b[0m     \u001b[39mif\u001b[39;00m markers \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
      "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:406\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode_dict\u001b[1;34m(dct, _current_indent_level)\u001b[0m\n\u001b[0;32m    404\u001b[0m         \u001b[39melse\u001b[39;00m:\n\u001b[0;32m    405\u001b[0m             chunks \u001b[39m=\u001b[39m _iterencode(value, _current_indent_level)\n\u001b[1;32m--> 406\u001b[0m         \u001b[39myield from\u001b[39;00m chunks\n\u001b[0;32m    407\u001b[0m \u001b[39mif\u001b[39;00m newline_indent \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m    408\u001b[0m     _current_indent_level \u001b[39m-\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n",
      "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:326\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode_list\u001b[1;34m(lst, _current_indent_level)\u001b[0m\n\u001b[0;32m    324\u001b[0m         \u001b[39melse\u001b[39;00m:\n\u001b[0;32m    325\u001b[0m             chunks \u001b[39m=\u001b[39m _iterencode(value, _current_indent_level)\n\u001b[1;32m--> 326\u001b[0m         \u001b[39myield from\u001b[39;00m chunks\n\u001b[0;32m    327\u001b[0m \u001b[39mif\u001b[39;00m newline_indent \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m    328\u001b[0m     _current_indent_level \u001b[39m-\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n",
      "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:439\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode\u001b[1;34m(o, _current_indent_level)\u001b[0m\n\u001b[0;32m    437\u001b[0m         \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mCircular reference detected\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m    438\u001b[0m     markers[markerid] \u001b[39m=\u001b[39m o\n\u001b[1;32m--> 439\u001b[0m o \u001b[39m=\u001b[39m _default(o)\n\u001b[0;32m    440\u001b[0m \u001b[39myield from\u001b[39;00m _iterencode(o, _current_indent_level)\n\u001b[0;32m    441\u001b[0m \u001b[39mif\u001b[39;00m markers \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
      "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:180\u001b[0m, in \u001b[0;36mJSONEncoder.default\u001b[1;34m(self, o)\u001b[0m\n\u001b[0;32m    161\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdefault\u001b[39m(\u001b[39mself\u001b[39m, o):\n\u001b[0;32m    162\u001b[0m \u001b[39m    \u001b[39m\u001b[39m\"\"\"Implement this method in a subclass such that it returns\u001b[39;00m\n\u001b[0;32m    163\u001b[0m \u001b[39m    a serializable object for ``o``, or calls the base implementation\u001b[39;00m\n\u001b[0;32m    164\u001b[0m \u001b[39m    (to raise a ``TypeError``).\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    178\u001b[0m \n\u001b[0;32m    179\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[1;32m--> 180\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mTypeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mObject of type \u001b[39m\u001b[39m{\u001b[39;00mo\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m    181\u001b[0m                     \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mis not JSON serializable\u001b[39m\u001b[39m'\u001b[39m)\n",
      "\u001b[1;31mTypeError\u001b[0m: Object of type ndarray is not JSON serializable"
     ]
    }
   ],
   "source": [
    "selected_feature_model = valid_feature_models[\n",
    "    str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
@@ -200,7 +50,7 @@
    "    k,\n",
    "    selected_dim_reduction_method,\n",
    "    top_images=10,\n",
-    ")\n"
+    ")"
   ]
  },
  {
--- a/2/utils.py
+++ b/2/utils.py
@@ -18,6 +18,8 @@ import torchvision.transforms as transforms
 from torchvision.datasets import Caltech101
 from torchvision.models import resnet50, ResNet50_Weights
 import tensorly as tl
 # OS and env
 import json
 from os import getenv
@@ -58,6 +60,8 @@ def loadDataset(dataset):
 dataset = loadDataset(Caltech101)
 NUM_LABELS = 101
 NUM_IMAGES = 4339
 class GridPartition:
@@ -1054,7 +1058,7 @@ def extract_latent_semantics_from_sim_matrix(
                "semantic-feature": V_T.tolist(),
            }
-            # for each latent semantic, sort imageID-weight pairs by weights in descending order
+            # for each latent semantic, sort object-weight pairs by weights in descending order
            displayed_latent_semantics = [
                sorted(
                    list(zip(feature_ids, latent_semantic)),
@@ -1089,7 +1093,7 @@ def extract_latent_semantics_from_sim_matrix(
                "semantic-feature": H.tolist(),
            }
-            # for each latent semantic, sort imageID-weight pairs by weights in descending order
+            # for each latent semantic, sort object-weight pairs by weights in descending order
            displayed_latent_semantics = [
                sorted(
                    list(zip(feature_ids, latent_semantic)),
@@ -1122,7 +1126,7 @@ def extract_latent_semantics_from_sim_matrix(
                "semantic-feature": K.tolist(),
            }
-            # for each latent semantic, sort imageID-weight pairs by weights in descending order
+            # for each latent semantic, sort object-weight pairs by weights in descending order
            displayed_latent_semantics = [
                sorted(
                    list(zip(feature_ids, latent_semantic)),
@@ -1140,10 +1144,10 @@ def extract_latent_semantics_from_sim_matrix(
            all_latent_semantics = {
                "image-semantic": Y.tolist(),
-                "semantic-feature": list(CC.values()),
+                "semantic-feature": CC.tolist(),
            }
-            # for each latent semantic, sort imageID-weight pairs by weights in descending order
+            # for each latent semantic, sort object-weight pairs by weights in ascending order
            displayed_latent_semantics = [
                sorted(
                    list(zip(feature_ids, latent_semantic)),
@@ -1152,9 +1156,8 @@ def extract_latent_semantics_from_sim_matrix(
                )[:top_images]
                for latent_semantic in Y.T
            ]
    if valid_dim_reduction_methods[dim_reduction_method] == 4:
            print("Note: for K-Means we display distances, in ascending order")
    for idx, latent_semantic in enumerate(displayed_latent_semantics):
        print(f"Latent semantic no. {idx}")
        for obj_id, weight in latent_semantic:
@@ -1185,19 +1188,17 @@ def find_label_label_similarity(fd_collection, feature_model):
    label_sim_matrix = []
    label_mean_vectors = []
-    num_labels = 101
+    for label in range(NUM_LABELS):
    for label in range(num_labels):
        # get representative vectors for the label
        label_mean_vectors.append(
            calculate_label_representatives(fd_collection, label, feature_model)
        )
-    label_sim_matrix = np.zeros((num_labels, num_labels))
+    label_sim_matrix = np.zeros((NUM_LABELS, NUM_LABELS))
    # Calculate half and fill the other
-    for i in range(num_labels):
+    for i in range(NUM_LABELS):
-        for j in range(i + 1, num_labels):
+        for j in range(i + 1, NUM_LABELS):
            # Note: lower the value, lower the distance => higher the similarity
            label_sim_matrix[i][j] = label_sim_matrix[j][i] = feature_distance_matches[
                feature_model
@@ -1219,14 +1220,30 @@ def find_image_image_similarity(fd_collection, feature_model):
        ).flatten()  # get the specific feature model's feature vector
        for img_fds in fd_collection.find()  # repeat for all images
    ]
-    num_images = len(feature_vectors)
+    image_sim_matrix = np.zeros((NUM_IMAGES, NUM_IMAGES))
    image_sim_matrix = np.zeros((num_images, num_images))
    # Calculate half and fill the other
-    for i in range(num_images):
+    for i in range(NUM_IMAGES):
-        for j in range(i + 1, num_images):
+        for j in range(i + 1, NUM_IMAGES):
            # Note: lower the value, lower the distance => higher the similarity
            image_sim_matrix[i][j] = image_sim_matrix[j][i] = feature_distance_matches[
                feature_model
            ](np.array(feature_vectors[i]), np.array(feature_vectors[j]))
    return image_sim_matrix
 def compute_cp_decomposition(fd_collection, feature_model, rank):
    assert (
        feature_model in valid_feature_models.values()
    ), "feature_model should be one of " + str(list(valid_feature_models.keys()))
    all_images = list(fd_collection.find())
    # (images, features, labels)
    data_tensor_shape = (NUM_IMAGES, len(all_images[0][feature_model]), NUM_LABELS)
    data_tensor = np.zeros(data_tensor_shape)
    for id in range(NUM_IMAGES):
        label = all_images[id]["true_label"]
        data_tensor[id, :, label] = all_images[id][feature_model]
    weights_tensor, factor_matrices = tl.decomposition.parafac(data_tensor, rank=rank, normalize_factors=True)
    return weights_tensor, factor_matrices