{ "cells": [ { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "from utils import *\n", "warnings.filterwarnings('ignore')\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Applying kmeans on the given similarity matrix to get 10 latent semantics (showing only top 10 label-weight pairs for each latent semantic)...\n", "Initialized centroids\n", "Iteration 0\n", "Iteration 1\n", "Iteration 2\n", "Iteration 3\n", "Iteration 4\n", "Iteration 5\n", "Iteration 6\n", "Iteration 7\n", "Iteration 8\n", "Iteration 9\n", "Iteration 10\n", "Iteration 11\n", "Iter 11 - Converged\n", "Latent semantic no. 0\n", "label\t84\t-\tWeight\t16.953715652557495\n", "label\t34\t-\tWeight\t17.25164883471016\n", "label\t1\t-\tWeight\t17.379970016799952\n", "label\t72\t-\tWeight\t17.439397073433092\n", "label\t32\t-\tWeight\t17.447297173030393\n", "label\t31\t-\tWeight\t17.448932606262144\n", "label\t40\t-\tWeight\t17.561159943630802\n", "label\t79\t-\tWeight\t17.572813876633166\n", "label\t5\t-\tWeight\t17.721278660592027\n", "label\t56\t-\tWeight\t17.731177291838822\n", "Latent semantic no. 1\n", "label\t84\t-\tWeight\t19.27643729221191\n", "label\t5\t-\tWeight\t19.449814613173483\n", "label\t32\t-\tWeight\t19.684592406270944\n", "label\t63\t-\tWeight\t19.911988624963808\n", "label\t79\t-\tWeight\t19.930151237028223\n", "label\t38\t-\tWeight\t19.948477661871497\n", "label\t89\t-\tWeight\t19.965086791647906\n", "label\t94\t-\tWeight\t19.990956583854018\n", "label\t72\t-\tWeight\t19.99680017871235\n", "label\t45\t-\tWeight\t20.058898160614795\n", "Latent semantic no. 2\n", "label\t0\t-\tWeight\tnan\n", "label\t1\t-\tWeight\tnan\n", "label\t2\t-\tWeight\tnan\n", "label\t3\t-\tWeight\tnan\n", "label\t4\t-\tWeight\tnan\n", "label\t5\t-\tWeight\tnan\n", "label\t6\t-\tWeight\tnan\n", "label\t7\t-\tWeight\tnan\n", "label\t8\t-\tWeight\tnan\n", "label\t9\t-\tWeight\tnan\n", "Latent semantic no. 3\n", "label\t0\t-\tWeight\tnan\n", "label\t1\t-\tWeight\tnan\n", "label\t2\t-\tWeight\tnan\n", "label\t3\t-\tWeight\tnan\n", "label\t4\t-\tWeight\tnan\n", "label\t5\t-\tWeight\tnan\n", "label\t6\t-\tWeight\tnan\n", "label\t7\t-\tWeight\tnan\n", "label\t8\t-\tWeight\tnan\n", "label\t9\t-\tWeight\tnan\n", "Latent semantic no. 4\n", "label\t32\t-\tWeight\t18.607843379925203\n", "label\t89\t-\tWeight\t18.671771165930238\n", "label\t84\t-\tWeight\t18.83858895833768\n", "label\t79\t-\tWeight\t18.84775924713071\n", "label\t55\t-\tWeight\t18.88614269359777\n", "label\t34\t-\tWeight\t18.891433443455583\n", "label\t11\t-\tWeight\t19.034715149675442\n", "label\t63\t-\tWeight\t19.042445031693624\n", "label\t5\t-\tWeight\t19.075471660855772\n", "label\t59\t-\tWeight\t19.096232354525338\n", "Latent semantic no. 5\n", "label\t88\t-\tWeight\t17.332684081151356\n", "label\t100\t-\tWeight\t17.414638052725692\n", "label\t89\t-\tWeight\t17.64193670680817\n", "label\t46\t-\tWeight\t17.663677856892257\n", "label\t5\t-\tWeight\t17.750606635854105\n", "label\t11\t-\tWeight\t17.921812162626082\n", "label\t17\t-\tWeight\t17.99728875058849\n", "label\t64\t-\tWeight\t18.20535869665654\n", "label\t84\t-\tWeight\t18.280826365832894\n", "label\t59\t-\tWeight\t18.48939095974247\n", "Latent semantic no. 6\n", "label\t0\t-\tWeight\tnan\n", "label\t1\t-\tWeight\tnan\n", "label\t2\t-\tWeight\tnan\n", "label\t3\t-\tWeight\tnan\n", "label\t4\t-\tWeight\tnan\n", "label\t5\t-\tWeight\tnan\n", "label\t6\t-\tWeight\tnan\n", "label\t7\t-\tWeight\tnan\n", "label\t8\t-\tWeight\tnan\n", "label\t9\t-\tWeight\tnan\n", "Latent semantic no. 7\n", "label\t59\t-\tWeight\t19.676597202857955\n", "label\t72\t-\tWeight\t19.687934144875545\n", "label\t89\t-\tWeight\t19.830805124280474\n", "label\t90\t-\tWeight\t20.021426354120276\n", "label\t77\t-\tWeight\t20.05776182294002\n", "label\t34\t-\tWeight\t20.058245159709028\n", "label\t70\t-\tWeight\t20.117786048649382\n", "label\t68\t-\tWeight\t20.139598145778074\n", "label\t88\t-\tWeight\t20.185751240083068\n", "label\t38\t-\tWeight\t20.208902223231863\n", "Latent semantic no. 8\n", "label\t0\t-\tWeight\tnan\n", "label\t1\t-\tWeight\tnan\n", "label\t2\t-\tWeight\tnan\n", "label\t3\t-\tWeight\tnan\n", "label\t4\t-\tWeight\tnan\n", "label\t5\t-\tWeight\tnan\n", "label\t6\t-\tWeight\tnan\n", "label\t7\t-\tWeight\tnan\n", "label\t8\t-\tWeight\tnan\n", "label\t9\t-\tWeight\tnan\n", "Latent semantic no. 9\n", "label\t0\t-\tWeight\tnan\n", "label\t1\t-\tWeight\tnan\n", "label\t2\t-\tWeight\tnan\n", "label\t3\t-\tWeight\tnan\n", "label\t4\t-\tWeight\tnan\n", "label\t5\t-\tWeight\tnan\n", "label\t6\t-\tWeight\tnan\n", "label\t7\t-\tWeight\tnan\n", "label\t8\t-\tWeight\tnan\n", "label\t9\t-\tWeight\tnan\n" ] }, { "ename": "TypeError", "evalue": "Object of type ndarray is not JSON serializable", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task_5.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[0;32m 9\u001b[0m selected_dim_reduction_method \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(\n\u001b[0;32m 10\u001b[0m \u001b[39minput\u001b[39m(\n\u001b[0;32m 11\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mEnter dimensionality reduction method - one of \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 12\u001b[0m \u001b[39m+\u001b[39m \u001b[39mstr\u001b[39m(\u001b[39mlist\u001b[39m(valid_dim_reduction_methods\u001b[39m.\u001b[39mkeys()))\n\u001b[0;32m 13\u001b[0m )\n\u001b[0;32m 14\u001b[0m )\n\u001b[0;32m 16\u001b[0m label_sim_matrix \u001b[39m=\u001b[39m find_label_label_similarity(fd_collection,selected_feature_model)\n\u001b[1;32m---> 18\u001b[0m extract_latent_semantics_from_sim_matrix(\n\u001b[0;32m 19\u001b[0m label_sim_matrix,\n\u001b[0;32m 20\u001b[0m selected_feature_model,\n\u001b[0;32m 21\u001b[0m \u001b[39m\"\u001b[39;49m\u001b[39mlabel\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m 22\u001b[0m k,\n\u001b[0;32m 23\u001b[0m selected_dim_reduction_method,\n\u001b[0;32m 24\u001b[0m top_images\u001b[39m=\u001b[39;49m\u001b[39m10\u001b[39;49m,\n\u001b[0;32m 25\u001b[0m )\n", "File \u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\utils.py:1193\u001b[0m, in \u001b[0;36mextract_latent_semantics_from_sim_matrix\u001b[1;34m(sim_matrix, feature_model, sim_type, k, dim_reduction_method, top_images)\u001b[0m\n\u001b[0;32m 1187\u001b[0m \u001b[39mfor\u001b[39;00m label \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(num_labels):\n\u001b[0;32m 1188\u001b[0m \u001b[39m# get representative vectors for the label\u001b[39;00m\n\u001b[0;32m 1189\u001b[0m label_mean_vectors\u001b[39m.\u001b[39mappend(\n\u001b[0;32m 1190\u001b[0m calculate_label_representatives(fd_collection, label, feature_model)\n\u001b[0;32m 1191\u001b[0m )\n\u001b[1;32m-> 1193\u001b[0m label_sim_matrix \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mzeros((num_labels, num_labels))\n\u001b[0;32m 1195\u001b[0m \u001b[39m# Calculate half and fill the other\u001b[39;00m\n\u001b[0;32m 1196\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(num_labels):\n", "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\__init__.py:179\u001b[0m, in \u001b[0;36mdump\u001b[1;34m(obj, fp, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)\u001b[0m\n\u001b[0;32m 173\u001b[0m iterable \u001b[39m=\u001b[39m \u001b[39mcls\u001b[39m(skipkeys\u001b[39m=\u001b[39mskipkeys, ensure_ascii\u001b[39m=\u001b[39mensure_ascii,\n\u001b[0;32m 174\u001b[0m check_circular\u001b[39m=\u001b[39mcheck_circular, allow_nan\u001b[39m=\u001b[39mallow_nan, indent\u001b[39m=\u001b[39mindent,\n\u001b[0;32m 175\u001b[0m separators\u001b[39m=\u001b[39mseparators,\n\u001b[0;32m 176\u001b[0m default\u001b[39m=\u001b[39mdefault, sort_keys\u001b[39m=\u001b[39msort_keys, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkw)\u001b[39m.\u001b[39miterencode(obj)\n\u001b[0;32m 177\u001b[0m \u001b[39m# could accelerate with writelines in some versions of Python, at\u001b[39;00m\n\u001b[0;32m 178\u001b[0m \u001b[39m# a debuggability cost\u001b[39;00m\n\u001b[1;32m--> 179\u001b[0m \u001b[39mfor\u001b[39;49;00m chunk \u001b[39min\u001b[39;49;00m iterable:\n\u001b[0;32m 180\u001b[0m fp\u001b[39m.\u001b[39;49mwrite(chunk)\n", "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:432\u001b[0m, in \u001b[0;36m_make_iterencode.._iterencode\u001b[1;34m(o, _current_indent_level)\u001b[0m\n\u001b[0;32m 430\u001b[0m \u001b[39myield from\u001b[39;00m _iterencode_list(o, _current_indent_level)\n\u001b[0;32m 431\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(o, \u001b[39mdict\u001b[39m):\n\u001b[1;32m--> 432\u001b[0m \u001b[39myield from\u001b[39;00m _iterencode_dict(o, _current_indent_level)\n\u001b[0;32m 433\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 434\u001b[0m \u001b[39mif\u001b[39;00m markers \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:406\u001b[0m, in \u001b[0;36m_make_iterencode.._iterencode_dict\u001b[1;34m(dct, _current_indent_level)\u001b[0m\n\u001b[0;32m 404\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 405\u001b[0m chunks \u001b[39m=\u001b[39m _iterencode(value, _current_indent_level)\n\u001b[1;32m--> 406\u001b[0m \u001b[39myield from\u001b[39;00m chunks\n\u001b[0;32m 407\u001b[0m \u001b[39mif\u001b[39;00m newline_indent \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 408\u001b[0m _current_indent_level \u001b[39m-\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n", "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:326\u001b[0m, in \u001b[0;36m_make_iterencode.._iterencode_list\u001b[1;34m(lst, _current_indent_level)\u001b[0m\n\u001b[0;32m 324\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 325\u001b[0m chunks \u001b[39m=\u001b[39m _iterencode(value, _current_indent_level)\n\u001b[1;32m--> 326\u001b[0m \u001b[39myield from\u001b[39;00m chunks\n\u001b[0;32m 327\u001b[0m \u001b[39mif\u001b[39;00m newline_indent \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 328\u001b[0m _current_indent_level \u001b[39m-\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n", "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:439\u001b[0m, in \u001b[0;36m_make_iterencode.._iterencode\u001b[1;34m(o, _current_indent_level)\u001b[0m\n\u001b[0;32m 437\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mCircular reference detected\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 438\u001b[0m markers[markerid] \u001b[39m=\u001b[39m o\n\u001b[1;32m--> 439\u001b[0m o \u001b[39m=\u001b[39m _default(o)\n\u001b[0;32m 440\u001b[0m \u001b[39myield from\u001b[39;00m _iterencode(o, _current_indent_level)\n\u001b[0;32m 441\u001b[0m \u001b[39mif\u001b[39;00m markers \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[1;32mc:\\Users\\Pranav\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\json\\encoder.py:180\u001b[0m, in \u001b[0;36mJSONEncoder.default\u001b[1;34m(self, o)\u001b[0m\n\u001b[0;32m 161\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdefault\u001b[39m(\u001b[39mself\u001b[39m, o):\n\u001b[0;32m 162\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Implement this method in a subclass such that it returns\u001b[39;00m\n\u001b[0;32m 163\u001b[0m \u001b[39m a serializable object for ``o``, or calls the base implementation\u001b[39;00m\n\u001b[0;32m 164\u001b[0m \u001b[39m (to raise a ``TypeError``).\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 178\u001b[0m \n\u001b[0;32m 179\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 180\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mTypeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mObject of type \u001b[39m\u001b[39m{\u001b[39;00mo\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m 181\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mis not JSON serializable\u001b[39m\u001b[39m'\u001b[39m)\n", "\u001b[1;31mTypeError\u001b[0m: Object of type ndarray is not JSON serializable" ] } ], "source": [ "selected_feature_model = valid_feature_models[\n", " str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n", "]\n", "\n", "k = int(input(\"Enter value of k: \"))\n", "if k < 1:\n", " raise ValueError(\"k should be a positive integer\")\n", "\n", "selected_dim_reduction_method = str(\n", " input(\n", " \"Enter dimensionality reduction method - one of \"\n", " + str(list(valid_dim_reduction_methods.keys()))\n", " )\n", ")\n", "\n", "label_sim_matrix = find_label_label_similarity(fd_collection,selected_feature_model)\n", "\n", "extract_latent_semantics_from_sim_matrix(\n", " label_sim_matrix,\n", " selected_feature_model,\n", " \"label\",\n", " k,\n", " selected_dim_reduction_method,\n", " top_images=10,\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 2 }