diff --git a/Phase 2/task6.ipynb b/Phase 2/task6.ipynb new file mode 100644 index 0000000..8c08a17 --- /dev/null +++ b/Phase 2/task6.ipynb @@ -0,0 +1,181 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import math\n", + "from pymongo import MongoClient\n", + "import scipy\n", + "import numpy as np\n", + "from sklearn.decomposition import NMF\n", + "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", + "from sklearn.cluster import KMeans" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client = MongoClient()\n", + "client = MongoClient(host = \"localhost\", port = 27017)\n", + "\n", + "# Select the database\n", + "db = client.Multimedia_Web_DBs\n", + "\n", + "# Fetch all documents from the collection and then sort them by \"_id\"\n", + "feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n", + "feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "def extractKLatentSemantics(k, image_sim_matrix, feature_model, dim_reduction):\n", + "\n", + " feature_ids = [x[\"_id\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", + " feature_labels = [x[\"label\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", + "\n", + " filename = 'ls4.json'\n", + "\n", + " match dim_reduction:\n", + "\n", + " case 1:\n", + " U, S, Vh = scipy.sparse.linalg.svds(np.array(image_sim_matrix), k=k)\n", + " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", + "\n", + " case 2:\n", + " model = NMF(n_components = k, init = 'random', solver = 'cd', alpha_H = 0.01, alpha_W = 0.01, max_iter = 10000)\n", + " min_value = np.min(image_sim_matrix)\n", + " feature_vectors_shifted = image_sim_matrix - min_value\n", + " U = model.fit_transform(np.array(feature_vectors_shifted))\n", + " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", + "\n", + " case 3:\n", + " U = LinearDiscriminantAnalysis(n_components = k).fit_transform(image_sim_matrix, feature_labels)\n", + " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", + "\n", + " case 4:\n", + " kmeans = KMeans(n_clusters = k)\n", + " kmeans.fit(image_sim_matrix)\n", + " U = kmeans.transform(image_sim_matrix)\n", + " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", + " \n", + " k_latent_semantics = [{\"_id\": item[0], \"semantics\": item[1]} for item in k_latent_semantics]\n", + " with open(filename, 'w', encoding='utf-8') as f:\n", + " json.dump(k_latent_semantics, f, ensure_ascii = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def findImageImageSimMatrix(feature_model):\n", + " \n", + " feature_vectors = [x[feature_model] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", + "\n", + " n = len(feature_vectors)\n", + "\n", + " image_sim_matrix = np.zeros((n, n))\n", + "\n", + " for i in range(n):\n", + " for j in range(i + 1, n):\n", + "\n", + " match feature_model:\n", + "\n", + " case \"color_moments\":\n", + " image_sim_matrix[i][j] = image_sim_matrix[j][i] = math.dist(feature_vectors[i], feature_vectors[j])\n", + " \n", + " case \"hog\":\n", + " image_sim_matrix[i][j] = image_sim_matrix[j][i] = (np.dot(feature_vectors[i], feature_vectors[j]) / (np.linalg.norm(feature_vectors[i]) * np.linalg.norm(feature_vectors[j])))\n", + "\n", + " case \"avgpool\" | \"layer3\" | \"fc\":\n", + " image_sim_matrix[i][j] = image_sim_matrix[j][i] = scipy.stats.pearsonr(feature_vectors[i], feature_vectors[j]).statistic\n", + " \n", + " return image_sim_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "def main():\n", + "\n", + " k = int(input(\"Enter k: \"))\n", + "\n", + " features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n", + "\n", + " # User input for feature model to extract\n", + " print(\"\\n1: Color moments\")\n", + " print(\"2: HOG\")\n", + " print(\"3: Resnet50 Avgpool layer\")\n", + " print(\"4: Resnet50 Layer 3\")\n", + " print(\"5: Resnet50 FC layer\")\n", + " feature_model = features[int(input(\"Select the feature model: \")) - 1]\n", + "\n", + " print(\"\\n1. SVD\")\n", + " print(\"2. NNMF\")\n", + " print(\"3. LDA\")\n", + " print(\"4. k-means\")\n", + " dim_reduction = int(input(\"Select the dimensionality reduction technique: \"))\n", + "\n", + " image_sim_matrix = findImageImageSimMatrix(feature_model)\n", + " print(image_sim_matrix)\n", + "\n", + " extractKLatentSemantics(k, image_sim_matrix, feature_model, dim_reduction)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "if __name__ == \"__main__\":\n", + " main()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Phase 2/task_3.ipynb b/Phase 2/task_3.ipynb index 34c0ca1..b8fb165 100644 --- a/Phase 2/task_3.ipynb +++ b/Phase 2/task_3.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -22,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -31,115 +31,115 @@ "text": [ "Applying svd on the cm_fd space to get 10 latent semantics (showing only top 10 image-weight pairs for each latent semantic)...\n", "Latent semantic no. 0\n", - "Image_ID\t7654\t-\tWeight\t0.08162189274964751\n", - "Image_ID\t8634\t-\tWeight\t0.06673589485778451\n", - "Image_ID\t5740\t-\tWeight\t0.060058821201972104\n", - "Image_ID\t6106\t-\tWeight\t0.05306661393931607\n", - "Image_ID\t5456\t-\tWeight\t0.05170171570330845\n", - "Image_ID\t7814\t-\tWeight\t0.04997978865116185\n", - "Image_ID\t6248\t-\tWeight\t0.04946683639815072\n", - "Image_ID\t5354\t-\tWeight\t0.04864381025793171\n", - "Image_ID\t6108\t-\tWeight\t0.04796763934338538\n", - "Image_ID\t5438\t-\tWeight\t0.047874747600689466\n", + "Image_ID\t7654\t-\tWeight\t0.0816218927496473\n", + "Image_ID\t8634\t-\tWeight\t0.0667358948577843\n", + "Image_ID\t5740\t-\tWeight\t0.06005882120197204\n", + "Image_ID\t6106\t-\tWeight\t0.0530666139393161\n", + "Image_ID\t5456\t-\tWeight\t0.051701715703308504\n", + "Image_ID\t7814\t-\tWeight\t0.04997978865116192\n", + "Image_ID\t6248\t-\tWeight\t0.04946683639815059\n", + "Image_ID\t5354\t-\tWeight\t0.04864381025793159\n", + "Image_ID\t6108\t-\tWeight\t0.0479676393433854\n", + "Image_ID\t5438\t-\tWeight\t0.04787474760068962\n", "Latent semantic no. 1\n", - "Image_ID\t8026\t-\tWeight\t0.06478360955460367\n", - "Image_ID\t6016\t-\tWeight\t0.0632709906607753\n", - "Image_ID\t3744\t-\tWeight\t0.05347414608321652\n", - "Image_ID\t3720\t-\tWeight\t0.0517124023583583\n", - "Image_ID\t7896\t-\tWeight\t0.049366978424645006\n", - "Image_ID\t6014\t-\tWeight\t0.047637173390389816\n", - "Image_ID\t6768\t-\tWeight\t0.04742408995375774\n", - "Image_ID\t4050\t-\tWeight\t0.0456343920101654\n", - "Image_ID\t6000\t-\tWeight\t0.04535273415975713\n", - "Image_ID\t6552\t-\tWeight\t0.04525300117499444\n", + "Image_ID\t7654\t-\tWeight\t0.05566187740909836\n", + "Image_ID\t7880\t-\tWeight\t0.05304265128270742\n", + "Image_ID\t5132\t-\tWeight\t0.052802620405367526\n", + "Image_ID\t4516\t-\tWeight\t0.05032667794065215\n", + "Image_ID\t3064\t-\tWeight\t0.04996389545581616\n", + "Image_ID\t7808\t-\tWeight\t0.04885211523705829\n", + "Image_ID\t8102\t-\tWeight\t0.04821048869059779\n", + "Image_ID\t5336\t-\tWeight\t0.047392911537133244\n", + "Image_ID\t3058\t-\tWeight\t0.04622961181395915\n", + "Image_ID\t7484\t-\tWeight\t0.04563242634411927\n", "Latent semantic no. 2\n", - "Image_ID\t7654\t-\tWeight\t0.0704670166327785\n", - "Image_ID\t2804\t-\tWeight\t0.059682344110996065\n", - "Image_ID\t2710\t-\tWeight\t0.059199111598090534\n", - "Image_ID\t3436\t-\tWeight\t0.05368202357324355\n", - "Image_ID\t7936\t-\tWeight\t0.053276991496894154\n", - "Image_ID\t2708\t-\tWeight\t0.048527019795007204\n", - "Image_ID\t3764\t-\tWeight\t0.04835537239641643\n", - "Image_ID\t7928\t-\tWeight\t0.047998989024259496\n", - "Image_ID\t5684\t-\tWeight\t0.04723047448150771\n", - "Image_ID\t5126\t-\tWeight\t0.04720498270016634\n", + "Image_ID\t7654\t-\tWeight\t0.07046701663277787\n", + "Image_ID\t2804\t-\tWeight\t0.059682344110995336\n", + "Image_ID\t2710\t-\tWeight\t0.05919911159809061\n", + "Image_ID\t3436\t-\tWeight\t0.05368202357324448\n", + "Image_ID\t7936\t-\tWeight\t0.05327699149689366\n", + "Image_ID\t2708\t-\tWeight\t0.04852701979500758\n", + "Image_ID\t3764\t-\tWeight\t0.04835537239641772\n", + "Image_ID\t7928\t-\tWeight\t0.04799898902425922\n", + "Image_ID\t5684\t-\tWeight\t0.04723047448150721\n", + "Image_ID\t5126\t-\tWeight\t0.04720498270016626\n", "Latent semantic no. 3\n", - "Image_ID\t6356\t-\tWeight\t0.0754447261688377\n", - "Image_ID\t6480\t-\tWeight\t0.06540890240964665\n", - "Image_ID\t4756\t-\tWeight\t0.06075370676621832\n", - "Image_ID\t8656\t-\tWeight\t0.060505116069252685\n", - "Image_ID\t6050\t-\tWeight\t0.058111632773274836\n", - "Image_ID\t6324\t-\tWeight\t0.056492568599917435\n", - "Image_ID\t8138\t-\tWeight\t0.0557967464751822\n", - "Image_ID\t3460\t-\tWeight\t0.05508818833516222\n", - "Image_ID\t200\t-\tWeight\t0.05459477384213874\n", - "Image_ID\t7220\t-\tWeight\t0.05376222500332758\n", + "Image_ID\t8654\t-\tWeight\t0.08668332932816088\n", + "Image_ID\t8618\t-\tWeight\t0.08568859853566119\n", + "Image_ID\t8658\t-\tWeight\t0.0777605087520117\n", + "Image_ID\t3306\t-\tWeight\t0.0745220591779124\n", + "Image_ID\t8620\t-\tWeight\t0.07351843281590886\n", + "Image_ID\t8638\t-\tWeight\t0.06948884666766826\n", + "Image_ID\t6754\t-\tWeight\t0.06896434951935482\n", + "Image_ID\t8676\t-\tWeight\t0.06623938393792103\n", + "Image_ID\t4650\t-\tWeight\t0.06566930583744507\n", + "Image_ID\t8636\t-\tWeight\t0.06499098805246775\n", "Latent semantic no. 4\n", - "Image_ID\t7370\t-\tWeight\t0.05281026462493995\n", - "Image_ID\t6528\t-\tWeight\t0.05252803707219332\n", - "Image_ID\t8056\t-\tWeight\t0.05175019567880743\n", - "Image_ID\t2958\t-\tWeight\t0.05123118911737749\n", - "Image_ID\t4614\t-\tWeight\t0.05061302210733273\n", - "Image_ID\t8292\t-\tWeight\t0.05000577057549489\n", - "Image_ID\t7888\t-\tWeight\t0.04905059301012787\n", - "Image_ID\t6540\t-\tWeight\t0.048139958875035395\n", - "Image_ID\t6064\t-\tWeight\t0.04605896293857696\n", - "Image_ID\t2974\t-\tWeight\t0.04488429099909397\n", + "Image_ID\t7370\t-\tWeight\t0.05281026462494081\n", + "Image_ID\t6528\t-\tWeight\t0.05252803707219361\n", + "Image_ID\t8056\t-\tWeight\t0.0517501956788071\n", + "Image_ID\t2958\t-\tWeight\t0.051231189117377514\n", + "Image_ID\t4614\t-\tWeight\t0.05061302210733084\n", + "Image_ID\t8292\t-\tWeight\t0.05000577057549516\n", + "Image_ID\t7888\t-\tWeight\t0.04905059301012733\n", + "Image_ID\t6540\t-\tWeight\t0.048139958875035006\n", + "Image_ID\t6064\t-\tWeight\t0.04605896293857509\n", + "Image_ID\t2974\t-\tWeight\t0.04488429099909442\n", "Latent semantic no. 5\n", - "Image_ID\t8570\t-\tWeight\t0.08379938013632145\n", - "Image_ID\t7784\t-\tWeight\t0.0723847258804912\n", - "Image_ID\t4152\t-\tWeight\t0.060769224719766333\n", - "Image_ID\t5114\t-\tWeight\t0.053872121517690504\n", - "Image_ID\t7774\t-\tWeight\t0.05324887247523992\n", - "Image_ID\t8614\t-\tWeight\t0.05319742868629013\n", - "Image_ID\t3072\t-\tWeight\t0.05083994521792821\n", - "Image_ID\t7798\t-\tWeight\t0.05059807413594892\n", - "Image_ID\t5118\t-\tWeight\t0.05022770477320976\n", - "Image_ID\t7040\t-\tWeight\t0.04996996742218053\n", + "Image_ID\t8570\t-\tWeight\t0.08379938013632153\n", + "Image_ID\t7784\t-\tWeight\t0.07238472588049127\n", + "Image_ID\t4152\t-\tWeight\t0.06076922471976642\n", + "Image_ID\t5114\t-\tWeight\t0.05387212151769057\n", + "Image_ID\t7774\t-\tWeight\t0.05324887247524\n", + "Image_ID\t8614\t-\tWeight\t0.05319742868629018\n", + "Image_ID\t3072\t-\tWeight\t0.05083994521792827\n", + "Image_ID\t7798\t-\tWeight\t0.050598074135949\n", + "Image_ID\t5118\t-\tWeight\t0.05022770477320978\n", + "Image_ID\t7040\t-\tWeight\t0.04996996742218058\n", "Latent semantic no. 6\n", - "Image_ID\t8570\t-\tWeight\t0.07082421149695754\n", - "Image_ID\t7774\t-\tWeight\t0.06546594547486781\n", - "Image_ID\t4152\t-\tWeight\t0.06440870014673936\n", + "Image_ID\t8570\t-\tWeight\t0.07082421149695753\n", + "Image_ID\t7774\t-\tWeight\t0.06546594547486784\n", + "Image_ID\t4152\t-\tWeight\t0.06440870014673937\n", "Image_ID\t5118\t-\tWeight\t0.06264436903974217\n", - "Image_ID\t7784\t-\tWeight\t0.06203552824772956\n", - "Image_ID\t7798\t-\tWeight\t0.05899354962287134\n", - "Image_ID\t7896\t-\tWeight\t0.05648444493570963\n", + "Image_ID\t7784\t-\tWeight\t0.06203552824772957\n", + "Image_ID\t7798\t-\tWeight\t0.05899354962287138\n", + "Image_ID\t7896\t-\tWeight\t0.056484444935709706\n", "Image_ID\t7766\t-\tWeight\t0.056063042928801675\n", - "Image_ID\t7792\t-\tWeight\t0.055578803018497686\n", - "Image_ID\t7834\t-\tWeight\t0.055567509183302555\n", + "Image_ID\t7792\t-\tWeight\t0.05557880301849769\n", + "Image_ID\t7834\t-\tWeight\t0.05556750918330256\n", "Latent semantic no. 7\n", - "Image_ID\t7912\t-\tWeight\t0.06634864556518678\n", - "Image_ID\t5534\t-\tWeight\t0.05913926717735747\n", - "Image_ID\t5550\t-\tWeight\t0.049468125695492526\n", - "Image_ID\t2106\t-\tWeight\t0.048274676516220805\n", - "Image_ID\t7804\t-\tWeight\t0.04822832951751611\n", - "Image_ID\t6198\t-\tWeight\t0.04795521082538372\n", - "Image_ID\t6728\t-\tWeight\t0.04729135404469566\n", - "Image_ID\t5588\t-\tWeight\t0.04715637083533252\n", - "Image_ID\t7276\t-\tWeight\t0.04637482601331893\n", - "Image_ID\t6730\t-\tWeight\t0.045930617636659\n", + "Image_ID\t1140\t-\tWeight\t0.05317423066517462\n", + "Image_ID\t5510\t-\tWeight\t0.052651188836683724\n", + "Image_ID\t5282\t-\tWeight\t0.05122146559887229\n", + "Image_ID\t1260\t-\tWeight\t0.050478632782130786\n", + "Image_ID\t1692\t-\tWeight\t0.05043911725770527\n", + "Image_ID\t8656\t-\tWeight\t0.04943228673655803\n", + "Image_ID\t1242\t-\tWeight\t0.04886689682608001\n", + "Image_ID\t7844\t-\tWeight\t0.048768495445578465\n", + "Image_ID\t5100\t-\tWeight\t0.04867702517715619\n", + "Image_ID\t5300\t-\tWeight\t0.048353062438932816\n", "Latent semantic no. 8\n", - "Image_ID\t1798\t-\tWeight\t0.04586412291217343\n", - "Image_ID\t1802\t-\tWeight\t0.044772142290101236\n", - "Image_ID\t1806\t-\tWeight\t0.044448676280621977\n", - "Image_ID\t1202\t-\tWeight\t0.043679466488681935\n", - "Image_ID\t1786\t-\tWeight\t0.04351371229636818\n", - "Image_ID\t1784\t-\tWeight\t0.04346765741634348\n", - "Image_ID\t1790\t-\tWeight\t0.04288750664761761\n", - "Image_ID\t1642\t-\tWeight\t0.041863484069841805\n", - "Image_ID\t1788\t-\tWeight\t0.04089406629514228\n", - "Image_ID\t1796\t-\tWeight\t0.04068815222347919\n", + "Image_ID\t1798\t-\tWeight\t0.0458641229121734\n", + "Image_ID\t1802\t-\tWeight\t0.044772142290101194\n", + "Image_ID\t1806\t-\tWeight\t0.044448676280621935\n", + "Image_ID\t1202\t-\tWeight\t0.043679466488681894\n", + "Image_ID\t1786\t-\tWeight\t0.043513712296368134\n", + "Image_ID\t1784\t-\tWeight\t0.043467657416343425\n", + "Image_ID\t1790\t-\tWeight\t0.04288750664761759\n", + "Image_ID\t1642\t-\tWeight\t0.041863484069841764\n", + "Image_ID\t1788\t-\tWeight\t0.04089406629514224\n", + "Image_ID\t1796\t-\tWeight\t0.04068815222347914\n", "Latent semantic no. 9\n", - "Image_ID\t8582\t-\tWeight\t0.02577153311253718\n", - "Image_ID\t8612\t-\tWeight\t0.025608143819276445\n", - "Image_ID\t7290\t-\tWeight\t0.025578071187110543\n", - "Image_ID\t7298\t-\tWeight\t0.025350467801040884\n", - "Image_ID\t7302\t-\tWeight\t0.02531661140938117\n", - "Image_ID\t7318\t-\tWeight\t0.025212779767014252\n", - "Image_ID\t8580\t-\tWeight\t0.025201323062899284\n", - "Image_ID\t6392\t-\tWeight\t0.02517086205642468\n", - "Image_ID\t2738\t-\tWeight\t0.025106516897995135\n", - "Image_ID\t6420\t-\tWeight\t0.02510499876667641\n" + "Image_ID\t8616\t-\tWeight\t-0.001110683188398373\n", + "Image_ID\t5234\t-\tWeight\t-0.001470742377963864\n", + "Image_ID\t3838\t-\tWeight\t-0.0018268938101953923\n", + "Image_ID\t7428\t-\tWeight\t-0.001978912864613778\n", + "Image_ID\t4664\t-\tWeight\t-0.0020551982165007863\n", + "Image_ID\t2754\t-\tWeight\t-0.002091620047637018\n", + "Image_ID\t2806\t-\tWeight\t-0.0021702921217260757\n", + "Image_ID\t3820\t-\tWeight\t-0.002247214027498397\n", + "Image_ID\t3786\t-\tWeight\t-0.002360567100195792\n", + "Image_ID\t4928\t-\tWeight\t-0.002395118791388935\n" ] } ], @@ -159,7 +159,7 @@ " )\n", ")\n", "\n", - "extract_latent_semantics(\n", + "extract_latent_semantics_from_feature_model(\n", " fd_collection,\n", " k,\n", " selected_feature_model,\n", diff --git a/Phase 2/task_5.ipynb b/Phase 2/task_5.ipynb index 0b1cdb0..bddcc7b 100644 --- a/Phase 2/task_5.ipynb +++ b/Phase 2/task_5.ipynb @@ -14,16 +14,6 @@ ] } ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], "source": [ "from utils import *\n", "warnings.filterwarnings('ignore')\n", @@ -32,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -41,124 +31,124 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Applying svd on the given similarity matrix to get 10 latent semantics (showing only top 10 image-weight pairs for each latent semantic)...\n", + "Applying svd on the given similarity matrix to get 10 latent semantics (showing only top 10 label-weight pairs for each latent semantic)...\n", "Latent semantic no. 0\n", - "Image_ID\t200\t-\tWeight\t0.0\n", - "Image_ID\t198\t-\tWeight\t-0.004684806351746236\n", - "Image_ID\t196\t-\tWeight\t-0.007271577414375871\n", - "Image_ID\t194\t-\tWeight\t-0.011073051177514079\n", - "Image_ID\t192\t-\tWeight\t-0.011680371639188197\n", - "Image_ID\t188\t-\tWeight\t-0.014876024947438421\n", - "Image_ID\t186\t-\tWeight\t-0.017327189984007427\n", - "Image_ID\t190\t-\tWeight\t-0.021143262428570023\n", - "Image_ID\t182\t-\tWeight\t-0.026835375354998945\n", - "Image_ID\t180\t-\tWeight\t-0.030539133156424272\n", + "label\t28\t-\tWeight\t0.2583354411312026\n", + "label\t29\t-\tWeight\t0.2301362547676974\n", + "label\t33\t-\tWeight\t0.2129183683279978\n", + "label\t9\t-\tWeight\t0.17625685452423093\n", + "label\t95\t-\tWeight\t0.16277551497836534\n", + "label\t47\t-\tWeight\t0.1424860388015467\n", + "label\t39\t-\tWeight\t0.1349747704005884\n", + "label\t30\t-\tWeight\t0.13251434767496492\n", + "label\t52\t-\tWeight\t0.12669069496270755\n", + "label\t8\t-\tWeight\t0.1257730807471899\n", "Latent semantic no. 1\n", - "Image_ID\t130\t-\tWeight\t0.21209688019072415\n", - "Image_ID\t138\t-\tWeight\t0.20392427070510372\n", - "Image_ID\t120\t-\tWeight\t0.1528415927574225\n", - "Image_ID\t132\t-\tWeight\t0.14995762877608315\n", - "Image_ID\t160\t-\tWeight\t0.1488052541453248\n", - "Image_ID\t136\t-\tWeight\t0.14309946283137032\n", - "Image_ID\t164\t-\tWeight\t0.1374261619484733\n", - "Image_ID\t140\t-\tWeight\t0.13528239495542024\n", - "Image_ID\t128\t-\tWeight\t0.12811923299406092\n", - "Image_ID\t152\t-\tWeight\t0.12752116772697258\n", + "label\t96\t-\tWeight\t0.2666765976054894\n", + "label\t97\t-\tWeight\t0.19087869496500426\n", + "label\t25\t-\tWeight\t0.17776094778851348\n", + "label\t3\t-\tWeight\t0.1759798805642099\n", + "label\t98\t-\tWeight\t0.16951497899752574\n", + "label\t22\t-\tWeight\t0.1667032655640346\n", + "label\t24\t-\tWeight\t0.16034180060184824\n", + "label\t19\t-\tWeight\t0.15345532912389587\n", + "label\t52\t-\tWeight\t0.13271640119612757\n", + "label\t29\t-\tWeight\t0.12856388746021633\n", "Latent semantic no. 2\n", - "Image_ID\t4\t-\tWeight\t0.2518749001016952\n", - "Image_ID\t8\t-\tWeight\t0.24177133880298157\n", - "Image_ID\t58\t-\tWeight\t0.1467873881626323\n", - "Image_ID\t0\t-\tWeight\t0.1384139791414865\n", - "Image_ID\t56\t-\tWeight\t0.11818058158618501\n", - "Image_ID\t20\t-\tWeight\t0.1102967668802325\n", - "Image_ID\t84\t-\tWeight\t0.1044376029159064\n", - "Image_ID\t18\t-\tWeight\t0.10262843674760519\n", - "Image_ID\t138\t-\tWeight\t0.10181762652349924\n", - "Image_ID\t70\t-\tWeight\t0.10127861659022899\n", + "label\t46\t-\tWeight\t0.21813474254675366\n", + "label\t79\t-\tWeight\t0.19091788352587957\n", + "label\t55\t-\tWeight\t0.1871080482210247\n", + "label\t56\t-\tWeight\t0.18322792605578184\n", + "label\t78\t-\tWeight\t0.17506936966351683\n", + "label\t98\t-\tWeight\t0.1733164832137484\n", + "label\t22\t-\tWeight\t0.17114312653027375\n", + "label\t38\t-\tWeight\t0.16928636840289424\n", + "label\t45\t-\tWeight\t0.1567042877228484\n", + "label\t4\t-\tWeight\t0.15108693899889344\n", "Latent semantic no. 3\n", - "Image_ID\t84\t-\tWeight\t0.16299489544466675\n", - "Image_ID\t94\t-\tWeight\t0.155336350677209\n", - "Image_ID\t70\t-\tWeight\t0.14011002627071287\n", - "Image_ID\t102\t-\tWeight\t0.13701247594788535\n", - "Image_ID\t88\t-\tWeight\t0.1320753872066342\n", - "Image_ID\t82\t-\tWeight\t0.1320716816148611\n", - "Image_ID\t86\t-\tWeight\t0.12902969925360877\n", - "Image_ID\t72\t-\tWeight\t0.12610296358207826\n", - "Image_ID\t92\t-\tWeight\t0.12596461453701044\n", - "Image_ID\t66\t-\tWeight\t0.12532841063277217\n", + "label\t96\t-\tWeight\t0.2736613529052896\n", + "label\t98\t-\tWeight\t0.218185914155306\n", + "label\t22\t-\tWeight\t0.1963451355822489\n", + "label\t3\t-\tWeight\t0.17627732148468614\n", + "label\t39\t-\tWeight\t0.1728992502839298\n", + "label\t52\t-\tWeight\t0.15597562436756945\n", + "label\t51\t-\tWeight\t0.1291470561734402\n", + "label\t30\t-\tWeight\t0.12453129554714541\n", + "label\t18\t-\tWeight\t0.1236867360720947\n", + "label\t38\t-\tWeight\t0.12184856229773917\n", "Latent semantic no. 4\n", - "Image_ID\t176\t-\tWeight\t0.17418620419170064\n", - "Image_ID\t184\t-\tWeight\t0.16284491366511475\n", - "Image_ID\t178\t-\tWeight\t0.15835141260945226\n", - "Image_ID\t182\t-\tWeight\t0.1563230190106094\n", - "Image_ID\t180\t-\tWeight\t0.14992527858819726\n", - "Image_ID\t170\t-\tWeight\t0.1461798073190985\n", - "Image_ID\t174\t-\tWeight\t0.13541698801645058\n", - "Image_ID\t166\t-\tWeight\t0.12423630035289784\n", - "Image_ID\t172\t-\tWeight\t0.1234361443074221\n", - "Image_ID\t52\t-\tWeight\t0.12074682250121946\n", + "label\t6\t-\tWeight\t0.23875690719216863\n", + "label\t67\t-\tWeight\t0.21007869938490106\n", + "label\t63\t-\tWeight\t0.18822840034389135\n", + "label\t14\t-\tWeight\t0.18738002200878218\n", + "label\t87\t-\tWeight\t0.17508576062247283\n", + "label\t23\t-\tWeight\t0.167492867766091\n", + "label\t15\t-\tWeight\t0.15522709562173342\n", + "label\t61\t-\tWeight\t0.13244353806854162\n", + "label\t45\t-\tWeight\t0.12833204093005665\n", + "label\t68\t-\tWeight\t0.12622315521729294\n", "Latent semantic no. 5\n", - "Image_ID\t184\t-\tWeight\t0.25060450796637307\n", - "Image_ID\t96\t-\tWeight\t0.19653319773940384\n", - "Image_ID\t4\t-\tWeight\t0.1927615510140044\n", - "Image_ID\t190\t-\tWeight\t0.1823467475920773\n", - "Image_ID\t104\t-\tWeight\t0.17232402315708764\n", - "Image_ID\t176\t-\tWeight\t0.15944267571419668\n", - "Image_ID\t2\t-\tWeight\t0.15830010074390483\n", - "Image_ID\t180\t-\tWeight\t0.15710086389623582\n", - "Image_ID\t86\t-\tWeight\t0.1531972222034532\n", - "Image_ID\t178\t-\tWeight\t0.14864580852650564\n", + "label\t30\t-\tWeight\t0.17385975982344382\n", + "label\t25\t-\tWeight\t0.14655711054814133\n", + "label\t39\t-\tWeight\t0.13307896633493813\n", + "label\t68\t-\tWeight\t0.12851498788897622\n", + "label\t24\t-\tWeight\t0.12828250585375986\n", + "label\t0\t-\tWeight\t0.12500243174429157\n", + "label\t1\t-\tWeight\t0.12371257574727512\n", + "label\t77\t-\tWeight\t0.12370279647800499\n", + "label\t89\t-\tWeight\t0.12233344688386875\n", + "label\t83\t-\tWeight\t0.11445596984835589\n", "Latent semantic no. 6\n", - "Image_ID\t160\t-\tWeight\t0.2664558477429268\n", - "Image_ID\t86\t-\tWeight\t0.22964178511691158\n", - "Image_ID\t4\t-\tWeight\t0.2027946708731003\n", - "Image_ID\t8\t-\tWeight\t0.17594388183949075\n", - "Image_ID\t96\t-\tWeight\t0.15932731178540344\n", - "Image_ID\t150\t-\tWeight\t0.1557669882841681\n", - "Image_ID\t42\t-\tWeight\t0.15015687757605228\n", - "Image_ID\t70\t-\tWeight\t0.14221366935133106\n", - "Image_ID\t166\t-\tWeight\t0.13822990110337333\n", - "Image_ID\t170\t-\tWeight\t0.136006921209686\n", + "label\t17\t-\tWeight\t0.2335282879255542\n", + "label\t48\t-\tWeight\t0.19418795795666355\n", + "label\t21\t-\tWeight\t0.19013440200231033\n", + "label\t85\t-\tWeight\t0.17503295059460947\n", + "label\t11\t-\tWeight\t0.14933372636956993\n", + "label\t1\t-\tWeight\t0.1384254243377172\n", + "label\t0\t-\tWeight\t0.13078647401074162\n", + "label\t57\t-\tWeight\t0.11374248801163754\n", + "label\t10\t-\tWeight\t0.10468223841103744\n", + "label\t99\t-\tWeight\t0.10191451131216464\n", "Latent semantic no. 7\n", - "Image_ID\t0\t-\tWeight\t0.18579423291522054\n", - "Image_ID\t160\t-\tWeight\t0.15838043091994455\n", - "Image_ID\t12\t-\tWeight\t0.1569899414230264\n", - "Image_ID\t16\t-\tWeight\t0.15348073631252238\n", - "Image_ID\t20\t-\tWeight\t0.14749435830520785\n", - "Image_ID\t18\t-\tWeight\t0.14710442040625207\n", - "Image_ID\t14\t-\tWeight\t0.14572307182896904\n", - "Image_ID\t2\t-\tWeight\t0.135886756644037\n", - "Image_ID\t158\t-\tWeight\t0.12716375063129493\n", - "Image_ID\t154\t-\tWeight\t0.11653475862758583\n", + "label\t82\t-\tWeight\t0.23372455436757703\n", + "label\t95\t-\tWeight\t0.21795238756371887\n", + "label\t60\t-\tWeight\t0.18080422229063045\n", + "label\t16\t-\tWeight\t0.1806105172209771\n", + "label\t27\t-\tWeight\t0.17365150902149876\n", + "label\t59\t-\tWeight\t0.17250044548228938\n", + "label\t26\t-\tWeight\t0.1661853291143862\n", + "label\t13\t-\tWeight\t0.16331211225170805\n", + "label\t34\t-\tWeight\t0.1523080193090529\n", + "label\t67\t-\tWeight\t0.13577900574984025\n", "Latent semantic no. 8\n", - "Image_ID\t128\t-\tWeight\t0.20162255290912043\n", - "Image_ID\t64\t-\tWeight\t0.2013551710742827\n", - "Image_ID\t76\t-\tWeight\t0.19200691322367733\n", - "Image_ID\t68\t-\tWeight\t0.183262211696717\n", - "Image_ID\t2\t-\tWeight\t0.17626949463475755\n", - "Image_ID\t126\t-\tWeight\t0.17260073717551033\n", - "Image_ID\t130\t-\tWeight\t0.16679745247386799\n", - "Image_ID\t0\t-\tWeight\t0.15145696367688846\n", - "Image_ID\t80\t-\tWeight\t0.13382645234168947\n", - "Image_ID\t132\t-\tWeight\t0.12607547198838437\n", + "label\t53\t-\tWeight\t0.2259481751468642\n", + "label\t37\t-\tWeight\t0.21583443408756542\n", + "label\t76\t-\tWeight\t0.20483376297311964\n", + "label\t44\t-\tWeight\t0.1690198227623472\n", + "label\t68\t-\tWeight\t0.1650723880318989\n", + "label\t28\t-\tWeight\t0.15689929414378492\n", + "label\t14\t-\tWeight\t0.1564371673909956\n", + "label\t54\t-\tWeight\t0.1553627917623035\n", + "label\t51\t-\tWeight\t0.14380435363337046\n", + "label\t36\t-\tWeight\t0.13510425005259438\n", "Latent semantic no. 9\n", - "Image_ID\t110\t-\tWeight\t0.2380313932091839\n", - "Image_ID\t126\t-\tWeight\t0.22284705922022288\n", - "Image_ID\t170\t-\tWeight\t0.20294066349000953\n", - "Image_ID\t58\t-\tWeight\t0.19271846291888434\n", - "Image_ID\t166\t-\tWeight\t0.16710379029940944\n", - "Image_ID\t118\t-\tWeight\t0.16159034411481996\n", - "Image_ID\t42\t-\tWeight\t0.1585043891315177\n", - "Image_ID\t120\t-\tWeight\t0.15529190621970054\n", - "Image_ID\t56\t-\tWeight\t0.1484578124120866\n", - "Image_ID\t160\t-\tWeight\t0.13578707023661948\n" + "label\t19\t-\tWeight\t0.11741024839079275\n", + "label\t40\t-\tWeight\t0.11107319334138463\n", + "label\t53\t-\tWeight\t0.11058750626248925\n", + "label\t51\t-\tWeight\t0.10794606425819818\n", + "label\t96\t-\tWeight\t0.10735468567860716\n", + "label\t55\t-\tWeight\t0.10731282010915796\n", + "label\t50\t-\tWeight\t0.10703093662670059\n", + "label\t1\t-\tWeight\t0.10651036503732043\n", + "label\t79\t-\tWeight\t0.10640855392103846\n", + "label\t47\t-\tWeight\t0.10594110421348357\n" ] } ], @@ -180,14 +170,13 @@ "\n", "label_sim_matrix = find_label_label_similarity(fd_collection,selected_feature_model)\n", "\n", - "extract_latent_semantics(\n", - " fd_collection,\n", - " k,\n", + "extract_latent_semantics_from_sim_matrix(\n", + " label_sim_matrix,\n", " selected_feature_model,\n", + " \"label\",\n", + " k,\n", " selected_dim_reduction_method,\n", - " sim_matrix=label_sim_matrix,\n", " top_images=10,\n", - " fn_prefix='label_sim-'\n", ")\n" ] }, diff --git a/Phase 2/task_6.ipynb b/Phase 2/task_6.ipynb new file mode 100644 index 0000000..15a3afb --- /dev/null +++ b/Phase 2/task_6.ipynb @@ -0,0 +1,78 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import *\n", + "warnings.filterwarnings('ignore')\n", + "%matplotlib inline\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "selected_feature_model = valid_feature_models[\n", + " str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n", + "]\n", + "\n", + "k = int(input(\"Enter value of k: \"))\n", + "if k < 1:\n", + " raise ValueError(\"k should be a positive integer\")\n", + "\n", + "selected_dim_reduction_method = str(\n", + " input(\n", + " \"Enter dimensionality reduction method - one of \"\n", + " + str(list(valid_dim_reduction_methods.keys()))\n", + " )\n", + ")\n", + "\n", + "image_sim_matrix = find_image_image_similarity(fd_collection,selected_feature_model)\n", + "\n", + "extract_latent_semantics_from_sim_matrix(\n", + " image_sim_matrix,\n", + " selected_feature_model,\n", + " \"image\",\n", + "\tk,\n", + " selected_dim_reduction_method,\n", + " top_images=10,\n", + ")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Phase 2/utils.py b/Phase 2/utils.py index fa749ff..6dbb8ed 100644 --- a/Phase 2/utils.py +++ b/Phase 2/utils.py @@ -523,7 +523,9 @@ def calculate_label_representatives(fd_collection, label, feature_model): """Calculate representative feature vector of a label as the mean of all feature vectors under a feature model""" label_fds = [ - np.array(img_fds[feature_model]).flatten() # get the specific feature model's feature vector + np.array( + img_fds[feature_model] + ).flatten() # get the specific feature model's feature vector for img_fds in fd_collection.find( {"true_label": label} ) # repeat for all images @@ -569,7 +571,7 @@ def show_similar_images_for_label( for cur_img in all_images: cur_img_id = cur_img["image_id"] - cur_img_fd = np.array(cur_img[feature_model]) + cur_img_fd = np.array(cur_img[feature_model]).flatten() cur_dist = distance_measure( cur_img_fd, @@ -658,15 +660,13 @@ def show_similar_labels_for_image( label_dict = {target_image_id: target_label} - target_image_fd = np.array(target_image[feature_model]) - all_images = fd_collection.find({}) for cur_img in all_images: cur_img_id = cur_img["image_id"] # skip target itself if cur_img_id == target_image_id: continue - cur_img_fd = np.array(cur_img[feature_model]) + cur_img_fd = np.array(cur_img[feature_model]).flatten() cur_dist = distance_measure( cur_img_fd, target_image_fd, @@ -698,11 +698,11 @@ def show_similar_labels_for_image( continue else: sample_image, sample_label = dataset[image_id] - axs[idx-1].imshow(transforms.ToPILImage()(sample_image)) - axs[idx-1].set_title( + axs[idx - 1].imshow(transforms.ToPILImage()(sample_image)) + axs[idx - 1].set_title( f"Label: {label_dict[image_id]}; Distance: {min_dists[image_id]}" ) - axs[idx-1].axis("off") + axs[idx - 1].axis("off") if save_plots: plt.savefig( @@ -841,20 +841,16 @@ def svd(matrix, k): return left_singular_vectors, np.diag(singular_values), right_singular_vectors.T -def extract_latent_semantics( +def extract_latent_semantics_from_feature_model( fd_collection, k, feature_model, dim_reduction_method, - sim_matrix=None, top_images=None, - fn_prefix="", ): """ Extract latent semantics for entire collection at once for a given feature_model and dim_reduction_method, and display the imageID-semantic weight pairs - Use `sim_matrix` to manually give similarity matrix instead of feature space - Leave `top_images` blank to display all imageID-weight pairs """ @@ -874,22 +870,14 @@ def extract_latent_semantics( if top_images is not None: top_img_str = f" (showing only top {top_images} image-weight pairs for each latent semantic)" - # if similarity matrix is provided - if sim_matrix is not None: - feature_vectors = sim_matrix - print( - "Applying {} on the given similarity matrix to get {} latent semantics{}...".format( - dim_reduction_method, k, top_img_str - ) - ) - # else take feature space from database - else: - feature_vectors = np.array([np.array(img[feature_model]).flatten() for img in all_images]) - print( - "Applying {} on the {} space to get {} latent semantics{}...".format( - dim_reduction_method, feature_model, k, top_img_str - ) + feature_vectors = np.array( + [np.array(img[feature_model]).flatten() for img in all_images] + ) + print( + "Applying {} on the {} space to get {} latent semantics{}...".format( + dim_reduction_method, feature_model, k, top_img_str ) + ) displayed_latent_semantics = {} all_latent_semantics = {} @@ -1011,12 +999,180 @@ def extract_latent_semantics( print(f"Image_ID\t{image_id}\t-\tWeight\t{weight}") with open( - f"{fn_prefix}{feature_model}-{dim_reduction_method}-{k}-semantics.json", + f"{feature_model}-{dim_reduction_method}-{k}-semantics.json", "w", encoding="utf-8", ) as output_file: json.dump(all_latent_semantics, output_file, ensure_ascii=False) +def extract_latent_semantics_from_sim_matrix( + sim_matrix, + feature_model, + sim_type, + k, + dim_reduction_method, + top_images=None, +): + """ + Extract latent semantics for a given similarity matrix for a given dim_reduction_method, and display the object-semantic weight pairs + + Leave `top_images` blank to display all imageID-weight pairs + """ + + assert sim_type in ["image", "label"], "sim_type should be one of " + str( + ["image", "label"] + ) + assert ( + feature_model in valid_feature_models.values() + ), "feature_model should be one of " + str(list(valid_feature_models.keys())) + assert ( + dim_reduction_method in valid_dim_reduction_methods.keys() + ), "dim_reduction_method should be one of " + str( + list(valid_dim_reduction_methods.keys()) + ) + assert len(sim_matrix) == len(sim_matrix[0]), "sim_matrix must be square matrix" + + top_img_str = "" + if top_images is not None: + top_img_str = f" (showing only top {top_images} {sim_type}-weight pairs for each latent semantic)" + + feature_vectors = sim_matrix + feature_ids = list(range(len(sim_matrix))) + + print( + "Applying {} on the given similarity matrix to get {} latent semantics{}...".format( + dim_reduction_method, k, top_img_str + ) + ) + + displayed_latent_semantics = {} + all_latent_semantics = {} + + match valid_dim_reduction_methods[dim_reduction_method]: + # singular value decomposition + # sparse version of SVD to get only k singular values + case 1: + U, S, V_T = svds(feature_vectors, k=k) + + all_latent_semantics = { + "image-semantic": U.tolist(), + "semantics-core": S.tolist(), + "semantic-feature": V_T.tolist(), + } + + # for each latent semantic, sort imageID-weight pairs by weights in descending order + displayed_latent_semantics = [ + sorted( + list(zip(feature_ids, latent_semantic)), + key=lambda x: x[1], + reverse=True, + )[:top_images] + for latent_semantic in U.T + ] + + # non-negative matrix factorization + case 2: + # NNMF requires non-negative input data + # so shift the input by subtracting the smallest value + min_value = np.min(feature_vectors) + feature_vectors_shifted = feature_vectors - min_value + + model = NMF( + n_components=k, + init="random", + solver="cd", + alpha_H=0.01, + alpha_W=0.01, + max_iter=10000, + ) + model.fit(feature_vectors_shifted) + + W = model.transform(feature_vectors_shifted) + H = model.components_ + + all_latent_semantics = { + "image-semantic": W.tolist(), + "semantic-feature": H.tolist(), + } + + # for each latent semantic, sort imageID-weight pairs by weights in descending order + displayed_latent_semantics = [ + sorted( + list(zip(feature_ids, latent_semantic)), + key=lambda x: x[1], + reverse=True, + )[:top_images] + for latent_semantic in W.T + ] + + # unsupervised LDA to extract topics (Latent Dirichlet Allocation) + # Note: LDA takes a bit of time + case 3: + # LDA requires non-negative input data + # so shift the input by subtracting the smallest value + min_value = np.min(feature_vectors) + feature_vectors_shifted = feature_vectors - min_value + + model = LatentDirichletAllocation( + n_components=k, learning_method="online", verbose=4 + ) + model.fit(feature_vectors_shifted) + + # K (k x fd_dim) is the factor matrix for latent semantic-feature pairs + K = model.components_ + # X (4339 x k) is the other factor matrix for image ID-latent semantic pairs + X = model.transform(feature_vectors_shifted) + + all_latent_semantics = { + "image-semantic": X.tolist(), + "semantic-feature": K.tolist(), + } + + # for each latent semantic, sort imageID-weight pairs by weights in descending order + displayed_latent_semantics = [ + sorted( + list(zip(feature_ids, latent_semantic)), + key=lambda x: x[1], + reverse=True, + )[:top_images] + for latent_semantic in X.T + ] + + # k-means clustering to reduce to k clusters/dimensions + case 4: + model = KMeans(n_clusters=k, verbose=2).fit(feature_vectors) + CC = model.cluster_centers_ + Y = model.transform(feature_vectors) + + all_latent_semantics = { + "image-semantic": Y.tolist(), + "semantic-feature": list(CC.values()), + } + + # for each latent semantic, sort imageID-weight pairs by weights in descending order + displayed_latent_semantics = [ + sorted( + list(zip(feature_ids, latent_semantic)), + key=lambda x: x[1], + reverse=False, + )[:top_images] + for latent_semantic in Y.T + ] + + for idx, latent_semantic in enumerate(displayed_latent_semantics): + print(f"Latent semantic no. {idx}") + for obj_id, weight in latent_semantic: + print(f"{sim_type}\t{obj_id}\t-\tWeight\t{weight}") + + # Finally also save sim_matrix + all_latent_semantics["sim-matrix"] = sim_matrix.tolist() + + with open( + f"{sim_type}_sim-{feature_model}-{dim_reduction_method}-{k}-semantics.json", + "w", + encoding="utf-8", + ) as output_file: + json.dump(all_latent_semantics, output_file, ensure_ascii=False) def find_label_label_similarity(fd_collection, feature_model): """ @@ -1039,10 +1195,38 @@ def find_label_label_similarity(fd_collection, feature_model): label_sim_matrix = np.zeros((num_labels, num_labels)) + # Calculate half and fill the other for i in range(num_labels): for j in range(i + 1, num_labels): # Note: lower the value, lower the distance => higher the similarity - label_sim_matrix[i][j] = feature_distance_matches[feature_model]( - np.array(label_mean_vectors[i]), np.array(label_mean_vectors[j]) - ) + label_sim_matrix[i][j] = label_sim_matrix[j][i] = feature_distance_matches[ + feature_model + ](np.array(label_mean_vectors[i]), np.array(label_mean_vectors[j])) return label_sim_matrix + + +def find_image_image_similarity(fd_collection, feature_model): + """ + Calculate similarity between images. Lower values indicate higher similarities + """ + assert ( + feature_model in valid_feature_models.values() + ), "feature_model should be one of " + str(list(valid_feature_models.keys())) + + feature_vectors = [ + np.array( + img_fds[feature_model] + ).flatten() # get the specific feature model's feature vector + for img_fds in fd_collection.find() # repeat for all images + ] + num_images = len(feature_vectors) + image_sim_matrix = np.zeros((num_images, num_images)) + + # Calculate half and fill the other + for i in range(num_images): + for j in range(i + 1, num_images): + # Note: lower the value, lower the distance => higher the similarity + image_sim_matrix[i][j] = image_sim_matrix[j][i] = feature_distance_matches[ + feature_model + ](np.array(feature_vectors[i]), np.array(feature_vectors[j])) + return image_sim_matrix