diff --git a/Phase 2/task3.ipynb b/Phase 2/task3.ipynb deleted file mode 100644 index ba60981..0000000 --- a/Phase 2/task3.ipynb +++ /dev/null @@ -1,142 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from pymongo import MongoClient\n", - "from task0a import *\n", - "import scipy\n", - "import numpy as np\n", - "from sklearn.decomposition import NMF\n", - "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", - "from sklearn.cluster import KMeans\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "client = MongoClient()\n", - "client = MongoClient(host=\"localhost\", port=27017)\n", - "\n", - "# Select the database\n", - "db = client.Multimedia_Web_DBs\n", - "\n", - "# Fetch all documents from the collection and then sort them by \"_id\"\n", - "feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n", - "feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)\n", - "\n", - "num_labels = 101" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def extractKLatentSemantics(k, feature_model, dim_reduction):\n", - "\n", - " feature_vectors = [x[feature_model] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", - " feature_labels = [x[\"label\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", - " feature_ids = [x[\"_id\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", - "\n", - " filename = ''\n", - "\n", - "\n", - " match dim_reduction:\n", - "\n", - " case 1:\n", - " filename = f'{feature_model}-svd-semantics.json'\n", - " U, S, Vh = scipy.sparse.linalg.svds(np.array(feature_vectors), k=k)\n", - " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", - "\n", - " case 2:\n", - " filename = f'{feature_model}-nnmf-semantics.json'\n", - " model = NMF(n_components = k, init = 'random', solver = 'cd', alpha_H = 0.01, alpha_W = 0.01, max_iter = 10000)\n", - " min_value = np.min(feature_vectors)\n", - " feature_vectors_shifted = feature_vectors - min_value\n", - " U = model.fit_transform(np.array(feature_vectors_shifted))\n", - " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", - "\n", - " case 3:\n", - " filename = f'{feature_model}-lda-semantics.json'\n", - " U = LinearDiscriminantAnalysis(n_components = k).fit_transform(feature_vectors, feature_labels)\n", - " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", - "\n", - " case 4:\n", - " filename = f'{feature_model}-kmeans-semantics.json'\n", - " kmeans = KMeans(n_clusters = k)\n", - " kmeans.fit(feature_vectors)\n", - " U = kmeans.transform(feature_vectors)\n", - " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", - " \n", - " k_latent_semantics = [{\"_id\": item[0], \"semantics\": item[1]} for item in k_latent_semantics]\n", - " with open(filename, 'w', encoding='utf-8') as f:\n", - " json.dump(k_latent_semantics, f, ensure_ascii = False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def main():\n", - "\n", - " # Load dataset\n", - "\n", - " # User input for Image ID\n", - " k = int(input(\"Enter k: \"))\n", - "\n", - " features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n", - "\n", - " # User input for feature model to extract\n", - " print(\"\\n1: Color moments\")\n", - " print(\"2: HOG\")\n", - " print(\"3: Resnet50 Avgpool layer\")\n", - " print(\"4: Resnet50 Layer 3\")\n", - " print(\"5: Resnet50 FC layer\")\n", - " feature_model = features[int(input(\"Select the feature model: \")) - 1]\n", - "\n", - " print(\"\\n1. SVD\")\n", - " print(\"2. NNMF\")\n", - " print(\"3. LDA\")\n", - " print(\"4. k-means\")\n", - " dim_reduction = int(input(\"Select the dimensionality reduction technique: \"))\n", - "\n", - " extractKLatentSemantics(k, feature_model, dim_reduction)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if __name__ == \"__main__\":\n", - " main()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Phase 2/task_5.ipynb b/Phase 2/task_5.ipynb new file mode 100644 index 0000000..7f025ea --- /dev/null +++ b/Phase 2/task_5.ipynb @@ -0,0 +1,214 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import *\n", + "warnings.filterwarnings('ignore')\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Applying svd on the fc_fd space to get 10 latent semantics (showing only top 10 image-weight pairs for each latent semantic)...\n", + "Latent semantic no. 0\n", + "Image_ID\t80\t-\tWeight\t0.2614097705550824\n", + "Image_ID\t74\t-\tWeight\t0.255431983850539\n", + "Image_ID\t72\t-\tWeight\t0.24329045773521019\n", + "Image_ID\t76\t-\tWeight\t0.22867416408250565\n", + "Image_ID\t38\t-\tWeight\t0.19933358228759127\n", + "Image_ID\t70\t-\tWeight\t0.18697368408982706\n", + "Image_ID\t78\t-\tWeight\t0.13796715203849405\n", + "Image_ID\t130\t-\tWeight\t0.12802644225327572\n", + "Image_ID\t128\t-\tWeight\t0.12766513481071043\n", + "Image_ID\t116\t-\tWeight\t0.12432195172872901\n", + "Latent semantic no. 1\n", + "Image_ID\t42\t-\tWeight\t0.24451953308549035\n", + "Image_ID\t104\t-\tWeight\t0.17513827022527176\n", + "Image_ID\t2\t-\tWeight\t0.17502495949250704\n", + "Image_ID\t0\t-\tWeight\t0.17209867451969002\n", + "Image_ID\t170\t-\tWeight\t0.16656363902027468\n", + "Image_ID\t96\t-\tWeight\t0.15318453472976815\n", + "Image_ID\t40\t-\tWeight\t0.1432149719665029\n", + "Image_ID\t44\t-\tWeight\t0.1429496131499582\n", + "Image_ID\t160\t-\tWeight\t0.13479710738132986\n", + "Image_ID\t6\t-\tWeight\t0.1264545662660414\n", + "Latent semantic no. 2\n", + "Image_ID\t86\t-\tWeight\t0.21244971577008848\n", + "Image_ID\t96\t-\tWeight\t0.19744514449239337\n", + "Image_ID\t90\t-\tWeight\t0.19463642108355275\n", + "Image_ID\t32\t-\tWeight\t0.18145091969843855\n", + "Image_ID\t42\t-\tWeight\t0.16316970985189788\n", + "Image_ID\t26\t-\tWeight\t0.15711519451212017\n", + "Image_ID\t184\t-\tWeight\t0.14991640994990046\n", + "Image_ID\t134\t-\tWeight\t0.1462330756631442\n", + "Image_ID\t40\t-\tWeight\t0.14437675159652016\n", + "Image_ID\t182\t-\tWeight\t0.1383518461119224\n", + "Latent semantic no. 3\n", + "Image_ID\t90\t-\tWeight\t0.1720078267722524\n", + "Image_ID\t156\t-\tWeight\t0.16000154385617743\n", + "Image_ID\t158\t-\tWeight\t0.1512646317732056\n", + "Image_ID\t160\t-\tWeight\t0.14646801598350143\n", + "Image_ID\t152\t-\tWeight\t0.1464352560589073\n", + "Image_ID\t150\t-\tWeight\t0.14619374900432364\n", + "Image_ID\t30\t-\tWeight\t0.14143498327111978\n", + "Image_ID\t36\t-\tWeight\t0.14028252934190766\n", + "Image_ID\t92\t-\tWeight\t0.14010606099568526\n", + "Image_ID\t96\t-\tWeight\t0.12878454015856147\n", + "Latent semantic no. 4\n", + "Image_ID\t0\t-\tWeight\t0.1851068625752792\n", + "Image_ID\t68\t-\tWeight\t0.18233577289211206\n", + "Image_ID\t70\t-\tWeight\t0.17658848660973384\n", + "Image_ID\t2\t-\tWeight\t0.1740864069632969\n", + "Image_ID\t64\t-\tWeight\t0.1652208125636303\n", + "Image_ID\t144\t-\tWeight\t0.1473307832877541\n", + "Image_ID\t140\t-\tWeight\t0.13555748295430797\n", + "Image_ID\t142\t-\tWeight\t0.12823249250147356\n", + "Image_ID\t86\t-\tWeight\t0.12718092599165637\n", + "Image_ID\t76\t-\tWeight\t0.1252879989162334\n", + "Latent semantic no. 5\n", + "Image_ID\t38\t-\tWeight\t0.18831453133913492\n", + "Image_ID\t44\t-\tWeight\t0.17741038115946053\n", + "Image_ID\t42\t-\tWeight\t0.16444727858214978\n", + "Image_ID\t130\t-\tWeight\t0.15436113645002744\n", + "Image_ID\t40\t-\tWeight\t0.1536450181907607\n", + "Image_ID\t132\t-\tWeight\t0.14964910372393345\n", + "Image_ID\t46\t-\tWeight\t0.147369630386678\n", + "Image_ID\t36\t-\tWeight\t0.14003912645014002\n", + "Image_ID\t128\t-\tWeight\t0.13864439525825356\n", + "Image_ID\t138\t-\tWeight\t0.13770732538821512\n", + "Latent semantic no. 6\n", + "Image_ID\t114\t-\tWeight\t0.15664448468019831\n", + "Image_ID\t2\t-\tWeight\t0.15491061836983144\n", + "Image_ID\t0\t-\tWeight\t0.1530303208538504\n", + "Image_ID\t6\t-\tWeight\t0.15295162665264536\n", + "Image_ID\t106\t-\tWeight\t0.14505207452002586\n", + "Image_ID\t110\t-\tWeight\t0.14364619871330633\n", + "Image_ID\t104\t-\tWeight\t0.14360445482307752\n", + "Image_ID\t116\t-\tWeight\t0.14309751290704328\n", + "Image_ID\t108\t-\tWeight\t0.14103122187663494\n", + "Image_ID\t112\t-\tWeight\t0.13936814882577545\n", + "Latent semantic no. 7\n", + "Image_ID\t158\t-\tWeight\t0.15332739573127638\n", + "Image_ID\t152\t-\tWeight\t0.15027095321242787\n", + "Image_ID\t2\t-\tWeight\t0.148228537938103\n", + "Image_ID\t0\t-\tWeight\t0.14693245027728857\n", + "Image_ID\t156\t-\tWeight\t0.1439438847861891\n", + "Image_ID\t8\t-\tWeight\t0.14356918947005834\n", + "Image_ID\t10\t-\tWeight\t0.1431162549061445\n", + "Image_ID\t6\t-\tWeight\t0.14277108702825383\n", + "Image_ID\t150\t-\tWeight\t0.1424099571884803\n", + "Image_ID\t164\t-\tWeight\t0.13731169848767164\n", + "Latent semantic no. 8\n", + "Image_ID\t136\t-\tWeight\t0.14826723874051348\n", + "Image_ID\t142\t-\tWeight\t0.1444905135922577\n", + "Image_ID\t116\t-\tWeight\t0.14310970423245634\n", + "Image_ID\t132\t-\tWeight\t0.13967210710664973\n", + "Image_ID\t152\t-\tWeight\t0.13699976834141417\n", + "Image_ID\t114\t-\tWeight\t0.13649814331495427\n", + "Image_ID\t138\t-\tWeight\t0.13624706512987708\n", + "Image_ID\t106\t-\tWeight\t0.13620952950667425\n", + "Image_ID\t110\t-\tWeight\t0.1346054901033104\n", + "Image_ID\t144\t-\tWeight\t0.13436573258693213\n", + "Latent semantic no. 9\n", + "Image_ID\t38\t-\tWeight\t0.15911686596038474\n", + "Image_ID\t2\t-\tWeight\t0.15207108925634513\n", + "Image_ID\t0\t-\tWeight\t0.15116756158498235\n", + "Image_ID\t6\t-\tWeight\t0.15009399187071035\n", + "Image_ID\t10\t-\tWeight\t0.14437025978168486\n", + "Image_ID\t4\t-\tWeight\t0.14315858315130434\n", + "Image_ID\t34\t-\tWeight\t0.14296451776950192\n", + "Image_ID\t22\t-\tWeight\t0.14272703151065388\n", + "Image_ID\t24\t-\tWeight\t0.14254462871698045\n", + "Image_ID\t20\t-\tWeight\t0.14096073579756538\n" + ] + } + ], + "source": [ + "selected_feature_model = valid_feature_models[\n", + " str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n", + "]\n", + "\n", + "k = int(input(\"Enter value of k: \"))\n", + "if k < 1:\n", + " raise ValueError(\"k should be a positive integer\")\n", + "\n", + "selected_dim_reduction_method = str(\n", + " input(\n", + " \"Enter dimensionality reduction method - one of \"\n", + " + str(list(valid_dim_reduction_methods.keys()))\n", + " )\n", + ")\n", + "\n", + "label_sim_matrix = find_label_label_similarity(fd_collection,selected_feature_model)\n", + "\n", + "extract_latent_semantics(\n", + " fd_collection,\n", + " k,\n", + " selected_feature_model,\n", + " selected_dim_reduction_method,\n", + " sim_matrix=label_sim_matrix,\n", + " top_images=10,\n", + " fn_prefix='label_sim-'\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Phase 2/utils.py b/Phase 2/utils.py index 3e822bd..312d324 100644 --- a/Phase 2/utils.py +++ b/Phase 2/utils.py @@ -641,9 +641,9 @@ class KMeans: for c in self.cluster_centers_: prev_centroid = prev_centroids[c] current_centroid = self.cluster_centers_[c] - convergence_tol = np.sum(abs( - (prev_centroid - current_centroid) / prev_centroid * 100.0 - )) + convergence_tol = np.sum( + abs((prev_centroid - current_centroid) / prev_centroid * 100.0) + ) if convergence_tol > self.tol: optimized = False if self.verbose > 0: @@ -676,11 +676,19 @@ class KMeans: def extract_latent_semantics( - fd_collection, k, feature_model, dim_reduction_method, top_images=None + fd_collection, + k, + feature_model, + dim_reduction_method, + sim_matrix=None, + top_images=None, + fn_prefix="", ): """ Extract latent semantics for entire collection at once for a given feature_model and dim_reduction_method, and display the imageID-semantic weight pairs + Use `sim_matrix` to manually give similarity matrix instead of feature space + Leave `top_images` blank to display all imageID-weight pairs """ @@ -694,18 +702,28 @@ def extract_latent_semantics( ) all_images = list(fd_collection.find()) - feature_vectors = np.array([img[feature_model] for img in all_images]) - feature_labels = [img["true_label"] for img in all_images] feature_ids = [img["image_id"] for img in all_images] - + top_img_str = "" if top_images is not None: top_img_str = f" (showing only top {top_images} image-weight pairs for each latent semantic)" - print( - "Applying {} on the {} space to get {} latent semantics{}...".format( - dim_reduction_method, feature_model, k, top_img_str + + # if similarity matrix is provided + if sim_matrix is not None: + feature_vectors = sim_matrix + print( + "Applying {} on the {} space to get {} latent semantics{}...".format( + dim_reduction_method, feature_model, k, top_img_str + ) + ) + # else take feature space from database + else: + feature_vectors = np.array([img[feature_model] for img in all_images]) + print( + "Applying {} on the given similarity matrix to get {} latent semantics{}...".format( + dim_reduction_method, k, top_img_str + ) ) - ) displayed_latent_semantics = {} all_latent_semantics = {} @@ -827,8 +845,38 @@ def extract_latent_semantics( print(f"Image_ID\t{image_id}\t-\tWeight\t{weight}") with open( - f"{feature_model}-{dim_reduction_method}-{k}-semantics.json", + f"{fn_prefix}{feature_model}-{dim_reduction_method}-{k}-semantics.json", "w", encoding="utf-8", ) as output_file: json.dump(all_latent_semantics, output_file, ensure_ascii=False) + + +def find_label_label_similarity(fd_collection, feature_model): + """ + Calculate similarity between labels. Lower values indicate higher similarities + """ + assert ( + feature_model in valid_feature_models.values() + ), "feature_model should be one of " + str(list(valid_feature_models.keys())) + + label_sim_matrix = [] + label_mean_vectors = [] + + num_labels = 101 + + for label in range(num_labels): + # get representative vectors for the label + label_mean_vectors.append( + calculate_label_representatives(fd_collection, label, feature_model) + ) + + label_sim_matrix = np.zeros((num_labels, num_labels)) + + for i in range(num_labels): + for j in range(i + 1, num_labels): + # Note: lower the value, lower the distance => higher the similarity + label_sim_matrix[i][j] = feature_distance_matches[feature_model]( + np.array(label_mean_vectors[i]), np.array(label_mean_vectors[j]) + ) + return label_sim_matrix