From 78be91a0ca81c91c29bbbd2a0c0bd2ec0e261746 Mon Sep 17 00:00:00 2001 From: Kaushik Narayan R Date: Tue, 10 Oct 2023 14:58:28 -0700 Subject: [PATCH] refactored pranav's task 3 code changed latent semantic storage, LDA is latent dirichlet allocation and image-weight arrangement is reversed --- Phase 2/task1.ipynb | 218 ------------------------------------------- Phase 2/task3.ipynb | 142 ++++++++++++++++++++++++++++ Phase 2/task_3.ipynb | 174 ++++++++++++++-------------------- Phase 2/utils.py | 154 ++++++++++++++++++++++++++++++ 4 files changed, 368 insertions(+), 320 deletions(-) delete mode 100644 Phase 2/task1.ipynb create mode 100644 Phase 2/task3.ipynb diff --git a/Phase 2/task1.ipynb b/Phase 2/task1.ipynb deleted file mode 100644 index 90c6840..0000000 --- a/Phase 2/task1.ipynb +++ /dev/null @@ -1,218 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'task0a'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 1\u001b[0m line \u001b[0;36m4\n\u001b[0;32m 2\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mmath\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mmatplotlib\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpyplot\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mplt\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtask0a\u001b[39;00m \u001b[39mimport\u001b[39;00m \u001b[39m*\u001b[39m\n\u001b[0;32m 5\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mscipy\u001b[39;00m\n", - "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'task0a'" - ] - } - ], - "source": [ - "from pymongo import MongoClient\n", - "import math\n", - "import matplotlib.pyplot as plt\n", - "# This was imported for the loadDataset function in the cell below\n", - "from task0a import *\n", - "import scipy" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'loadDataset' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 2\u001b[0m line \u001b[0;36m7\n\u001b[0;32m 4\u001b[0m \u001b[39m# Select the database\u001b[39;00m\n\u001b[0;32m 5\u001b[0m db \u001b[39m=\u001b[39m client\u001b[39m.\u001b[39mMultimedia_Web_DBs\n\u001b[1;32m----> 7\u001b[0m caltechDataset \u001b[39m=\u001b[39m loadDataset()\n\u001b[0;32m 9\u001b[0m \u001b[39m# Fetch all documents from the collection and then sort them by \"_id\"\u001b[39;00m\n\u001b[0;32m 10\u001b[0m feature_descriptors \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(db\u001b[39m.\u001b[39mCaltech101_Feature_Descriptors\u001b[39m.\u001b[39mfind({}))\n", - "\u001b[1;31mNameError\u001b[0m: name 'loadDataset' is not defined" - ] - } - ], - "source": [ - "client = MongoClient()\n", - "client = MongoClient(host=\"localhost\", port=27017)\n", - "\n", - "# Select the database\n", - "db = client.Multimedia_Web_DBs\n", - "\n", - "# This function was the part of task 1 in my project directory. \n", - "# caltechDataset is in format (_id, image_pixels, label)\n", - "caltechDataset = loadDataset()\n", - "\n", - "# Fetch all documents from the collection and then sort them by \"_id\"\n", - "feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n", - "feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)\n", - "\n", - "num_labels = 101" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "def calculate_label_means(l, feature_model):\n", - " \n", - " # Just picking the feature vector for that particular label from even _id rows in the dataset\n", - " label_vectors = [x[feature_model] for x in feature_descriptors if x[\"label\"] == l and x[\"_id\"] % 2 == 0]\n", - " \n", - " label_mean_vector = [sum(col)/len(col) for col in zip(*label_vectors)]\n", - " return label_mean_vector" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "def findKRelevantImages(mean_vector, feature_model, l):\n", - "\n", - " # Same as in above function, but took ids as well.\n", - " # Redundant step.\n", - " label_vectors = [(x[\"_id\"], x[feature_model]) for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", - "\n", - " n = len(label_vectors)\n", - "\n", - " similarities = []\n", - "\n", - " # Use the appropriate similarity based on feature model selected by the user\n", - " match feature_model:\n", - "\n", - " case \"color_moments\":\n", - "\n", - " for i in range(n):\n", - " similarities.append({\"_id\": label_vectors[i][0], \"similarity\": math.dist(mean_vector, label_vectors[i][1])})\n", - " similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=False)\n", - "\n", - " case \"hog\":\n", - "\n", - " for i in range(n):\n", - " similarities.append({\"_id\": label_vectors[i][0], \"similarity\": (np.dot(mean_vector, label_vectors[i][1]) / (np.linalg.norm(mean_vector) * np.linalg.norm(label_vectors[i][1])))})\n", - " similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=True)\n", - " \n", - " case \"layer3\" | \"avgpool\" | \"fc\":\n", - "\n", - " for i in range(n):\n", - " similarities.append({\"_id\": label_vectors[i][0], \"similarity\": scipy.stats.pearsonr(mean_vector, label_vectors[i][1]).statistic})\n", - " similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=True)\n", - " \n", - " return similarities\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "def main():\n", - "\n", - " # Load dataset\n", - "\n", - " # User input for Image ID\n", - " l = int(input(\"Enter query label: \"))\n", - " k = int(input(\"Enter k: \"))\n", - "\n", - " features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n", - "\n", - " # User input for feature model to extract\n", - " print(\"1: Color moments\")\n", - " print(\"2: HOG\")\n", - " print(\"3: Resnet50 Avgpool layer\")\n", - " print(\"4: Resnet50 Layer 3\")\n", - " print(\"5: Resnet50 FC layer\")\n", - " feature_model = features[int(input(\"Select the feature model: \")) - 1]\n", - "\n", - " mean_vector = calculate_label_means(l, feature_model)\n", - "\n", - " similar_images = findKRelevantImages(mean_vector, feature_model, l)\n", - "\n", - " for i in range(k):\n", - " print(similar_images[i])\n", - "\n", - " # Show the \"k relevant images\"\n", - " fig, axes = plt.subplots(1, k, figsize=(15, 5))\n", - "\n", - " for i in range(k):\n", - " # caltechDataset[similar_images[i][\"_id\"]][1] because\n", - " # similar_images[i][\"_id\"] will provide me the image id\n", - " # [1] will be image pixel values since caltechDataset is in format (id, pixels, label)\n", - " axes[i].imshow(caltechDataset[similar_images[i][\"_id\"]][1].permute(1, 2, 0))\n", - " axes[i].set_title(f'id: {similar_images[i][\"_id\"]}')\n", - "\n", - " # Show the figure with all the images\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "ename": "KeyboardInterrupt", - "evalue": "Interrupted by user", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 6\u001b[0m line \u001b[0;36m2\n\u001b[0;32m 1\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m__name__\u001b[39m \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m__main__\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m----> 2\u001b[0m main()\n", - "\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 6\u001b[0m line \u001b[0;36m6\n\u001b[0;32m 1\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mmain\u001b[39m():\n\u001b[0;32m 2\u001b[0m \n\u001b[0;32m 3\u001b[0m \u001b[39m# Load dataset\u001b[39;00m\n\u001b[0;32m 4\u001b[0m \n\u001b[0;32m 5\u001b[0m \u001b[39m# User input for Image ID\u001b[39;00m\n\u001b[1;32m----> 6\u001b[0m l \u001b[39m=\u001b[39m \u001b[39mint\u001b[39m(\u001b[39minput\u001b[39;49m(\u001b[39m\"\u001b[39;49m\u001b[39mEnter query label: \u001b[39;49m\u001b[39m\"\u001b[39;49m))\n\u001b[0;32m 7\u001b[0m k \u001b[39m=\u001b[39m \u001b[39mint\u001b[39m(\u001b[39minput\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mEnter k: \u001b[39m\u001b[39m\"\u001b[39m))\n\u001b[0;32m 9\u001b[0m features \u001b[39m=\u001b[39m [\u001b[39m'\u001b[39m\u001b[39mcolor_moments\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mhog\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mlayer3\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mavgpool\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mfc\u001b[39m\u001b[39m'\u001b[39m]\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py:1202\u001b[0m, in \u001b[0;36mKernel.raw_input\u001b[1;34m(self, prompt)\u001b[0m\n\u001b[0;32m 1200\u001b[0m msg \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mraw_input was called, but this frontend does not support input requests.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1201\u001b[0m \u001b[39mraise\u001b[39;00m StdinNotImplementedError(msg)\n\u001b[1;32m-> 1202\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_input_request(\n\u001b[0;32m 1203\u001b[0m \u001b[39mstr\u001b[39;49m(prompt),\n\u001b[0;32m 1204\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_parent_ident[\u001b[39m\"\u001b[39;49m\u001b[39mshell\u001b[39;49m\u001b[39m\"\u001b[39;49m],\n\u001b[0;32m 1205\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mget_parent(\u001b[39m\"\u001b[39;49m\u001b[39mshell\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[0;32m 1206\u001b[0m password\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[0;32m 1207\u001b[0m )\n", - "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py:1245\u001b[0m, in \u001b[0;36mKernel._input_request\u001b[1;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[0;32m 1242\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyboardInterrupt\u001b[39;00m:\n\u001b[0;32m 1243\u001b[0m \u001b[39m# re-raise KeyboardInterrupt, to truncate traceback\u001b[39;00m\n\u001b[0;32m 1244\u001b[0m msg \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mInterrupted by user\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m-> 1245\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyboardInterrupt\u001b[39;00m(msg) \u001b[39mfrom\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m 1246\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m:\n\u001b[0;32m 1247\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlog\u001b[39m.\u001b[39mwarning(\u001b[39m\"\u001b[39m\u001b[39mInvalid Message:\u001b[39m\u001b[39m\"\u001b[39m, exc_info\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n", - "\u001b[1;31mKeyboardInterrupt\u001b[0m: Interrupted by user" - ] - } - ], - "source": [ - "if __name__ == \"__main__\":\n", - " main()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Phase 2/task3.ipynb b/Phase 2/task3.ipynb new file mode 100644 index 0000000..ba60981 --- /dev/null +++ b/Phase 2/task3.ipynb @@ -0,0 +1,142 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from pymongo import MongoClient\n", + "from task0a import *\n", + "import scipy\n", + "import numpy as np\n", + "from sklearn.decomposition import NMF\n", + "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", + "from sklearn.cluster import KMeans\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client = MongoClient()\n", + "client = MongoClient(host=\"localhost\", port=27017)\n", + "\n", + "# Select the database\n", + "db = client.Multimedia_Web_DBs\n", + "\n", + "# Fetch all documents from the collection and then sort them by \"_id\"\n", + "feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n", + "feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)\n", + "\n", + "num_labels = 101" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def extractKLatentSemantics(k, feature_model, dim_reduction):\n", + "\n", + " feature_vectors = [x[feature_model] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", + " feature_labels = [x[\"label\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", + " feature_ids = [x[\"_id\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", + "\n", + " filename = ''\n", + "\n", + "\n", + " match dim_reduction:\n", + "\n", + " case 1:\n", + " filename = f'{feature_model}-svd-semantics.json'\n", + " U, S, Vh = scipy.sparse.linalg.svds(np.array(feature_vectors), k=k)\n", + " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", + "\n", + " case 2:\n", + " filename = f'{feature_model}-nnmf-semantics.json'\n", + " model = NMF(n_components = k, init = 'random', solver = 'cd', alpha_H = 0.01, alpha_W = 0.01, max_iter = 10000)\n", + " min_value = np.min(feature_vectors)\n", + " feature_vectors_shifted = feature_vectors - min_value\n", + " U = model.fit_transform(np.array(feature_vectors_shifted))\n", + " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", + "\n", + " case 3:\n", + " filename = f'{feature_model}-lda-semantics.json'\n", + " U = LinearDiscriminantAnalysis(n_components = k).fit_transform(feature_vectors, feature_labels)\n", + " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", + "\n", + " case 4:\n", + " filename = f'{feature_model}-kmeans-semantics.json'\n", + " kmeans = KMeans(n_clusters = k)\n", + " kmeans.fit(feature_vectors)\n", + " U = kmeans.transform(feature_vectors)\n", + " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", + " \n", + " k_latent_semantics = [{\"_id\": item[0], \"semantics\": item[1]} for item in k_latent_semantics]\n", + " with open(filename, 'w', encoding='utf-8') as f:\n", + " json.dump(k_latent_semantics, f, ensure_ascii = False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def main():\n", + "\n", + " # Load dataset\n", + "\n", + " # User input for Image ID\n", + " k = int(input(\"Enter k: \"))\n", + "\n", + " features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n", + "\n", + " # User input for feature model to extract\n", + " print(\"\\n1: Color moments\")\n", + " print(\"2: HOG\")\n", + " print(\"3: Resnet50 Avgpool layer\")\n", + " print(\"4: Resnet50 Layer 3\")\n", + " print(\"5: Resnet50 FC layer\")\n", + " feature_model = features[int(input(\"Select the feature model: \")) - 1]\n", + "\n", + " print(\"\\n1. SVD\")\n", + " print(\"2. NNMF\")\n", + " print(\"3. LDA\")\n", + " print(\"4. k-means\")\n", + " dim_reduction = int(input(\"Select the dimensionality reduction technique: \"))\n", + "\n", + " extractKLatentSemantics(k, feature_model, dim_reduction)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if __name__ == \"__main__\":\n", + " main()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Phase 2/task_3.ipynb b/Phase 2/task_3.ipynb index ba60981..477e772 100644 --- a/Phase 2/task_3.ipynb +++ b/Phase 2/task_3.ipynb @@ -2,126 +2,82 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "import json\n", - "from pymongo import MongoClient\n", - "from task0a import *\n", - "import scipy\n", - "import numpy as np\n", - "from sklearn.decomposition import NMF\n", - "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", - "from sklearn.cluster import KMeans\n" + "from utils import *\n", + "warnings.filterwarnings('ignore')\n", + "%matplotlib inline" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "client = MongoClient()\n", - "client = MongoClient(host=\"localhost\", port=27017)\n", - "\n", - "# Select the database\n", - "db = client.Multimedia_Web_DBs\n", - "\n", - "# Fetch all documents from the collection and then sort them by \"_id\"\n", - "feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n", - "feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)\n", - "\n", - "num_labels = 101" + "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Applying lda on the fc_fd space to get 10 latent semantics (showing only top 10 image-weight pairs for each latent semantic)...\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\Kaushik\\ASU\\CSE 515 - Multimedia and Web Databases\\Project\\Phase 2\\task_3.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[0;32m 7\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mk should be a positive integer\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 9\u001b[0m selected_dim_reduction_method \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(\n\u001b[0;32m 10\u001b[0m \u001b[39minput\u001b[39m(\n\u001b[0;32m 11\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mEnter dimensionality reduction method - one of \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 12\u001b[0m \u001b[39m+\u001b[39m \u001b[39mstr\u001b[39m(\u001b[39mlist\u001b[39m(valid_dim_reduction_methods\u001b[39m.\u001b[39mkeys()))\n\u001b[0;32m 13\u001b[0m )\n\u001b[0;32m 14\u001b[0m )\n\u001b[1;32m---> 16\u001b[0m extract_latent_semantics(\n\u001b[0;32m 17\u001b[0m fd_collection,\n\u001b[0;32m 18\u001b[0m k,\n\u001b[0;32m 19\u001b[0m selected_feature_model,\n\u001b[0;32m 20\u001b[0m selected_dim_reduction_method,\n\u001b[0;32m 21\u001b[0m top_images\u001b[39m=\u001b[39;49m\u001b[39m10\u001b[39;49m,\n\u001b[0;32m 22\u001b[0m )\n", + "File \u001b[1;32mc:\\Kaushik\\ASU\\CSE 515 - Multimedia and Web Databases\\Project\\Phase 2\\utils.py:674\u001b[0m, in \u001b[0;36mextract_latent_semantics\u001b[1;34m(fd_collection, k, feature_model, dim_reduction_method, top_images)\u001b[0m\n\u001b[0;32m 669\u001b[0m \u001b[39m# unsupervised LDA to extract topics (Latent Dirichlet Allocation)\u001b[39;00m\n\u001b[0;32m 670\u001b[0m \u001b[39m# Note: LDA takes a bit of time\u001b[39;00m\n\u001b[0;32m 671\u001b[0m \u001b[39mcase\u001b[39;00m \u001b[39m3\u001b[39m:\n\u001b[0;32m 672\u001b[0m \u001b[39m# LDA requires non-negative input data\u001b[39;00m\n\u001b[0;32m 673\u001b[0m \u001b[39m# so shift the input by subtracting the smallest value\u001b[39;00m\n\u001b[1;32m--> 674\u001b[0m min_value \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mmin(feature_vectors)\n\u001b[0;32m 675\u001b[0m feature_vectors_shifted \u001b[39m=\u001b[39m feature_vectors \u001b[39m-\u001b[39m min_value\n\u001b[0;32m 677\u001b[0m model \u001b[39m=\u001b[39m LatentDirichletAllocation(n_components\u001b[39m=\u001b[39mk, learning_method\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39monline\u001b[39m\u001b[39m\"\u001b[39m, verbose\u001b[39m=\u001b[39m\u001b[39m4\u001b[39m)\n", + "File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\sklearn\\base.py:1151\u001b[0m, in \u001b[0;36m_fit_context..decorator..wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1144\u001b[0m estimator\u001b[39m.\u001b[39m_validate_params()\n\u001b[0;32m 1146\u001b[0m \u001b[39mwith\u001b[39;00m config_context(\n\u001b[0;32m 1147\u001b[0m skip_parameter_validation\u001b[39m=\u001b[39m(\n\u001b[0;32m 1148\u001b[0m prefer_skip_nested_validation \u001b[39mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 1149\u001b[0m )\n\u001b[0;32m 1150\u001b[0m ):\n\u001b[1;32m-> 1151\u001b[0m \u001b[39mreturn\u001b[39;00m fit_method(estimator, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\sklearn\\decomposition\\_lda.py:665\u001b[0m, in \u001b[0;36mLatentDirichletAllocation.fit\u001b[1;34m(self, X, y)\u001b[0m\n\u001b[0;32m 663\u001b[0m \u001b[39mif\u001b[39;00m learning_method \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39monline\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[0;32m 664\u001b[0m \u001b[39mfor\u001b[39;00m idx_slice \u001b[39min\u001b[39;00m gen_batches(n_samples, batch_size):\n\u001b[1;32m--> 665\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_em_step(\n\u001b[0;32m 666\u001b[0m X[idx_slice, :],\n\u001b[0;32m 667\u001b[0m total_samples\u001b[39m=\u001b[39;49mn_samples,\n\u001b[0;32m 668\u001b[0m batch_update\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[0;32m 669\u001b[0m parallel\u001b[39m=\u001b[39;49mparallel,\n\u001b[0;32m 670\u001b[0m )\n\u001b[0;32m 671\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 672\u001b[0m \u001b[39m# batch update\u001b[39;00m\n\u001b[0;32m 673\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_em_step(\n\u001b[0;32m 674\u001b[0m X, total_samples\u001b[39m=\u001b[39mn_samples, batch_update\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m, parallel\u001b[39m=\u001b[39mparallel\n\u001b[0;32m 675\u001b[0m )\n", + "File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\sklearn\\decomposition\\_lda.py:524\u001b[0m, in \u001b[0;36mLatentDirichletAllocation._em_step\u001b[1;34m(self, X, total_samples, batch_update, parallel)\u001b[0m\n\u001b[0;32m 497\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"EM update for 1 iteration.\u001b[39;00m\n\u001b[0;32m 498\u001b[0m \n\u001b[0;32m 499\u001b[0m \u001b[39mupdate `_component` by batch VB or online VB.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 520\u001b[0m \u001b[39m Unnormalized document topic distribution.\u001b[39;00m\n\u001b[0;32m 521\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 523\u001b[0m \u001b[39m# E-step\u001b[39;00m\n\u001b[1;32m--> 524\u001b[0m _, suff_stats \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_e_step(\n\u001b[0;32m 525\u001b[0m X, cal_sstats\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, random_init\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, parallel\u001b[39m=\u001b[39;49mparallel\n\u001b[0;32m 526\u001b[0m )\n\u001b[0;32m 528\u001b[0m \u001b[39m# M-step\u001b[39;00m\n\u001b[0;32m 529\u001b[0m \u001b[39mif\u001b[39;00m batch_update:\n", + "File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\sklearn\\decomposition\\_lda.py:467\u001b[0m, in \u001b[0;36mLatentDirichletAllocation._e_step\u001b[1;34m(self, X, cal_sstats, random_init, parallel)\u001b[0m\n\u001b[0;32m 465\u001b[0m \u001b[39mif\u001b[39;00m parallel \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 466\u001b[0m parallel \u001b[39m=\u001b[39m Parallel(n_jobs\u001b[39m=\u001b[39mn_jobs, verbose\u001b[39m=\u001b[39m\u001b[39mmax\u001b[39m(\u001b[39m0\u001b[39m, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mverbose \u001b[39m-\u001b[39m \u001b[39m1\u001b[39m))\n\u001b[1;32m--> 467\u001b[0m results \u001b[39m=\u001b[39m parallel(\n\u001b[0;32m 468\u001b[0m delayed(_update_doc_distribution)(\n\u001b[0;32m 469\u001b[0m X[idx_slice, :],\n\u001b[0;32m 470\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mexp_dirichlet_component_,\n\u001b[0;32m 471\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdoc_topic_prior_,\n\u001b[0;32m 472\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmax_doc_update_iter,\n\u001b[0;32m 473\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmean_change_tol,\n\u001b[0;32m 474\u001b[0m cal_sstats,\n\u001b[0;32m 475\u001b[0m random_state,\n\u001b[0;32m 476\u001b[0m )\n\u001b[0;32m 477\u001b[0m \u001b[39mfor\u001b[39;49;00m idx_slice \u001b[39min\u001b[39;49;00m gen_even_slices(X\u001b[39m.\u001b[39;49mshape[\u001b[39m0\u001b[39;49m], n_jobs)\n\u001b[0;32m 478\u001b[0m )\n\u001b[0;32m 480\u001b[0m \u001b[39m# merge result\u001b[39;00m\n\u001b[0;32m 481\u001b[0m doc_topics, sstats_list \u001b[39m=\u001b[39m \u001b[39mzip\u001b[39m(\u001b[39m*\u001b[39mresults)\n", + "File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\sklearn\\utils\\parallel.py:65\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m 60\u001b[0m config \u001b[39m=\u001b[39m get_config()\n\u001b[0;32m 61\u001b[0m iterable_with_config \u001b[39m=\u001b[39m (\n\u001b[0;32m 62\u001b[0m (_with_config(delayed_func, config), args, kwargs)\n\u001b[0;32m 63\u001b[0m \u001b[39mfor\u001b[39;00m delayed_func, args, kwargs \u001b[39min\u001b[39;00m iterable\n\u001b[0;32m 64\u001b[0m )\n\u001b[1;32m---> 65\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__call__\u001b[39;49m(iterable_with_config)\n", + "File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\joblib\\parallel.py:1863\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m 1861\u001b[0m output \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_sequential_output(iterable)\n\u001b[0;32m 1862\u001b[0m \u001b[39mnext\u001b[39m(output)\n\u001b[1;32m-> 1863\u001b[0m \u001b[39mreturn\u001b[39;00m output \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreturn_generator \u001b[39melse\u001b[39;00m \u001b[39mlist\u001b[39;49m(output)\n\u001b[0;32m 1865\u001b[0m \u001b[39m# Let's create an ID that uniquely identifies the current call. If the\u001b[39;00m\n\u001b[0;32m 1866\u001b[0m \u001b[39m# call is interrupted early and that the same instance is immediately\u001b[39;00m\n\u001b[0;32m 1867\u001b[0m \u001b[39m# re-used, this id will be used to prevent workers that were\u001b[39;00m\n\u001b[0;32m 1868\u001b[0m \u001b[39m# concurrently finalizing a task from the previous call to run the\u001b[39;00m\n\u001b[0;32m 1869\u001b[0m \u001b[39m# callback.\u001b[39;00m\n\u001b[0;32m 1870\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_lock:\n", + "File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\joblib\\parallel.py:1792\u001b[0m, in \u001b[0;36mParallel._get_sequential_output\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m 1790\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_dispatched_batches \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[0;32m 1791\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_dispatched_tasks \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[1;32m-> 1792\u001b[0m res \u001b[39m=\u001b[39m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 1793\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_completed_tasks \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[0;32m 1794\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mprint_progress()\n", + "File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\sklearn\\utils\\parallel.py:127\u001b[0m, in \u001b[0;36m_FuncWrapper.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 125\u001b[0m config \u001b[39m=\u001b[39m {}\n\u001b[0;32m 126\u001b[0m \u001b[39mwith\u001b[39;00m config_context(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mconfig):\n\u001b[1;32m--> 127\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfunction(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\sklearn\\decomposition\\_lda.py:144\u001b[0m, in \u001b[0;36m_update_doc_distribution\u001b[1;34m(X, exp_topic_word_distr, doc_topic_prior, max_doc_update_iter, mean_change_tol, cal_sstats, random_state)\u001b[0m\n\u001b[0;32m 140\u001b[0m last_d \u001b[39m=\u001b[39m doc_topic_d\n\u001b[0;32m 142\u001b[0m \u001b[39m# The optimal phi_{dwk} is proportional to\u001b[39;00m\n\u001b[0;32m 143\u001b[0m \u001b[39m# exp(E[log(theta_{dk})]) * exp(E[log(beta_{dw})]).\u001b[39;00m\n\u001b[1;32m--> 144\u001b[0m norm_phi \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39;49mdot(exp_doc_topic_d, exp_topic_word_d) \u001b[39m+\u001b[39m eps\n\u001b[0;32m 146\u001b[0m doc_topic_d \u001b[39m=\u001b[39m exp_doc_topic_d \u001b[39m*\u001b[39m np\u001b[39m.\u001b[39mdot(cnts \u001b[39m/\u001b[39m norm_phi, exp_topic_word_d\u001b[39m.\u001b[39mT)\n\u001b[0;32m 147\u001b[0m \u001b[39m# Note: adds doc_topic_prior to doc_topic_d, in-place.\u001b[39;00m\n", + "File \u001b[1;32m<__array_function__ internals>:180\u001b[0m, in \u001b[0;36mdot\u001b[1;34m(*args, **kwargs)\u001b[0m\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], "source": [ - "def extractKLatentSemantics(k, feature_model, dim_reduction):\n", + "selected_feature_model = valid_feature_models[\n", + " str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n", + "]\n", "\n", - " feature_vectors = [x[feature_model] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", - " feature_labels = [x[\"label\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", - " feature_ids = [x[\"_id\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", + "k = int(input(\"Enter value of k: \"))\n", + "if k < 1:\n", + " raise ValueError(\"k should be a positive integer\")\n", "\n", - " filename = ''\n", + "selected_dim_reduction_method = str(\n", + " input(\n", + " \"Enter dimensionality reduction method - one of \"\n", + " + str(list(valid_dim_reduction_methods.keys()))\n", + " )\n", + ")\n", "\n", - "\n", - " match dim_reduction:\n", - "\n", - " case 1:\n", - " filename = f'{feature_model}-svd-semantics.json'\n", - " U, S, Vh = scipy.sparse.linalg.svds(np.array(feature_vectors), k=k)\n", - " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", - "\n", - " case 2:\n", - " filename = f'{feature_model}-nnmf-semantics.json'\n", - " model = NMF(n_components = k, init = 'random', solver = 'cd', alpha_H = 0.01, alpha_W = 0.01, max_iter = 10000)\n", - " min_value = np.min(feature_vectors)\n", - " feature_vectors_shifted = feature_vectors - min_value\n", - " U = model.fit_transform(np.array(feature_vectors_shifted))\n", - " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", - "\n", - " case 3:\n", - " filename = f'{feature_model}-lda-semantics.json'\n", - " U = LinearDiscriminantAnalysis(n_components = k).fit_transform(feature_vectors, feature_labels)\n", - " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", - "\n", - " case 4:\n", - " filename = f'{feature_model}-kmeans-semantics.json'\n", - " kmeans = KMeans(n_clusters = k)\n", - " kmeans.fit(feature_vectors)\n", - " U = kmeans.transform(feature_vectors)\n", - " k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", - " \n", - " k_latent_semantics = [{\"_id\": item[0], \"semantics\": item[1]} for item in k_latent_semantics]\n", - " with open(filename, 'w', encoding='utf-8') as f:\n", - " json.dump(k_latent_semantics, f, ensure_ascii = False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def main():\n", - "\n", - " # Load dataset\n", - "\n", - " # User input for Image ID\n", - " k = int(input(\"Enter k: \"))\n", - "\n", - " features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n", - "\n", - " # User input for feature model to extract\n", - " print(\"\\n1: Color moments\")\n", - " print(\"2: HOG\")\n", - " print(\"3: Resnet50 Avgpool layer\")\n", - " print(\"4: Resnet50 Layer 3\")\n", - " print(\"5: Resnet50 FC layer\")\n", - " feature_model = features[int(input(\"Select the feature model: \")) - 1]\n", - "\n", - " print(\"\\n1. SVD\")\n", - " print(\"2. NNMF\")\n", - " print(\"3. LDA\")\n", - " print(\"4. k-means\")\n", - " dim_reduction = int(input(\"Select the dimensionality reduction technique: \"))\n", - "\n", - " extractKLatentSemantics(k, feature_model, dim_reduction)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if __name__ == \"__main__\":\n", - " main()" + "extract_latent_semantics(\n", + " fd_collection,\n", + " k,\n", + " selected_feature_model,\n", + " selected_dim_reduction_method,\n", + " top_images=10,\n", + ")\n" ] }, { @@ -133,8 +89,22 @@ } ], "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" } }, "nbformat": 4, diff --git a/Phase 2/utils.py b/Phase 2/utils.py index 8e6d031..1c70c5c 100644 --- a/Phase 2/utils.py +++ b/Phase 2/utils.py @@ -4,6 +4,11 @@ import math import cv2 import numpy as np from scipy.stats import pearsonr +from scipy.sparse.linalg import svds +from sklearn.decomposition import NMF +from sklearn.decomposition import LatentDirichletAllocation +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.cluster import KMeans # Torch import torch @@ -12,6 +17,7 @@ from torchvision.datasets import Caltech101 from torchvision.models import resnet50, ResNet50_Weights # OS and env +import json from os import getenv from dotenv import load_dotenv import warnings @@ -566,3 +572,151 @@ def show_similar_images_for_label( f"Plots/Label_{target_label}_{feature_model}_{distance_measure.__name__}_k{k}.png" ) plt.show() + + +valid_dim_reduction_methods = { + "svd": 1, + "nmf": 2, + "lda": 3, + "kmeans": 4, +} + + +def extract_latent_semantics( + fd_collection, k, feature_model, dim_reduction_method, top_images=None +): + """ + Extract latent semantics for entire collection at once for a given feature_model and dim_reduction_method, and display the imageID-semantic weight pairs + + Leave `top_images` blank to display all imageID-weight pairs + """ + + assert ( + feature_model in valid_feature_models.values() + ), "feature_model should be one of " + str(list(valid_feature_models.keys())) + assert ( + dim_reduction_method in valid_dim_reduction_methods.keys() + ), "dim_reduction_method should be one of " + str( + list(valid_dim_reduction_methods.keys()) + ) + + all_images = list(fd_collection.find()) + feature_vectors = np.array([img[feature_model] for img in all_images]) + feature_labels = [img["true_label"] for img in all_images] + feature_ids = [img["image_id"] for img in all_images] + + top_img_str = "" + if top_images is not None: + top_img_str = f" (showing only top {top_images} image-weight pairs for each latent semantic)" + print( + "Applying {} on the {} space to get {} latent semantics{}...".format( + dim_reduction_method, feature_model, k, top_img_str + ) + ) + + displayed_latent_semantics = {} + all_latent_semantics = {} + + match valid_dim_reduction_methods[dim_reduction_method]: + # singular value decomposition + # sparse version of SVD to get only k singular values + case 1: + U, S, V_T = svds(feature_vectors, k=k) + + all_latent_semantics = { + "image-semantic": U.tolist(), + "semantics-core": S.tolist(), + "semantic-feature": V_T.tolist(), + } + + # for each latent semantic, sort imageID-weight pairs by weights in descending order + displayed_latent_semantics = [ + sorted( + list(zip(feature_ids, latent_semantic)), + key=lambda x: x[1], + reverse=True, + )[:top_images] + for latent_semantic in U.T + ] + + # non-negative matrix factorization + case 2: + # NNMF requires non-negative input data + # so shift the input by subtracting the smallest value + min_value = np.min(feature_vectors) + feature_vectors_shifted = feature_vectors - min_value + + model = NMF( + n_components=k, + init="random", + solver="cd", + alpha_H=0.01, + alpha_W=0.01, + max_iter=10000, + ) + model.fit(feature_vectors_shifted) + + W = model.transform(feature_vectors_shifted) + H = model.components_ + + all_latent_semantics = {"image-semantic": W, "semantic-feature": H} + + # for each latent semantic, sort imageID-weight pairs by weights in descending order + displayed_latent_semantics = [ + sorted( + list(zip(feature_ids, latent_semantic)), + key=lambda x: x[1], + reverse=True, + )[:top_images] + for latent_semantic in W.T + ] + + # unsupervised LDA to extract topics (Latent Dirichlet Allocation) + # Note: LDA takes a bit of time + case 3: + # LDA requires non-negative input data + # so shift the input by subtracting the smallest value + min_value = np.min(feature_vectors) + feature_vectors_shifted = feature_vectors - min_value + + model = LatentDirichletAllocation( + n_components=k, learning_method="online", verbose=4 + ) + model.fit(feature_vectors_shifted) + + # K (k x fd_dim) is the factor matrix for latent semantic-feature pairs + K = model.components_ + # X (4339 x k) is the other factor matrix for image ID-latent semantic pairs + X = model.transform(feature_vectors_shifted) + + all_latent_semantics = {"image-semantic": X, "semantic-feature": K} + + # for each latent semantic, sort imageID-weight pairs by weights in descending order + displayed_latent_semantics = [ + sorted( + list(zip(feature_ids, latent_semantic)), + key=lambda x: x[1], + reverse=True, + )[:top_images] + for latent_semantic in X.T + ] + + # k-means clustering to reduce to k clusters/dimensions + case 4: + model = KMeans(n_clusters=k).fit(feature_vectors) + CC = model.cluster_centers_ + U = model.transform(feature_vectors) + + all_latent_semantics = {"image-semantic": U, "semantic_feature": CC} + + for idx, latent_semantic in enumerate(displayed_latent_semantics): + print(f"Latent semantic no. {idx}") + for image_id, weight in latent_semantic: + print(f"Image_ID\t{image_id}\t-\tWeight\t{weight}") + + with open( + f"{feature_model}-{dim_reduction_method}-{k}-semantics.json", + "w", + encoding="utf-8", + ) as output_file: + json.dump(all_latent_semantics, output_file, ensure_ascii=False)