refactored pranav's task 3 code

changed latent semantic storage, LDA is latent dirichlet allocation and image-weight arrangement is reversed
This commit is contained in:
Kaushik Narayan R 2023-10-10 14:58:28 -07:00
parent 5580611ba4
commit 78be91a0ca
4 changed files with 368 additions and 320 deletions

View File

@ -1,218 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'task0a'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 1\u001b[0m line \u001b[0;36m4\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W0sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mmath\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W0sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mmatplotlib\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpyplot\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mplt\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W0sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtask0a\u001b[39;00m \u001b[39mimport\u001b[39;00m \u001b[39m*\u001b[39m\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W0sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mscipy\u001b[39;00m\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'task0a'"
]
}
],
"source": [
"from pymongo import MongoClient\n",
"import math\n",
"import matplotlib.pyplot as plt\n",
"# This was imported for the loadDataset function in the cell below\n",
"from task0a import *\n",
"import scipy"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'loadDataset' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 2\u001b[0m line \u001b[0;36m7\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W1sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39m# Select the database\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W1sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m db \u001b[39m=\u001b[39m client\u001b[39m.\u001b[39mMultimedia_Web_DBs\n\u001b[1;32m----> <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W1sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m caltechDataset \u001b[39m=\u001b[39m loadDataset()\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W1sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m \u001b[39m# Fetch all documents from the collection and then sort them by \"_id\"\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W1sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m feature_descriptors \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(db\u001b[39m.\u001b[39mCaltech101_Feature_Descriptors\u001b[39m.\u001b[39mfind({}))\n",
"\u001b[1;31mNameError\u001b[0m: name 'loadDataset' is not defined"
]
}
],
"source": [
"client = MongoClient()\n",
"client = MongoClient(host=\"localhost\", port=27017)\n",
"\n",
"# Select the database\n",
"db = client.Multimedia_Web_DBs\n",
"\n",
"# This function was the part of task 1 in my project directory. \n",
"# caltechDataset is in format (_id, image_pixels, label)\n",
"caltechDataset = loadDataset()\n",
"\n",
"# Fetch all documents from the collection and then sort them by \"_id\"\n",
"feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n",
"feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)\n",
"\n",
"num_labels = 101"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def calculate_label_means(l, feature_model):\n",
" \n",
" # Just picking the feature vector for that particular label from even _id rows in the dataset\n",
" label_vectors = [x[feature_model] for x in feature_descriptors if x[\"label\"] == l and x[\"_id\"] % 2 == 0]\n",
" \n",
" label_mean_vector = [sum(col)/len(col) for col in zip(*label_vectors)]\n",
" return label_mean_vector"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def findKRelevantImages(mean_vector, feature_model, l):\n",
"\n",
" # Same as in above function, but took ids as well.\n",
" # Redundant step.\n",
" label_vectors = [(x[\"_id\"], x[feature_model]) for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
"\n",
" n = len(label_vectors)\n",
"\n",
" similarities = []\n",
"\n",
" # Use the appropriate similarity based on feature model selected by the user\n",
" match feature_model:\n",
"\n",
" case \"color_moments\":\n",
"\n",
" for i in range(n):\n",
" similarities.append({\"_id\": label_vectors[i][0], \"similarity\": math.dist(mean_vector, label_vectors[i][1])})\n",
" similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=False)\n",
"\n",
" case \"hog\":\n",
"\n",
" for i in range(n):\n",
" similarities.append({\"_id\": label_vectors[i][0], \"similarity\": (np.dot(mean_vector, label_vectors[i][1]) / (np.linalg.norm(mean_vector) * np.linalg.norm(label_vectors[i][1])))})\n",
" similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=True)\n",
" \n",
" case \"layer3\" | \"avgpool\" | \"fc\":\n",
"\n",
" for i in range(n):\n",
" similarities.append({\"_id\": label_vectors[i][0], \"similarity\": scipy.stats.pearsonr(mean_vector, label_vectors[i][1]).statistic})\n",
" similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=True)\n",
" \n",
" return similarities\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def main():\n",
"\n",
" # Load dataset\n",
"\n",
" # User input for Image ID\n",
" l = int(input(\"Enter query label: \"))\n",
" k = int(input(\"Enter k: \"))\n",
"\n",
" features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n",
"\n",
" # User input for feature model to extract\n",
" print(\"1: Color moments\")\n",
" print(\"2: HOG\")\n",
" print(\"3: Resnet50 Avgpool layer\")\n",
" print(\"4: Resnet50 Layer 3\")\n",
" print(\"5: Resnet50 FC layer\")\n",
" feature_model = features[int(input(\"Select the feature model: \")) - 1]\n",
"\n",
" mean_vector = calculate_label_means(l, feature_model)\n",
"\n",
" similar_images = findKRelevantImages(mean_vector, feature_model, l)\n",
"\n",
" for i in range(k):\n",
" print(similar_images[i])\n",
"\n",
" # Show the \"k relevant images\"\n",
" fig, axes = plt.subplots(1, k, figsize=(15, 5))\n",
"\n",
" for i in range(k):\n",
" # caltechDataset[similar_images[i][\"_id\"]][1] because\n",
" # similar_images[i][\"_id\"] will provide me the image id\n",
" # [1] will be image pixel values since caltechDataset is in format (id, pixels, label)\n",
" axes[i].imshow(caltechDataset[similar_images[i][\"_id\"]][1].permute(1, 2, 0))\n",
" axes[i].set_title(f'id: {similar_images[i][\"_id\"]}')\n",
"\n",
" # Show the figure with all the images\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "Interrupted by user",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 6\u001b[0m line \u001b[0;36m2\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W5sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m__name__\u001b[39m \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m__main__\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m----> <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W5sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m main()\n",
"\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 6\u001b[0m line \u001b[0;36m6\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W5sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mmain\u001b[39m():\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W5sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m \n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W5sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39m# Load dataset\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W5sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W5sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m \u001b[39m# User input for Image ID\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W5sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m l \u001b[39m=\u001b[39m \u001b[39mint\u001b[39m(\u001b[39minput\u001b[39;49m(\u001b[39m\"\u001b[39;49m\u001b[39mEnter query label: \u001b[39;49m\u001b[39m\"\u001b[39;49m))\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W5sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m k \u001b[39m=\u001b[39m \u001b[39mint\u001b[39m(\u001b[39minput\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mEnter k: \u001b[39m\u001b[39m\"\u001b[39m))\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W5sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m features \u001b[39m=\u001b[39m [\u001b[39m'\u001b[39m\u001b[39mcolor_moments\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mhog\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mlayer3\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mavgpool\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mfc\u001b[39m\u001b[39m'\u001b[39m]\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py:1202\u001b[0m, in \u001b[0;36mKernel.raw_input\u001b[1;34m(self, prompt)\u001b[0m\n\u001b[0;32m 1200\u001b[0m msg \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mraw_input was called, but this frontend does not support input requests.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1201\u001b[0m \u001b[39mraise\u001b[39;00m StdinNotImplementedError(msg)\n\u001b[1;32m-> 1202\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_input_request(\n\u001b[0;32m 1203\u001b[0m \u001b[39mstr\u001b[39;49m(prompt),\n\u001b[0;32m 1204\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_parent_ident[\u001b[39m\"\u001b[39;49m\u001b[39mshell\u001b[39;49m\u001b[39m\"\u001b[39;49m],\n\u001b[0;32m 1205\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mget_parent(\u001b[39m\"\u001b[39;49m\u001b[39mshell\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[0;32m 1206\u001b[0m password\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[0;32m 1207\u001b[0m )\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py:1245\u001b[0m, in \u001b[0;36mKernel._input_request\u001b[1;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[0;32m 1242\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyboardInterrupt\u001b[39;00m:\n\u001b[0;32m 1243\u001b[0m \u001b[39m# re-raise KeyboardInterrupt, to truncate traceback\u001b[39;00m\n\u001b[0;32m 1244\u001b[0m msg \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mInterrupted by user\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m-> 1245\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyboardInterrupt\u001b[39;00m(msg) \u001b[39mfrom\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m 1246\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m:\n\u001b[0;32m 1247\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlog\u001b[39m.\u001b[39mwarning(\u001b[39m\"\u001b[39m\u001b[39mInvalid Message:\u001b[39m\u001b[39m\"\u001b[39m, exc_info\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: Interrupted by user"
]
}
],
"source": [
"if __name__ == \"__main__\":\n",
" main()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

142
Phase 2/task3.ipynb Normal file
View File

@ -0,0 +1,142 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"from pymongo import MongoClient\n",
"from task0a import *\n",
"import scipy\n",
"import numpy as np\n",
"from sklearn.decomposition import NMF\n",
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
"from sklearn.cluster import KMeans\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client = MongoClient()\n",
"client = MongoClient(host=\"localhost\", port=27017)\n",
"\n",
"# Select the database\n",
"db = client.Multimedia_Web_DBs\n",
"\n",
"# Fetch all documents from the collection and then sort them by \"_id\"\n",
"feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n",
"feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)\n",
"\n",
"num_labels = 101"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def extractKLatentSemantics(k, feature_model, dim_reduction):\n",
"\n",
" feature_vectors = [x[feature_model] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
" feature_labels = [x[\"label\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
" feature_ids = [x[\"_id\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
"\n",
" filename = ''\n",
"\n",
"\n",
" match dim_reduction:\n",
"\n",
" case 1:\n",
" filename = f'{feature_model}-svd-semantics.json'\n",
" U, S, Vh = scipy.sparse.linalg.svds(np.array(feature_vectors), k=k)\n",
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
"\n",
" case 2:\n",
" filename = f'{feature_model}-nnmf-semantics.json'\n",
" model = NMF(n_components = k, init = 'random', solver = 'cd', alpha_H = 0.01, alpha_W = 0.01, max_iter = 10000)\n",
" min_value = np.min(feature_vectors)\n",
" feature_vectors_shifted = feature_vectors - min_value\n",
" U = model.fit_transform(np.array(feature_vectors_shifted))\n",
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
"\n",
" case 3:\n",
" filename = f'{feature_model}-lda-semantics.json'\n",
" U = LinearDiscriminantAnalysis(n_components = k).fit_transform(feature_vectors, feature_labels)\n",
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
"\n",
" case 4:\n",
" filename = f'{feature_model}-kmeans-semantics.json'\n",
" kmeans = KMeans(n_clusters = k)\n",
" kmeans.fit(feature_vectors)\n",
" U = kmeans.transform(feature_vectors)\n",
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
" \n",
" k_latent_semantics = [{\"_id\": item[0], \"semantics\": item[1]} for item in k_latent_semantics]\n",
" with open(filename, 'w', encoding='utf-8') as f:\n",
" json.dump(k_latent_semantics, f, ensure_ascii = False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def main():\n",
"\n",
" # Load dataset\n",
"\n",
" # User input for Image ID\n",
" k = int(input(\"Enter k: \"))\n",
"\n",
" features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n",
"\n",
" # User input for feature model to extract\n",
" print(\"\\n1: Color moments\")\n",
" print(\"2: HOG\")\n",
" print(\"3: Resnet50 Avgpool layer\")\n",
" print(\"4: Resnet50 Layer 3\")\n",
" print(\"5: Resnet50 FC layer\")\n",
" feature_model = features[int(input(\"Select the feature model: \")) - 1]\n",
"\n",
" print(\"\\n1. SVD\")\n",
" print(\"2. NNMF\")\n",
" print(\"3. LDA\")\n",
" print(\"4. k-means\")\n",
" dim_reduction = int(input(\"Select the dimensionality reduction technique: \"))\n",
"\n",
" extractKLatentSemantics(k, feature_model, dim_reduction)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" main()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -2,126 +2,82 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import json\n", "from utils import *\n",
"from pymongo import MongoClient\n", "warnings.filterwarnings('ignore')\n",
"from task0a import *\n", "%matplotlib inline"
"import scipy\n",
"import numpy as np\n",
"from sklearn.decomposition import NMF\n",
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
"from sklearn.cluster import KMeans\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"client = MongoClient()\n", "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n"
"client = MongoClient(host=\"localhost\", port=27017)\n",
"\n",
"# Select the database\n",
"db = client.Multimedia_Web_DBs\n",
"\n",
"# Fetch all documents from the collection and then sort them by \"_id\"\n",
"feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n",
"feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)\n",
"\n",
"num_labels = 101"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Applying lda on the fc_fd space to get 10 latent semantics (showing only top 10 image-weight pairs for each latent semantic)...\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mc:\\Kaushik\\ASU\\CSE 515 - Multimedia and Web Databases\\Project\\Phase 2\\task_3.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_3.ipynb#W2sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mk should be a positive integer\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_3.ipynb#W2sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m selected_dim_reduction_method \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_3.ipynb#W2sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m \u001b[39minput\u001b[39m(\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_3.ipynb#W2sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mEnter dimensionality reduction method - one of \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_3.ipynb#W2sZmlsZQ%3D%3D?line=11'>12</a>\u001b[0m \u001b[39m+\u001b[39m \u001b[39mstr\u001b[39m(\u001b[39mlist\u001b[39m(valid_dim_reduction_methods\u001b[39m.\u001b[39mkeys()))\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_3.ipynb#W2sZmlsZQ%3D%3D?line=12'>13</a>\u001b[0m )\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_3.ipynb#W2sZmlsZQ%3D%3D?line=13'>14</a>\u001b[0m )\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_3.ipynb#W2sZmlsZQ%3D%3D?line=15'>16</a>\u001b[0m extract_latent_semantics(\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_3.ipynb#W2sZmlsZQ%3D%3D?line=16'>17</a>\u001b[0m fd_collection,\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_3.ipynb#W2sZmlsZQ%3D%3D?line=17'>18</a>\u001b[0m k,\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_3.ipynb#W2sZmlsZQ%3D%3D?line=18'>19</a>\u001b[0m selected_feature_model,\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_3.ipynb#W2sZmlsZQ%3D%3D?line=19'>20</a>\u001b[0m selected_dim_reduction_method,\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_3.ipynb#W2sZmlsZQ%3D%3D?line=20'>21</a>\u001b[0m top_images\u001b[39m=\u001b[39;49m\u001b[39m10\u001b[39;49m,\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_3.ipynb#W2sZmlsZQ%3D%3D?line=21'>22</a>\u001b[0m )\n",
"File \u001b[1;32mc:\\Kaushik\\ASU\\CSE 515 - Multimedia and Web Databases\\Project\\Phase 2\\utils.py:674\u001b[0m, in \u001b[0;36mextract_latent_semantics\u001b[1;34m(fd_collection, k, feature_model, dim_reduction_method, top_images)\u001b[0m\n\u001b[0;32m 669\u001b[0m \u001b[39m# unsupervised LDA to extract topics (Latent Dirichlet Allocation)\u001b[39;00m\n\u001b[0;32m 670\u001b[0m \u001b[39m# Note: LDA takes a bit of time\u001b[39;00m\n\u001b[0;32m 671\u001b[0m \u001b[39mcase\u001b[39;00m \u001b[39m3\u001b[39m:\n\u001b[0;32m 672\u001b[0m \u001b[39m# LDA requires non-negative input data\u001b[39;00m\n\u001b[0;32m 673\u001b[0m \u001b[39m# so shift the input by subtracting the smallest value\u001b[39;00m\n\u001b[1;32m--> 674\u001b[0m min_value \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mmin(feature_vectors)\n\u001b[0;32m 675\u001b[0m feature_vectors_shifted \u001b[39m=\u001b[39m feature_vectors \u001b[39m-\u001b[39m min_value\n\u001b[0;32m 677\u001b[0m model \u001b[39m=\u001b[39m LatentDirichletAllocation(n_components\u001b[39m=\u001b[39mk, learning_method\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39monline\u001b[39m\u001b[39m\"\u001b[39m, verbose\u001b[39m=\u001b[39m\u001b[39m4\u001b[39m)\n",
"File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\sklearn\\base.py:1151\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1144\u001b[0m estimator\u001b[39m.\u001b[39m_validate_params()\n\u001b[0;32m 1146\u001b[0m \u001b[39mwith\u001b[39;00m config_context(\n\u001b[0;32m 1147\u001b[0m skip_parameter_validation\u001b[39m=\u001b[39m(\n\u001b[0;32m 1148\u001b[0m prefer_skip_nested_validation \u001b[39mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 1149\u001b[0m )\n\u001b[0;32m 1150\u001b[0m ):\n\u001b[1;32m-> 1151\u001b[0m \u001b[39mreturn\u001b[39;00m fit_method(estimator, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
"File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\sklearn\\decomposition\\_lda.py:665\u001b[0m, in \u001b[0;36mLatentDirichletAllocation.fit\u001b[1;34m(self, X, y)\u001b[0m\n\u001b[0;32m 663\u001b[0m \u001b[39mif\u001b[39;00m learning_method \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39monline\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[0;32m 664\u001b[0m \u001b[39mfor\u001b[39;00m idx_slice \u001b[39min\u001b[39;00m gen_batches(n_samples, batch_size):\n\u001b[1;32m--> 665\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_em_step(\n\u001b[0;32m 666\u001b[0m X[idx_slice, :],\n\u001b[0;32m 667\u001b[0m total_samples\u001b[39m=\u001b[39;49mn_samples,\n\u001b[0;32m 668\u001b[0m batch_update\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[0;32m 669\u001b[0m parallel\u001b[39m=\u001b[39;49mparallel,\n\u001b[0;32m 670\u001b[0m )\n\u001b[0;32m 671\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 672\u001b[0m \u001b[39m# batch update\u001b[39;00m\n\u001b[0;32m 673\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_em_step(\n\u001b[0;32m 674\u001b[0m X, total_samples\u001b[39m=\u001b[39mn_samples, batch_update\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m, parallel\u001b[39m=\u001b[39mparallel\n\u001b[0;32m 675\u001b[0m )\n",
"File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\sklearn\\decomposition\\_lda.py:524\u001b[0m, in \u001b[0;36mLatentDirichletAllocation._em_step\u001b[1;34m(self, X, total_samples, batch_update, parallel)\u001b[0m\n\u001b[0;32m 497\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"EM update for 1 iteration.\u001b[39;00m\n\u001b[0;32m 498\u001b[0m \n\u001b[0;32m 499\u001b[0m \u001b[39mupdate `_component` by batch VB or online VB.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 520\u001b[0m \u001b[39m Unnormalized document topic distribution.\u001b[39;00m\n\u001b[0;32m 521\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 523\u001b[0m \u001b[39m# E-step\u001b[39;00m\n\u001b[1;32m--> 524\u001b[0m _, suff_stats \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_e_step(\n\u001b[0;32m 525\u001b[0m X, cal_sstats\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, random_init\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, parallel\u001b[39m=\u001b[39;49mparallel\n\u001b[0;32m 526\u001b[0m )\n\u001b[0;32m 528\u001b[0m \u001b[39m# M-step\u001b[39;00m\n\u001b[0;32m 529\u001b[0m \u001b[39mif\u001b[39;00m batch_update:\n",
"File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\sklearn\\decomposition\\_lda.py:467\u001b[0m, in \u001b[0;36mLatentDirichletAllocation._e_step\u001b[1;34m(self, X, cal_sstats, random_init, parallel)\u001b[0m\n\u001b[0;32m 465\u001b[0m \u001b[39mif\u001b[39;00m parallel \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 466\u001b[0m parallel \u001b[39m=\u001b[39m Parallel(n_jobs\u001b[39m=\u001b[39mn_jobs, verbose\u001b[39m=\u001b[39m\u001b[39mmax\u001b[39m(\u001b[39m0\u001b[39m, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mverbose \u001b[39m-\u001b[39m \u001b[39m1\u001b[39m))\n\u001b[1;32m--> 467\u001b[0m results \u001b[39m=\u001b[39m parallel(\n\u001b[0;32m 468\u001b[0m delayed(_update_doc_distribution)(\n\u001b[0;32m 469\u001b[0m X[idx_slice, :],\n\u001b[0;32m 470\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mexp_dirichlet_component_,\n\u001b[0;32m 471\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdoc_topic_prior_,\n\u001b[0;32m 472\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmax_doc_update_iter,\n\u001b[0;32m 473\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmean_change_tol,\n\u001b[0;32m 474\u001b[0m cal_sstats,\n\u001b[0;32m 475\u001b[0m random_state,\n\u001b[0;32m 476\u001b[0m )\n\u001b[0;32m 477\u001b[0m \u001b[39mfor\u001b[39;49;00m idx_slice \u001b[39min\u001b[39;49;00m gen_even_slices(X\u001b[39m.\u001b[39;49mshape[\u001b[39m0\u001b[39;49m], n_jobs)\n\u001b[0;32m 478\u001b[0m )\n\u001b[0;32m 480\u001b[0m \u001b[39m# merge result\u001b[39;00m\n\u001b[0;32m 481\u001b[0m doc_topics, sstats_list \u001b[39m=\u001b[39m \u001b[39mzip\u001b[39m(\u001b[39m*\u001b[39mresults)\n",
"File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\sklearn\\utils\\parallel.py:65\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m 60\u001b[0m config \u001b[39m=\u001b[39m get_config()\n\u001b[0;32m 61\u001b[0m iterable_with_config \u001b[39m=\u001b[39m (\n\u001b[0;32m 62\u001b[0m (_with_config(delayed_func, config), args, kwargs)\n\u001b[0;32m 63\u001b[0m \u001b[39mfor\u001b[39;00m delayed_func, args, kwargs \u001b[39min\u001b[39;00m iterable\n\u001b[0;32m 64\u001b[0m )\n\u001b[1;32m---> 65\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__call__\u001b[39;49m(iterable_with_config)\n",
"File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\joblib\\parallel.py:1863\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m 1861\u001b[0m output \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_sequential_output(iterable)\n\u001b[0;32m 1862\u001b[0m \u001b[39mnext\u001b[39m(output)\n\u001b[1;32m-> 1863\u001b[0m \u001b[39mreturn\u001b[39;00m output \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreturn_generator \u001b[39melse\u001b[39;00m \u001b[39mlist\u001b[39;49m(output)\n\u001b[0;32m 1865\u001b[0m \u001b[39m# Let's create an ID that uniquely identifies the current call. If the\u001b[39;00m\n\u001b[0;32m 1866\u001b[0m \u001b[39m# call is interrupted early and that the same instance is immediately\u001b[39;00m\n\u001b[0;32m 1867\u001b[0m \u001b[39m# re-used, this id will be used to prevent workers that were\u001b[39;00m\n\u001b[0;32m 1868\u001b[0m \u001b[39m# concurrently finalizing a task from the previous call to run the\u001b[39;00m\n\u001b[0;32m 1869\u001b[0m \u001b[39m# callback.\u001b[39;00m\n\u001b[0;32m 1870\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_lock:\n",
"File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\joblib\\parallel.py:1792\u001b[0m, in \u001b[0;36mParallel._get_sequential_output\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m 1790\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_dispatched_batches \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[0;32m 1791\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_dispatched_tasks \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[1;32m-> 1792\u001b[0m res \u001b[39m=\u001b[39m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 1793\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_completed_tasks \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[0;32m 1794\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mprint_progress()\n",
"File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\sklearn\\utils\\parallel.py:127\u001b[0m, in \u001b[0;36m_FuncWrapper.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 125\u001b[0m config \u001b[39m=\u001b[39m {}\n\u001b[0;32m 126\u001b[0m \u001b[39mwith\u001b[39;00m config_context(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mconfig):\n\u001b[1;32m--> 127\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfunction(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
"File \u001b[1;32mc:\\Users\\rknar\\.pyenv\\pyenv-win\\versions\\3.10.5\\lib\\site-packages\\sklearn\\decomposition\\_lda.py:144\u001b[0m, in \u001b[0;36m_update_doc_distribution\u001b[1;34m(X, exp_topic_word_distr, doc_topic_prior, max_doc_update_iter, mean_change_tol, cal_sstats, random_state)\u001b[0m\n\u001b[0;32m 140\u001b[0m last_d \u001b[39m=\u001b[39m doc_topic_d\n\u001b[0;32m 142\u001b[0m \u001b[39m# The optimal phi_{dwk} is proportional to\u001b[39;00m\n\u001b[0;32m 143\u001b[0m \u001b[39m# exp(E[log(theta_{dk})]) * exp(E[log(beta_{dw})]).\u001b[39;00m\n\u001b[1;32m--> 144\u001b[0m norm_phi \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39;49mdot(exp_doc_topic_d, exp_topic_word_d) \u001b[39m+\u001b[39m eps\n\u001b[0;32m 146\u001b[0m doc_topic_d \u001b[39m=\u001b[39m exp_doc_topic_d \u001b[39m*\u001b[39m np\u001b[39m.\u001b[39mdot(cnts \u001b[39m/\u001b[39m norm_phi, exp_topic_word_d\u001b[39m.\u001b[39mT)\n\u001b[0;32m 147\u001b[0m \u001b[39m# Note: adds doc_topic_prior to doc_topic_d, in-place.\u001b[39;00m\n",
"File \u001b[1;32m<__array_function__ internals>:180\u001b[0m, in \u001b[0;36mdot\u001b[1;34m(*args, **kwargs)\u001b[0m\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [ "source": [
"def extractKLatentSemantics(k, feature_model, dim_reduction):\n", "selected_feature_model = valid_feature_models[\n",
" str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
"]\n",
"\n", "\n",
" feature_vectors = [x[feature_model] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", "k = int(input(\"Enter value of k: \"))\n",
" feature_labels = [x[\"label\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", "if k < 1:\n",
" feature_ids = [x[\"_id\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", " raise ValueError(\"k should be a positive integer\")\n",
"\n", "\n",
" filename = ''\n", "selected_dim_reduction_method = str(\n",
" input(\n",
" \"Enter dimensionality reduction method - one of \"\n",
" + str(list(valid_dim_reduction_methods.keys()))\n",
" )\n",
")\n",
"\n", "\n",
"\n", "extract_latent_semantics(\n",
" match dim_reduction:\n", " fd_collection,\n",
"\n", " k,\n",
" case 1:\n", " selected_feature_model,\n",
" filename = f'{feature_model}-svd-semantics.json'\n", " selected_dim_reduction_method,\n",
" U, S, Vh = scipy.sparse.linalg.svds(np.array(feature_vectors), k=k)\n", " top_images=10,\n",
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n", ")\n"
"\n",
" case 2:\n",
" filename = f'{feature_model}-nnmf-semantics.json'\n",
" model = NMF(n_components = k, init = 'random', solver = 'cd', alpha_H = 0.01, alpha_W = 0.01, max_iter = 10000)\n",
" min_value = np.min(feature_vectors)\n",
" feature_vectors_shifted = feature_vectors - min_value\n",
" U = model.fit_transform(np.array(feature_vectors_shifted))\n",
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
"\n",
" case 3:\n",
" filename = f'{feature_model}-lda-semantics.json'\n",
" U = LinearDiscriminantAnalysis(n_components = k).fit_transform(feature_vectors, feature_labels)\n",
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
"\n",
" case 4:\n",
" filename = f'{feature_model}-kmeans-semantics.json'\n",
" kmeans = KMeans(n_clusters = k)\n",
" kmeans.fit(feature_vectors)\n",
" U = kmeans.transform(feature_vectors)\n",
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
" \n",
" k_latent_semantics = [{\"_id\": item[0], \"semantics\": item[1]} for item in k_latent_semantics]\n",
" with open(filename, 'w', encoding='utf-8') as f:\n",
" json.dump(k_latent_semantics, f, ensure_ascii = False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def main():\n",
"\n",
" # Load dataset\n",
"\n",
" # User input for Image ID\n",
" k = int(input(\"Enter k: \"))\n",
"\n",
" features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n",
"\n",
" # User input for feature model to extract\n",
" print(\"\\n1: Color moments\")\n",
" print(\"2: HOG\")\n",
" print(\"3: Resnet50 Avgpool layer\")\n",
" print(\"4: Resnet50 Layer 3\")\n",
" print(\"5: Resnet50 FC layer\")\n",
" feature_model = features[int(input(\"Select the feature model: \")) - 1]\n",
"\n",
" print(\"\\n1. SVD\")\n",
" print(\"2. NNMF\")\n",
" print(\"3. LDA\")\n",
" print(\"4. k-means\")\n",
" dim_reduction = int(input(\"Select the dimensionality reduction technique: \"))\n",
"\n",
" extractKLatentSemantics(k, feature_model, dim_reduction)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" main()"
] ]
}, },
{ {
@ -133,8 +89,22 @@
} }
], ],
"metadata": { "metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": { "language_info": {
"name": "python" "codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.5"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -4,6 +4,11 @@ import math
import cv2 import cv2
import numpy as np import numpy as np
from scipy.stats import pearsonr from scipy.stats import pearsonr
from scipy.sparse.linalg import svds
from sklearn.decomposition import NMF
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.cluster import KMeans
# Torch # Torch
import torch import torch
@ -12,6 +17,7 @@ from torchvision.datasets import Caltech101
from torchvision.models import resnet50, ResNet50_Weights from torchvision.models import resnet50, ResNet50_Weights
# OS and env # OS and env
import json
from os import getenv from os import getenv
from dotenv import load_dotenv from dotenv import load_dotenv
import warnings import warnings
@ -566,3 +572,151 @@ def show_similar_images_for_label(
f"Plots/Label_{target_label}_{feature_model}_{distance_measure.__name__}_k{k}.png" f"Plots/Label_{target_label}_{feature_model}_{distance_measure.__name__}_k{k}.png"
) )
plt.show() plt.show()
valid_dim_reduction_methods = {
"svd": 1,
"nmf": 2,
"lda": 3,
"kmeans": 4,
}
def extract_latent_semantics(
fd_collection, k, feature_model, dim_reduction_method, top_images=None
):
"""
Extract latent semantics for entire collection at once for a given feature_model and dim_reduction_method, and display the imageID-semantic weight pairs
Leave `top_images` blank to display all imageID-weight pairs
"""
assert (
feature_model in valid_feature_models.values()
), "feature_model should be one of " + str(list(valid_feature_models.keys()))
assert (
dim_reduction_method in valid_dim_reduction_methods.keys()
), "dim_reduction_method should be one of " + str(
list(valid_dim_reduction_methods.keys())
)
all_images = list(fd_collection.find())
feature_vectors = np.array([img[feature_model] for img in all_images])
feature_labels = [img["true_label"] for img in all_images]
feature_ids = [img["image_id"] for img in all_images]
top_img_str = ""
if top_images is not None:
top_img_str = f" (showing only top {top_images} image-weight pairs for each latent semantic)"
print(
"Applying {} on the {} space to get {} latent semantics{}...".format(
dim_reduction_method, feature_model, k, top_img_str
)
)
displayed_latent_semantics = {}
all_latent_semantics = {}
match valid_dim_reduction_methods[dim_reduction_method]:
# singular value decomposition
# sparse version of SVD to get only k singular values
case 1:
U, S, V_T = svds(feature_vectors, k=k)
all_latent_semantics = {
"image-semantic": U.tolist(),
"semantics-core": S.tolist(),
"semantic-feature": V_T.tolist(),
}
# for each latent semantic, sort imageID-weight pairs by weights in descending order
displayed_latent_semantics = [
sorted(
list(zip(feature_ids, latent_semantic)),
key=lambda x: x[1],
reverse=True,
)[:top_images]
for latent_semantic in U.T
]
# non-negative matrix factorization
case 2:
# NNMF requires non-negative input data
# so shift the input by subtracting the smallest value
min_value = np.min(feature_vectors)
feature_vectors_shifted = feature_vectors - min_value
model = NMF(
n_components=k,
init="random",
solver="cd",
alpha_H=0.01,
alpha_W=0.01,
max_iter=10000,
)
model.fit(feature_vectors_shifted)
W = model.transform(feature_vectors_shifted)
H = model.components_
all_latent_semantics = {"image-semantic": W, "semantic-feature": H}
# for each latent semantic, sort imageID-weight pairs by weights in descending order
displayed_latent_semantics = [
sorted(
list(zip(feature_ids, latent_semantic)),
key=lambda x: x[1],
reverse=True,
)[:top_images]
for latent_semantic in W.T
]
# unsupervised LDA to extract topics (Latent Dirichlet Allocation)
# Note: LDA takes a bit of time
case 3:
# LDA requires non-negative input data
# so shift the input by subtracting the smallest value
min_value = np.min(feature_vectors)
feature_vectors_shifted = feature_vectors - min_value
model = LatentDirichletAllocation(
n_components=k, learning_method="online", verbose=4
)
model.fit(feature_vectors_shifted)
# K (k x fd_dim) is the factor matrix for latent semantic-feature pairs
K = model.components_
# X (4339 x k) is the other factor matrix for image ID-latent semantic pairs
X = model.transform(feature_vectors_shifted)
all_latent_semantics = {"image-semantic": X, "semantic-feature": K}
# for each latent semantic, sort imageID-weight pairs by weights in descending order
displayed_latent_semantics = [
sorted(
list(zip(feature_ids, latent_semantic)),
key=lambda x: x[1],
reverse=True,
)[:top_images]
for latent_semantic in X.T
]
# k-means clustering to reduce to k clusters/dimensions
case 4:
model = KMeans(n_clusters=k).fit(feature_vectors)
CC = model.cluster_centers_
U = model.transform(feature_vectors)
all_latent_semantics = {"image-semantic": U, "semantic_feature": CC}
for idx, latent_semantic in enumerate(displayed_latent_semantics):
print(f"Latent semantic no. {idx}")
for image_id, weight in latent_semantic:
print(f"Image_ID\t{image_id}\t-\tWeight\t{weight}")
with open(
f"{feature_model}-{dim_reduction_method}-{k}-semantics.json",
"w",
encoding="utf-8",
) as output_file:
json.dump(all_latent_semantics, output_file, ensure_ascii=False)