mirror of
https://github.com/20kaushik02/CSE515_MWDB_Project.git
synced 2025-12-06 09:24:07 +00:00
Merge branch 'master' of https://github.com/20kaushik02/CSE515_MWDB_Project into task9
This commit is contained in:
commit
a0d7b500b3
181
Phase 2/task6.ipynb
Normal file
181
Phase 2/task6.ipynb
Normal file
@ -0,0 +1,181 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import math\n",
|
||||
"from pymongo import MongoClient\n",
|
||||
"import scipy\n",
|
||||
"import numpy as np\n",
|
||||
"from sklearn.decomposition import NMF\n",
|
||||
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
|
||||
"from sklearn.cluster import KMeans"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"client = MongoClient()\n",
|
||||
"client = MongoClient(host = \"localhost\", port = 27017)\n",
|
||||
"\n",
|
||||
"# Select the database\n",
|
||||
"db = client.Multimedia_Web_DBs\n",
|
||||
"\n",
|
||||
"# Fetch all documents from the collection and then sort them by \"_id\"\n",
|
||||
"feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n",
|
||||
"feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def extractKLatentSemantics(k, image_sim_matrix, feature_model, dim_reduction):\n",
|
||||
"\n",
|
||||
" feature_ids = [x[\"_id\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
|
||||
" feature_labels = [x[\"label\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
|
||||
"\n",
|
||||
" filename = 'ls4.json'\n",
|
||||
"\n",
|
||||
" match dim_reduction:\n",
|
||||
"\n",
|
||||
" case 1:\n",
|
||||
" U, S, Vh = scipy.sparse.linalg.svds(np.array(image_sim_matrix), k=k)\n",
|
||||
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
|
||||
"\n",
|
||||
" case 2:\n",
|
||||
" model = NMF(n_components = k, init = 'random', solver = 'cd', alpha_H = 0.01, alpha_W = 0.01, max_iter = 10000)\n",
|
||||
" min_value = np.min(image_sim_matrix)\n",
|
||||
" feature_vectors_shifted = image_sim_matrix - min_value\n",
|
||||
" U = model.fit_transform(np.array(feature_vectors_shifted))\n",
|
||||
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
|
||||
"\n",
|
||||
" case 3:\n",
|
||||
" U = LinearDiscriminantAnalysis(n_components = k).fit_transform(image_sim_matrix, feature_labels)\n",
|
||||
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
|
||||
"\n",
|
||||
" case 4:\n",
|
||||
" kmeans = KMeans(n_clusters = k)\n",
|
||||
" kmeans.fit(image_sim_matrix)\n",
|
||||
" U = kmeans.transform(image_sim_matrix)\n",
|
||||
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
|
||||
" \n",
|
||||
" k_latent_semantics = [{\"_id\": item[0], \"semantics\": item[1]} for item in k_latent_semantics]\n",
|
||||
" with open(filename, 'w', encoding='utf-8') as f:\n",
|
||||
" json.dump(k_latent_semantics, f, ensure_ascii = False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def findImageImageSimMatrix(feature_model):\n",
|
||||
" \n",
|
||||
" feature_vectors = [x[feature_model] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
|
||||
"\n",
|
||||
" n = len(feature_vectors)\n",
|
||||
"\n",
|
||||
" image_sim_matrix = np.zeros((n, n))\n",
|
||||
"\n",
|
||||
" for i in range(n):\n",
|
||||
" for j in range(i + 1, n):\n",
|
||||
"\n",
|
||||
" match feature_model:\n",
|
||||
"\n",
|
||||
" case \"color_moments\":\n",
|
||||
" image_sim_matrix[i][j] = image_sim_matrix[j][i] = math.dist(feature_vectors[i], feature_vectors[j])\n",
|
||||
" \n",
|
||||
" case \"hog\":\n",
|
||||
" image_sim_matrix[i][j] = image_sim_matrix[j][i] = (np.dot(feature_vectors[i], feature_vectors[j]) / (np.linalg.norm(feature_vectors[i]) * np.linalg.norm(feature_vectors[j])))\n",
|
||||
"\n",
|
||||
" case \"avgpool\" | \"layer3\" | \"fc\":\n",
|
||||
" image_sim_matrix[i][j] = image_sim_matrix[j][i] = scipy.stats.pearsonr(feature_vectors[i], feature_vectors[j]).statistic\n",
|
||||
" \n",
|
||||
" return image_sim_matrix"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"def main():\n",
|
||||
"\n",
|
||||
" k = int(input(\"Enter k: \"))\n",
|
||||
"\n",
|
||||
" features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n",
|
||||
"\n",
|
||||
" # User input for feature model to extract\n",
|
||||
" print(\"\\n1: Color moments\")\n",
|
||||
" print(\"2: HOG\")\n",
|
||||
" print(\"3: Resnet50 Avgpool layer\")\n",
|
||||
" print(\"4: Resnet50 Layer 3\")\n",
|
||||
" print(\"5: Resnet50 FC layer\")\n",
|
||||
" feature_model = features[int(input(\"Select the feature model: \")) - 1]\n",
|
||||
"\n",
|
||||
" print(\"\\n1. SVD\")\n",
|
||||
" print(\"2. NNMF\")\n",
|
||||
" print(\"3. LDA\")\n",
|
||||
" print(\"4. k-means\")\n",
|
||||
" dim_reduction = int(input(\"Select the dimensionality reduction technique: \"))\n",
|
||||
"\n",
|
||||
" image_sim_matrix = findImageImageSimMatrix(feature_model)\n",
|
||||
" print(image_sim_matrix)\n",
|
||||
"\n",
|
||||
" extractKLatentSemantics(k, image_sim_matrix, feature_model, dim_reduction)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" main()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -13,7 +13,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -22,7 +22,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -31,115 +31,115 @@
|
||||
"text": [
|
||||
"Applying svd on the cm_fd space to get 10 latent semantics (showing only top 10 image-weight pairs for each latent semantic)...\n",
|
||||
"Latent semantic no. 0\n",
|
||||
"Image_ID\t7654\t-\tWeight\t0.08162189274964751\n",
|
||||
"Image_ID\t8634\t-\tWeight\t0.06673589485778451\n",
|
||||
"Image_ID\t5740\t-\tWeight\t0.060058821201972104\n",
|
||||
"Image_ID\t6106\t-\tWeight\t0.05306661393931607\n",
|
||||
"Image_ID\t5456\t-\tWeight\t0.05170171570330845\n",
|
||||
"Image_ID\t7814\t-\tWeight\t0.04997978865116185\n",
|
||||
"Image_ID\t6248\t-\tWeight\t0.04946683639815072\n",
|
||||
"Image_ID\t5354\t-\tWeight\t0.04864381025793171\n",
|
||||
"Image_ID\t6108\t-\tWeight\t0.04796763934338538\n",
|
||||
"Image_ID\t5438\t-\tWeight\t0.047874747600689466\n",
|
||||
"Image_ID\t7654\t-\tWeight\t0.0816218927496473\n",
|
||||
"Image_ID\t8634\t-\tWeight\t0.0667358948577843\n",
|
||||
"Image_ID\t5740\t-\tWeight\t0.06005882120197204\n",
|
||||
"Image_ID\t6106\t-\tWeight\t0.0530666139393161\n",
|
||||
"Image_ID\t5456\t-\tWeight\t0.051701715703308504\n",
|
||||
"Image_ID\t7814\t-\tWeight\t0.04997978865116192\n",
|
||||
"Image_ID\t6248\t-\tWeight\t0.04946683639815059\n",
|
||||
"Image_ID\t5354\t-\tWeight\t0.04864381025793159\n",
|
||||
"Image_ID\t6108\t-\tWeight\t0.0479676393433854\n",
|
||||
"Image_ID\t5438\t-\tWeight\t0.04787474760068962\n",
|
||||
"Latent semantic no. 1\n",
|
||||
"Image_ID\t8026\t-\tWeight\t0.06478360955460367\n",
|
||||
"Image_ID\t6016\t-\tWeight\t0.0632709906607753\n",
|
||||
"Image_ID\t3744\t-\tWeight\t0.05347414608321652\n",
|
||||
"Image_ID\t3720\t-\tWeight\t0.0517124023583583\n",
|
||||
"Image_ID\t7896\t-\tWeight\t0.049366978424645006\n",
|
||||
"Image_ID\t6014\t-\tWeight\t0.047637173390389816\n",
|
||||
"Image_ID\t6768\t-\tWeight\t0.04742408995375774\n",
|
||||
"Image_ID\t4050\t-\tWeight\t0.0456343920101654\n",
|
||||
"Image_ID\t6000\t-\tWeight\t0.04535273415975713\n",
|
||||
"Image_ID\t6552\t-\tWeight\t0.04525300117499444\n",
|
||||
"Image_ID\t7654\t-\tWeight\t0.05566187740909836\n",
|
||||
"Image_ID\t7880\t-\tWeight\t0.05304265128270742\n",
|
||||
"Image_ID\t5132\t-\tWeight\t0.052802620405367526\n",
|
||||
"Image_ID\t4516\t-\tWeight\t0.05032667794065215\n",
|
||||
"Image_ID\t3064\t-\tWeight\t0.04996389545581616\n",
|
||||
"Image_ID\t7808\t-\tWeight\t0.04885211523705829\n",
|
||||
"Image_ID\t8102\t-\tWeight\t0.04821048869059779\n",
|
||||
"Image_ID\t5336\t-\tWeight\t0.047392911537133244\n",
|
||||
"Image_ID\t3058\t-\tWeight\t0.04622961181395915\n",
|
||||
"Image_ID\t7484\t-\tWeight\t0.04563242634411927\n",
|
||||
"Latent semantic no. 2\n",
|
||||
"Image_ID\t7654\t-\tWeight\t0.0704670166327785\n",
|
||||
"Image_ID\t2804\t-\tWeight\t0.059682344110996065\n",
|
||||
"Image_ID\t2710\t-\tWeight\t0.059199111598090534\n",
|
||||
"Image_ID\t3436\t-\tWeight\t0.05368202357324355\n",
|
||||
"Image_ID\t7936\t-\tWeight\t0.053276991496894154\n",
|
||||
"Image_ID\t2708\t-\tWeight\t0.048527019795007204\n",
|
||||
"Image_ID\t3764\t-\tWeight\t0.04835537239641643\n",
|
||||
"Image_ID\t7928\t-\tWeight\t0.047998989024259496\n",
|
||||
"Image_ID\t5684\t-\tWeight\t0.04723047448150771\n",
|
||||
"Image_ID\t5126\t-\tWeight\t0.04720498270016634\n",
|
||||
"Image_ID\t7654\t-\tWeight\t0.07046701663277787\n",
|
||||
"Image_ID\t2804\t-\tWeight\t0.059682344110995336\n",
|
||||
"Image_ID\t2710\t-\tWeight\t0.05919911159809061\n",
|
||||
"Image_ID\t3436\t-\tWeight\t0.05368202357324448\n",
|
||||
"Image_ID\t7936\t-\tWeight\t0.05327699149689366\n",
|
||||
"Image_ID\t2708\t-\tWeight\t0.04852701979500758\n",
|
||||
"Image_ID\t3764\t-\tWeight\t0.04835537239641772\n",
|
||||
"Image_ID\t7928\t-\tWeight\t0.04799898902425922\n",
|
||||
"Image_ID\t5684\t-\tWeight\t0.04723047448150721\n",
|
||||
"Image_ID\t5126\t-\tWeight\t0.04720498270016626\n",
|
||||
"Latent semantic no. 3\n",
|
||||
"Image_ID\t6356\t-\tWeight\t0.0754447261688377\n",
|
||||
"Image_ID\t6480\t-\tWeight\t0.06540890240964665\n",
|
||||
"Image_ID\t4756\t-\tWeight\t0.06075370676621832\n",
|
||||
"Image_ID\t8656\t-\tWeight\t0.060505116069252685\n",
|
||||
"Image_ID\t6050\t-\tWeight\t0.058111632773274836\n",
|
||||
"Image_ID\t6324\t-\tWeight\t0.056492568599917435\n",
|
||||
"Image_ID\t8138\t-\tWeight\t0.0557967464751822\n",
|
||||
"Image_ID\t3460\t-\tWeight\t0.05508818833516222\n",
|
||||
"Image_ID\t200\t-\tWeight\t0.05459477384213874\n",
|
||||
"Image_ID\t7220\t-\tWeight\t0.05376222500332758\n",
|
||||
"Image_ID\t8654\t-\tWeight\t0.08668332932816088\n",
|
||||
"Image_ID\t8618\t-\tWeight\t0.08568859853566119\n",
|
||||
"Image_ID\t8658\t-\tWeight\t0.0777605087520117\n",
|
||||
"Image_ID\t3306\t-\tWeight\t0.0745220591779124\n",
|
||||
"Image_ID\t8620\t-\tWeight\t0.07351843281590886\n",
|
||||
"Image_ID\t8638\t-\tWeight\t0.06948884666766826\n",
|
||||
"Image_ID\t6754\t-\tWeight\t0.06896434951935482\n",
|
||||
"Image_ID\t8676\t-\tWeight\t0.06623938393792103\n",
|
||||
"Image_ID\t4650\t-\tWeight\t0.06566930583744507\n",
|
||||
"Image_ID\t8636\t-\tWeight\t0.06499098805246775\n",
|
||||
"Latent semantic no. 4\n",
|
||||
"Image_ID\t7370\t-\tWeight\t0.05281026462493995\n",
|
||||
"Image_ID\t6528\t-\tWeight\t0.05252803707219332\n",
|
||||
"Image_ID\t8056\t-\tWeight\t0.05175019567880743\n",
|
||||
"Image_ID\t2958\t-\tWeight\t0.05123118911737749\n",
|
||||
"Image_ID\t4614\t-\tWeight\t0.05061302210733273\n",
|
||||
"Image_ID\t8292\t-\tWeight\t0.05000577057549489\n",
|
||||
"Image_ID\t7888\t-\tWeight\t0.04905059301012787\n",
|
||||
"Image_ID\t6540\t-\tWeight\t0.048139958875035395\n",
|
||||
"Image_ID\t6064\t-\tWeight\t0.04605896293857696\n",
|
||||
"Image_ID\t2974\t-\tWeight\t0.04488429099909397\n",
|
||||
"Image_ID\t7370\t-\tWeight\t0.05281026462494081\n",
|
||||
"Image_ID\t6528\t-\tWeight\t0.05252803707219361\n",
|
||||
"Image_ID\t8056\t-\tWeight\t0.0517501956788071\n",
|
||||
"Image_ID\t2958\t-\tWeight\t0.051231189117377514\n",
|
||||
"Image_ID\t4614\t-\tWeight\t0.05061302210733084\n",
|
||||
"Image_ID\t8292\t-\tWeight\t0.05000577057549516\n",
|
||||
"Image_ID\t7888\t-\tWeight\t0.04905059301012733\n",
|
||||
"Image_ID\t6540\t-\tWeight\t0.048139958875035006\n",
|
||||
"Image_ID\t6064\t-\tWeight\t0.04605896293857509\n",
|
||||
"Image_ID\t2974\t-\tWeight\t0.04488429099909442\n",
|
||||
"Latent semantic no. 5\n",
|
||||
"Image_ID\t8570\t-\tWeight\t0.08379938013632145\n",
|
||||
"Image_ID\t7784\t-\tWeight\t0.0723847258804912\n",
|
||||
"Image_ID\t4152\t-\tWeight\t0.060769224719766333\n",
|
||||
"Image_ID\t5114\t-\tWeight\t0.053872121517690504\n",
|
||||
"Image_ID\t7774\t-\tWeight\t0.05324887247523992\n",
|
||||
"Image_ID\t8614\t-\tWeight\t0.05319742868629013\n",
|
||||
"Image_ID\t3072\t-\tWeight\t0.05083994521792821\n",
|
||||
"Image_ID\t7798\t-\tWeight\t0.05059807413594892\n",
|
||||
"Image_ID\t5118\t-\tWeight\t0.05022770477320976\n",
|
||||
"Image_ID\t7040\t-\tWeight\t0.04996996742218053\n",
|
||||
"Image_ID\t8570\t-\tWeight\t0.08379938013632153\n",
|
||||
"Image_ID\t7784\t-\tWeight\t0.07238472588049127\n",
|
||||
"Image_ID\t4152\t-\tWeight\t0.06076922471976642\n",
|
||||
"Image_ID\t5114\t-\tWeight\t0.05387212151769057\n",
|
||||
"Image_ID\t7774\t-\tWeight\t0.05324887247524\n",
|
||||
"Image_ID\t8614\t-\tWeight\t0.05319742868629018\n",
|
||||
"Image_ID\t3072\t-\tWeight\t0.05083994521792827\n",
|
||||
"Image_ID\t7798\t-\tWeight\t0.050598074135949\n",
|
||||
"Image_ID\t5118\t-\tWeight\t0.05022770477320978\n",
|
||||
"Image_ID\t7040\t-\tWeight\t0.04996996742218058\n",
|
||||
"Latent semantic no. 6\n",
|
||||
"Image_ID\t8570\t-\tWeight\t0.07082421149695754\n",
|
||||
"Image_ID\t7774\t-\tWeight\t0.06546594547486781\n",
|
||||
"Image_ID\t4152\t-\tWeight\t0.06440870014673936\n",
|
||||
"Image_ID\t8570\t-\tWeight\t0.07082421149695753\n",
|
||||
"Image_ID\t7774\t-\tWeight\t0.06546594547486784\n",
|
||||
"Image_ID\t4152\t-\tWeight\t0.06440870014673937\n",
|
||||
"Image_ID\t5118\t-\tWeight\t0.06264436903974217\n",
|
||||
"Image_ID\t7784\t-\tWeight\t0.06203552824772956\n",
|
||||
"Image_ID\t7798\t-\tWeight\t0.05899354962287134\n",
|
||||
"Image_ID\t7896\t-\tWeight\t0.05648444493570963\n",
|
||||
"Image_ID\t7784\t-\tWeight\t0.06203552824772957\n",
|
||||
"Image_ID\t7798\t-\tWeight\t0.05899354962287138\n",
|
||||
"Image_ID\t7896\t-\tWeight\t0.056484444935709706\n",
|
||||
"Image_ID\t7766\t-\tWeight\t0.056063042928801675\n",
|
||||
"Image_ID\t7792\t-\tWeight\t0.055578803018497686\n",
|
||||
"Image_ID\t7834\t-\tWeight\t0.055567509183302555\n",
|
||||
"Image_ID\t7792\t-\tWeight\t0.05557880301849769\n",
|
||||
"Image_ID\t7834\t-\tWeight\t0.05556750918330256\n",
|
||||
"Latent semantic no. 7\n",
|
||||
"Image_ID\t7912\t-\tWeight\t0.06634864556518678\n",
|
||||
"Image_ID\t5534\t-\tWeight\t0.05913926717735747\n",
|
||||
"Image_ID\t5550\t-\tWeight\t0.049468125695492526\n",
|
||||
"Image_ID\t2106\t-\tWeight\t0.048274676516220805\n",
|
||||
"Image_ID\t7804\t-\tWeight\t0.04822832951751611\n",
|
||||
"Image_ID\t6198\t-\tWeight\t0.04795521082538372\n",
|
||||
"Image_ID\t6728\t-\tWeight\t0.04729135404469566\n",
|
||||
"Image_ID\t5588\t-\tWeight\t0.04715637083533252\n",
|
||||
"Image_ID\t7276\t-\tWeight\t0.04637482601331893\n",
|
||||
"Image_ID\t6730\t-\tWeight\t0.045930617636659\n",
|
||||
"Image_ID\t1140\t-\tWeight\t0.05317423066517462\n",
|
||||
"Image_ID\t5510\t-\tWeight\t0.052651188836683724\n",
|
||||
"Image_ID\t5282\t-\tWeight\t0.05122146559887229\n",
|
||||
"Image_ID\t1260\t-\tWeight\t0.050478632782130786\n",
|
||||
"Image_ID\t1692\t-\tWeight\t0.05043911725770527\n",
|
||||
"Image_ID\t8656\t-\tWeight\t0.04943228673655803\n",
|
||||
"Image_ID\t1242\t-\tWeight\t0.04886689682608001\n",
|
||||
"Image_ID\t7844\t-\tWeight\t0.048768495445578465\n",
|
||||
"Image_ID\t5100\t-\tWeight\t0.04867702517715619\n",
|
||||
"Image_ID\t5300\t-\tWeight\t0.048353062438932816\n",
|
||||
"Latent semantic no. 8\n",
|
||||
"Image_ID\t1798\t-\tWeight\t0.04586412291217343\n",
|
||||
"Image_ID\t1802\t-\tWeight\t0.044772142290101236\n",
|
||||
"Image_ID\t1806\t-\tWeight\t0.044448676280621977\n",
|
||||
"Image_ID\t1202\t-\tWeight\t0.043679466488681935\n",
|
||||
"Image_ID\t1786\t-\tWeight\t0.04351371229636818\n",
|
||||
"Image_ID\t1784\t-\tWeight\t0.04346765741634348\n",
|
||||
"Image_ID\t1790\t-\tWeight\t0.04288750664761761\n",
|
||||
"Image_ID\t1642\t-\tWeight\t0.041863484069841805\n",
|
||||
"Image_ID\t1788\t-\tWeight\t0.04089406629514228\n",
|
||||
"Image_ID\t1796\t-\tWeight\t0.04068815222347919\n",
|
||||
"Image_ID\t1798\t-\tWeight\t0.0458641229121734\n",
|
||||
"Image_ID\t1802\t-\tWeight\t0.044772142290101194\n",
|
||||
"Image_ID\t1806\t-\tWeight\t0.044448676280621935\n",
|
||||
"Image_ID\t1202\t-\tWeight\t0.043679466488681894\n",
|
||||
"Image_ID\t1786\t-\tWeight\t0.043513712296368134\n",
|
||||
"Image_ID\t1784\t-\tWeight\t0.043467657416343425\n",
|
||||
"Image_ID\t1790\t-\tWeight\t0.04288750664761759\n",
|
||||
"Image_ID\t1642\t-\tWeight\t0.041863484069841764\n",
|
||||
"Image_ID\t1788\t-\tWeight\t0.04089406629514224\n",
|
||||
"Image_ID\t1796\t-\tWeight\t0.04068815222347914\n",
|
||||
"Latent semantic no. 9\n",
|
||||
"Image_ID\t8582\t-\tWeight\t0.02577153311253718\n",
|
||||
"Image_ID\t8612\t-\tWeight\t0.025608143819276445\n",
|
||||
"Image_ID\t7290\t-\tWeight\t0.025578071187110543\n",
|
||||
"Image_ID\t7298\t-\tWeight\t0.025350467801040884\n",
|
||||
"Image_ID\t7302\t-\tWeight\t0.02531661140938117\n",
|
||||
"Image_ID\t7318\t-\tWeight\t0.025212779767014252\n",
|
||||
"Image_ID\t8580\t-\tWeight\t0.025201323062899284\n",
|
||||
"Image_ID\t6392\t-\tWeight\t0.02517086205642468\n",
|
||||
"Image_ID\t2738\t-\tWeight\t0.025106516897995135\n",
|
||||
"Image_ID\t6420\t-\tWeight\t0.02510499876667641\n"
|
||||
"Image_ID\t8616\t-\tWeight\t-0.001110683188398373\n",
|
||||
"Image_ID\t5234\t-\tWeight\t-0.001470742377963864\n",
|
||||
"Image_ID\t3838\t-\tWeight\t-0.0018268938101953923\n",
|
||||
"Image_ID\t7428\t-\tWeight\t-0.001978912864613778\n",
|
||||
"Image_ID\t4664\t-\tWeight\t-0.0020551982165007863\n",
|
||||
"Image_ID\t2754\t-\tWeight\t-0.002091620047637018\n",
|
||||
"Image_ID\t2806\t-\tWeight\t-0.0021702921217260757\n",
|
||||
"Image_ID\t3820\t-\tWeight\t-0.002247214027498397\n",
|
||||
"Image_ID\t3786\t-\tWeight\t-0.002360567100195792\n",
|
||||
"Image_ID\t4928\t-\tWeight\t-0.002395118791388935\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -159,7 +159,7 @@
|
||||
" )\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"extract_latent_semantics(\n",
|
||||
"extract_latent_semantics_from_feature_model(\n",
|
||||
" fd_collection,\n",
|
||||
" k,\n",
|
||||
" selected_feature_model,\n",
|
||||
|
||||
@ -14,16 +14,6 @@
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%load_ext autoreload\n",
|
||||
"%autoreload 2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from utils import *\n",
|
||||
"warnings.filterwarnings('ignore')\n",
|
||||
@ -32,7 +22,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -41,124 +31,124 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Applying svd on the given similarity matrix to get 10 latent semantics (showing only top 10 image-weight pairs for each latent semantic)...\n",
|
||||
"Applying svd on the given similarity matrix to get 10 latent semantics (showing only top 10 label-weight pairs for each latent semantic)...\n",
|
||||
"Latent semantic no. 0\n",
|
||||
"Image_ID\t200\t-\tWeight\t0.0\n",
|
||||
"Image_ID\t198\t-\tWeight\t-0.004684806351746236\n",
|
||||
"Image_ID\t196\t-\tWeight\t-0.007271577414375871\n",
|
||||
"Image_ID\t194\t-\tWeight\t-0.011073051177514079\n",
|
||||
"Image_ID\t192\t-\tWeight\t-0.011680371639188197\n",
|
||||
"Image_ID\t188\t-\tWeight\t-0.014876024947438421\n",
|
||||
"Image_ID\t186\t-\tWeight\t-0.017327189984007427\n",
|
||||
"Image_ID\t190\t-\tWeight\t-0.021143262428570023\n",
|
||||
"Image_ID\t182\t-\tWeight\t-0.026835375354998945\n",
|
||||
"Image_ID\t180\t-\tWeight\t-0.030539133156424272\n",
|
||||
"label\t28\t-\tWeight\t0.2583354411312026\n",
|
||||
"label\t29\t-\tWeight\t0.2301362547676974\n",
|
||||
"label\t33\t-\tWeight\t0.2129183683279978\n",
|
||||
"label\t9\t-\tWeight\t0.17625685452423093\n",
|
||||
"label\t95\t-\tWeight\t0.16277551497836534\n",
|
||||
"label\t47\t-\tWeight\t0.1424860388015467\n",
|
||||
"label\t39\t-\tWeight\t0.1349747704005884\n",
|
||||
"label\t30\t-\tWeight\t0.13251434767496492\n",
|
||||
"label\t52\t-\tWeight\t0.12669069496270755\n",
|
||||
"label\t8\t-\tWeight\t0.1257730807471899\n",
|
||||
"Latent semantic no. 1\n",
|
||||
"Image_ID\t130\t-\tWeight\t0.21209688019072415\n",
|
||||
"Image_ID\t138\t-\tWeight\t0.20392427070510372\n",
|
||||
"Image_ID\t120\t-\tWeight\t0.1528415927574225\n",
|
||||
"Image_ID\t132\t-\tWeight\t0.14995762877608315\n",
|
||||
"Image_ID\t160\t-\tWeight\t0.1488052541453248\n",
|
||||
"Image_ID\t136\t-\tWeight\t0.14309946283137032\n",
|
||||
"Image_ID\t164\t-\tWeight\t0.1374261619484733\n",
|
||||
"Image_ID\t140\t-\tWeight\t0.13528239495542024\n",
|
||||
"Image_ID\t128\t-\tWeight\t0.12811923299406092\n",
|
||||
"Image_ID\t152\t-\tWeight\t0.12752116772697258\n",
|
||||
"label\t96\t-\tWeight\t0.2666765976054894\n",
|
||||
"label\t97\t-\tWeight\t0.19087869496500426\n",
|
||||
"label\t25\t-\tWeight\t0.17776094778851348\n",
|
||||
"label\t3\t-\tWeight\t0.1759798805642099\n",
|
||||
"label\t98\t-\tWeight\t0.16951497899752574\n",
|
||||
"label\t22\t-\tWeight\t0.1667032655640346\n",
|
||||
"label\t24\t-\tWeight\t0.16034180060184824\n",
|
||||
"label\t19\t-\tWeight\t0.15345532912389587\n",
|
||||
"label\t52\t-\tWeight\t0.13271640119612757\n",
|
||||
"label\t29\t-\tWeight\t0.12856388746021633\n",
|
||||
"Latent semantic no. 2\n",
|
||||
"Image_ID\t4\t-\tWeight\t0.2518749001016952\n",
|
||||
"Image_ID\t8\t-\tWeight\t0.24177133880298157\n",
|
||||
"Image_ID\t58\t-\tWeight\t0.1467873881626323\n",
|
||||
"Image_ID\t0\t-\tWeight\t0.1384139791414865\n",
|
||||
"Image_ID\t56\t-\tWeight\t0.11818058158618501\n",
|
||||
"Image_ID\t20\t-\tWeight\t0.1102967668802325\n",
|
||||
"Image_ID\t84\t-\tWeight\t0.1044376029159064\n",
|
||||
"Image_ID\t18\t-\tWeight\t0.10262843674760519\n",
|
||||
"Image_ID\t138\t-\tWeight\t0.10181762652349924\n",
|
||||
"Image_ID\t70\t-\tWeight\t0.10127861659022899\n",
|
||||
"label\t46\t-\tWeight\t0.21813474254675366\n",
|
||||
"label\t79\t-\tWeight\t0.19091788352587957\n",
|
||||
"label\t55\t-\tWeight\t0.1871080482210247\n",
|
||||
"label\t56\t-\tWeight\t0.18322792605578184\n",
|
||||
"label\t78\t-\tWeight\t0.17506936966351683\n",
|
||||
"label\t98\t-\tWeight\t0.1733164832137484\n",
|
||||
"label\t22\t-\tWeight\t0.17114312653027375\n",
|
||||
"label\t38\t-\tWeight\t0.16928636840289424\n",
|
||||
"label\t45\t-\tWeight\t0.1567042877228484\n",
|
||||
"label\t4\t-\tWeight\t0.15108693899889344\n",
|
||||
"Latent semantic no. 3\n",
|
||||
"Image_ID\t84\t-\tWeight\t0.16299489544466675\n",
|
||||
"Image_ID\t94\t-\tWeight\t0.155336350677209\n",
|
||||
"Image_ID\t70\t-\tWeight\t0.14011002627071287\n",
|
||||
"Image_ID\t102\t-\tWeight\t0.13701247594788535\n",
|
||||
"Image_ID\t88\t-\tWeight\t0.1320753872066342\n",
|
||||
"Image_ID\t82\t-\tWeight\t0.1320716816148611\n",
|
||||
"Image_ID\t86\t-\tWeight\t0.12902969925360877\n",
|
||||
"Image_ID\t72\t-\tWeight\t0.12610296358207826\n",
|
||||
"Image_ID\t92\t-\tWeight\t0.12596461453701044\n",
|
||||
"Image_ID\t66\t-\tWeight\t0.12532841063277217\n",
|
||||
"label\t96\t-\tWeight\t0.2736613529052896\n",
|
||||
"label\t98\t-\tWeight\t0.218185914155306\n",
|
||||
"label\t22\t-\tWeight\t0.1963451355822489\n",
|
||||
"label\t3\t-\tWeight\t0.17627732148468614\n",
|
||||
"label\t39\t-\tWeight\t0.1728992502839298\n",
|
||||
"label\t52\t-\tWeight\t0.15597562436756945\n",
|
||||
"label\t51\t-\tWeight\t0.1291470561734402\n",
|
||||
"label\t30\t-\tWeight\t0.12453129554714541\n",
|
||||
"label\t18\t-\tWeight\t0.1236867360720947\n",
|
||||
"label\t38\t-\tWeight\t0.12184856229773917\n",
|
||||
"Latent semantic no. 4\n",
|
||||
"Image_ID\t176\t-\tWeight\t0.17418620419170064\n",
|
||||
"Image_ID\t184\t-\tWeight\t0.16284491366511475\n",
|
||||
"Image_ID\t178\t-\tWeight\t0.15835141260945226\n",
|
||||
"Image_ID\t182\t-\tWeight\t0.1563230190106094\n",
|
||||
"Image_ID\t180\t-\tWeight\t0.14992527858819726\n",
|
||||
"Image_ID\t170\t-\tWeight\t0.1461798073190985\n",
|
||||
"Image_ID\t174\t-\tWeight\t0.13541698801645058\n",
|
||||
"Image_ID\t166\t-\tWeight\t0.12423630035289784\n",
|
||||
"Image_ID\t172\t-\tWeight\t0.1234361443074221\n",
|
||||
"Image_ID\t52\t-\tWeight\t0.12074682250121946\n",
|
||||
"label\t6\t-\tWeight\t0.23875690719216863\n",
|
||||
"label\t67\t-\tWeight\t0.21007869938490106\n",
|
||||
"label\t63\t-\tWeight\t0.18822840034389135\n",
|
||||
"label\t14\t-\tWeight\t0.18738002200878218\n",
|
||||
"label\t87\t-\tWeight\t0.17508576062247283\n",
|
||||
"label\t23\t-\tWeight\t0.167492867766091\n",
|
||||
"label\t15\t-\tWeight\t0.15522709562173342\n",
|
||||
"label\t61\t-\tWeight\t0.13244353806854162\n",
|
||||
"label\t45\t-\tWeight\t0.12833204093005665\n",
|
||||
"label\t68\t-\tWeight\t0.12622315521729294\n",
|
||||
"Latent semantic no. 5\n",
|
||||
"Image_ID\t184\t-\tWeight\t0.25060450796637307\n",
|
||||
"Image_ID\t96\t-\tWeight\t0.19653319773940384\n",
|
||||
"Image_ID\t4\t-\tWeight\t0.1927615510140044\n",
|
||||
"Image_ID\t190\t-\tWeight\t0.1823467475920773\n",
|
||||
"Image_ID\t104\t-\tWeight\t0.17232402315708764\n",
|
||||
"Image_ID\t176\t-\tWeight\t0.15944267571419668\n",
|
||||
"Image_ID\t2\t-\tWeight\t0.15830010074390483\n",
|
||||
"Image_ID\t180\t-\tWeight\t0.15710086389623582\n",
|
||||
"Image_ID\t86\t-\tWeight\t0.1531972222034532\n",
|
||||
"Image_ID\t178\t-\tWeight\t0.14864580852650564\n",
|
||||
"label\t30\t-\tWeight\t0.17385975982344382\n",
|
||||
"label\t25\t-\tWeight\t0.14655711054814133\n",
|
||||
"label\t39\t-\tWeight\t0.13307896633493813\n",
|
||||
"label\t68\t-\tWeight\t0.12851498788897622\n",
|
||||
"label\t24\t-\tWeight\t0.12828250585375986\n",
|
||||
"label\t0\t-\tWeight\t0.12500243174429157\n",
|
||||
"label\t1\t-\tWeight\t0.12371257574727512\n",
|
||||
"label\t77\t-\tWeight\t0.12370279647800499\n",
|
||||
"label\t89\t-\tWeight\t0.12233344688386875\n",
|
||||
"label\t83\t-\tWeight\t0.11445596984835589\n",
|
||||
"Latent semantic no. 6\n",
|
||||
"Image_ID\t160\t-\tWeight\t0.2664558477429268\n",
|
||||
"Image_ID\t86\t-\tWeight\t0.22964178511691158\n",
|
||||
"Image_ID\t4\t-\tWeight\t0.2027946708731003\n",
|
||||
"Image_ID\t8\t-\tWeight\t0.17594388183949075\n",
|
||||
"Image_ID\t96\t-\tWeight\t0.15932731178540344\n",
|
||||
"Image_ID\t150\t-\tWeight\t0.1557669882841681\n",
|
||||
"Image_ID\t42\t-\tWeight\t0.15015687757605228\n",
|
||||
"Image_ID\t70\t-\tWeight\t0.14221366935133106\n",
|
||||
"Image_ID\t166\t-\tWeight\t0.13822990110337333\n",
|
||||
"Image_ID\t170\t-\tWeight\t0.136006921209686\n",
|
||||
"label\t17\t-\tWeight\t0.2335282879255542\n",
|
||||
"label\t48\t-\tWeight\t0.19418795795666355\n",
|
||||
"label\t21\t-\tWeight\t0.19013440200231033\n",
|
||||
"label\t85\t-\tWeight\t0.17503295059460947\n",
|
||||
"label\t11\t-\tWeight\t0.14933372636956993\n",
|
||||
"label\t1\t-\tWeight\t0.1384254243377172\n",
|
||||
"label\t0\t-\tWeight\t0.13078647401074162\n",
|
||||
"label\t57\t-\tWeight\t0.11374248801163754\n",
|
||||
"label\t10\t-\tWeight\t0.10468223841103744\n",
|
||||
"label\t99\t-\tWeight\t0.10191451131216464\n",
|
||||
"Latent semantic no. 7\n",
|
||||
"Image_ID\t0\t-\tWeight\t0.18579423291522054\n",
|
||||
"Image_ID\t160\t-\tWeight\t0.15838043091994455\n",
|
||||
"Image_ID\t12\t-\tWeight\t0.1569899414230264\n",
|
||||
"Image_ID\t16\t-\tWeight\t0.15348073631252238\n",
|
||||
"Image_ID\t20\t-\tWeight\t0.14749435830520785\n",
|
||||
"Image_ID\t18\t-\tWeight\t0.14710442040625207\n",
|
||||
"Image_ID\t14\t-\tWeight\t0.14572307182896904\n",
|
||||
"Image_ID\t2\t-\tWeight\t0.135886756644037\n",
|
||||
"Image_ID\t158\t-\tWeight\t0.12716375063129493\n",
|
||||
"Image_ID\t154\t-\tWeight\t0.11653475862758583\n",
|
||||
"label\t82\t-\tWeight\t0.23372455436757703\n",
|
||||
"label\t95\t-\tWeight\t0.21795238756371887\n",
|
||||
"label\t60\t-\tWeight\t0.18080422229063045\n",
|
||||
"label\t16\t-\tWeight\t0.1806105172209771\n",
|
||||
"label\t27\t-\tWeight\t0.17365150902149876\n",
|
||||
"label\t59\t-\tWeight\t0.17250044548228938\n",
|
||||
"label\t26\t-\tWeight\t0.1661853291143862\n",
|
||||
"label\t13\t-\tWeight\t0.16331211225170805\n",
|
||||
"label\t34\t-\tWeight\t0.1523080193090529\n",
|
||||
"label\t67\t-\tWeight\t0.13577900574984025\n",
|
||||
"Latent semantic no. 8\n",
|
||||
"Image_ID\t128\t-\tWeight\t0.20162255290912043\n",
|
||||
"Image_ID\t64\t-\tWeight\t0.2013551710742827\n",
|
||||
"Image_ID\t76\t-\tWeight\t0.19200691322367733\n",
|
||||
"Image_ID\t68\t-\tWeight\t0.183262211696717\n",
|
||||
"Image_ID\t2\t-\tWeight\t0.17626949463475755\n",
|
||||
"Image_ID\t126\t-\tWeight\t0.17260073717551033\n",
|
||||
"Image_ID\t130\t-\tWeight\t0.16679745247386799\n",
|
||||
"Image_ID\t0\t-\tWeight\t0.15145696367688846\n",
|
||||
"Image_ID\t80\t-\tWeight\t0.13382645234168947\n",
|
||||
"Image_ID\t132\t-\tWeight\t0.12607547198838437\n",
|
||||
"label\t53\t-\tWeight\t0.2259481751468642\n",
|
||||
"label\t37\t-\tWeight\t0.21583443408756542\n",
|
||||
"label\t76\t-\tWeight\t0.20483376297311964\n",
|
||||
"label\t44\t-\tWeight\t0.1690198227623472\n",
|
||||
"label\t68\t-\tWeight\t0.1650723880318989\n",
|
||||
"label\t28\t-\tWeight\t0.15689929414378492\n",
|
||||
"label\t14\t-\tWeight\t0.1564371673909956\n",
|
||||
"label\t54\t-\tWeight\t0.1553627917623035\n",
|
||||
"label\t51\t-\tWeight\t0.14380435363337046\n",
|
||||
"label\t36\t-\tWeight\t0.13510425005259438\n",
|
||||
"Latent semantic no. 9\n",
|
||||
"Image_ID\t110\t-\tWeight\t0.2380313932091839\n",
|
||||
"Image_ID\t126\t-\tWeight\t0.22284705922022288\n",
|
||||
"Image_ID\t170\t-\tWeight\t0.20294066349000953\n",
|
||||
"Image_ID\t58\t-\tWeight\t0.19271846291888434\n",
|
||||
"Image_ID\t166\t-\tWeight\t0.16710379029940944\n",
|
||||
"Image_ID\t118\t-\tWeight\t0.16159034411481996\n",
|
||||
"Image_ID\t42\t-\tWeight\t0.1585043891315177\n",
|
||||
"Image_ID\t120\t-\tWeight\t0.15529190621970054\n",
|
||||
"Image_ID\t56\t-\tWeight\t0.1484578124120866\n",
|
||||
"Image_ID\t160\t-\tWeight\t0.13578707023661948\n"
|
||||
"label\t19\t-\tWeight\t0.11741024839079275\n",
|
||||
"label\t40\t-\tWeight\t0.11107319334138463\n",
|
||||
"label\t53\t-\tWeight\t0.11058750626248925\n",
|
||||
"label\t51\t-\tWeight\t0.10794606425819818\n",
|
||||
"label\t96\t-\tWeight\t0.10735468567860716\n",
|
||||
"label\t55\t-\tWeight\t0.10731282010915796\n",
|
||||
"label\t50\t-\tWeight\t0.10703093662670059\n",
|
||||
"label\t1\t-\tWeight\t0.10651036503732043\n",
|
||||
"label\t79\t-\tWeight\t0.10640855392103846\n",
|
||||
"label\t47\t-\tWeight\t0.10594110421348357\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -180,14 +170,13 @@
|
||||
"\n",
|
||||
"label_sim_matrix = find_label_label_similarity(fd_collection,selected_feature_model)\n",
|
||||
"\n",
|
||||
"extract_latent_semantics(\n",
|
||||
" fd_collection,\n",
|
||||
" k,\n",
|
||||
"extract_latent_semantics_from_sim_matrix(\n",
|
||||
" label_sim_matrix,\n",
|
||||
" selected_feature_model,\n",
|
||||
" \"label\",\n",
|
||||
" k,\n",
|
||||
" selected_dim_reduction_method,\n",
|
||||
" sim_matrix=label_sim_matrix,\n",
|
||||
" top_images=10,\n",
|
||||
" fn_prefix='label_sim-'\n",
|
||||
")\n"
|
||||
]
|
||||
},
|
||||
|
||||
78
Phase 2/task_6.ipynb
Normal file
78
Phase 2/task_6.ipynb
Normal file
@ -0,0 +1,78 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from utils import *\n",
|
||||
"warnings.filterwarnings('ignore')\n",
|
||||
"%matplotlib inline\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"selected_feature_model = valid_feature_models[\n",
|
||||
" str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"k = int(input(\"Enter value of k: \"))\n",
|
||||
"if k < 1:\n",
|
||||
" raise ValueError(\"k should be a positive integer\")\n",
|
||||
"\n",
|
||||
"selected_dim_reduction_method = str(\n",
|
||||
" input(\n",
|
||||
" \"Enter dimensionality reduction method - one of \"\n",
|
||||
" + str(list(valid_dim_reduction_methods.keys()))\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"image_sim_matrix = find_image_image_similarity(fd_collection,selected_feature_model)\n",
|
||||
"\n",
|
||||
"extract_latent_semantics_from_sim_matrix(\n",
|
||||
" image_sim_matrix,\n",
|
||||
" selected_feature_model,\n",
|
||||
" \"image\",\n",
|
||||
"\tk,\n",
|
||||
" selected_dim_reduction_method,\n",
|
||||
" top_images=10,\n",
|
||||
")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
238
Phase 2/utils.py
238
Phase 2/utils.py
@ -523,7 +523,9 @@ def calculate_label_representatives(fd_collection, label, feature_model):
|
||||
"""Calculate representative feature vector of a label as the mean of all feature vectors under a feature model"""
|
||||
|
||||
label_fds = [
|
||||
np.array(img_fds[feature_model]).flatten() # get the specific feature model's feature vector
|
||||
np.array(
|
||||
img_fds[feature_model]
|
||||
).flatten() # get the specific feature model's feature vector
|
||||
for img_fds in fd_collection.find(
|
||||
{"true_label": label}
|
||||
) # repeat for all images
|
||||
@ -569,7 +571,7 @@ def show_similar_images_for_label(
|
||||
|
||||
for cur_img in all_images:
|
||||
cur_img_id = cur_img["image_id"]
|
||||
cur_img_fd = np.array(cur_img[feature_model])
|
||||
cur_img_fd = np.array(cur_img[feature_model]).flatten()
|
||||
|
||||
cur_dist = distance_measure(
|
||||
cur_img_fd,
|
||||
@ -658,15 +660,13 @@ def show_similar_labels_for_image(
|
||||
|
||||
label_dict = {target_image_id: target_label}
|
||||
|
||||
target_image_fd = np.array(target_image[feature_model])
|
||||
|
||||
all_images = fd_collection.find({})
|
||||
for cur_img in all_images:
|
||||
cur_img_id = cur_img["image_id"]
|
||||
# skip target itself
|
||||
if cur_img_id == target_image_id:
|
||||
continue
|
||||
cur_img_fd = np.array(cur_img[feature_model])
|
||||
cur_img_fd = np.array(cur_img[feature_model]).flatten()
|
||||
cur_dist = distance_measure(
|
||||
cur_img_fd,
|
||||
target_image_fd,
|
||||
@ -698,11 +698,11 @@ def show_similar_labels_for_image(
|
||||
continue
|
||||
else:
|
||||
sample_image, sample_label = dataset[image_id]
|
||||
axs[idx-1].imshow(transforms.ToPILImage()(sample_image))
|
||||
axs[idx-1].set_title(
|
||||
axs[idx - 1].imshow(transforms.ToPILImage()(sample_image))
|
||||
axs[idx - 1].set_title(
|
||||
f"Label: {label_dict[image_id]}; Distance: {min_dists[image_id]}"
|
||||
)
|
||||
axs[idx-1].axis("off")
|
||||
axs[idx - 1].axis("off")
|
||||
|
||||
if save_plots:
|
||||
plt.savefig(
|
||||
@ -841,20 +841,16 @@ def svd(matrix, k):
|
||||
|
||||
return left_singular_vectors, np.diag(singular_values), right_singular_vectors.T
|
||||
|
||||
def extract_latent_semantics(
|
||||
def extract_latent_semantics_from_feature_model(
|
||||
fd_collection,
|
||||
k,
|
||||
feature_model,
|
||||
dim_reduction_method,
|
||||
sim_matrix=None,
|
||||
top_images=None,
|
||||
fn_prefix="",
|
||||
):
|
||||
"""
|
||||
Extract latent semantics for entire collection at once for a given feature_model and dim_reduction_method, and display the imageID-semantic weight pairs
|
||||
|
||||
Use `sim_matrix` to manually give similarity matrix instead of feature space
|
||||
|
||||
Leave `top_images` blank to display all imageID-weight pairs
|
||||
"""
|
||||
|
||||
@ -874,17 +870,9 @@ def extract_latent_semantics(
|
||||
if top_images is not None:
|
||||
top_img_str = f" (showing only top {top_images} image-weight pairs for each latent semantic)"
|
||||
|
||||
# if similarity matrix is provided
|
||||
if sim_matrix is not None:
|
||||
feature_vectors = sim_matrix
|
||||
print(
|
||||
"Applying {} on the given similarity matrix to get {} latent semantics{}...".format(
|
||||
dim_reduction_method, k, top_img_str
|
||||
feature_vectors = np.array(
|
||||
[np.array(img[feature_model]).flatten() for img in all_images]
|
||||
)
|
||||
)
|
||||
# else take feature space from database
|
||||
else:
|
||||
feature_vectors = np.array([np.array(img[feature_model]).flatten() for img in all_images])
|
||||
print(
|
||||
"Applying {} on the {} space to get {} latent semantics{}...".format(
|
||||
dim_reduction_method, feature_model, k, top_img_str
|
||||
@ -1011,12 +999,180 @@ def extract_latent_semantics(
|
||||
print(f"Image_ID\t{image_id}\t-\tWeight\t{weight}")
|
||||
|
||||
with open(
|
||||
f"{fn_prefix}{feature_model}-{dim_reduction_method}-{k}-semantics.json",
|
||||
f"{feature_model}-{dim_reduction_method}-{k}-semantics.json",
|
||||
"w",
|
||||
encoding="utf-8",
|
||||
) as output_file:
|
||||
json.dump(all_latent_semantics, output_file, ensure_ascii=False)
|
||||
|
||||
def extract_latent_semantics_from_sim_matrix(
|
||||
sim_matrix,
|
||||
feature_model,
|
||||
sim_type,
|
||||
k,
|
||||
dim_reduction_method,
|
||||
top_images=None,
|
||||
):
|
||||
"""
|
||||
Extract latent semantics for a given similarity matrix for a given dim_reduction_method, and display the object-semantic weight pairs
|
||||
|
||||
Leave `top_images` blank to display all imageID-weight pairs
|
||||
"""
|
||||
|
||||
assert sim_type in ["image", "label"], "sim_type should be one of " + str(
|
||||
["image", "label"]
|
||||
)
|
||||
assert (
|
||||
feature_model in valid_feature_models.values()
|
||||
), "feature_model should be one of " + str(list(valid_feature_models.keys()))
|
||||
assert (
|
||||
dim_reduction_method in valid_dim_reduction_methods.keys()
|
||||
), "dim_reduction_method should be one of " + str(
|
||||
list(valid_dim_reduction_methods.keys())
|
||||
)
|
||||
assert len(sim_matrix) == len(sim_matrix[0]), "sim_matrix must be square matrix"
|
||||
|
||||
top_img_str = ""
|
||||
if top_images is not None:
|
||||
top_img_str = f" (showing only top {top_images} {sim_type}-weight pairs for each latent semantic)"
|
||||
|
||||
feature_vectors = sim_matrix
|
||||
feature_ids = list(range(len(sim_matrix)))
|
||||
|
||||
print(
|
||||
"Applying {} on the given similarity matrix to get {} latent semantics{}...".format(
|
||||
dim_reduction_method, k, top_img_str
|
||||
)
|
||||
)
|
||||
|
||||
displayed_latent_semantics = {}
|
||||
all_latent_semantics = {}
|
||||
|
||||
match valid_dim_reduction_methods[dim_reduction_method]:
|
||||
# singular value decomposition
|
||||
# sparse version of SVD to get only k singular values
|
||||
case 1:
|
||||
U, S, V_T = svds(feature_vectors, k=k)
|
||||
|
||||
all_latent_semantics = {
|
||||
"image-semantic": U.tolist(),
|
||||
"semantics-core": S.tolist(),
|
||||
"semantic-feature": V_T.tolist(),
|
||||
}
|
||||
|
||||
# for each latent semantic, sort imageID-weight pairs by weights in descending order
|
||||
displayed_latent_semantics = [
|
||||
sorted(
|
||||
list(zip(feature_ids, latent_semantic)),
|
||||
key=lambda x: x[1],
|
||||
reverse=True,
|
||||
)[:top_images]
|
||||
for latent_semantic in U.T
|
||||
]
|
||||
|
||||
# non-negative matrix factorization
|
||||
case 2:
|
||||
# NNMF requires non-negative input data
|
||||
# so shift the input by subtracting the smallest value
|
||||
min_value = np.min(feature_vectors)
|
||||
feature_vectors_shifted = feature_vectors - min_value
|
||||
|
||||
model = NMF(
|
||||
n_components=k,
|
||||
init="random",
|
||||
solver="cd",
|
||||
alpha_H=0.01,
|
||||
alpha_W=0.01,
|
||||
max_iter=10000,
|
||||
)
|
||||
model.fit(feature_vectors_shifted)
|
||||
|
||||
W = model.transform(feature_vectors_shifted)
|
||||
H = model.components_
|
||||
|
||||
all_latent_semantics = {
|
||||
"image-semantic": W.tolist(),
|
||||
"semantic-feature": H.tolist(),
|
||||
}
|
||||
|
||||
# for each latent semantic, sort imageID-weight pairs by weights in descending order
|
||||
displayed_latent_semantics = [
|
||||
sorted(
|
||||
list(zip(feature_ids, latent_semantic)),
|
||||
key=lambda x: x[1],
|
||||
reverse=True,
|
||||
)[:top_images]
|
||||
for latent_semantic in W.T
|
||||
]
|
||||
|
||||
# unsupervised LDA to extract topics (Latent Dirichlet Allocation)
|
||||
# Note: LDA takes a bit of time
|
||||
case 3:
|
||||
# LDA requires non-negative input data
|
||||
# so shift the input by subtracting the smallest value
|
||||
min_value = np.min(feature_vectors)
|
||||
feature_vectors_shifted = feature_vectors - min_value
|
||||
|
||||
model = LatentDirichletAllocation(
|
||||
n_components=k, learning_method="online", verbose=4
|
||||
)
|
||||
model.fit(feature_vectors_shifted)
|
||||
|
||||
# K (k x fd_dim) is the factor matrix for latent semantic-feature pairs
|
||||
K = model.components_
|
||||
# X (4339 x k) is the other factor matrix for image ID-latent semantic pairs
|
||||
X = model.transform(feature_vectors_shifted)
|
||||
|
||||
all_latent_semantics = {
|
||||
"image-semantic": X.tolist(),
|
||||
"semantic-feature": K.tolist(),
|
||||
}
|
||||
|
||||
# for each latent semantic, sort imageID-weight pairs by weights in descending order
|
||||
displayed_latent_semantics = [
|
||||
sorted(
|
||||
list(zip(feature_ids, latent_semantic)),
|
||||
key=lambda x: x[1],
|
||||
reverse=True,
|
||||
)[:top_images]
|
||||
for latent_semantic in X.T
|
||||
]
|
||||
|
||||
# k-means clustering to reduce to k clusters/dimensions
|
||||
case 4:
|
||||
model = KMeans(n_clusters=k, verbose=2).fit(feature_vectors)
|
||||
CC = model.cluster_centers_
|
||||
Y = model.transform(feature_vectors)
|
||||
|
||||
all_latent_semantics = {
|
||||
"image-semantic": Y.tolist(),
|
||||
"semantic-feature": list(CC.values()),
|
||||
}
|
||||
|
||||
# for each latent semantic, sort imageID-weight pairs by weights in descending order
|
||||
displayed_latent_semantics = [
|
||||
sorted(
|
||||
list(zip(feature_ids, latent_semantic)),
|
||||
key=lambda x: x[1],
|
||||
reverse=False,
|
||||
)[:top_images]
|
||||
for latent_semantic in Y.T
|
||||
]
|
||||
|
||||
for idx, latent_semantic in enumerate(displayed_latent_semantics):
|
||||
print(f"Latent semantic no. {idx}")
|
||||
for obj_id, weight in latent_semantic:
|
||||
print(f"{sim_type}\t{obj_id}\t-\tWeight\t{weight}")
|
||||
|
||||
# Finally also save sim_matrix
|
||||
all_latent_semantics["sim-matrix"] = sim_matrix.tolist()
|
||||
|
||||
with open(
|
||||
f"{sim_type}_sim-{feature_model}-{dim_reduction_method}-{k}-semantics.json",
|
||||
"w",
|
||||
encoding="utf-8",
|
||||
) as output_file:
|
||||
json.dump(all_latent_semantics, output_file, ensure_ascii=False)
|
||||
|
||||
def find_label_label_similarity(fd_collection, feature_model):
|
||||
"""
|
||||
@ -1039,10 +1195,38 @@ def find_label_label_similarity(fd_collection, feature_model):
|
||||
|
||||
label_sim_matrix = np.zeros((num_labels, num_labels))
|
||||
|
||||
# Calculate half and fill the other
|
||||
for i in range(num_labels):
|
||||
for j in range(i + 1, num_labels):
|
||||
# Note: lower the value, lower the distance => higher the similarity
|
||||
label_sim_matrix[i][j] = feature_distance_matches[feature_model](
|
||||
np.array(label_mean_vectors[i]), np.array(label_mean_vectors[j])
|
||||
)
|
||||
label_sim_matrix[i][j] = label_sim_matrix[j][i] = feature_distance_matches[
|
||||
feature_model
|
||||
](np.array(label_mean_vectors[i]), np.array(label_mean_vectors[j]))
|
||||
return label_sim_matrix
|
||||
|
||||
|
||||
def find_image_image_similarity(fd_collection, feature_model):
|
||||
"""
|
||||
Calculate similarity between images. Lower values indicate higher similarities
|
||||
"""
|
||||
assert (
|
||||
feature_model in valid_feature_models.values()
|
||||
), "feature_model should be one of " + str(list(valid_feature_models.keys()))
|
||||
|
||||
feature_vectors = [
|
||||
np.array(
|
||||
img_fds[feature_model]
|
||||
).flatten() # get the specific feature model's feature vector
|
||||
for img_fds in fd_collection.find() # repeat for all images
|
||||
]
|
||||
num_images = len(feature_vectors)
|
||||
image_sim_matrix = np.zeros((num_images, num_images))
|
||||
|
||||
# Calculate half and fill the other
|
||||
for i in range(num_images):
|
||||
for j in range(i + 1, num_images):
|
||||
# Note: lower the value, lower the distance => higher the similarity
|
||||
image_sim_matrix[i][j] = image_sim_matrix[j][i] = feature_distance_matches[
|
||||
feature_model
|
||||
](np.array(feature_vectors[i]), np.array(feature_vectors[j]))
|
||||
return image_sim_matrix
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user