mirror of
https://github.com/20kaushik02/CSE515_MWDB_Project.git
synced 2025-12-06 11:04:07 +00:00
refactored pranav's task 6 code, some more fixes and changes
This commit is contained in:
parent
27e180521d
commit
d73c768af4
@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -13,7 +13,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -22,7 +22,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -31,115 +31,115 @@
|
|||||||
"text": [
|
"text": [
|
||||||
"Applying svd on the cm_fd space to get 10 latent semantics (showing only top 10 image-weight pairs for each latent semantic)...\n",
|
"Applying svd on the cm_fd space to get 10 latent semantics (showing only top 10 image-weight pairs for each latent semantic)...\n",
|
||||||
"Latent semantic no. 0\n",
|
"Latent semantic no. 0\n",
|
||||||
"Image_ID\t7654\t-\tWeight\t0.08162189274964751\n",
|
"Image_ID\t7654\t-\tWeight\t0.0816218927496473\n",
|
||||||
"Image_ID\t8634\t-\tWeight\t0.06673589485778451\n",
|
"Image_ID\t8634\t-\tWeight\t0.0667358948577843\n",
|
||||||
"Image_ID\t5740\t-\tWeight\t0.060058821201972104\n",
|
"Image_ID\t5740\t-\tWeight\t0.06005882120197204\n",
|
||||||
"Image_ID\t6106\t-\tWeight\t0.05306661393931607\n",
|
"Image_ID\t6106\t-\tWeight\t0.0530666139393161\n",
|
||||||
"Image_ID\t5456\t-\tWeight\t0.05170171570330845\n",
|
"Image_ID\t5456\t-\tWeight\t0.051701715703308504\n",
|
||||||
"Image_ID\t7814\t-\tWeight\t0.04997978865116185\n",
|
"Image_ID\t7814\t-\tWeight\t0.04997978865116192\n",
|
||||||
"Image_ID\t6248\t-\tWeight\t0.04946683639815072\n",
|
"Image_ID\t6248\t-\tWeight\t0.04946683639815059\n",
|
||||||
"Image_ID\t5354\t-\tWeight\t0.04864381025793171\n",
|
"Image_ID\t5354\t-\tWeight\t0.04864381025793159\n",
|
||||||
"Image_ID\t6108\t-\tWeight\t0.04796763934338538\n",
|
"Image_ID\t6108\t-\tWeight\t0.0479676393433854\n",
|
||||||
"Image_ID\t5438\t-\tWeight\t0.047874747600689466\n",
|
"Image_ID\t5438\t-\tWeight\t0.04787474760068962\n",
|
||||||
"Latent semantic no. 1\n",
|
"Latent semantic no. 1\n",
|
||||||
"Image_ID\t8026\t-\tWeight\t0.06478360955460367\n",
|
"Image_ID\t7654\t-\tWeight\t0.05566187740909836\n",
|
||||||
"Image_ID\t6016\t-\tWeight\t0.0632709906607753\n",
|
"Image_ID\t7880\t-\tWeight\t0.05304265128270742\n",
|
||||||
"Image_ID\t3744\t-\tWeight\t0.05347414608321652\n",
|
"Image_ID\t5132\t-\tWeight\t0.052802620405367526\n",
|
||||||
"Image_ID\t3720\t-\tWeight\t0.0517124023583583\n",
|
"Image_ID\t4516\t-\tWeight\t0.05032667794065215\n",
|
||||||
"Image_ID\t7896\t-\tWeight\t0.049366978424645006\n",
|
"Image_ID\t3064\t-\tWeight\t0.04996389545581616\n",
|
||||||
"Image_ID\t6014\t-\tWeight\t0.047637173390389816\n",
|
"Image_ID\t7808\t-\tWeight\t0.04885211523705829\n",
|
||||||
"Image_ID\t6768\t-\tWeight\t0.04742408995375774\n",
|
"Image_ID\t8102\t-\tWeight\t0.04821048869059779\n",
|
||||||
"Image_ID\t4050\t-\tWeight\t0.0456343920101654\n",
|
"Image_ID\t5336\t-\tWeight\t0.047392911537133244\n",
|
||||||
"Image_ID\t6000\t-\tWeight\t0.04535273415975713\n",
|
"Image_ID\t3058\t-\tWeight\t0.04622961181395915\n",
|
||||||
"Image_ID\t6552\t-\tWeight\t0.04525300117499444\n",
|
"Image_ID\t7484\t-\tWeight\t0.04563242634411927\n",
|
||||||
"Latent semantic no. 2\n",
|
"Latent semantic no. 2\n",
|
||||||
"Image_ID\t7654\t-\tWeight\t0.0704670166327785\n",
|
"Image_ID\t7654\t-\tWeight\t0.07046701663277787\n",
|
||||||
"Image_ID\t2804\t-\tWeight\t0.059682344110996065\n",
|
"Image_ID\t2804\t-\tWeight\t0.059682344110995336\n",
|
||||||
"Image_ID\t2710\t-\tWeight\t0.059199111598090534\n",
|
"Image_ID\t2710\t-\tWeight\t0.05919911159809061\n",
|
||||||
"Image_ID\t3436\t-\tWeight\t0.05368202357324355\n",
|
"Image_ID\t3436\t-\tWeight\t0.05368202357324448\n",
|
||||||
"Image_ID\t7936\t-\tWeight\t0.053276991496894154\n",
|
"Image_ID\t7936\t-\tWeight\t0.05327699149689366\n",
|
||||||
"Image_ID\t2708\t-\tWeight\t0.048527019795007204\n",
|
"Image_ID\t2708\t-\tWeight\t0.04852701979500758\n",
|
||||||
"Image_ID\t3764\t-\tWeight\t0.04835537239641643\n",
|
"Image_ID\t3764\t-\tWeight\t0.04835537239641772\n",
|
||||||
"Image_ID\t7928\t-\tWeight\t0.047998989024259496\n",
|
"Image_ID\t7928\t-\tWeight\t0.04799898902425922\n",
|
||||||
"Image_ID\t5684\t-\tWeight\t0.04723047448150771\n",
|
"Image_ID\t5684\t-\tWeight\t0.04723047448150721\n",
|
||||||
"Image_ID\t5126\t-\tWeight\t0.04720498270016634\n",
|
"Image_ID\t5126\t-\tWeight\t0.04720498270016626\n",
|
||||||
"Latent semantic no. 3\n",
|
"Latent semantic no. 3\n",
|
||||||
"Image_ID\t6356\t-\tWeight\t0.0754447261688377\n",
|
"Image_ID\t8654\t-\tWeight\t0.08668332932816088\n",
|
||||||
"Image_ID\t6480\t-\tWeight\t0.06540890240964665\n",
|
"Image_ID\t8618\t-\tWeight\t0.08568859853566119\n",
|
||||||
"Image_ID\t4756\t-\tWeight\t0.06075370676621832\n",
|
"Image_ID\t8658\t-\tWeight\t0.0777605087520117\n",
|
||||||
"Image_ID\t8656\t-\tWeight\t0.060505116069252685\n",
|
"Image_ID\t3306\t-\tWeight\t0.0745220591779124\n",
|
||||||
"Image_ID\t6050\t-\tWeight\t0.058111632773274836\n",
|
"Image_ID\t8620\t-\tWeight\t0.07351843281590886\n",
|
||||||
"Image_ID\t6324\t-\tWeight\t0.056492568599917435\n",
|
"Image_ID\t8638\t-\tWeight\t0.06948884666766826\n",
|
||||||
"Image_ID\t8138\t-\tWeight\t0.0557967464751822\n",
|
"Image_ID\t6754\t-\tWeight\t0.06896434951935482\n",
|
||||||
"Image_ID\t3460\t-\tWeight\t0.05508818833516222\n",
|
"Image_ID\t8676\t-\tWeight\t0.06623938393792103\n",
|
||||||
"Image_ID\t200\t-\tWeight\t0.05459477384213874\n",
|
"Image_ID\t4650\t-\tWeight\t0.06566930583744507\n",
|
||||||
"Image_ID\t7220\t-\tWeight\t0.05376222500332758\n",
|
"Image_ID\t8636\t-\tWeight\t0.06499098805246775\n",
|
||||||
"Latent semantic no. 4\n",
|
"Latent semantic no. 4\n",
|
||||||
"Image_ID\t7370\t-\tWeight\t0.05281026462493995\n",
|
"Image_ID\t7370\t-\tWeight\t0.05281026462494081\n",
|
||||||
"Image_ID\t6528\t-\tWeight\t0.05252803707219332\n",
|
"Image_ID\t6528\t-\tWeight\t0.05252803707219361\n",
|
||||||
"Image_ID\t8056\t-\tWeight\t0.05175019567880743\n",
|
"Image_ID\t8056\t-\tWeight\t0.0517501956788071\n",
|
||||||
"Image_ID\t2958\t-\tWeight\t0.05123118911737749\n",
|
"Image_ID\t2958\t-\tWeight\t0.051231189117377514\n",
|
||||||
"Image_ID\t4614\t-\tWeight\t0.05061302210733273\n",
|
"Image_ID\t4614\t-\tWeight\t0.05061302210733084\n",
|
||||||
"Image_ID\t8292\t-\tWeight\t0.05000577057549489\n",
|
"Image_ID\t8292\t-\tWeight\t0.05000577057549516\n",
|
||||||
"Image_ID\t7888\t-\tWeight\t0.04905059301012787\n",
|
"Image_ID\t7888\t-\tWeight\t0.04905059301012733\n",
|
||||||
"Image_ID\t6540\t-\tWeight\t0.048139958875035395\n",
|
"Image_ID\t6540\t-\tWeight\t0.048139958875035006\n",
|
||||||
"Image_ID\t6064\t-\tWeight\t0.04605896293857696\n",
|
"Image_ID\t6064\t-\tWeight\t0.04605896293857509\n",
|
||||||
"Image_ID\t2974\t-\tWeight\t0.04488429099909397\n",
|
"Image_ID\t2974\t-\tWeight\t0.04488429099909442\n",
|
||||||
"Latent semantic no. 5\n",
|
"Latent semantic no. 5\n",
|
||||||
"Image_ID\t8570\t-\tWeight\t0.08379938013632145\n",
|
"Image_ID\t8570\t-\tWeight\t0.08379938013632153\n",
|
||||||
"Image_ID\t7784\t-\tWeight\t0.0723847258804912\n",
|
"Image_ID\t7784\t-\tWeight\t0.07238472588049127\n",
|
||||||
"Image_ID\t4152\t-\tWeight\t0.060769224719766333\n",
|
"Image_ID\t4152\t-\tWeight\t0.06076922471976642\n",
|
||||||
"Image_ID\t5114\t-\tWeight\t0.053872121517690504\n",
|
"Image_ID\t5114\t-\tWeight\t0.05387212151769057\n",
|
||||||
"Image_ID\t7774\t-\tWeight\t0.05324887247523992\n",
|
"Image_ID\t7774\t-\tWeight\t0.05324887247524\n",
|
||||||
"Image_ID\t8614\t-\tWeight\t0.05319742868629013\n",
|
"Image_ID\t8614\t-\tWeight\t0.05319742868629018\n",
|
||||||
"Image_ID\t3072\t-\tWeight\t0.05083994521792821\n",
|
"Image_ID\t3072\t-\tWeight\t0.05083994521792827\n",
|
||||||
"Image_ID\t7798\t-\tWeight\t0.05059807413594892\n",
|
"Image_ID\t7798\t-\tWeight\t0.050598074135949\n",
|
||||||
"Image_ID\t5118\t-\tWeight\t0.05022770477320976\n",
|
"Image_ID\t5118\t-\tWeight\t0.05022770477320978\n",
|
||||||
"Image_ID\t7040\t-\tWeight\t0.04996996742218053\n",
|
"Image_ID\t7040\t-\tWeight\t0.04996996742218058\n",
|
||||||
"Latent semantic no. 6\n",
|
"Latent semantic no. 6\n",
|
||||||
"Image_ID\t8570\t-\tWeight\t0.07082421149695754\n",
|
"Image_ID\t8570\t-\tWeight\t0.07082421149695753\n",
|
||||||
"Image_ID\t7774\t-\tWeight\t0.06546594547486781\n",
|
"Image_ID\t7774\t-\tWeight\t0.06546594547486784\n",
|
||||||
"Image_ID\t4152\t-\tWeight\t0.06440870014673936\n",
|
"Image_ID\t4152\t-\tWeight\t0.06440870014673937\n",
|
||||||
"Image_ID\t5118\t-\tWeight\t0.06264436903974217\n",
|
"Image_ID\t5118\t-\tWeight\t0.06264436903974217\n",
|
||||||
"Image_ID\t7784\t-\tWeight\t0.06203552824772956\n",
|
"Image_ID\t7784\t-\tWeight\t0.06203552824772957\n",
|
||||||
"Image_ID\t7798\t-\tWeight\t0.05899354962287134\n",
|
"Image_ID\t7798\t-\tWeight\t0.05899354962287138\n",
|
||||||
"Image_ID\t7896\t-\tWeight\t0.05648444493570963\n",
|
"Image_ID\t7896\t-\tWeight\t0.056484444935709706\n",
|
||||||
"Image_ID\t7766\t-\tWeight\t0.056063042928801675\n",
|
"Image_ID\t7766\t-\tWeight\t0.056063042928801675\n",
|
||||||
"Image_ID\t7792\t-\tWeight\t0.055578803018497686\n",
|
"Image_ID\t7792\t-\tWeight\t0.05557880301849769\n",
|
||||||
"Image_ID\t7834\t-\tWeight\t0.055567509183302555\n",
|
"Image_ID\t7834\t-\tWeight\t0.05556750918330256\n",
|
||||||
"Latent semantic no. 7\n",
|
"Latent semantic no. 7\n",
|
||||||
"Image_ID\t7912\t-\tWeight\t0.06634864556518678\n",
|
"Image_ID\t1140\t-\tWeight\t0.05317423066517462\n",
|
||||||
"Image_ID\t5534\t-\tWeight\t0.05913926717735747\n",
|
"Image_ID\t5510\t-\tWeight\t0.052651188836683724\n",
|
||||||
"Image_ID\t5550\t-\tWeight\t0.049468125695492526\n",
|
"Image_ID\t5282\t-\tWeight\t0.05122146559887229\n",
|
||||||
"Image_ID\t2106\t-\tWeight\t0.048274676516220805\n",
|
"Image_ID\t1260\t-\tWeight\t0.050478632782130786\n",
|
||||||
"Image_ID\t7804\t-\tWeight\t0.04822832951751611\n",
|
"Image_ID\t1692\t-\tWeight\t0.05043911725770527\n",
|
||||||
"Image_ID\t6198\t-\tWeight\t0.04795521082538372\n",
|
"Image_ID\t8656\t-\tWeight\t0.04943228673655803\n",
|
||||||
"Image_ID\t6728\t-\tWeight\t0.04729135404469566\n",
|
"Image_ID\t1242\t-\tWeight\t0.04886689682608001\n",
|
||||||
"Image_ID\t5588\t-\tWeight\t0.04715637083533252\n",
|
"Image_ID\t7844\t-\tWeight\t0.048768495445578465\n",
|
||||||
"Image_ID\t7276\t-\tWeight\t0.04637482601331893\n",
|
"Image_ID\t5100\t-\tWeight\t0.04867702517715619\n",
|
||||||
"Image_ID\t6730\t-\tWeight\t0.045930617636659\n",
|
"Image_ID\t5300\t-\tWeight\t0.048353062438932816\n",
|
||||||
"Latent semantic no. 8\n",
|
"Latent semantic no. 8\n",
|
||||||
"Image_ID\t1798\t-\tWeight\t0.04586412291217343\n",
|
"Image_ID\t1798\t-\tWeight\t0.0458641229121734\n",
|
||||||
"Image_ID\t1802\t-\tWeight\t0.044772142290101236\n",
|
"Image_ID\t1802\t-\tWeight\t0.044772142290101194\n",
|
||||||
"Image_ID\t1806\t-\tWeight\t0.044448676280621977\n",
|
"Image_ID\t1806\t-\tWeight\t0.044448676280621935\n",
|
||||||
"Image_ID\t1202\t-\tWeight\t0.043679466488681935\n",
|
"Image_ID\t1202\t-\tWeight\t0.043679466488681894\n",
|
||||||
"Image_ID\t1786\t-\tWeight\t0.04351371229636818\n",
|
"Image_ID\t1786\t-\tWeight\t0.043513712296368134\n",
|
||||||
"Image_ID\t1784\t-\tWeight\t0.04346765741634348\n",
|
"Image_ID\t1784\t-\tWeight\t0.043467657416343425\n",
|
||||||
"Image_ID\t1790\t-\tWeight\t0.04288750664761761\n",
|
"Image_ID\t1790\t-\tWeight\t0.04288750664761759\n",
|
||||||
"Image_ID\t1642\t-\tWeight\t0.041863484069841805\n",
|
"Image_ID\t1642\t-\tWeight\t0.041863484069841764\n",
|
||||||
"Image_ID\t1788\t-\tWeight\t0.04089406629514228\n",
|
"Image_ID\t1788\t-\tWeight\t0.04089406629514224\n",
|
||||||
"Image_ID\t1796\t-\tWeight\t0.04068815222347919\n",
|
"Image_ID\t1796\t-\tWeight\t0.04068815222347914\n",
|
||||||
"Latent semantic no. 9\n",
|
"Latent semantic no. 9\n",
|
||||||
"Image_ID\t8582\t-\tWeight\t0.02577153311253718\n",
|
"Image_ID\t8616\t-\tWeight\t-0.001110683188398373\n",
|
||||||
"Image_ID\t8612\t-\tWeight\t0.025608143819276445\n",
|
"Image_ID\t5234\t-\tWeight\t-0.001470742377963864\n",
|
||||||
"Image_ID\t7290\t-\tWeight\t0.025578071187110543\n",
|
"Image_ID\t3838\t-\tWeight\t-0.0018268938101953923\n",
|
||||||
"Image_ID\t7298\t-\tWeight\t0.025350467801040884\n",
|
"Image_ID\t7428\t-\tWeight\t-0.001978912864613778\n",
|
||||||
"Image_ID\t7302\t-\tWeight\t0.02531661140938117\n",
|
"Image_ID\t4664\t-\tWeight\t-0.0020551982165007863\n",
|
||||||
"Image_ID\t7318\t-\tWeight\t0.025212779767014252\n",
|
"Image_ID\t2754\t-\tWeight\t-0.002091620047637018\n",
|
||||||
"Image_ID\t8580\t-\tWeight\t0.025201323062899284\n",
|
"Image_ID\t2806\t-\tWeight\t-0.0021702921217260757\n",
|
||||||
"Image_ID\t6392\t-\tWeight\t0.02517086205642468\n",
|
"Image_ID\t3820\t-\tWeight\t-0.002247214027498397\n",
|
||||||
"Image_ID\t2738\t-\tWeight\t0.025106516897995135\n",
|
"Image_ID\t3786\t-\tWeight\t-0.002360567100195792\n",
|
||||||
"Image_ID\t6420\t-\tWeight\t0.02510499876667641\n"
|
"Image_ID\t4928\t-\tWeight\t-0.002395118791388935\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -159,7 +159,7 @@
|
|||||||
" )\n",
|
" )\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"extract_latent_semantics(\n",
|
"extract_latent_semantics_from_feature_model(\n",
|
||||||
" fd_collection,\n",
|
" fd_collection,\n",
|
||||||
" k,\n",
|
" k,\n",
|
||||||
" selected_feature_model,\n",
|
" selected_feature_model,\n",
|
||||||
|
|||||||
@ -5,16 +5,6 @@
|
|||||||
"execution_count": 1,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
|
||||||
"%load_ext autoreload\n",
|
|
||||||
"%autoreload 2"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
"source": [
|
||||||
"from utils import *\n",
|
"from utils import *\n",
|
||||||
"warnings.filterwarnings('ignore')\n",
|
"warnings.filterwarnings('ignore')\n",
|
||||||
@ -23,7 +13,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -32,124 +22,124 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Applying svd on the fc_fd space to get 10 latent semantics (showing only top 10 image-weight pairs for each latent semantic)...\n",
|
"Applying svd on the given similarity matrix to get 10 latent semantics (showing only top 10 label-weight pairs for each latent semantic)...\n",
|
||||||
"Latent semantic no. 0\n",
|
"Latent semantic no. 0\n",
|
||||||
"Image_ID\t80\t-\tWeight\t0.2614097705550824\n",
|
"label\t28\t-\tWeight\t0.2583354411312026\n",
|
||||||
"Image_ID\t74\t-\tWeight\t0.255431983850539\n",
|
"label\t29\t-\tWeight\t0.2301362547676974\n",
|
||||||
"Image_ID\t72\t-\tWeight\t0.24329045773521019\n",
|
"label\t33\t-\tWeight\t0.2129183683279978\n",
|
||||||
"Image_ID\t76\t-\tWeight\t0.22867416408250565\n",
|
"label\t9\t-\tWeight\t0.17625685452423093\n",
|
||||||
"Image_ID\t38\t-\tWeight\t0.19933358228759127\n",
|
"label\t95\t-\tWeight\t0.16277551497836534\n",
|
||||||
"Image_ID\t70\t-\tWeight\t0.18697368408982706\n",
|
"label\t47\t-\tWeight\t0.1424860388015467\n",
|
||||||
"Image_ID\t78\t-\tWeight\t0.13796715203849405\n",
|
"label\t39\t-\tWeight\t0.1349747704005884\n",
|
||||||
"Image_ID\t130\t-\tWeight\t0.12802644225327572\n",
|
"label\t30\t-\tWeight\t0.13251434767496492\n",
|
||||||
"Image_ID\t128\t-\tWeight\t0.12766513481071043\n",
|
"label\t52\t-\tWeight\t0.12669069496270755\n",
|
||||||
"Image_ID\t116\t-\tWeight\t0.12432195172872901\n",
|
"label\t8\t-\tWeight\t0.1257730807471899\n",
|
||||||
"Latent semantic no. 1\n",
|
"Latent semantic no. 1\n",
|
||||||
"Image_ID\t42\t-\tWeight\t0.24451953308549035\n",
|
"label\t96\t-\tWeight\t0.2666765976054894\n",
|
||||||
"Image_ID\t104\t-\tWeight\t0.17513827022527176\n",
|
"label\t97\t-\tWeight\t0.19087869496500426\n",
|
||||||
"Image_ID\t2\t-\tWeight\t0.17502495949250704\n",
|
"label\t25\t-\tWeight\t0.17776094778851348\n",
|
||||||
"Image_ID\t0\t-\tWeight\t0.17209867451969002\n",
|
"label\t3\t-\tWeight\t0.1759798805642099\n",
|
||||||
"Image_ID\t170\t-\tWeight\t0.16656363902027468\n",
|
"label\t98\t-\tWeight\t0.16951497899752574\n",
|
||||||
"Image_ID\t96\t-\tWeight\t0.15318453472976815\n",
|
"label\t22\t-\tWeight\t0.1667032655640346\n",
|
||||||
"Image_ID\t40\t-\tWeight\t0.1432149719665029\n",
|
"label\t24\t-\tWeight\t0.16034180060184824\n",
|
||||||
"Image_ID\t44\t-\tWeight\t0.1429496131499582\n",
|
"label\t19\t-\tWeight\t0.15345532912389587\n",
|
||||||
"Image_ID\t160\t-\tWeight\t0.13479710738132986\n",
|
"label\t52\t-\tWeight\t0.13271640119612757\n",
|
||||||
"Image_ID\t6\t-\tWeight\t0.1264545662660414\n",
|
"label\t29\t-\tWeight\t0.12856388746021633\n",
|
||||||
"Latent semantic no. 2\n",
|
"Latent semantic no. 2\n",
|
||||||
"Image_ID\t86\t-\tWeight\t0.21244971577008848\n",
|
"label\t46\t-\tWeight\t0.21813474254675366\n",
|
||||||
"Image_ID\t96\t-\tWeight\t0.19744514449239337\n",
|
"label\t79\t-\tWeight\t0.19091788352587957\n",
|
||||||
"Image_ID\t90\t-\tWeight\t0.19463642108355275\n",
|
"label\t55\t-\tWeight\t0.1871080482210247\n",
|
||||||
"Image_ID\t32\t-\tWeight\t0.18145091969843855\n",
|
"label\t56\t-\tWeight\t0.18322792605578184\n",
|
||||||
"Image_ID\t42\t-\tWeight\t0.16316970985189788\n",
|
"label\t78\t-\tWeight\t0.17506936966351683\n",
|
||||||
"Image_ID\t26\t-\tWeight\t0.15711519451212017\n",
|
"label\t98\t-\tWeight\t0.1733164832137484\n",
|
||||||
"Image_ID\t184\t-\tWeight\t0.14991640994990046\n",
|
"label\t22\t-\tWeight\t0.17114312653027375\n",
|
||||||
"Image_ID\t134\t-\tWeight\t0.1462330756631442\n",
|
"label\t38\t-\tWeight\t0.16928636840289424\n",
|
||||||
"Image_ID\t40\t-\tWeight\t0.14437675159652016\n",
|
"label\t45\t-\tWeight\t0.1567042877228484\n",
|
||||||
"Image_ID\t182\t-\tWeight\t0.1383518461119224\n",
|
"label\t4\t-\tWeight\t0.15108693899889344\n",
|
||||||
"Latent semantic no. 3\n",
|
"Latent semantic no. 3\n",
|
||||||
"Image_ID\t90\t-\tWeight\t0.1720078267722524\n",
|
"label\t96\t-\tWeight\t0.2736613529052896\n",
|
||||||
"Image_ID\t156\t-\tWeight\t0.16000154385617743\n",
|
"label\t98\t-\tWeight\t0.218185914155306\n",
|
||||||
"Image_ID\t158\t-\tWeight\t0.1512646317732056\n",
|
"label\t22\t-\tWeight\t0.1963451355822489\n",
|
||||||
"Image_ID\t160\t-\tWeight\t0.14646801598350143\n",
|
"label\t3\t-\tWeight\t0.17627732148468614\n",
|
||||||
"Image_ID\t152\t-\tWeight\t0.1464352560589073\n",
|
"label\t39\t-\tWeight\t0.1728992502839298\n",
|
||||||
"Image_ID\t150\t-\tWeight\t0.14619374900432364\n",
|
"label\t52\t-\tWeight\t0.15597562436756945\n",
|
||||||
"Image_ID\t30\t-\tWeight\t0.14143498327111978\n",
|
"label\t51\t-\tWeight\t0.1291470561734402\n",
|
||||||
"Image_ID\t36\t-\tWeight\t0.14028252934190766\n",
|
"label\t30\t-\tWeight\t0.12453129554714541\n",
|
||||||
"Image_ID\t92\t-\tWeight\t0.14010606099568526\n",
|
"label\t18\t-\tWeight\t0.1236867360720947\n",
|
||||||
"Image_ID\t96\t-\tWeight\t0.12878454015856147\n",
|
"label\t38\t-\tWeight\t0.12184856229773917\n",
|
||||||
"Latent semantic no. 4\n",
|
"Latent semantic no. 4\n",
|
||||||
"Image_ID\t0\t-\tWeight\t0.1851068625752792\n",
|
"label\t6\t-\tWeight\t0.23875690719216863\n",
|
||||||
"Image_ID\t68\t-\tWeight\t0.18233577289211206\n",
|
"label\t67\t-\tWeight\t0.21007869938490106\n",
|
||||||
"Image_ID\t70\t-\tWeight\t0.17658848660973384\n",
|
"label\t63\t-\tWeight\t0.18822840034389135\n",
|
||||||
"Image_ID\t2\t-\tWeight\t0.1740864069632969\n",
|
"label\t14\t-\tWeight\t0.18738002200878218\n",
|
||||||
"Image_ID\t64\t-\tWeight\t0.1652208125636303\n",
|
"label\t87\t-\tWeight\t0.17508576062247283\n",
|
||||||
"Image_ID\t144\t-\tWeight\t0.1473307832877541\n",
|
"label\t23\t-\tWeight\t0.167492867766091\n",
|
||||||
"Image_ID\t140\t-\tWeight\t0.13555748295430797\n",
|
"label\t15\t-\tWeight\t0.15522709562173342\n",
|
||||||
"Image_ID\t142\t-\tWeight\t0.12823249250147356\n",
|
"label\t61\t-\tWeight\t0.13244353806854162\n",
|
||||||
"Image_ID\t86\t-\tWeight\t0.12718092599165637\n",
|
"label\t45\t-\tWeight\t0.12833204093005665\n",
|
||||||
"Image_ID\t76\t-\tWeight\t0.1252879989162334\n",
|
"label\t68\t-\tWeight\t0.12622315521729294\n",
|
||||||
"Latent semantic no. 5\n",
|
"Latent semantic no. 5\n",
|
||||||
"Image_ID\t38\t-\tWeight\t0.18831453133913492\n",
|
"label\t30\t-\tWeight\t0.17385975982344382\n",
|
||||||
"Image_ID\t44\t-\tWeight\t0.17741038115946053\n",
|
"label\t25\t-\tWeight\t0.14655711054814133\n",
|
||||||
"Image_ID\t42\t-\tWeight\t0.16444727858214978\n",
|
"label\t39\t-\tWeight\t0.13307896633493813\n",
|
||||||
"Image_ID\t130\t-\tWeight\t0.15436113645002744\n",
|
"label\t68\t-\tWeight\t0.12851498788897622\n",
|
||||||
"Image_ID\t40\t-\tWeight\t0.1536450181907607\n",
|
"label\t24\t-\tWeight\t0.12828250585375986\n",
|
||||||
"Image_ID\t132\t-\tWeight\t0.14964910372393345\n",
|
"label\t0\t-\tWeight\t0.12500243174429157\n",
|
||||||
"Image_ID\t46\t-\tWeight\t0.147369630386678\n",
|
"label\t1\t-\tWeight\t0.12371257574727512\n",
|
||||||
"Image_ID\t36\t-\tWeight\t0.14003912645014002\n",
|
"label\t77\t-\tWeight\t0.12370279647800499\n",
|
||||||
"Image_ID\t128\t-\tWeight\t0.13864439525825356\n",
|
"label\t89\t-\tWeight\t0.12233344688386875\n",
|
||||||
"Image_ID\t138\t-\tWeight\t0.13770732538821512\n",
|
"label\t83\t-\tWeight\t0.11445596984835589\n",
|
||||||
"Latent semantic no. 6\n",
|
"Latent semantic no. 6\n",
|
||||||
"Image_ID\t114\t-\tWeight\t0.15664448468019831\n",
|
"label\t17\t-\tWeight\t0.2335282879255542\n",
|
||||||
"Image_ID\t2\t-\tWeight\t0.15491061836983144\n",
|
"label\t48\t-\tWeight\t0.19418795795666355\n",
|
||||||
"Image_ID\t0\t-\tWeight\t0.1530303208538504\n",
|
"label\t21\t-\tWeight\t0.19013440200231033\n",
|
||||||
"Image_ID\t6\t-\tWeight\t0.15295162665264536\n",
|
"label\t85\t-\tWeight\t0.17503295059460947\n",
|
||||||
"Image_ID\t106\t-\tWeight\t0.14505207452002586\n",
|
"label\t11\t-\tWeight\t0.14933372636956993\n",
|
||||||
"Image_ID\t110\t-\tWeight\t0.14364619871330633\n",
|
"label\t1\t-\tWeight\t0.1384254243377172\n",
|
||||||
"Image_ID\t104\t-\tWeight\t0.14360445482307752\n",
|
"label\t0\t-\tWeight\t0.13078647401074162\n",
|
||||||
"Image_ID\t116\t-\tWeight\t0.14309751290704328\n",
|
"label\t57\t-\tWeight\t0.11374248801163754\n",
|
||||||
"Image_ID\t108\t-\tWeight\t0.14103122187663494\n",
|
"label\t10\t-\tWeight\t0.10468223841103744\n",
|
||||||
"Image_ID\t112\t-\tWeight\t0.13936814882577545\n",
|
"label\t99\t-\tWeight\t0.10191451131216464\n",
|
||||||
"Latent semantic no. 7\n",
|
"Latent semantic no. 7\n",
|
||||||
"Image_ID\t158\t-\tWeight\t0.15332739573127638\n",
|
"label\t82\t-\tWeight\t0.23372455436757703\n",
|
||||||
"Image_ID\t152\t-\tWeight\t0.15027095321242787\n",
|
"label\t95\t-\tWeight\t0.21795238756371887\n",
|
||||||
"Image_ID\t2\t-\tWeight\t0.148228537938103\n",
|
"label\t60\t-\tWeight\t0.18080422229063045\n",
|
||||||
"Image_ID\t0\t-\tWeight\t0.14693245027728857\n",
|
"label\t16\t-\tWeight\t0.1806105172209771\n",
|
||||||
"Image_ID\t156\t-\tWeight\t0.1439438847861891\n",
|
"label\t27\t-\tWeight\t0.17365150902149876\n",
|
||||||
"Image_ID\t8\t-\tWeight\t0.14356918947005834\n",
|
"label\t59\t-\tWeight\t0.17250044548228938\n",
|
||||||
"Image_ID\t10\t-\tWeight\t0.1431162549061445\n",
|
"label\t26\t-\tWeight\t0.1661853291143862\n",
|
||||||
"Image_ID\t6\t-\tWeight\t0.14277108702825383\n",
|
"label\t13\t-\tWeight\t0.16331211225170805\n",
|
||||||
"Image_ID\t150\t-\tWeight\t0.1424099571884803\n",
|
"label\t34\t-\tWeight\t0.1523080193090529\n",
|
||||||
"Image_ID\t164\t-\tWeight\t0.13731169848767164\n",
|
"label\t67\t-\tWeight\t0.13577900574984025\n",
|
||||||
"Latent semantic no. 8\n",
|
"Latent semantic no. 8\n",
|
||||||
"Image_ID\t136\t-\tWeight\t0.14826723874051348\n",
|
"label\t53\t-\tWeight\t0.2259481751468642\n",
|
||||||
"Image_ID\t142\t-\tWeight\t0.1444905135922577\n",
|
"label\t37\t-\tWeight\t0.21583443408756542\n",
|
||||||
"Image_ID\t116\t-\tWeight\t0.14310970423245634\n",
|
"label\t76\t-\tWeight\t0.20483376297311964\n",
|
||||||
"Image_ID\t132\t-\tWeight\t0.13967210710664973\n",
|
"label\t44\t-\tWeight\t0.1690198227623472\n",
|
||||||
"Image_ID\t152\t-\tWeight\t0.13699976834141417\n",
|
"label\t68\t-\tWeight\t0.1650723880318989\n",
|
||||||
"Image_ID\t114\t-\tWeight\t0.13649814331495427\n",
|
"label\t28\t-\tWeight\t0.15689929414378492\n",
|
||||||
"Image_ID\t138\t-\tWeight\t0.13624706512987708\n",
|
"label\t14\t-\tWeight\t0.1564371673909956\n",
|
||||||
"Image_ID\t106\t-\tWeight\t0.13620952950667425\n",
|
"label\t54\t-\tWeight\t0.1553627917623035\n",
|
||||||
"Image_ID\t110\t-\tWeight\t0.1346054901033104\n",
|
"label\t51\t-\tWeight\t0.14380435363337046\n",
|
||||||
"Image_ID\t144\t-\tWeight\t0.13436573258693213\n",
|
"label\t36\t-\tWeight\t0.13510425005259438\n",
|
||||||
"Latent semantic no. 9\n",
|
"Latent semantic no. 9\n",
|
||||||
"Image_ID\t38\t-\tWeight\t0.15911686596038474\n",
|
"label\t19\t-\tWeight\t0.11741024839079275\n",
|
||||||
"Image_ID\t2\t-\tWeight\t0.15207108925634513\n",
|
"label\t40\t-\tWeight\t0.11107319334138463\n",
|
||||||
"Image_ID\t0\t-\tWeight\t0.15116756158498235\n",
|
"label\t53\t-\tWeight\t0.11058750626248925\n",
|
||||||
"Image_ID\t6\t-\tWeight\t0.15009399187071035\n",
|
"label\t51\t-\tWeight\t0.10794606425819818\n",
|
||||||
"Image_ID\t10\t-\tWeight\t0.14437025978168486\n",
|
"label\t96\t-\tWeight\t0.10735468567860716\n",
|
||||||
"Image_ID\t4\t-\tWeight\t0.14315858315130434\n",
|
"label\t55\t-\tWeight\t0.10731282010915796\n",
|
||||||
"Image_ID\t34\t-\tWeight\t0.14296451776950192\n",
|
"label\t50\t-\tWeight\t0.10703093662670059\n",
|
||||||
"Image_ID\t22\t-\tWeight\t0.14272703151065388\n",
|
"label\t1\t-\tWeight\t0.10651036503732043\n",
|
||||||
"Image_ID\t24\t-\tWeight\t0.14254462871698045\n",
|
"label\t79\t-\tWeight\t0.10640855392103846\n",
|
||||||
"Image_ID\t20\t-\tWeight\t0.14096073579756538\n"
|
"label\t47\t-\tWeight\t0.10594110421348357\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -171,14 +161,13 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"label_sim_matrix = find_label_label_similarity(fd_collection,selected_feature_model)\n",
|
"label_sim_matrix = find_label_label_similarity(fd_collection,selected_feature_model)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"extract_latent_semantics(\n",
|
"extract_latent_semantics_from_sim_matrix(\n",
|
||||||
" fd_collection,\n",
|
" label_sim_matrix,\n",
|
||||||
" k,\n",
|
|
||||||
" selected_feature_model,\n",
|
" selected_feature_model,\n",
|
||||||
|
" \"label\",\n",
|
||||||
|
" k,\n",
|
||||||
" selected_dim_reduction_method,\n",
|
" selected_dim_reduction_method,\n",
|
||||||
" sim_matrix=label_sim_matrix,\n",
|
|
||||||
" top_images=10,\n",
|
" top_images=10,\n",
|
||||||
" fn_prefix='label_sim-'\n",
|
|
||||||
")\n"
|
")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
78
Phase 2/task_6.ipynb
Normal file
78
Phase 2/task_6.ipynb
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from utils import *\n",
|
||||||
|
"warnings.filterwarnings('ignore')\n",
|
||||||
|
"%matplotlib inline\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"selected_feature_model = valid_feature_models[\n",
|
||||||
|
" str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
|
||||||
|
"]\n",
|
||||||
|
"\n",
|
||||||
|
"k = int(input(\"Enter value of k: \"))\n",
|
||||||
|
"if k < 1:\n",
|
||||||
|
" raise ValueError(\"k should be a positive integer\")\n",
|
||||||
|
"\n",
|
||||||
|
"selected_dim_reduction_method = str(\n",
|
||||||
|
" input(\n",
|
||||||
|
" \"Enter dimensionality reduction method - one of \"\n",
|
||||||
|
" + str(list(valid_dim_reduction_methods.keys()))\n",
|
||||||
|
" )\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"image_sim_matrix = find_image_image_similarity(fd_collection,selected_feature_model)\n",
|
||||||
|
"\n",
|
||||||
|
"extract_latent_semantics_from_sim_matrix(\n",
|
||||||
|
" image_sim_matrix,\n",
|
||||||
|
" selected_feature_model,\n",
|
||||||
|
" \"image\",\n",
|
||||||
|
"\tk,\n",
|
||||||
|
" selected_dim_reduction_method,\n",
|
||||||
|
" top_images=10,\n",
|
||||||
|
")\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
234
Phase 2/utils.py
234
Phase 2/utils.py
@ -517,7 +517,9 @@ def calculate_label_representatives(fd_collection, label, feature_model):
|
|||||||
"""Calculate representative feature vector of a label as the mean of all feature vectors under a feature model"""
|
"""Calculate representative feature vector of a label as the mean of all feature vectors under a feature model"""
|
||||||
|
|
||||||
label_fds = [
|
label_fds = [
|
||||||
img_fds[feature_model] # get the specific feature model's feature vector
|
np.array(
|
||||||
|
img_fds[feature_model]
|
||||||
|
).flatten() # get the specific feature model's feature vector
|
||||||
for img_fds in fd_collection.find(
|
for img_fds in fd_collection.find(
|
||||||
{"true_label": label}
|
{"true_label": label}
|
||||||
) # repeat for all images
|
) # repeat for all images
|
||||||
@ -563,7 +565,7 @@ def show_similar_images_for_label(
|
|||||||
|
|
||||||
for cur_img in all_images:
|
for cur_img in all_images:
|
||||||
cur_img_id = cur_img["image_id"]
|
cur_img_id = cur_img["image_id"]
|
||||||
cur_img_fd = np.array(cur_img[feature_model])
|
cur_img_fd = np.array(cur_img[feature_model]).flatten()
|
||||||
|
|
||||||
cur_dist = distance_measure(
|
cur_dist = distance_measure(
|
||||||
cur_img_fd,
|
cur_img_fd,
|
||||||
@ -652,15 +654,13 @@ def show_similar_labels_for_image(
|
|||||||
|
|
||||||
label_dict = {target_image_id: target_label}
|
label_dict = {target_image_id: target_label}
|
||||||
|
|
||||||
target_image_fd = np.array(target_image[feature_model])
|
|
||||||
|
|
||||||
all_images = fd_collection.find({})
|
all_images = fd_collection.find({})
|
||||||
for cur_img in all_images:
|
for cur_img in all_images:
|
||||||
cur_img_id = cur_img["image_id"]
|
cur_img_id = cur_img["image_id"]
|
||||||
# skip target itself
|
# skip target itself
|
||||||
if cur_img_id == target_image_id:
|
if cur_img_id == target_image_id:
|
||||||
continue
|
continue
|
||||||
cur_img_fd = np.array(cur_img[feature_model])
|
cur_img_fd = np.array(cur_img[feature_model]).flatten()
|
||||||
cur_dist = distance_measure(
|
cur_dist = distance_measure(
|
||||||
cur_img_fd,
|
cur_img_fd,
|
||||||
target_image_fd,
|
target_image_fd,
|
||||||
@ -804,20 +804,16 @@ class KMeans:
|
|||||||
return Y
|
return Y
|
||||||
|
|
||||||
|
|
||||||
def extract_latent_semantics(
|
def extract_latent_semantics_from_feature_model(
|
||||||
fd_collection,
|
fd_collection,
|
||||||
k,
|
k,
|
||||||
feature_model,
|
feature_model,
|
||||||
dim_reduction_method,
|
dim_reduction_method,
|
||||||
sim_matrix=None,
|
|
||||||
top_images=None,
|
top_images=None,
|
||||||
fn_prefix="",
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Extract latent semantics for entire collection at once for a given feature_model and dim_reduction_method, and display the imageID-semantic weight pairs
|
Extract latent semantics for entire collection at once for a given feature_model and dim_reduction_method, and display the imageID-semantic weight pairs
|
||||||
|
|
||||||
Use `sim_matrix` to manually give similarity matrix instead of feature space
|
|
||||||
|
|
||||||
Leave `top_images` blank to display all imageID-weight pairs
|
Leave `top_images` blank to display all imageID-weight pairs
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -837,17 +833,9 @@ def extract_latent_semantics(
|
|||||||
if top_images is not None:
|
if top_images is not None:
|
||||||
top_img_str = f" (showing only top {top_images} image-weight pairs for each latent semantic)"
|
top_img_str = f" (showing only top {top_images} image-weight pairs for each latent semantic)"
|
||||||
|
|
||||||
# if similarity matrix is provided
|
feature_vectors = np.array(
|
||||||
if sim_matrix is not None:
|
[np.array(img[feature_model]).flatten() for img in all_images]
|
||||||
feature_vectors = sim_matrix
|
|
||||||
print(
|
|
||||||
"Applying {} on the given similarity matrix to get {} latent semantics{}...".format(
|
|
||||||
dim_reduction_method, k, top_img_str
|
|
||||||
)
|
)
|
||||||
)
|
|
||||||
# else take feature space from database
|
|
||||||
else:
|
|
||||||
feature_vectors = np.array([np.array(img[feature_model]).flatten() for img in all_images])
|
|
||||||
print(
|
print(
|
||||||
"Applying {} on the {} space to get {} latent semantics{}...".format(
|
"Applying {} on the {} space to get {} latent semantics{}...".format(
|
||||||
dim_reduction_method, feature_model, k, top_img_str
|
dim_reduction_method, feature_model, k, top_img_str
|
||||||
@ -974,7 +962,177 @@ def extract_latent_semantics(
|
|||||||
print(f"Image_ID\t{image_id}\t-\tWeight\t{weight}")
|
print(f"Image_ID\t{image_id}\t-\tWeight\t{weight}")
|
||||||
|
|
||||||
with open(
|
with open(
|
||||||
f"{fn_prefix}{feature_model}-{dim_reduction_method}-{k}-semantics.json",
|
f"{feature_model}-{dim_reduction_method}-{k}-semantics.json",
|
||||||
|
"w",
|
||||||
|
encoding="utf-8",
|
||||||
|
) as output_file:
|
||||||
|
json.dump(all_latent_semantics, output_file, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_latent_semantics_from_sim_matrix(
|
||||||
|
sim_matrix,
|
||||||
|
feature_model,
|
||||||
|
sim_type,
|
||||||
|
k,
|
||||||
|
dim_reduction_method,
|
||||||
|
top_images=None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Extract latent semantics for a given similarity matrix for a given dim_reduction_method, and display the object-semantic weight pairs
|
||||||
|
|
||||||
|
Leave `top_images` blank to display all imageID-weight pairs
|
||||||
|
"""
|
||||||
|
|
||||||
|
assert sim_type in ["image", "label"], "sim_type should be one of " + str(
|
||||||
|
["image", "label"]
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
feature_model in valid_feature_models.values()
|
||||||
|
), "feature_model should be one of " + str(list(valid_feature_models.keys()))
|
||||||
|
assert (
|
||||||
|
dim_reduction_method in valid_dim_reduction_methods.keys()
|
||||||
|
), "dim_reduction_method should be one of " + str(
|
||||||
|
list(valid_dim_reduction_methods.keys())
|
||||||
|
)
|
||||||
|
assert len(sim_matrix) == len(sim_matrix[0]), "sim_matrix must be square matrix"
|
||||||
|
|
||||||
|
top_img_str = ""
|
||||||
|
if top_images is not None:
|
||||||
|
top_img_str = f" (showing only top {top_images} {sim_type}-weight pairs for each latent semantic)"
|
||||||
|
|
||||||
|
feature_vectors = sim_matrix
|
||||||
|
feature_ids = list(range(len(sim_matrix)))
|
||||||
|
|
||||||
|
print(
|
||||||
|
"Applying {} on the given similarity matrix to get {} latent semantics{}...".format(
|
||||||
|
dim_reduction_method, k, top_img_str
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
displayed_latent_semantics = {}
|
||||||
|
all_latent_semantics = {}
|
||||||
|
|
||||||
|
match valid_dim_reduction_methods[dim_reduction_method]:
|
||||||
|
# singular value decomposition
|
||||||
|
# sparse version of SVD to get only k singular values
|
||||||
|
case 1:
|
||||||
|
U, S, V_T = svds(feature_vectors, k=k)
|
||||||
|
|
||||||
|
all_latent_semantics = {
|
||||||
|
"image-semantic": U.tolist(),
|
||||||
|
"semantics-core": S.tolist(),
|
||||||
|
"semantic-feature": V_T.tolist(),
|
||||||
|
}
|
||||||
|
|
||||||
|
# for each latent semantic, sort imageID-weight pairs by weights in descending order
|
||||||
|
displayed_latent_semantics = [
|
||||||
|
sorted(
|
||||||
|
list(zip(feature_ids, latent_semantic)),
|
||||||
|
key=lambda x: x[1],
|
||||||
|
reverse=True,
|
||||||
|
)[:top_images]
|
||||||
|
for latent_semantic in U.T
|
||||||
|
]
|
||||||
|
|
||||||
|
# non-negative matrix factorization
|
||||||
|
case 2:
|
||||||
|
# NNMF requires non-negative input data
|
||||||
|
# so shift the input by subtracting the smallest value
|
||||||
|
min_value = np.min(feature_vectors)
|
||||||
|
feature_vectors_shifted = feature_vectors - min_value
|
||||||
|
|
||||||
|
model = NMF(
|
||||||
|
n_components=k,
|
||||||
|
init="random",
|
||||||
|
solver="cd",
|
||||||
|
alpha_H=0.01,
|
||||||
|
alpha_W=0.01,
|
||||||
|
max_iter=10000,
|
||||||
|
)
|
||||||
|
model.fit(feature_vectors_shifted)
|
||||||
|
|
||||||
|
W = model.transform(feature_vectors_shifted)
|
||||||
|
H = model.components_
|
||||||
|
|
||||||
|
all_latent_semantics = {
|
||||||
|
"image-semantic": W.tolist(),
|
||||||
|
"semantic-feature": H.tolist(),
|
||||||
|
}
|
||||||
|
|
||||||
|
# for each latent semantic, sort imageID-weight pairs by weights in descending order
|
||||||
|
displayed_latent_semantics = [
|
||||||
|
sorted(
|
||||||
|
list(zip(feature_ids, latent_semantic)),
|
||||||
|
key=lambda x: x[1],
|
||||||
|
reverse=True,
|
||||||
|
)[:top_images]
|
||||||
|
for latent_semantic in W.T
|
||||||
|
]
|
||||||
|
|
||||||
|
# unsupervised LDA to extract topics (Latent Dirichlet Allocation)
|
||||||
|
# Note: LDA takes a bit of time
|
||||||
|
case 3:
|
||||||
|
# LDA requires non-negative input data
|
||||||
|
# so shift the input by subtracting the smallest value
|
||||||
|
min_value = np.min(feature_vectors)
|
||||||
|
feature_vectors_shifted = feature_vectors - min_value
|
||||||
|
|
||||||
|
model = LatentDirichletAllocation(
|
||||||
|
n_components=k, learning_method="online", verbose=4
|
||||||
|
)
|
||||||
|
model.fit(feature_vectors_shifted)
|
||||||
|
|
||||||
|
# K (k x fd_dim) is the factor matrix for latent semantic-feature pairs
|
||||||
|
K = model.components_
|
||||||
|
# X (4339 x k) is the other factor matrix for image ID-latent semantic pairs
|
||||||
|
X = model.transform(feature_vectors_shifted)
|
||||||
|
|
||||||
|
all_latent_semantics = {
|
||||||
|
"image-semantic": X.tolist(),
|
||||||
|
"semantic-feature": K.tolist(),
|
||||||
|
}
|
||||||
|
|
||||||
|
# for each latent semantic, sort imageID-weight pairs by weights in descending order
|
||||||
|
displayed_latent_semantics = [
|
||||||
|
sorted(
|
||||||
|
list(zip(feature_ids, latent_semantic)),
|
||||||
|
key=lambda x: x[1],
|
||||||
|
reverse=True,
|
||||||
|
)[:top_images]
|
||||||
|
for latent_semantic in X.T
|
||||||
|
]
|
||||||
|
|
||||||
|
# k-means clustering to reduce to k clusters/dimensions
|
||||||
|
case 4:
|
||||||
|
model = KMeans(n_clusters=k, verbose=2).fit(feature_vectors)
|
||||||
|
CC = model.cluster_centers_
|
||||||
|
Y = model.transform(feature_vectors)
|
||||||
|
|
||||||
|
all_latent_semantics = {
|
||||||
|
"image-semantic": Y.tolist(),
|
||||||
|
"semantic-feature": list(CC.values()),
|
||||||
|
}
|
||||||
|
|
||||||
|
# for each latent semantic, sort imageID-weight pairs by weights in descending order
|
||||||
|
displayed_latent_semantics = [
|
||||||
|
sorted(
|
||||||
|
list(zip(feature_ids, latent_semantic)),
|
||||||
|
key=lambda x: x[1],
|
||||||
|
reverse=False,
|
||||||
|
)[:top_images]
|
||||||
|
for latent_semantic in Y.T
|
||||||
|
]
|
||||||
|
|
||||||
|
for idx, latent_semantic in enumerate(displayed_latent_semantics):
|
||||||
|
print(f"Latent semantic no. {idx}")
|
||||||
|
for obj_id, weight in latent_semantic:
|
||||||
|
print(f"{sim_type}\t{obj_id}\t-\tWeight\t{weight}")
|
||||||
|
|
||||||
|
# Finally also save sim_matrix
|
||||||
|
all_latent_semantics["sim-matrix"] = sim_matrix.tolist()
|
||||||
|
|
||||||
|
with open(
|
||||||
|
f"{sim_type}_sim-{feature_model}-{dim_reduction_method}-{k}-semantics.json",
|
||||||
"w",
|
"w",
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
) as output_file:
|
) as output_file:
|
||||||
@ -1002,10 +1160,38 @@ def find_label_label_similarity(fd_collection, feature_model):
|
|||||||
|
|
||||||
label_sim_matrix = np.zeros((num_labels, num_labels))
|
label_sim_matrix = np.zeros((num_labels, num_labels))
|
||||||
|
|
||||||
|
# Calculate half and fill the other
|
||||||
for i in range(num_labels):
|
for i in range(num_labels):
|
||||||
for j in range(i + 1, num_labels):
|
for j in range(i + 1, num_labels):
|
||||||
# Note: lower the value, lower the distance => higher the similarity
|
# Note: lower the value, lower the distance => higher the similarity
|
||||||
label_sim_matrix[i][j] = feature_distance_matches[feature_model](
|
label_sim_matrix[i][j] = label_sim_matrix[j][i] = feature_distance_matches[
|
||||||
np.array(label_mean_vectors[i]), np.array(label_mean_vectors[j])
|
feature_model
|
||||||
)
|
](np.array(label_mean_vectors[i]), np.array(label_mean_vectors[j]))
|
||||||
return label_sim_matrix
|
return label_sim_matrix
|
||||||
|
|
||||||
|
|
||||||
|
def find_image_image_similarity(fd_collection, feature_model):
|
||||||
|
"""
|
||||||
|
Calculate similarity between images. Lower values indicate higher similarities
|
||||||
|
"""
|
||||||
|
assert (
|
||||||
|
feature_model in valid_feature_models.values()
|
||||||
|
), "feature_model should be one of " + str(list(valid_feature_models.keys()))
|
||||||
|
|
||||||
|
feature_vectors = [
|
||||||
|
np.array(
|
||||||
|
img_fds[feature_model]
|
||||||
|
).flatten() # get the specific feature model's feature vector
|
||||||
|
for img_fds in fd_collection.find() # repeat for all images
|
||||||
|
]
|
||||||
|
num_images = len(feature_vectors)
|
||||||
|
image_sim_matrix = np.zeros((num_images, num_images))
|
||||||
|
|
||||||
|
# Calculate half and fill the other
|
||||||
|
for i in range(num_images):
|
||||||
|
for j in range(i + 1, num_images):
|
||||||
|
# Note: lower the value, lower the distance => higher the similarity
|
||||||
|
image_sim_matrix[i][j] = image_sim_matrix[j][i] = feature_distance_matches[
|
||||||
|
feature_model
|
||||||
|
](np.array(feature_vectors[i]), np.array(feature_vectors[j]))
|
||||||
|
return image_sim_matrix
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user