mirror of
https://github.com/20kaushik02/CSE515_MWDB_Project.git
synced 2025-12-06 07:54:07 +00:00
207 lines
7.8 KiB
Plaintext
207 lines
7.8 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from utils import *\n",
|
|
"warnings.filterwarnings('ignore')\n",
|
|
"%matplotlib inline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Applying kmeans on the given similarity matrix to get 10 latent semantics (showing only top 10 label-weight pairs for each latent semantic)...\n",
|
|
"Initialized centroids\n",
|
|
"Iteration 6 - Converged\n",
|
|
"Note: for K-Means we display distances, in ascending order\n",
|
|
"Latent semantic no. 0\n",
|
|
"label\t92\t-\tDistance\t3.230292112512146\n",
|
|
"label\t4\t-\tDistance\t3.5335656340201087\n",
|
|
"label\t2\t-\tDistance\t4.905027845590568\n",
|
|
"label\t69\t-\tDistance\t4.993399423965622\n",
|
|
"label\t65\t-\tDistance\t6.275170101152081\n",
|
|
"label\t21\t-\tDistance\t6.792963383606834\n",
|
|
"label\t95\t-\tDistance\t9.460863854781731\n",
|
|
"label\t60\t-\tDistance\t10.659440914917885\n",
|
|
"label\t82\t-\tDistance\t14.23961431596092\n",
|
|
"label\t51\t-\tDistance\t14.308250416010853\n",
|
|
"Latent semantic no. 1\n",
|
|
"label\t47\t-\tDistance\t1.7917105751649582\n",
|
|
"label\t42\t-\tDistance\t1.8293437639389183\n",
|
|
"label\t35\t-\tDistance\t2.47989550940933\n",
|
|
"label\t29\t-\tDistance\t2.4870731532031694\n",
|
|
"label\t33\t-\tDistance\t3.0078415187323975\n",
|
|
"label\t49\t-\tDistance\t3.1694527370940753\n",
|
|
"label\t54\t-\tDistance\t3.1764161450515775\n",
|
|
"label\t28\t-\tDistance\t3.520891544025031\n",
|
|
"label\t19\t-\tDistance\t3.752147129401601\n",
|
|
"label\t82\t-\tDistance\t3.9820650145644705\n",
|
|
"Latent semantic no. 2\n",
|
|
"label\t91\t-\tDistance\t2.7653145272024493\n",
|
|
"label\t14\t-\tDistance\t3.829858168383929\n",
|
|
"label\t93\t-\tDistance\t4.108580770102051\n",
|
|
"label\t48\t-\tDistance\t4.25643528657963\n",
|
|
"label\t85\t-\tDistance\t4.308356278561495\n",
|
|
"label\t17\t-\tDistance\t4.72066235395654\n",
|
|
"label\t52\t-\tDistance\t4.733719921198274\n",
|
|
"label\t43\t-\tDistance\t5.593133775346241\n",
|
|
"label\t75\t-\tDistance\t6.35213810417939\n",
|
|
"label\t83\t-\tDistance\t6.365421291009637\n",
|
|
"Latent semantic no. 3\n",
|
|
"label\t63\t-\tDistance\t3.0750924250527425\n",
|
|
"label\t98\t-\tDistance\t3.256907164618595\n",
|
|
"label\t59\t-\tDistance\t3.36740335111714\n",
|
|
"label\t32\t-\tDistance\t3.4369727667587036\n",
|
|
"label\t84\t-\tDistance\t4.042695694344645\n",
|
|
"label\t79\t-\tDistance\t4.051227266452548\n",
|
|
"label\t94\t-\tDistance\t4.535286748567164\n",
|
|
"label\t75\t-\tDistance\t4.567193344282598\n",
|
|
"label\t11\t-\tDistance\t4.856460310962189\n",
|
|
"label\t55\t-\tDistance\t5.036016117772108\n",
|
|
"Latent semantic no. 4\n",
|
|
"label\t80\t-\tDistance\t4.403201299886196\n",
|
|
"label\t99\t-\tDistance\t4.731021526243766\n",
|
|
"label\t3\t-\tDistance\t4.807090489912411\n",
|
|
"label\t48\t-\tDistance\t8.911953449338059\n",
|
|
"label\t85\t-\tDistance\t9.334554754293974\n",
|
|
"label\t52\t-\tDistance\t11.390353342613288\n",
|
|
"label\t43\t-\tDistance\t12.033766054009595\n",
|
|
"label\t91\t-\tDistance\t12.446673116679838\n",
|
|
"label\t14\t-\tDistance\t12.717196488491759\n",
|
|
"label\t83\t-\tDistance\t13.5754060440636\n",
|
|
"Latent semantic no. 5\n",
|
|
"label\t77\t-\tDistance\t2.144778050426236\n",
|
|
"label\t45\t-\tDistance\t2.3391902699042175\n",
|
|
"label\t73\t-\tDistance\t2.5586280095180554\n",
|
|
"label\t22\t-\tDistance\t2.833603911721891\n",
|
|
"label\t57\t-\tDistance\t2.9256965790964955\n",
|
|
"label\t50\t-\tDistance\t3.216841848641699\n",
|
|
"label\t74\t-\tDistance\t3.2964675276683377\n",
|
|
"label\t38\t-\tDistance\t3.3501016749777297\n",
|
|
"label\t72\t-\tDistance\t3.461208008080578\n",
|
|
"label\t34\t-\tDistance\t3.8970766980234073\n",
|
|
"Latent semantic no. 6\n",
|
|
"label\t78\t-\tDistance\t1.772794735295686\n",
|
|
"label\t6\t-\tDistance\t1.9243189269571448\n",
|
|
"label\t67\t-\tDistance\t2.0159218514234905\n",
|
|
"label\t23\t-\tDistance\t2.0402136200750687\n",
|
|
"label\t7\t-\tDistance\t2.1597363741525943\n",
|
|
"label\t15\t-\tDistance\t2.2890961861911463\n",
|
|
"label\t86\t-\tDistance\t2.418355035843437\n",
|
|
"label\t39\t-\tDistance\t2.431493894783776\n",
|
|
"label\t20\t-\tDistance\t2.4339361855736694\n",
|
|
"label\t61\t-\tDistance\t2.4663666328704577\n",
|
|
"Latent semantic no. 7\n",
|
|
"label\t36\t-\tDistance\t2.148560462001178\n",
|
|
"label\t10\t-\tDistance\t2.336732460490279\n",
|
|
"label\t76\t-\tDistance\t2.410558517560451\n",
|
|
"label\t9\t-\tDistance\t2.4853810228702433\n",
|
|
"label\t44\t-\tDistance\t2.822322732248757\n",
|
|
"label\t16\t-\tDistance\t2.8525379488476954\n",
|
|
"label\t19\t-\tDistance\t2.887333058828606\n",
|
|
"label\t41\t-\tDistance\t3.2609266747980072\n",
|
|
"label\t0\t-\tDistance\t3.4462772872176073\n",
|
|
"label\t8\t-\tDistance\t3.4492972662700305\n",
|
|
"Latent semantic no. 8\n",
|
|
"label\t60\t-\tDistance\t3.2878466679861047\n",
|
|
"label\t66\t-\tDistance\t3.8429959542446595\n",
|
|
"label\t95\t-\tDistance\t4.407501055402251\n",
|
|
"label\t51\t-\tDistance\t4.675169110980285\n",
|
|
"label\t82\t-\tDistance\t4.930711123344968\n",
|
|
"label\t1\t-\tDistance\t5.746326956457264\n",
|
|
"label\t42\t-\tDistance\t5.932080034810729\n",
|
|
"label\t29\t-\tDistance\t5.934164464898548\n",
|
|
"label\t47\t-\tDistance\t6.3479330191887025\n",
|
|
"label\t35\t-\tDistance\t6.422013021100036\n",
|
|
"Latent semantic no. 9\n",
|
|
"label\t83\t-\tDistance\t5.036696108166727\n",
|
|
"label\t100\t-\tDistance\t5.163440732380748\n",
|
|
"label\t43\t-\tDistance\t5.447889420797845\n",
|
|
"label\t88\t-\tDistance\t6.470159759945887\n",
|
|
"label\t64\t-\tDistance\t6.8077571085247355\n",
|
|
"label\t17\t-\tDistance\t7.350448996699054\n",
|
|
"label\t55\t-\tDistance\t7.555979165305925\n",
|
|
"label\t11\t-\tDistance\t7.84770773092541\n",
|
|
"label\t91\t-\tDistance\t7.869761874577601\n",
|
|
"label\t75\t-\tDistance\t7.997112142085329\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"selected_feature_model = valid_feature_models[\n",
|
|
" str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
|
|
"]\n",
|
|
"\n",
|
|
"k = int(input(\"Enter value of k: \"))\n",
|
|
"if k < 1:\n",
|
|
" raise ValueError(\"k should be a positive integer\")\n",
|
|
"\n",
|
|
"selected_dim_reduction_method = str(\n",
|
|
" input(\n",
|
|
" \"Enter dimensionality reduction method - one of \"\n",
|
|
" + str(list(valid_dim_reduction_methods.keys()))\n",
|
|
" )\n",
|
|
")\n",
|
|
"\n",
|
|
"label_sim_matrix = find_label_label_similarity(fd_collection,selected_feature_model)\n",
|
|
"\n",
|
|
"extract_latent_semantics_from_sim_matrix(\n",
|
|
" label_sim_matrix,\n",
|
|
" selected_feature_model,\n",
|
|
" \"label\",\n",
|
|
" k,\n",
|
|
" selected_dim_reduction_method,\n",
|
|
" top_images=10,\n",
|
|
")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|