mirror of
https://github.com/20kaushik02/CSE515_MWDB_Project.git
synced 2025-12-06 12:04:07 +00:00
206 lines
7.8 KiB
Plaintext
206 lines
7.8 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from utils import *\n",
|
|
"warnings.filterwarnings('ignore')\n",
|
|
"%matplotlib inline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Applying kmeans on the given similarity matrix to get 10 latent semantics (showing only top 10 label-weight pairs for each latent semantic)...\n",
|
|
"Initialized centroids\n",
|
|
"Note: for K-Means we display distances, in ascending order\n",
|
|
"Latent semantic no. 0\n",
|
|
"label\t38\t-\tDistance\t2.0070855260713345\n",
|
|
"label\t71\t-\tDistance\t2.224974820168396\n",
|
|
"label\t94\t-\tDistance\t2.341857909278956\n",
|
|
"label\t45\t-\tDistance\t2.99059339026617\n",
|
|
"label\t34\t-\tDistance\t3.2010802901998034\n",
|
|
"label\t57\t-\tDistance\t3.248469772417219\n",
|
|
"label\t77\t-\tDistance\t3.5731245496083677\n",
|
|
"label\t84\t-\tDistance\t4.026791789923078\n",
|
|
"label\t50\t-\tDistance\t4.144496651017247\n",
|
|
"label\t74\t-\tDistance\t4.614517493407895\n",
|
|
"Latent semantic no. 1\n",
|
|
"label\t92\t-\tDistance\t3.230292112512146\n",
|
|
"label\t4\t-\tDistance\t3.5335656340201087\n",
|
|
"label\t2\t-\tDistance\t4.905027845590568\n",
|
|
"label\t69\t-\tDistance\t4.993399423965622\n",
|
|
"label\t65\t-\tDistance\t6.275170101152081\n",
|
|
"label\t21\t-\tDistance\t6.792963383606834\n",
|
|
"label\t95\t-\tDistance\t9.460863854781731\n",
|
|
"label\t60\t-\tDistance\t10.659440914917885\n",
|
|
"label\t82\t-\tDistance\t14.23961431596092\n",
|
|
"label\t51\t-\tDistance\t14.308250416010853\n",
|
|
"Latent semantic no. 2\n",
|
|
"label\t98\t-\tDistance\t4.084187568594383\n",
|
|
"label\t75\t-\tDistance\t4.208154727653996\n",
|
|
"label\t59\t-\tDistance\t4.267012427049042\n",
|
|
"label\t11\t-\tDistance\t4.3719751047928685\n",
|
|
"label\t63\t-\tDistance\t4.389793026579887\n",
|
|
"label\t64\t-\tDistance\t4.534510062334466\n",
|
|
"label\t32\t-\tDistance\t4.596340579479344\n",
|
|
"label\t79\t-\tDistance\t4.97413168034284\n",
|
|
"label\t55\t-\tDistance\t5.180445076965457\n",
|
|
"label\t84\t-\tDistance\t5.321702524477488\n",
|
|
"Latent semantic no. 3\n",
|
|
"label\t73\t-\tDistance\t2.2337776135986673\n",
|
|
"label\t77\t-\tDistance\t2.446394227315699\n",
|
|
"label\t22\t-\tDistance\t2.8266085928002305\n",
|
|
"label\t96\t-\tDistance\t2.951528289863372\n",
|
|
"label\t72\t-\tDistance\t3.0039788225292554\n",
|
|
"label\t45\t-\tDistance\t3.109522101340006\n",
|
|
"label\t74\t-\tDistance\t3.519269143632249\n",
|
|
"label\t57\t-\tDistance\t3.589490130921498\n",
|
|
"label\t50\t-\tDistance\t3.6391055564874\n",
|
|
"label\t18\t-\tDistance\t4.109290572000071\n",
|
|
"Latent semantic no. 4\n",
|
|
"label\t78\t-\tDistance\t1.8064076815500691\n",
|
|
"label\t6\t-\tDistance\t1.960264623688121\n",
|
|
"label\t7\t-\tDistance\t2.1426433652644246\n",
|
|
"label\t61\t-\tDistance\t2.211884975823563\n",
|
|
"label\t67\t-\tDistance\t2.2819452598845484\n",
|
|
"label\t20\t-\tDistance\t2.3104854768313308\n",
|
|
"label\t62\t-\tDistance\t2.4074452247279643\n",
|
|
"label\t23\t-\tDistance\t2.4123612373578465\n",
|
|
"label\t27\t-\tDistance\t2.5964084026274183\n",
|
|
"label\t26\t-\tDistance\t2.6484422926018762\n",
|
|
"Latent semantic no. 5\n",
|
|
"label\t1\t-\tDistance\t0.0\n",
|
|
"label\t66\t-\tDistance\t6.283884339485376\n",
|
|
"label\t0\t-\tDistance\t7.134186839941345\n",
|
|
"label\t68\t-\tDistance\t7.6014631552864165\n",
|
|
"label\t42\t-\tDistance\t7.699614640935743\n",
|
|
"label\t90\t-\tDistance\t7.745628525155249\n",
|
|
"label\t35\t-\tDistance\t7.888542444783939\n",
|
|
"label\t89\t-\tDistance\t8.296957431371565\n",
|
|
"label\t19\t-\tDistance\t8.389232149750157\n",
|
|
"label\t70\t-\tDistance\t8.416181651996403\n",
|
|
"Latent semantic no. 6\n",
|
|
"label\t58\t-\tDistance\t1.4491641647189777\n",
|
|
"label\t37\t-\tDistance\t1.5439214839372046\n",
|
|
"label\t24\t-\tDistance\t1.5452615348627594\n",
|
|
"label\t8\t-\tDistance\t1.7715264047899464\n",
|
|
"label\t25\t-\tDistance\t1.86516161648985\n",
|
|
"label\t87\t-\tDistance\t2.077478215700691\n",
|
|
"label\t15\t-\tDistance\t2.225120843217057\n",
|
|
"label\t16\t-\tDistance\t2.267782774837321\n",
|
|
"label\t39\t-\tDistance\t2.395932754700218\n",
|
|
"label\t36\t-\tDistance\t2.6959359474526083\n",
|
|
"Latent semantic no. 7\n",
|
|
"label\t44\t-\tDistance\t2.2303295853566074\n",
|
|
"label\t19\t-\tDistance\t2.233360628309259\n",
|
|
"label\t76\t-\tDistance\t2.2873290684053234\n",
|
|
"label\t49\t-\tDistance\t2.4188703660528543\n",
|
|
"label\t9\t-\tDistance\t2.4470406114525685\n",
|
|
"label\t54\t-\tDistance\t2.4930648472372385\n",
|
|
"label\t10\t-\tDistance\t2.5342743763741615\n",
|
|
"label\t33\t-\tDistance\t2.6761306270075935\n",
|
|
"label\t28\t-\tDistance\t2.76245021657484\n",
|
|
"label\t36\t-\tDistance\t2.8111765962163813\n",
|
|
"Latent semantic no. 8\n",
|
|
"label\t48\t-\tDistance\t3.1737547288361596\n",
|
|
"label\t85\t-\tDistance\t4.021946100259249\n",
|
|
"label\t91\t-\tDistance\t5.424003509894085\n",
|
|
"label\t52\t-\tDistance\t5.537608967567619\n",
|
|
"label\t43\t-\tDistance\t5.53813149000202\n",
|
|
"label\t80\t-\tDistance\t5.6425678719484225\n",
|
|
"label\t14\t-\tDistance\t5.87213105210823\n",
|
|
"label\t83\t-\tDistance\t7.001763109529681\n",
|
|
"label\t93\t-\tDistance\t7.563499549838895\n",
|
|
"label\t3\t-\tDistance\t8.036164382755217\n",
|
|
"Latent semantic no. 9\n",
|
|
"label\t51\t-\tDistance\t1.871618718249688\n",
|
|
"label\t82\t-\tDistance\t2.0311106628896\n",
|
|
"label\t29\t-\tDistance\t2.6509535626831533\n",
|
|
"label\t42\t-\tDistance\t3.083958153652753\n",
|
|
"label\t47\t-\tDistance\t3.198020753679005\n",
|
|
"label\t66\t-\tDistance\t3.5690360028582857\n",
|
|
"label\t35\t-\tDistance\t4.033659067172662\n",
|
|
"label\t60\t-\tDistance\t4.979629225985197\n",
|
|
"label\t33\t-\tDistance\t6.016698032150541\n",
|
|
"label\t95\t-\tDistance\t6.119613727077633\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"selected_feature_model = valid_feature_models[\n",
|
|
" str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
|
|
"]\n",
|
|
"\n",
|
|
"k = int(input(\"Enter value of k: \"))\n",
|
|
"if k < 1:\n",
|
|
" raise ValueError(\"k should be a positive integer\")\n",
|
|
"\n",
|
|
"selected_dim_reduction_method = str(\n",
|
|
" input(\n",
|
|
" \"Enter dimensionality reduction method - one of \"\n",
|
|
" + str(list(valid_dim_reduction_methods.keys()))\n",
|
|
" )\n",
|
|
")\n",
|
|
"\n",
|
|
"label_sim_matrix = find_label_label_similarity(fd_collection,selected_feature_model)\n",
|
|
"\n",
|
|
"extract_latent_semantics_from_sim_matrix(\n",
|
|
" label_sim_matrix,\n",
|
|
" selected_feature_model,\n",
|
|
" \"label\",\n",
|
|
" k,\n",
|
|
" selected_dim_reduction_method,\n",
|
|
" top_images=10,\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.4"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|