mirror of
https://github.com/20kaushik02/CSE515_MWDB_Project.git
synced 2025-12-06 11:04:07 +00:00
optimized approach for pagerank
This commit is contained in:
parent
992ea2db24
commit
75030857b8
@ -22,7 +22,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": 16,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -104,6 +104,7 @@
|
|||||||
" self, label, m, damping_factor=0.85, max_iter=1000, tol=1e-6\n",
|
" self, label, m, damping_factor=0.85, max_iter=1000, tol=1e-6\n",
|
||||||
" ):\n",
|
" ):\n",
|
||||||
" import time\n",
|
" import time\n",
|
||||||
|
"\n",
|
||||||
" if self.similarity_graph is None:\n",
|
" if self.similarity_graph is None:\n",
|
||||||
" raise ValueError(\n",
|
" raise ValueError(\n",
|
||||||
" \"Similarity graph not created. Call create_similarity_graph() first.\"\n",
|
" \"Similarity graph not created. Call create_similarity_graph() first.\"\n",
|
||||||
@ -112,6 +113,7 @@
|
|||||||
" label_indices = [\n",
|
" label_indices = [\n",
|
||||||
" img[\"image_id\"] for img in self.fd_collection.find({\"true_label\": label})\n",
|
" img[\"image_id\"] for img in self.fd_collection.find({\"true_label\": label})\n",
|
||||||
" ] # IDs of images with the given label\n",
|
" ] # IDs of images with the given label\n",
|
||||||
|
" num_label_nodes = len(label_indices)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" pr_scores = np.ones(NUM_IMAGES) / NUM_IMAGES # Initialize PageRank scores\n",
|
" pr_scores = np.ones(NUM_IMAGES) / NUM_IMAGES # Initialize PageRank scores\n",
|
||||||
" if self.verbose:\n",
|
" if self.verbose:\n",
|
||||||
@ -119,22 +121,21 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" for _iter in range(max_iter):\n",
|
" for _iter in range(max_iter):\n",
|
||||||
" prev_scores = np.copy(pr_scores)\n",
|
" prev_scores = np.copy(pr_scores)\n",
|
||||||
" # for every node,\n",
|
|
||||||
" for i in range(NUM_IMAGES):\n",
|
|
||||||
" tic = time.time()\n",
|
|
||||||
" # add sum of connected nodes' PR scores\n",
|
|
||||||
" pr_scores[i] = damping_factor * sum(\n",
|
|
||||||
" pr_scores[j]\n",
|
|
||||||
" for j in range(NUM_IMAGES)\n",
|
|
||||||
" if (i * 2, j * 2) in self.similarity_graph\n",
|
|
||||||
" # and add the prob for random teleport *if node in given label*\n",
|
|
||||||
" ) + (1 - damping_factor) * (1 if i * 2 in label_indices else 0) / len(\n",
|
|
||||||
" label_indices\n",
|
|
||||||
" )\n",
|
|
||||||
" toc = time.time()\n",
|
|
||||||
" print(toc-tic)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" pr_scores /= sum(pr_scores) # Normalize\n",
|
" # for every edge\n",
|
||||||
|
" for (i, j) in self.similarity_graph:\n",
|
||||||
|
" # add neighbor's score\n",
|
||||||
|
" pr_scores[int(i / 2)] += pr_scores[int(j / 2)]\n",
|
||||||
|
" # damping\n",
|
||||||
|
" pr_scores *= damping_factor\n",
|
||||||
|
"\n",
|
||||||
|
" # extra teleport prob for label nodes only\n",
|
||||||
|
" for label_node in label_indices:\n",
|
||||||
|
" pr_scores[label_node] += 1 - damping_factor\n",
|
||||||
|
" pr_scores[label_node] /= num_label_nodes\n",
|
||||||
|
"\n",
|
||||||
|
" # normalize\n",
|
||||||
|
" pr_scores /= sum(pr_scores)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # check for convergence\n",
|
" # check for convergence\n",
|
||||||
" conv_tol = np.sum(np.abs(prev_scores - pr_scores))\n",
|
" conv_tol = np.sum(np.abs(prev_scores - pr_scores))\n",
|
||||||
@ -154,7 +155,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -248,7 +249,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 16,
|
"execution_count": 17,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -257,20 +258,35 @@
|
|||||||
"text": [
|
"text": [
|
||||||
"Using image-image similarity matrix from semantic data\n",
|
"Using image-image similarity matrix from semantic data\n",
|
||||||
"Similarity graph created\n",
|
"Similarity graph created\n",
|
||||||
"Initialized pagerank scores\n"
|
"Initialized pagerank scores\n",
|
||||||
]
|
"Iter 0, conv_tol=1.719665978050747\n",
|
||||||
},
|
"Iter 1, conv_tol=1.2881607697596094\n",
|
||||||
{
|
"Iter 2, conv_tol=0.2665287566432058\n",
|
||||||
"ename": "KeyboardInterrupt",
|
"Iter 3, conv_tol=0.059527897230663145\n",
|
||||||
"evalue": "",
|
"Iter 4, conv_tol=0.0347000929210205\n",
|
||||||
"output_type": "error",
|
"Iter 5, conv_tol=0.02202852715618498\n",
|
||||||
"traceback": [
|
"Iter 6, conv_tol=0.013882145533702685\n",
|
||||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
"Iter 7, conv_tol=0.008684261384663692\n",
|
||||||
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
"Iter 8, conv_tol=0.005407038899165463\n",
|
||||||
"\u001b[1;32mc:\\Kaushik\\ASU\\CSE 515 - Multimedia and Web Databases\\Project\\Phase 2\\task_11.ipynb Cell 5\u001b[0m line \u001b[0;36m1\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m img_graph \u001b[39m=\u001b[39m ImageGraph(fd_collection, \u001b[39mTrue\u001b[39;00m)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m img_graph\u001b[39m.\u001b[39mcreate_similarity_graph(\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m n,\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m selected_feature_model,\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m data,\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m selected_dim_reduction_method\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=11'>12</a>\u001b[0m )\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=12'>13</a>\u001b[0m imgs \u001b[39m=\u001b[39m img_graph\u001b[39m.\u001b[39;49mpersonalized_pagerank(l, m)\n",
|
"Iter 9, conv_tol=0.0033566552382892495\n",
|
||||||
"\u001b[1;32mc:\\Kaushik\\ASU\\CSE 515 - Multimedia and Web Databases\\Project\\Phase 2\\task_11.ipynb Cell 5\u001b[0m line \u001b[0;36m9\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=92'>93</a>\u001b[0m \u001b[39m# for every node,\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=93'>94</a>\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(NUM_IMAGES):\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=94'>95</a>\u001b[0m \u001b[39m# add sum of connected nodes' PR scores\u001b[39;00m\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=95'>96</a>\u001b[0m pr_scores[i] \u001b[39m=\u001b[39m damping_factor \u001b[39m*\u001b[39m \u001b[39msum\u001b[39;49m(\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=96'>97</a>\u001b[0m pr_scores[j]\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=97'>98</a>\u001b[0m \u001b[39mfor\u001b[39;49;00m j \u001b[39min\u001b[39;49;00m \u001b[39mrange\u001b[39;49m(NUM_IMAGES)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=98'>99</a>\u001b[0m \u001b[39mif\u001b[39;49;00m (i \u001b[39m*\u001b[39;49m \u001b[39m2\u001b[39;49m, j \u001b[39m*\u001b[39;49m \u001b[39m2\u001b[39;49m) \u001b[39min\u001b[39;49;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msimilarity_graph\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=99'>100</a>\u001b[0m \u001b[39m# and add the prob for random teleport *if node in given label*\u001b[39;49;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=100'>101</a>\u001b[0m ) \u001b[39m+\u001b[39m (\u001b[39m1\u001b[39m \u001b[39m-\u001b[39m damping_factor) \u001b[39m*\u001b[39m (\u001b[39m1\u001b[39m \u001b[39mif\u001b[39;00m i \u001b[39m*\u001b[39m \u001b[39m2\u001b[39m \u001b[39min\u001b[39;00m label_indices \u001b[39melse\u001b[39;00m \u001b[39m0\u001b[39m) \u001b[39m/\u001b[39m \u001b[39mlen\u001b[39m(\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=101'>102</a>\u001b[0m label_indices\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=102'>103</a>\u001b[0m )\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=103'>104</a>\u001b[0m pr_scores \u001b[39m/\u001b[39m\u001b[39m=\u001b[39m \u001b[39msum\u001b[39m(pr_scores) \u001b[39m# Normalize\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=105'>106</a>\u001b[0m \u001b[39m# check for convergence\u001b[39;00m\n",
|
"Iter 10, conv_tol=0.002079980308951357\n",
|
||||||
"\u001b[1;32mc:\\Kaushik\\ASU\\CSE 515 - Multimedia and Web Databases\\Project\\Phase 2\\task_11.ipynb Cell 5\u001b[0m line \u001b[0;36m9\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=92'>93</a>\u001b[0m \u001b[39m# for every node,\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=93'>94</a>\u001b[0m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(NUM_IMAGES):\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=94'>95</a>\u001b[0m \u001b[39m# add sum of connected nodes' PR scores\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=95'>96</a>\u001b[0m pr_scores[i] \u001b[39m=\u001b[39m damping_factor \u001b[39m*\u001b[39m \u001b[39msum\u001b[39m(\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=96'>97</a>\u001b[0m pr_scores[j]\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=97'>98</a>\u001b[0m \u001b[39mfor\u001b[39;00m j \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(NUM_IMAGES)\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=98'>99</a>\u001b[0m \u001b[39mif\u001b[39;00m (i \u001b[39m*\u001b[39;49m \u001b[39m2\u001b[39;49m, j \u001b[39m*\u001b[39;49m \u001b[39m2\u001b[39;49m) \u001b[39min\u001b[39;49;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msimilarity_graph\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=99'>100</a>\u001b[0m \u001b[39m# and add the prob for random teleport *if node in given label*\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=100'>101</a>\u001b[0m ) \u001b[39m+\u001b[39m (\u001b[39m1\u001b[39m \u001b[39m-\u001b[39m damping_factor) \u001b[39m*\u001b[39m (\u001b[39m1\u001b[39m \u001b[39mif\u001b[39;00m i \u001b[39m*\u001b[39m \u001b[39m2\u001b[39m \u001b[39min\u001b[39;00m label_indices \u001b[39melse\u001b[39;00m \u001b[39m0\u001b[39m) \u001b[39m/\u001b[39m \u001b[39mlen\u001b[39m(\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=101'>102</a>\u001b[0m label_indices\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=102'>103</a>\u001b[0m )\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=103'>104</a>\u001b[0m pr_scores \u001b[39m/\u001b[39m\u001b[39m=\u001b[39m \u001b[39msum\u001b[39m(pr_scores) \u001b[39m# Normalize\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Kaushik/ASU/CSE%20515%20-%20Multimedia%20and%20Web%20Databases/Project/Phase%202/task_11.ipynb#W5sZmlsZQ%3D%3D?line=105'>106</a>\u001b[0m \u001b[39m# check for convergence\u001b[39;00m\n",
|
"Iter 11, conv_tol=0.0012874164560444917\n",
|
||||||
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
|
"Iter 12, conv_tol=0.0007962948214944976\n",
|
||||||
|
"Iter 13, conv_tol=0.0004923115832688122\n",
|
||||||
|
"Iter 14, conv_tol=0.00030429131691503663\n",
|
||||||
|
"Iter 15, conv_tol=0.0001880472373585323\n",
|
||||||
|
"Iter 16, conv_tol=0.00011619830480688171\n",
|
||||||
|
"Iter 17, conv_tol=7.179680034073339e-05\n",
|
||||||
|
"Iter 18, conv_tol=4.4360187507886776e-05\n",
|
||||||
|
"Iter 19, conv_tol=2.740760859338375e-05\n",
|
||||||
|
"Iter 20, conv_tol=1.6933331813984318e-05\n",
|
||||||
|
"Iter 21, conv_tol=1.0461878708516084e-05\n",
|
||||||
|
"Iter 22, conv_tol=6.463599865544653e-06\n",
|
||||||
|
"Iter 23, conv_tol=3.993353111534267e-06\n",
|
||||||
|
"Iter 24, conv_tol=2.4671753579536407e-06\n",
|
||||||
|
"Iter 25, conv_tol=1.524269424726632e-06\n",
|
||||||
|
"Iter 26, conv_tol=9.417228262050036e-07\n",
|
||||||
|
"Converged\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -290,6 +306,27 @@
|
|||||||
" imgs = img_graph.personalized_pagerank(l, m)\n"
|
" imgs = img_graph.personalized_pagerank(l, m)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"array([8464, 8432, 8354, 7988, 8312, 8674, 8628, 8586, 8208, 7990],\n",
|
||||||
|
" dtype=int64)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 15,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"imgs"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user