mirror of
https://github.com/20kaushik02/CSE515_MWDB_Project.git
synced 2025-12-06 09:34:07 +00:00
193 lines
8.5 KiB
Plaintext
193 lines
8.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "ModuleNotFoundError",
|
|
"evalue": "No module named 'task0a'",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
|
"\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 1\u001b[0m line \u001b[0;36m4\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W0sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mmath\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W0sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mmatplotlib\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpyplot\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mplt\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W0sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtask0a\u001b[39;00m \u001b[39mimport\u001b[39;00m \u001b[39m*\u001b[39m\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W0sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mscipy\u001b[39;00m\n",
|
|
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'task0a'"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from pymongo import MongoClient\n",
|
|
"import math\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from task0a import *\n",
|
|
"import scipy"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "NameError",
|
|
"evalue": "name 'loadDataset' is not defined",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
|
|
"\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 2\u001b[0m line \u001b[0;36m7\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W1sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39m# Select the database\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W1sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m db \u001b[39m=\u001b[39m client\u001b[39m.\u001b[39mMultimedia_Web_DBs\n\u001b[1;32m----> <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W1sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m caltechDataset \u001b[39m=\u001b[39m loadDataset()\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W1sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m \u001b[39m# Fetch all documents from the collection and then sort them by \"_id\"\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/e%3A/Fall%2023/CSE%20515%20-%20Multimedia%20and%20web%20databases/CSE515_MWDB_Project/Phase%202/task1.ipynb#W1sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m feature_descriptors \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(db\u001b[39m.\u001b[39mCaltech101_Feature_Descriptors\u001b[39m.\u001b[39mfind({}))\n",
|
|
"\u001b[1;31mNameError\u001b[0m: name 'loadDataset' is not defined"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"client = MongoClient()\n",
|
|
"client = MongoClient(host=\"localhost\", port=27017)\n",
|
|
"\n",
|
|
"# Select the database\n",
|
|
"db = client.Multimedia_Web_DBs\n",
|
|
"\n",
|
|
"caltechDataset = loadDataset()\n",
|
|
"\n",
|
|
"# Fetch all documents from the collection and then sort them by \"_id\"\n",
|
|
"feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n",
|
|
"feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)\n",
|
|
"\n",
|
|
"num_labels = 101"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def calculate_label_means(l, feature_model):\n",
|
|
" \n",
|
|
" label_vectors = [x[feature_model] for x in feature_descriptors if x[\"label\"] == l and x[\"_id\"] % 2 == 0]\n",
|
|
" \n",
|
|
" label_mean_vector = [sum(col)/len(col) for col in zip(*label_vectors)]\n",
|
|
" return label_mean_vector"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def findKRelevantImages(mean_vector, feature_model, l):\n",
|
|
"\n",
|
|
" label_vectors = [(x[\"_id\"], x[feature_model]) for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
|
|
"\n",
|
|
" n = len(label_vectors)\n",
|
|
"\n",
|
|
" similarities = []\n",
|
|
"\n",
|
|
" match feature_model:\n",
|
|
"\n",
|
|
" case \"color_moments\":\n",
|
|
"\n",
|
|
" for i in range(n):\n",
|
|
" similarities.append({\"_id\": label_vectors[i][0], \"similarity\": math.dist(mean_vector, label_vectors[i][1])})\n",
|
|
" similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=False)\n",
|
|
"\n",
|
|
" case \"hog\":\n",
|
|
"\n",
|
|
" for i in range(n):\n",
|
|
" similarities.append({\"_id\": label_vectors[i][0], \"similarity\": (np.dot(mean_vector, label_vectors[i][1]) / (np.linalg.norm(mean_vector) * np.linalg.norm(label_vectors[i][1])))})\n",
|
|
" similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=True)\n",
|
|
" \n",
|
|
" case \"layer3\" | \"avgpool\" | \"fc\":\n",
|
|
"\n",
|
|
" for i in range(n):\n",
|
|
" similarities.append({\"_id\": label_vectors[i][0], \"similarity\": scipy.stats.pearsonr(mean_vector, label_vectors[i][1]).statistic})\n",
|
|
" similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=True)\n",
|
|
" \n",
|
|
" return similarities\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def main():\n",
|
|
"\n",
|
|
" # Load dataset\n",
|
|
"\n",
|
|
" # User input for Image ID\n",
|
|
" l = int(input(\"Enter query label: \"))\n",
|
|
" k = int(input(\"Enter k: \"))\n",
|
|
"\n",
|
|
" features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n",
|
|
"\n",
|
|
" # User input for feature model to extract\n",
|
|
" print(\"1: Color moments\")\n",
|
|
" print(\"2: HOG\")\n",
|
|
" print(\"3: Resnet50 Avgpool layer\")\n",
|
|
" print(\"4: Resnet50 Layer 3\")\n",
|
|
" print(\"5: Resnet50 FC layer\")\n",
|
|
" feature_model = features[int(input(\"Select the feature model: \")) - 1]\n",
|
|
"\n",
|
|
" mean_vector = calculate_label_means(l, feature_model)\n",
|
|
"\n",
|
|
" similar_images = findKRelevantImages(mean_vector, feature_model, l)\n",
|
|
"\n",
|
|
" for i in range(k):\n",
|
|
" print(similar_images[i])\n",
|
|
"\n",
|
|
" fig, axes = plt.subplots(1, k, figsize=(15, 5))\n",
|
|
"\n",
|
|
" for i in range(k):\n",
|
|
" axes[i].imshow(caltechDataset[similar_images[i][\"_id\"]][1].permute(1, 2, 0))\n",
|
|
" axes[i].set_title(f'id: {similar_images[i][\"_id\"]}')\n",
|
|
"\n",
|
|
" # Show the figure with all the images\n",
|
|
" plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"if __name__ == \"__main__\":\n",
|
|
" main()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.4"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|