From 3abcdd97e92a27b618ac710e3b6bfea8a459d881 Mon Sep 17 00:00:00 2001 From: pranavbrkr Date: Wed, 4 Oct 2023 15:25:54 -0700 Subject: [PATCH] task1 init --- Phase 2/task1.ipynb | 192 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 Phase 2/task1.ipynb diff --git a/Phase 2/task1.ipynb b/Phase 2/task1.ipynb new file mode 100644 index 0000000..1928c98 --- /dev/null +++ b/Phase 2/task1.ipynb @@ -0,0 +1,192 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'task0a'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 1\u001b[0m line \u001b[0;36m4\n\u001b[0;32m 2\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mmath\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mmatplotlib\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpyplot\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mplt\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtask0a\u001b[39;00m \u001b[39mimport\u001b[39;00m \u001b[39m*\u001b[39m\n\u001b[0;32m 5\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mscipy\u001b[39;00m\n", + "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'task0a'" + ] + } + ], + "source": [ + "from pymongo import MongoClient\n", + "import math\n", + "import matplotlib.pyplot as plt\n", + "from task0a import *\n", + "import scipy" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'loadDataset' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 2\u001b[0m line \u001b[0;36m7\n\u001b[0;32m 4\u001b[0m \u001b[39m# Select the database\u001b[39;00m\n\u001b[0;32m 5\u001b[0m db \u001b[39m=\u001b[39m client\u001b[39m.\u001b[39mMultimedia_Web_DBs\n\u001b[1;32m----> 7\u001b[0m caltechDataset \u001b[39m=\u001b[39m loadDataset()\n\u001b[0;32m 9\u001b[0m \u001b[39m# Fetch all documents from the collection and then sort them by \"_id\"\u001b[39;00m\n\u001b[0;32m 10\u001b[0m feature_descriptors \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(db\u001b[39m.\u001b[39mCaltech101_Feature_Descriptors\u001b[39m.\u001b[39mfind({}))\n", + "\u001b[1;31mNameError\u001b[0m: name 'loadDataset' is not defined" + ] + } + ], + "source": [ + "client = MongoClient()\n", + "client = MongoClient(host=\"localhost\", port=27017)\n", + "\n", + "# Select the database\n", + "db = client.Multimedia_Web_DBs\n", + "\n", + "caltechDataset = loadDataset()\n", + "\n", + "# Fetch all documents from the collection and then sort them by \"_id\"\n", + "feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n", + "feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)\n", + "\n", + "num_labels = 101" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_label_means(l, feature_model):\n", + " \n", + " label_vectors = [x[feature_model] for x in feature_descriptors if x[\"label\"] == l and x[\"_id\"] % 2 == 0]\n", + " \n", + " label_mean_vector = [sum(col)/len(col) for col in zip(*label_vectors)]\n", + " return label_mean_vector" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def findKRelevantImages(mean_vector, feature_model, l):\n", + "\n", + " label_vectors = [(x[\"_id\"], x[feature_model]) for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n", + "\n", + " n = len(label_vectors)\n", + "\n", + " similarities = []\n", + "\n", + " match feature_model:\n", + "\n", + " case \"color_moments\":\n", + "\n", + " for i in range(n):\n", + " similarities.append({\"_id\": label_vectors[i][0], \"similarity\": math.dist(mean_vector, label_vectors[i][1])})\n", + " similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=False)\n", + "\n", + " case \"hog\":\n", + "\n", + " for i in range(n):\n", + " similarities.append({\"_id\": label_vectors[i][0], \"similarity\": (np.dot(mean_vector, label_vectors[i][1]) / (np.linalg.norm(mean_vector) * np.linalg.norm(label_vectors[i][1])))})\n", + " similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=True)\n", + " \n", + " case \"layer3\" | \"avgpool\" | \"fc\":\n", + "\n", + " for i in range(n):\n", + " similarities.append({\"_id\": label_vectors[i][0], \"similarity\": scipy.stats.pearsonr(mean_vector, label_vectors[i][1]).statistic})\n", + " similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=True)\n", + " \n", + " return similarities\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def main():\n", + "\n", + " # Load dataset\n", + "\n", + " # User input for Image ID\n", + " l = int(input(\"Enter query label: \"))\n", + " k = int(input(\"Enter k: \"))\n", + "\n", + " features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n", + "\n", + " # User input for feature model to extract\n", + " print(\"1: Color moments\")\n", + " print(\"2: HOG\")\n", + " print(\"3: Resnet50 Avgpool layer\")\n", + " print(\"4: Resnet50 Layer 3\")\n", + " print(\"5: Resnet50 FC layer\")\n", + " feature_model = features[int(input(\"Select the feature model: \")) - 1]\n", + "\n", + " mean_vector = calculate_label_means(l, feature_model)\n", + "\n", + " similar_images = findKRelevantImages(mean_vector, feature_model, l)\n", + "\n", + " for i in range(k):\n", + " print(similar_images[i])\n", + "\n", + " fig, axes = plt.subplots(1, k, figsize=(15, 5))\n", + "\n", + " for i in range(k):\n", + " axes[i].imshow(caltechDataset[similar_images[i][\"_id\"]][1].permute(1, 2, 0))\n", + " axes[i].set_title(f'id: {similar_images[i][\"_id\"]}')\n", + "\n", + " # Show the figure with all the images\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "if __name__ == \"__main__\":\n", + " main()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}