diff --git a/Phase 2/task1.ipynb b/Phase 2/task1.ipynb
new file mode 100644
index 0000000..90c6840
--- /dev/null
+++ b/Phase 2/task1.ipynb
@@ -0,0 +1,218 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ModuleNotFoundError",
+ "evalue": "No module named 'task0a'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 1\u001b[0m line \u001b[0;36m4\n\u001b[0;32m 2\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mmath\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mmatplotlib\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpyplot\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mplt\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtask0a\u001b[39;00m \u001b[39mimport\u001b[39;00m \u001b[39m*\u001b[39m\n\u001b[0;32m 5\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mscipy\u001b[39;00m\n",
+ "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'task0a'"
+ ]
+ }
+ ],
+ "source": [
+ "from pymongo import MongoClient\n",
+ "import math\n",
+ "import matplotlib.pyplot as plt\n",
+ "# This was imported for the loadDataset function in the cell below\n",
+ "from task0a import *\n",
+ "import scipy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "NameError",
+ "evalue": "name 'loadDataset' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 2\u001b[0m line \u001b[0;36m7\n\u001b[0;32m 4\u001b[0m \u001b[39m# Select the database\u001b[39;00m\n\u001b[0;32m 5\u001b[0m db \u001b[39m=\u001b[39m client\u001b[39m.\u001b[39mMultimedia_Web_DBs\n\u001b[1;32m----> 7\u001b[0m caltechDataset \u001b[39m=\u001b[39m loadDataset()\n\u001b[0;32m 9\u001b[0m \u001b[39m# Fetch all documents from the collection and then sort them by \"_id\"\u001b[39;00m\n\u001b[0;32m 10\u001b[0m feature_descriptors \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(db\u001b[39m.\u001b[39mCaltech101_Feature_Descriptors\u001b[39m.\u001b[39mfind({}))\n",
+ "\u001b[1;31mNameError\u001b[0m: name 'loadDataset' is not defined"
+ ]
+ }
+ ],
+ "source": [
+ "client = MongoClient()\n",
+ "client = MongoClient(host=\"localhost\", port=27017)\n",
+ "\n",
+ "# Select the database\n",
+ "db = client.Multimedia_Web_DBs\n",
+ "\n",
+ "# This function was the part of task 1 in my project directory. \n",
+ "# caltechDataset is in format (_id, image_pixels, label)\n",
+ "caltechDataset = loadDataset()\n",
+ "\n",
+ "# Fetch all documents from the collection and then sort them by \"_id\"\n",
+ "feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n",
+ "feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)\n",
+ "\n",
+ "num_labels = 101"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def calculate_label_means(l, feature_model):\n",
+ " \n",
+ " # Just picking the feature vector for that particular label from even _id rows in the dataset\n",
+ " label_vectors = [x[feature_model] for x in feature_descriptors if x[\"label\"] == l and x[\"_id\"] % 2 == 0]\n",
+ " \n",
+ " label_mean_vector = [sum(col)/len(col) for col in zip(*label_vectors)]\n",
+ " return label_mean_vector"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def findKRelevantImages(mean_vector, feature_model, l):\n",
+ "\n",
+ " # Same as in above function, but took ids as well.\n",
+ " # Redundant step.\n",
+ " label_vectors = [(x[\"_id\"], x[feature_model]) for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
+ "\n",
+ " n = len(label_vectors)\n",
+ "\n",
+ " similarities = []\n",
+ "\n",
+ " # Use the appropriate similarity based on feature model selected by the user\n",
+ " match feature_model:\n",
+ "\n",
+ " case \"color_moments\":\n",
+ "\n",
+ " for i in range(n):\n",
+ " similarities.append({\"_id\": label_vectors[i][0], \"similarity\": math.dist(mean_vector, label_vectors[i][1])})\n",
+ " similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=False)\n",
+ "\n",
+ " case \"hog\":\n",
+ "\n",
+ " for i in range(n):\n",
+ " similarities.append({\"_id\": label_vectors[i][0], \"similarity\": (np.dot(mean_vector, label_vectors[i][1]) / (np.linalg.norm(mean_vector) * np.linalg.norm(label_vectors[i][1])))})\n",
+ " similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=True)\n",
+ " \n",
+ " case \"layer3\" | \"avgpool\" | \"fc\":\n",
+ "\n",
+ " for i in range(n):\n",
+ " similarities.append({\"_id\": label_vectors[i][0], \"similarity\": scipy.stats.pearsonr(mean_vector, label_vectors[i][1]).statistic})\n",
+ " similarities = sorted(similarities, key=lambda x: x[\"similarity\"], reverse=True)\n",
+ " \n",
+ " return similarities\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def main():\n",
+ "\n",
+ " # Load dataset\n",
+ "\n",
+ " # User input for Image ID\n",
+ " l = int(input(\"Enter query label: \"))\n",
+ " k = int(input(\"Enter k: \"))\n",
+ "\n",
+ " features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n",
+ "\n",
+ " # User input for feature model to extract\n",
+ " print(\"1: Color moments\")\n",
+ " print(\"2: HOG\")\n",
+ " print(\"3: Resnet50 Avgpool layer\")\n",
+ " print(\"4: Resnet50 Layer 3\")\n",
+ " print(\"5: Resnet50 FC layer\")\n",
+ " feature_model = features[int(input(\"Select the feature model: \")) - 1]\n",
+ "\n",
+ " mean_vector = calculate_label_means(l, feature_model)\n",
+ "\n",
+ " similar_images = findKRelevantImages(mean_vector, feature_model, l)\n",
+ "\n",
+ " for i in range(k):\n",
+ " print(similar_images[i])\n",
+ "\n",
+ " # Show the \"k relevant images\"\n",
+ " fig, axes = plt.subplots(1, k, figsize=(15, 5))\n",
+ "\n",
+ " for i in range(k):\n",
+ " # caltechDataset[similar_images[i][\"_id\"]][1] because\n",
+ " # similar_images[i][\"_id\"] will provide me the image id\n",
+ " # [1] will be image pixel values since caltechDataset is in format (id, pixels, label)\n",
+ " axes[i].imshow(caltechDataset[similar_images[i][\"_id\"]][1].permute(1, 2, 0))\n",
+ " axes[i].set_title(f'id: {similar_images[i][\"_id\"]}')\n",
+ "\n",
+ " # Show the figure with all the images\n",
+ " plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "KeyboardInterrupt",
+ "evalue": "Interrupted by user",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 6\u001b[0m line \u001b[0;36m2\n\u001b[0;32m 1\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m__name__\u001b[39m \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m__main__\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m----> 2\u001b[0m main()\n",
+ "\u001b[1;32me:\\Fall 23\\CSE 515 - Multimedia and web databases\\CSE515_MWDB_Project\\Phase 2\\task1.ipynb Cell 6\u001b[0m line \u001b[0;36m6\n\u001b[0;32m 1\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mmain\u001b[39m():\n\u001b[0;32m 2\u001b[0m \n\u001b[0;32m 3\u001b[0m \u001b[39m# Load dataset\u001b[39;00m\n\u001b[0;32m 4\u001b[0m \n\u001b[0;32m 5\u001b[0m \u001b[39m# User input for Image ID\u001b[39;00m\n\u001b[1;32m----> 6\u001b[0m l \u001b[39m=\u001b[39m \u001b[39mint\u001b[39m(\u001b[39minput\u001b[39;49m(\u001b[39m\"\u001b[39;49m\u001b[39mEnter query label: \u001b[39;49m\u001b[39m\"\u001b[39;49m))\n\u001b[0;32m 7\u001b[0m k \u001b[39m=\u001b[39m \u001b[39mint\u001b[39m(\u001b[39minput\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mEnter k: \u001b[39m\u001b[39m\"\u001b[39m))\n\u001b[0;32m 9\u001b[0m features \u001b[39m=\u001b[39m [\u001b[39m'\u001b[39m\u001b[39mcolor_moments\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mhog\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mlayer3\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mavgpool\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mfc\u001b[39m\u001b[39m'\u001b[39m]\n",
+ "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py:1202\u001b[0m, in \u001b[0;36mKernel.raw_input\u001b[1;34m(self, prompt)\u001b[0m\n\u001b[0;32m 1200\u001b[0m msg \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mraw_input was called, but this frontend does not support input requests.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1201\u001b[0m \u001b[39mraise\u001b[39;00m StdinNotImplementedError(msg)\n\u001b[1;32m-> 1202\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_input_request(\n\u001b[0;32m 1203\u001b[0m \u001b[39mstr\u001b[39;49m(prompt),\n\u001b[0;32m 1204\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_parent_ident[\u001b[39m\"\u001b[39;49m\u001b[39mshell\u001b[39;49m\u001b[39m\"\u001b[39;49m],\n\u001b[0;32m 1205\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mget_parent(\u001b[39m\"\u001b[39;49m\u001b[39mshell\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[0;32m 1206\u001b[0m password\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[0;32m 1207\u001b[0m )\n",
+ "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\ipykernel\\kernelbase.py:1245\u001b[0m, in \u001b[0;36mKernel._input_request\u001b[1;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[0;32m 1242\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyboardInterrupt\u001b[39;00m:\n\u001b[0;32m 1243\u001b[0m \u001b[39m# re-raise KeyboardInterrupt, to truncate traceback\u001b[39;00m\n\u001b[0;32m 1244\u001b[0m msg \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mInterrupted by user\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m-> 1245\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyboardInterrupt\u001b[39;00m(msg) \u001b[39mfrom\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m 1246\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m:\n\u001b[0;32m 1247\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlog\u001b[39m.\u001b[39mwarning(\u001b[39m\"\u001b[39m\u001b[39mInvalid Message:\u001b[39m\u001b[39m\"\u001b[39m, exc_info\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n",
+ "\u001b[1;31mKeyboardInterrupt\u001b[0m: Interrupted by user"
+ ]
+ }
+ ],
+ "source": [
+ "if __name__ == \"__main__\":\n",
+ " main()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}