diff --git a/Phase 3/task1.ipynb b/Phase 3/task1.ipynb new file mode 100644 index 0000000..f1e75e2 --- /dev/null +++ b/Phase 3/task1.ipynb @@ -0,0 +1,443 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from utils import *\n", + "\n", + "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Applying svd on the fc_fd space to get 5 latent semantics.\n" + ] + } + ], + "source": [ + "selected_feature_model = \"fc_fd\"\n", + "\n", + "k = int(input(\"Enter value of k: \"))\n", + "if k < 1:\n", + " raise ValueError(\"k should be a positive integer\")\n", + "\n", + "\n", + "label_mean_vectors = []\n", + "\n", + "\n", + "\n", + "latent_semantics = extract_latent_semantics_from_feature_model(\n", + " fd_collection,\n", + " k,\n", + " selected_feature_model,\n", + ")\n", + "\n", + "U = np.array(latent_semantics[\"image-semantic\"])\n", + "S = np.array(latent_semantics[\"semantics-core\"])\n", + "if len(S.shape) == 1:\n", + " S = np.diag(S)\n", + "V = np.transpose(np.array(latent_semantics[\"semantic-feature\"]))\n", + "comparison_feature_space = np.matmul(U,S)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "true_positives = {}\n", + "false_negatives = {}\n", + "false_positives = {}\n", + "true_negatives = {}\n", + "for image_id in range(NUM_IMAGES):\n", + " image = fd_collection.find_one({\"image_id\": (image_id*2+1)})\n", + " comparison_vector = np.matmul(np.matmul(np.array(image[selected_feature_model]).flatten(), V), S)\n", + "\n", + " distances = []\n", + " for i in range(NUM_LABELS):\n", + " distances.append(\n", + " {\n", + " \"image_id\": image_id*2 + 1,\n", + " \"label\": i,\n", + " \"distance\": math.dist(\n", + " comparison_vector, comparison_feature_space[i]\n", + " ),\n", + " }\n", + " )\n", + " distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)\n", + " # print(f\"Similar Labels for image {(image_id*2+1)}\")\n", + " if distances[0][\"label\"] == image[\"true_label\"]:\n", + " true_positives[image[\"true_label\"]] = true_positives.get(image[\"true_label\"], 0) + 1\n", + " else:\n", + " false_negatives[image[\"true_label\"]] = false_negatives.get(image[\"true_label\"], 0) + 1\n", + " false_positives[distances[0][\"label\"]] = false_positives.get(distances[0][\"label\"], 0) + 1\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Precision for Label 0: 0.6772151898734177\n", + "Recall for Label 0: 0.4930875576036866\n", + "F1 Score for Label 0: 0.5706666666666667\n", + "Precision for Label 1: 0.62\n", + "Recall for Label 1: 0.7110091743119266\n", + "F1 Score for Label 1: 0.6623931623931624\n", + "Precision for Label 2: 0.5\n", + "Recall for Label 2: 0.23\n", + "F1 Score for Label 2: 0.31506849315068497\n", + "Precision for Label 3: 0.9184782608695652\n", + "Recall for Label 3: 0.42355889724310775\n", + "F1 Score for Label 3: 0.5797598627787307\n", + "Precision for Label 4: 0.19047619047619047\n", + "Recall for Label 4: 0.14814814814814814\n", + "F1 Score for Label 4: 0.16666666666666666\n", + "Precision for Label 5: 0.8899521531100478\n", + "Recall for Label 5: 0.465\n", + "F1 Score for Label 5: 0.6108374384236454\n", + "Precision for Label 6: 0.0\n", + "Recall for Label 6: 0.0\n", + "F1 Score for Label 6: None\n", + "Precision for Label 7: 0.06666666666666667\n", + "Recall for Label 7: 0.047619047619047616\n", + "F1 Score for Label 7: 0.05555555555555555\n", + "Precision for Label 8: 0.22727272727272727\n", + "Recall for Label 8: 0.20833333333333334\n", + "F1 Score for Label 8: 0.21739130434782608\n", + "Precision for Label 9: 0.1\n", + "Recall for Label 9: 0.037037037037037035\n", + "F1 Score for Label 9: 0.05405405405405406\n", + "Precision for Label 10: 0.11764705882352941\n", + "Recall for Label 10: 0.08695652173913043\n", + "F1 Score for Label 10: 0.09999999999999999\n", + "Precision for Label 11: 0.09523809523809523\n", + "Recall for Label 11: 0.125\n", + "F1 Score for Label 11: 0.1081081081081081\n", + "Precision for Label 12: 0.6666666666666666\n", + "Recall for Label 12: 0.46875\n", + "F1 Score for Label 12: 0.5504587155963303\n", + "Precision for Label 13: 0.5483870967741935\n", + "Recall for Label 13: 0.3469387755102041\n", + "F1 Score for Label 13: 0.425\n", + "Precision for Label 14: 0.19230769230769232\n", + "Recall for Label 14: 0.22727272727272727\n", + "F1 Score for Label 14: 0.20833333333333331\n", + "Precision for Label 15: 0.4642857142857143\n", + "Recall for Label 15: 0.30952380952380953\n", + "F1 Score for Label 15: 0.3714285714285715\n", + "Precision for Label 16: 0.15384615384615385\n", + "Recall for Label 16: 0.08695652173913043\n", + "F1 Score for Label 16: 0.1111111111111111\n", + "Precision for Label 17: 0.3333333333333333\n", + "Recall for Label 17: 0.36\n", + "F1 Score for Label 17: 0.34615384615384615\n", + "Precision for Label 18: 0.15384615384615385\n", + "Recall for Label 18: 0.2857142857142857\n", + "F1 Score for Label 18: 0.2\n", + "Precision for Label 19: 0.9104477611940298\n", + "Recall for Label 19: 0.9838709677419355\n", + "F1 Score for Label 19: 0.9457364341085271\n", + "Precision for Label 20: 0.2\n", + "Recall for Label 20: 0.21739130434782608\n", + "F1 Score for Label 20: 0.20833333333333331\n", + "Precision for Label 21: 0.7058823529411765\n", + "Recall for Label 21: 0.4\n", + "F1 Score for Label 21: 0.5106382978723405\n", + "Precision for Label 22: 0.1875\n", + "Recall for Label 22: 0.0967741935483871\n", + "F1 Score for Label 22: 0.12765957446808507\n", + "Precision for Label 23: 0.5365853658536586\n", + "Recall for Label 23: 0.41509433962264153\n", + "F1 Score for Label 23: 0.46808510638297873\n", + "Precision for Label 24: 0.16216216216216217\n", + "Recall for Label 24: 0.25\n", + "F1 Score for Label 24: 0.19672131147540983\n", + "Precision for Label 25: 0.18181818181818182\n", + "Recall for Label 25: 0.11764705882352941\n", + "F1 Score for Label 25: 0.14285714285714285\n", + "Precision for Label 26: 0.1875\n", + "Recall for Label 26: 0.08108108108108109\n", + "F1 Score for Label 26: 0.11320754716981132\n", + "Precision for Label 27: 0.52\n", + "Recall for Label 27: 0.37142857142857144\n", + "F1 Score for Label 27: 0.43333333333333335\n", + "Precision for Label 28: 0.3235294117647059\n", + "Recall for Label 28: 0.44\n", + "F1 Score for Label 28: 0.3728813559322034\n", + "Precision for Label 29: 0.25\n", + "Recall for Label 29: 0.12\n", + "F1 Score for Label 29: 0.16216216216216217\n", + "Precision for Label 30: 0.46875\n", + "Recall for Label 30: 0.5172413793103449\n", + "F1 Score for Label 30: 0.4918032786885246\n", + "Precision for Label 31: 0.11320754716981132\n", + "Recall for Label 31: 0.18181818181818182\n", + "F1 Score for Label 31: 0.13953488372093023\n", + "Precision for Label 32: 0.42105263157894735\n", + "Recall for Label 32: 0.6153846153846154\n", + "F1 Score for Label 32: 0.5\n", + "Precision for Label 33: 0.3103448275862069\n", + "Recall for Label 33: 0.2727272727272727\n", + "F1 Score for Label 33: 0.2903225806451613\n", + "Precision for Label 34: 0.16666666666666666\n", + "Recall for Label 34: 0.058823529411764705\n", + "F1 Score for Label 34: 0.08695652173913045\n", + "Precision for Label 35: 0.23809523809523808\n", + "Recall for Label 35: 0.13513513513513514\n", + "F1 Score for Label 35: 0.1724137931034483\n", + "Precision for Label 36: 0.23529411764705882\n", + "Recall for Label 36: 0.25\n", + "F1 Score for Label 36: 0.24242424242424243\n", + "Precision for Label 37: 0.34615384615384615\n", + "Recall for Label 37: 0.3333333333333333\n", + "F1 Score for Label 37: 0.33962264150943394\n", + "Precision for Label 38: 0.09523809523809523\n", + "Recall for Label 38: 0.0625\n", + "F1 Score for Label 38: 0.07547169811320754\n", + "Precision for Label 39: 0.6666666666666666\n", + "Recall for Label 39: 0.8095238095238095\n", + "F1 Score for Label 39: 0.7311827956989246\n", + "Precision for Label 40: 0.55\n", + "Recall for Label 40: 0.6470588235294118\n", + "F1 Score for Label 40: 0.5945945945945946\n", + "Precision for Label 41: 0.06896551724137931\n", + "Recall for Label 41: 0.06060606060606061\n", + "F1 Score for Label 41: 0.06451612903225808\n", + "Precision for Label 42: 0.06666666666666667\n", + "Recall for Label 42: 0.08695652173913043\n", + "F1 Score for Label 42: 0.07547169811320756\n", + "Precision for Label 43: 0.18\n", + "Recall for Label 43: 0.5294117647058824\n", + "F1 Score for Label 43: 0.26865671641791045\n", + "Precision for Label 44: 0.0\n", + "Recall for Label 44: 0.0\n", + "F1 Score for Label 44: None\n", + "Precision for Label 45: 0.20833333333333334\n", + "Recall for Label 45: 0.2\n", + "F1 Score for Label 45: 0.20408163265306126\n", + "Precision for Label 46: 0.34615384615384615\n", + "Recall for Label 46: 0.54\n", + "F1 Score for Label 46: 0.42187500000000006\n", + "Precision for Label 47: 0.2714285714285714\n", + "Recall for Label 47: 0.38\n", + "F1 Score for Label 47: 0.3166666666666666\n", + "Precision for Label 48: 0.5263157894736842\n", + "Recall for Label 48: 0.47619047619047616\n", + "F1 Score for Label 48: 0.5\n", + "Precision for Label 49: 0.04819277108433735\n", + "Recall for Label 49: 0.14814814814814814\n", + "F1 Score for Label 49: 0.07272727272727274\n", + "Precision for Label 50: 0.41509433962264153\n", + "Recall for Label 50: 0.5\n", + "F1 Score for Label 50: 0.4536082474226804\n", + "Precision for Label 51: 0.2631578947368421\n", + "Recall for Label 51: 0.375\n", + "F1 Score for Label 51: 0.30927835051546393\n", + "Precision for Label 52: 0.5294117647058824\n", + "Recall for Label 52: 0.6\n", + "F1 Score for Label 52: 0.5625\n", + "Precision for Label 53: 0.75\n", + "Recall for Label 53: 0.9375\n", + "F1 Score for Label 53: 0.8333333333333334\n", + "Precision for Label 54: 0.23076923076923078\n", + "Recall for Label 54: 0.13953488372093023\n", + "F1 Score for Label 54: 0.1739130434782609\n", + "Precision for Label 55: 0.5\n", + "Recall for Label 55: 0.5263157894736842\n", + "F1 Score for Label 55: 0.5128205128205129\n", + "Precision for Label 56: 0.3333333333333333\n", + "Recall for Label 56: 0.3870967741935484\n", + "F1 Score for Label 56: 0.3582089552238806\n", + "Precision for Label 57: 0.775\n", + "Recall for Label 57: 0.775\n", + "F1 Score for Label 57: 0.775\n", + "Precision for Label 58: 0.15789473684210525\n", + "Recall for Label 58: 0.5384615384615384\n", + "F1 Score for Label 58: 0.24418604651162792\n", + "Precision for Label 59: 0.17647058823529413\n", + "Recall for Label 59: 0.14285714285714285\n", + "F1 Score for Label 59: 0.15789473684210528\n", + "Precision for Label 60: 0.5625\n", + "Recall for Label 60: 0.5454545454545454\n", + "F1 Score for Label 60: 0.5538461538461538\n", + "Precision for Label 61: 0.11428571428571428\n", + "Recall for Label 61: 0.19047619047619047\n", + "F1 Score for Label 61: 0.14285714285714285\n", + "Precision for Label 62: 0.15\n", + "Recall for Label 62: 0.15\n", + "F1 Score for Label 62: 0.15\n", + "Precision for Label 63: 0.3111111111111111\n", + "Recall for Label 63: 0.3181818181818182\n", + "F1 Score for Label 63: 0.31460674157303375\n", + "Precision for Label 64: 0.2631578947368421\n", + "Recall for Label 64: 0.625\n", + "F1 Score for Label 64: 0.37037037037037035\n", + "Precision for Label 65: 0.7777777777777778\n", + "Recall for Label 65: 0.5526315789473685\n", + "F1 Score for Label 65: 0.6461538461538462\n", + "Precision for Label 66: 0.057692307692307696\n", + "Recall for Label 66: 0.1111111111111111\n", + "F1 Score for Label 66: 0.07594936708860758\n", + "Precision for Label 67: 0.21428571428571427\n", + "Recall for Label 67: 0.3333333333333333\n", + "F1 Score for Label 67: 0.2608695652173913\n", + "Precision for Label 68: 0.16666666666666666\n", + "Recall for Label 68: 0.10526315789473684\n", + "F1 Score for Label 68: 0.12903225806451615\n", + "Precision for Label 69: 0.5625\n", + "Recall for Label 69: 0.75\n", + "F1 Score for Label 69: 0.6428571428571429\n", + "Precision for Label 70: 0.05\n", + "Recall for Label 70: 0.05263157894736842\n", + "F1 Score for Label 70: 0.05128205128205128\n", + "Precision for Label 71: 0.12\n", + "Recall for Label 71: 0.13636363636363635\n", + "F1 Score for Label 71: 0.1276595744680851\n", + "Precision for Label 72: 0.38461538461538464\n", + "Recall for Label 72: 0.18518518518518517\n", + "F1 Score for Label 72: 0.25\n", + "Precision for Label 73: 0.08695652173913043\n", + "Recall for Label 73: 0.11764705882352941\n", + "F1 Score for Label 73: 0.09999999999999999\n", + "Precision for Label 74: 0.375\n", + "Recall for Label 74: 0.42857142857142855\n", + "F1 Score for Label 74: 0.39999999999999997\n", + "Precision for Label 75: 0.30392156862745096\n", + "Recall for Label 75: 0.7560975609756098\n", + "F1 Score for Label 75: 0.4335664335664335\n", + "Precision for Label 76: 0.3333333333333333\n", + "Recall for Label 76: 0.6666666666666666\n", + "F1 Score for Label 76: 0.4444444444444444\n", + "Precision for Label 77: 0.125\n", + "Recall for Label 77: 0.125\n", + "F1 Score for Label 77: 0.125\n", + "Precision for Label 78: 0.13333333333333333\n", + "Recall for Label 78: 0.2\n", + "F1 Score for Label 78: 0.16\n", + "Precision for Label 79: 0.1415929203539823\n", + "Recall for Label 79: 0.5\n", + "F1 Score for Label 79: 0.2206896551724138\n", + "Precision for Label 80: 0.12\n", + "Recall for Label 80: 0.15789473684210525\n", + "F1 Score for Label 80: 0.13636363636363635\n", + "Precision for Label 81: 0.2553191489361702\n", + "Recall for Label 81: 0.2857142857142857\n", + "F1 Score for Label 81: 0.2696629213483146\n", + "Precision for Label 82: 0.22580645161290322\n", + "Recall for Label 82: 0.4827586206896552\n", + "F1 Score for Label 82: 0.3076923076923077\n", + "Precision for Label 83: 0.1111111111111111\n", + "Recall for Label 83: 0.17647058823529413\n", + "F1 Score for Label 83: 0.13636363636363638\n", + "Precision for Label 84: 0.17073170731707318\n", + "Recall for Label 84: 0.21875\n", + "F1 Score for Label 84: 0.19178082191780824\n", + "Precision for Label 85: 0.5151515151515151\n", + "Recall for Label 85: 0.7391304347826086\n", + "F1 Score for Label 85: 0.6071428571428571\n", + "Precision for Label 86: 0.3\n", + "Recall for Label 86: 0.4186046511627907\n", + "F1 Score for Label 86: 0.34951456310679613\n", + "Precision for Label 87: 0.45454545454545453\n", + "Recall for Label 87: 0.3448275862068966\n", + "F1 Score for Label 87: 0.39215686274509803\n", + "Precision for Label 88: 0.3181818181818182\n", + "Recall for Label 88: 0.21875\n", + "F1 Score for Label 88: 0.25925925925925924\n", + "Precision for Label 89: 0.15789473684210525\n", + "Recall for Label 89: 0.16666666666666666\n", + "F1 Score for Label 89: 0.16216216216216214\n", + "Precision for Label 90: 0.39473684210526316\n", + "Recall for Label 90: 0.35714285714285715\n", + "F1 Score for Label 90: 0.37500000000000006\n", + "Precision for Label 91: 0.10638297872340426\n", + "Recall for Label 91: 0.2\n", + "F1 Score for Label 91: 0.13888888888888892\n", + "Precision for Label 92: 0.3448275862068966\n", + "Recall for Label 92: 0.23255813953488372\n", + "F1 Score for Label 92: 0.2777777777777778\n", + "Precision for Label 93: 0.041666666666666664\n", + "Recall for Label 93: 0.02702702702702703\n", + "F1 Score for Label 93: 0.03278688524590164\n", + "Precision for Label 94: 0.5111111111111111\n", + "Recall for Label 94: 0.19166666666666668\n", + "F1 Score for Label 94: 0.2787878787878788\n", + "Precision for Label 95: 0.44\n", + "Recall for Label 95: 0.6111111111111112\n", + "F1 Score for Label 95: 0.5116279069767442\n", + "Precision for Label 96: 0.13675213675213677\n", + "Recall for Label 96: 0.5333333333333333\n", + "F1 Score for Label 96: 0.217687074829932\n", + "Precision for Label 97: 0.14285714285714285\n", + "Recall for Label 97: 0.11764705882352941\n", + "F1 Score for Label 97: 0.12903225806451615\n", + "Precision for Label 98: 0.13414634146341464\n", + "Recall for Label 98: 0.39285714285714285\n", + "F1 Score for Label 98: 0.19999999999999998\n", + "Precision for Label 99: 0.1\n", + "Recall for Label 99: 0.10526315789473684\n", + "F1 Score for Label 99: 0.10256410256410256\n", + "Precision for Label 100: 0.3584905660377358\n", + "Recall for Label 100: 0.6333333333333333\n", + "F1 Score for Label 100: 0.45783132530120485\n", + "Overall Accuracy: 0.38543107422775474\n" + ] + } + ], + "source": [ + "for i in range(NUM_LABELS):\n", + " true_negatives[i] = NUM_IMAGES - (true_positives.get(i,0) + false_negatives.get(i,0) + false_positives.get(i,0))\n", + " precision = true_positives.get(i,0)/(true_positives.get(i,0) + false_positives.get(i,0))\n", + " recall = true_positives.get(i,0)/(true_positives.get(i,0)+false_negatives.get(i,0))\n", + " if (precision + recall == 0):\n", + " f1_score = None\n", + " else:\n", + " f1_score = 2*precision*recall/(precision+recall) \n", + " print(f\"\\nPrecision for Label {i}: {precision}\")\n", + " print(f\"Recall for Label {i}: {recall}\")\n", + " print(f\"F1 Score for Label {i}: {f1_score}\")\n", + "\n", + "accuracy = sum(true_positives.values())/NUM_IMAGES\n", + "print(f\"Overall Accuracy: {accuracy}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Phase 3/utils.py b/Phase 3/utils.py index ff1af99..0891b3d 100644 --- a/Phase 3/utils.py +++ b/Phase 3/utils.py @@ -40,7 +40,8 @@ from pymongo import MongoClient # Visualizing import matplotlib.pyplot as plt - +NUM_LABELS = 101 +NUM_IMAGES = 4338 valid_classification_methods = { "m-nn": 1, @@ -222,3 +223,96 @@ class LSHIndex: unique_similar_vectors.append(tuple(candidate)) return list(unique_similar_vectors), len(unique_vectors), len(candidates) + +def extract_latent_semantics_from_feature_model( + fd_collection, + k, + feature_model, +): + """ + Extract latent semantics for entire collection at once for a given feature_model and dim_reduction_method, and display the imageID-semantic weight pairs + + Leave `top_images` blank to display all imageID-weight pairs + """ + + + label_features = np.array([ + np.array( + calculate_label_representatives(fd_collection, label, feature_model) + ).flatten() # get the specific feature model's feature vector + for label in range(NUM_LABELS) + # repeat for all images + ]) + + print( + "Applying {} on the {} space to get {} latent semantics.".format( + "svd", feature_model, k + ) + ) + + all_latent_semantics = {} + + + U, S, V_T = svd(label_features, k=k) + + U = [C.real for C in U] + S = [C.real for C in S] + V_T = [C.real for C in V_T] + + all_latent_semantics = { + "image-semantic": U, + "semantics-core": S, + "semantic-feature": V_T, + } + + # for each latent semantic, sort imageID-weight pairs by weights in descending order + return all_latent_semantics + + +def calculate_label_representatives(fd_collection, label, feature_model): + """Calculate representative feature vector of a label as the mean of all feature vectors under a feature model""" + + label_fds = [ + np.array( + img_fds[feature_model] + ).flatten() # get the specific feature model's feature vector + for img_fds in fd_collection.find( + {"true_label": label, "$mod": [2,0]} + ) # repeat for all images + ] + + # Calculate mean across each dimension + # and build a mean vector out of these means + label_mean_vector = [sum(col) / len(col) for col in zip(*label_fds)] + return label_mean_vector + +def svd(matrix, k): + # Step 1: Compute the covariance matrix + cov_matrix = np.dot(matrix.T, matrix) + + # Step 2: Compute the eigenvalues and eigenvectors of the covariance matrix + eigenvalues, eigenvectors = np.linalg.eig(cov_matrix) + + # Step 3: Sort the eigenvalues and corresponding eigenvectors + sort_indices = eigenvalues.argsort()[::-1] + eigenvalues = eigenvalues[sort_indices] + eigenvectors = eigenvectors[:, sort_indices] + + # Step 4: Compute the singular values and the left and right singular vectors + singular_values = np.sqrt(eigenvalues) + left_singular_vectors = np.dot(matrix, eigenvectors) + right_singular_vectors = eigenvectors + + # Step 5: Normalize the singular vectors + for i in range(left_singular_vectors.shape[1]): + left_singular_vectors[:, i] /= singular_values[i] + + for i in range(right_singular_vectors.shape[1]): + right_singular_vectors[:, i] /= singular_values[i] + + # Keep only the top k singular values and their corresponding vectors + singular_values = singular_values[:k] + left_singular_vectors = left_singular_vectors[:, :k] + right_singular_vectors = right_singular_vectors[:, :k] + + return left_singular_vectors, np.diag(singular_values), right_singular_vectors.T \ No newline at end of file