{ "cells": [ { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ] } ], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "from utils import *\n", "warnings.filterwarnings('ignore')\n", "%matplotlib inline\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n", "all_images = fd_collection.find()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "selected_latent_space = valid_latent_spaces[\n", " str(input(\"Enter latent space - one of \" + str(list(valid_latent_spaces.keys()))))\n", "]\n", "\n", "selected_feature_model = valid_feature_models[\n", " str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n", "]\n", "\n", "k = int(input(\"Enter value of k (no. of latent semantics): \"))\n", "if k < 1:\n", " raise ValueError(\"k should be a positive integer\")\n", "\n", "k_2 = int(input(\"Enter value of k_2 (no. of similar images): \"))\n", "if k_2 < 1:\n", " raise ValueError(\"k_2 should be a positive integer\")\n", "\n", "if selected_latent_space != \"cp\":\n", " selected_dim_reduction_method = str(\n", " input(\n", " \"Enter dimensionality reduction method - one of \"\n", " + str(list(valid_dim_reduction_methods.keys()))\n", " )\n", " )\n", "\n", "label = int(input(\"Enter label: \"))\n", "if label < 0 and label > 100:\n", " raise ValueError(\"label should be between 0 and 100\")\n", "\n", "label_rep = calculate_label_representatives(\n", " fd_collection, label, selected_feature_model\n", ")\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cm_fd-svd-10-semantics.json loaded\n" ] } ], "source": [ "# Loading latent semantics\n", "match selected_latent_space:\n", " # LS1\n", " case \"\":\n", " file_prefix = f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}\"\n", " file_name = file_prefix + \"-semantics.json\"\n", " model_name = file_prefix + \"-model.joblib\"\n", " if os.path.exists(file_name):\n", " data = json.load(open(file_name))\n", " print(file_name + \" loaded\")\n", " else:\n", " raise Exception(file_name + \" does not exist\")\n", " # LDA model\n", " if selected_dim_reduction_method == \"lda\":\n", " if os.path.exists(model_name):\n", " data_model = load(model_name)\n", " print(model_name + \" loaded\")\n", " else:\n", " raise Exception(model_name + \" does not exist\")\n", " # LS2\n", " case \"cp\":\n", " file_name = f\"{selected_feature_model}-cp-{k}-semantics.json\"\n", " if os.path.exists(file_name):\n", " data = json.load(open(file_name))\n", " print(file_name + \" loaded\")\n", " else:\n", " raise Exception(file_name + \" does not exist\")\n", " # LS3, LS4\n", " case _:\n", " file_name = f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"\n", " if os.path.exists(file_name):\n", " data = json.load(open(file_name))\n", " print(file_name + \" loaded\")\n", " else:\n", " raise Exception(file_name + \" does not exist\")\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "def extract_similarities_ls1_ls4(latent_space, dim_reduction, data, label, label_rep):\n", "\n", " match dim_reduction:\n", "\n", " case \"svd\":\n", " U = np.array(data[\"image-semantic\"])\n", " S = np.array(data[\"semantics-core\"])\n", " if len(S.shape) == 1:\n", " S = np.diag(S)\n", " V = np.transpose(np.array(data[\"semantic-feature\"]))\n", "\n", " if latent_space == \"image_sim\":\n", " label_vectors = []\n", " length = len(U)\n", " # get label rep from img sim matrix itself\n", " # i.e get label's images' semantics and take rep from those\n", " for i in range(length):\n", " if all_images[i][\"true_label\"] == label:\n", " label_vectors.append(U[i])\n", " label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n", " comparison_vector = np.matmul(label_rep, S)\n", " else:\n", " # use label rep from feature space\n", " comparison_vector = np.matmul(np.matmul(label_rep, V), S)\n", "\n", " comparison_feature_space = np.matmul(U, S)\n", "\n", " case \"nmf\":\n", " H = np.array(data[\"semantic-feature\"])\n", " comparison_feature_space = W = np.array(data[\"image-semantic\"])\n", " if latent_space == \"image_sim\":\n", " label_vectors = []\n", " length = len(W)\n", " for i in range(length):\n", " if all_images[i][\"true_label\"] == label:\n", " label_vectors.append(W[i])\n", " label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n", " comparison_vector = label_rep\n", " else:\n", " min_value = np.min(label_rep)\n", " feature_vectors_shifted = label_rep - min_value\n", " comparison_vector = nmf(feature_vectors_shifted, H, update_H=False)\n", "\n", " case \"kmeans\":\n", " comparison_vector = []\n", " comparison_feature_space = np.array(data[\"image-semantic\"])\n", " S = np.array(data[\"semantic-feature\"])\n", "\n", " if latent_space == \"image_sim\":\n", " sim_matrix = np.array(data[\"sim-matrix\"])\n", " label_vectors = []\n", " length = len(sim_matrix)\n", " for i in range(length):\n", " if all_images[i][\"true_label\"] == label:\n", " label_vectors.append(sim_matrix[i])\n", " label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n", "\n", " # get label_rep's kmeans semantic\n", " for centroid in S:\n", " comparison_vector.append(math.dist(label_rep, centroid))\n", "\n", " case \"lda\":\n", "\n", " comparison_feature_space = np.array(data[\"image-semantic\"])\n", " if latent_space == \"image_sim\":\n", " label_vectors = []\n", " length = len(comparison_feature_space)\n", " for i in range(length):\n", " if all_images[i][\"true_label\"] == label:\n", " label_vectors.append(comparison_feature_space[i])\n", " label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n", " comparison_vector = label_rep\n", " else:\n", " min_value = np.min(label_rep)\n", " feature_vectors_shifted = label_rep - min_value\n", " comparison_vector = data_model.transform(\n", " feature_vectors_shifted.flatten().reshape(1, -1)\n", " ).flatten()\n", "\n", " distances = []\n", " for i in range(NUM_IMAGES):\n", " if all_images[i][\"true_label\"] != label:\n", " distances.append(\n", " {\n", " \"image_id\": i,\n", " \"label\": all_images[i][\"true_label\"],\n", " \"distance\": math.dist(\n", " comparison_vector, comparison_feature_space[i]\n", " ),\n", " }\n", " )\n", "\n", " distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)\n", "\n", " similar_labels = []\n", " unique_labels = set()\n", "\n", " for img in distances:\n", " if img[\"label\"] not in unique_labels:\n", " similar_labels.append(img)\n", " unique_labels.add(img[\"label\"])\n", "\n", " if len(similar_labels) == k_2:\n", " break\n", "\n", " for x in similar_labels:\n", " print(x)\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "def extract_similarities_ls2(data, label):\n", "\n", " LS = np.array(data[\"label-semantic\"])\n", " S = np.array(data[\"semantics-core\"])\n", "\n", " if len(S.shape) == 1:\n", " S = np.diag(S)\n", "\n", " comparison_feature_space = np.matmul(LS, S)\n", " comparison_vector = comparison_feature_space[label]\n", "\n", " distances = []\n", "\n", " n = len(comparison_feature_space)\n", " for i in range(n):\n", " if i != label:\n", " distances.append(\n", " {\n", " \"label\": i,\n", " \"distance\": math.dist(\n", " comparison_vector, comparison_feature_space[i]\n", " ),\n", " }\n", " )\n", "\n", " distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:k_2]\n", "\n", " for x in distances:\n", " print(x)\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "def extract_similarities_ls3(dim_reduction, data, label):\n", "\n", " if dim_reduction == \"svd\":\n", " U = np.array(data[\"image-semantic\"])\n", " S = np.array(data[\"semantics-core\"])\n", " V = np.transpose(np.array(data[\"semantic-feature\"]))\n", "\n", " comparison_feature_space = np.matmul(U, S)\n", " else:\n", " comparison_feature_space = np.array(data[\"image-semantic\"])\n", "\n", " comparison_vector = comparison_feature_space[label]\n", "\n", " n = len(comparison_feature_space)\n", " distances = []\n", " for i in range(n):\n", " if i != label:\n", " distances.append(\n", " {\n", " \"label\": i,\n", " \"distance\": math.dist(\n", " comparison_vector, comparison_feature_space[i]\n", " ),\n", " }\n", " )\n", "\n", " distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:k_2]\n", "\n", " for x in distances:\n", " print(x)\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'image_id': 88, 'label': 0, 'distance': 1.0674257256118014}\n", "{'image_id': 3495, 'label': 74, 'distance': 1.2947824352796302}\n", "{'image_id': 3548, 'label': 76, 'distance': 1.3839125472415652}\n", "{'image_id': 2306, 'label': 35, 'distance': 1.4136775151406638}\n", "{'image_id': 2271, 'label': 34, 'distance': 1.560355392987607}\n", "{'image_id': 2097, 'label': 28, 'distance': 1.6213029580319027}\n", "{'image_id': 2444, 'label': 39, 'distance': 1.6252904256132055}\n", "{'image_id': 1656, 'label': 15, 'distance': 1.6283345060828458}\n", "{'image_id': 3223, 'label': 63, 'distance': 1.6574252628682995}\n", "{'image_id': 3717, 'label': 82, 'distance': 1.6796825272768603}\n" ] } ], "source": [ "match selected_latent_space:\n", "\n", " case \"\" | \"image_sim\":\n", " \n", " extract_similarities_ls1_ls4(selected_latent_space, selected_dim_reduction_method, data, label, label_rep)\n", "\n", " case \"label_sim\":\n", "\n", " extract_similarities_ls3(selected_dim_reduction_method, data, label)\n", "\n", " case \"cp\":\n", "\n", " extract_similarities_ls2(data, label)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.5" } }, "nbformat": 4, "nbformat_minor": 2 }