{ "cells": [ { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ] } ], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "import json\n", "import os\n", "import numpy as np\n", "from utils import *\n", "import math\n", "import heapq\n", "import random" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n", "all_images = fd_collection.find()" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "label_sim-cm_fd-lda-10-model.joblib loaded\n" ] } ], "source": [ "selected_latent_space = valid_latent_spaces[\n", " str(input(\"Enter latent space - one of \" + str(list(valid_latent_spaces.keys()))))\n", "]\n", "\n", "selected_feature_model = valid_feature_models[\n", " str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n", "]\n", "\n", "k = int(input(\"Enter value of k: \"))\n", "if k < 1:\n", " raise ValueError(\"k should be a positive integer\")\n", "\n", "selected_dim_reduction_method = str(\n", " input(\n", " \"Enter dimensionality reduction method - one of \"\n", " + str(list(valid_dim_reduction_methods.keys()))\n", " )\n", ")\n", "\n", "image_id = int(input(\"Enter image ID: \"))\n", "if image_id < 0 and image_id > 8676 and image_id % 2 != 0:\n", " raise ValueError(\"image id should be even number between 0 and 8676\")\n", "\n", "img_label = all_images[int(image_id / 2)][\"true_label\"]\n", "\n", "knum = int(input(\"Enter value of knum: \"))\n", "if knum < 1:\n", " raise ValueError(\"knum should be a positive integer\")\n", "\n", "match selected_latent_space:\n", " case \"\":\n", " if selected_dim_reduction_method == \"lda\":\n", " if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\") and os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n", " if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\"):\n", " model = load(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\")\n", " data = json.load(open(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n", " print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib and json loaded\")\n", " else:\n", " print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib does not exist\")\n", " else:\n", " if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n", " data = json.load(open(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n", " print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n", " else:\n", " print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")\n", " case \"cp\":\n", " if os.path.exists(f\"{selected_feature_model}-cp-{k}-semantics.json\"):\n", " data = json.load(open(f\"{selected_feature_model}-cp-{k}-semantics.json\"))\n", " print(f\"{selected_feature_model}-cp-{k}-semantics.json loaded\")\n", " else:\n", " print(f\"{selected_feature_model}-cp-{k}-semantics.json does not exist\")\n", " case _:\n", " if selected_dim_reduction_method == \"lda\":\n", " if os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\") and os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n", " model = load(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\")\n", " data = json.load(open(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n", " print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib loaded\")\n", " else:\n", " print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib does not exist\")\n", " else:\n", " if os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}img-{k}-semantics.json\"):\n", " data = json.load(open(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n", " print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n", " else:\n", " print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "def extract_similarities_ls1_ls4(latent_space, dim_reduction, selected_feature_model, data, image_id):\n", "\n", " image_fd = np.array(all_images[int(image_id / 2)][selected_feature_model]).flatten()\n", "\n", " match dim_reduction:\n", "\n", " case 'svd':\n", " U = np.array(data[\"image-semantic\"])\n", " S = np.array(data[\"semantics-core\"])\n", " if len(S.shape) == 1:\n", " S = np.diag(S)\n", " V = np.transpose(np.array(data[\"semantic-feature\"]))\n", " \n", " comparison_feature_space = np.matmul(U, S)\n", "\n", " if latent_space == \"image_sim\":\n", " comparison_vector = comparison_feature_space[int(image_id / 2)]\n", " else:\n", " comparison_vector = np.matmul(np.matmul(image_fd, V), S)\n", " \n", " case \"nmf\":\n", " H = np.array(data['semantic-feature'])\n", " comparison_feature_space = np.array(data['image-semantic'])\n", "\n", " if latent_space == \"image_sim\":\n", " comparison_vector = comparison_feature_space[int(image_id / 2)]\n", " else:\n", " comparison_vector = np.matmul(image_fd, np.transpose(H))\n", "\n", " case \"kmeans\":\n", " comparison_vector = []\n", " comparison_feature_space = np.array(data[\"image-semantic\"])\n", " S = np.array(data[\"semantic-feature\"])\n", "\n", " for centroid in S:\n", " if latent_space == \"image_sim\":\n", " sim_matrix = np.array(data[\"sim-matrix\"])\n", " comparison_vector.append(math.dist(sim_matrix[int(image_id / 2)], centroid))\n", " else:\n", " comparison_vector.append(math.dist(image_fd, centroid))\n", " \n", " case \"lda\":\n", " comparison_feature_space = np.array(data[\"image-semantic\"])\n", " if latent_space == \"image_sim\":\n", " comparison_vector = comparison_feature_space[int(image_id / 2)]\n", " else:\n", " fd = np.array(all_images[int(image_id / 2)][selected_feature_model])\n", " min_value = np.min(fd)\n", " feature_vectors_shifted = fd - min_value\n", " comparison_vector = model.transform(feature_vectors_shifted.flatten().reshape(1, -1)).flatten()\n", " print(comparison_feature_space.shape)\n", " print(comparison_vector.shape)\n", "\n", " n = len(comparison_feature_space)\n", "\n", " distances = []\n", " for i in range(n):\n", " if (i * 2) != image_id:\n", " distances.append({\"image_id\": i, \"label\": all_images[i][\"true_label\"], \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n", "\n", " distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)\n", "\n", " similar_labels = []\n", " unique_labels = set()\n", "\n", " for img in distances:\n", " if img['label'] not in unique_labels:\n", " similar_labels.append(img)\n", " unique_labels.add(img[\"label\"])\n", "\n", " if len(similar_labels) == knum:\n", " break\n", "\n", "\n", " for x in similar_labels:\n", " print(x)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "def extract_similarities_ls2(data, image_id):\n", "\n", " IS = np.array(data[\"image-semantic\"])\n", " S = np.array(data[\"semantics-core\"])\n", "\n", " if len(S.shape) == 1:\n", " S = np.diag(S)\n", "\n", " comparison_feature_space = np.matmul(IS, S)\n", " comparison_vector = comparison_feature_space[int(image_id / 2)]\n", "\n", " distances = []\n", "\n", " n = len(comparison_feature_space)\n", " for i in range(n):\n", " if i != (image_id / 2):\n", " distances.append({\"image_id\": i * 2, \"label\": all_images[i][\"true_label\"], \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n", " \n", " distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)\n", "\n", " similar_labels = []\n", " unique_labels = set()\n", "\n", " for img in distances:\n", " if img[\"label\"] not in unique_labels and img[\"label\"] != img_label:\n", " similar_labels.append(img)\n", " unique_labels.add(img[\"label\"])\n", "\n", " if len(similar_labels) == knum:\n", " break\n", "\n", "\n", " for x in similar_labels:\n", " print(x)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "def extract_similarities_ls3(dim_reduction, data, image_id):\n", "\n", " img_label = all_images[int(image_id / 2)][\"true_label\"]\n", "\n", " match dim_reduction:\n", "\n", " case 'svd':\n", " U = np.array(data[\"image-semantic\"])\n", " S = np.array(data[\"semantics-core\"])\n", " V = np.transpose(np.array(data[\"semantic-feature\"]))\n", "\n", " comparison_feature_space = np.matmul(U, S)\n", " comparison_vector = comparison_feature_space[img_label]\n", " \n", " case \"nmf\":\n", " comparison_feature_space = np.array(data['image-semantic'])\n", " comparison_vector = comparison_feature_space[img_label]\n", "\n", " case \"kmeans\":\n", " comparison_feature_space = np.array(data[\"image-semantic\"])\n", " comparison_vector = comparison_feature_space[img_label]\n", "\n", " case \"lda\":\n", " comparison_feature_space = np.array(data[\"image-semantic\"])\n", " comparison_vector = comparison_feature_space[img_label]\n", "\n", " n = len(comparison_feature_space)\n", " distance = float('inf')\n", " most_similar_label = img_label\n", " distances = []\n", " for i in range(n):\n", " if i != img_label:\n", " distances.append({\"label\": i, \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n", "\n", " distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:knum]\n", "\n", " for img in distances:\n", " print(img)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'label': 19, 'distance': 6.355424425784916e-06}\n", "{'label': 33, 'distance': 1.2740918489579356e-05}\n", "{'label': 54, 'distance': 6.85116641512525e-05}\n", "{'label': 49, 'distance': 8.314856743319156e-05}\n", "{'label': 28, 'distance': 0.00016883047586624644}\n", "{'label': 36, 'distance': 0.0002122873813686887}\n", "{'label': 35, 'distance': 0.00023350326417372972}\n", "{'label': 44, 'distance': 0.0002406295461878455}\n", "{'label': 9, 'distance': 0.00024234313765943418}\n", "{'label': 10, 'distance': 0.0002640071831902495}\n" ] } ], "source": [ "match selected_latent_space:\n", "\n", " case \"\" | \"image_sim\":\n", " \n", " extract_similarities_ls1_ls4(selected_latent_space, selected_dim_reduction_method, selected_feature_model, data, image_id)\n", "\n", " case \"label_sim\":\n", "\n", " extract_similarities_ls3(selected_dim_reduction_method, data, image_id)\n", "\n", " case \"cp\":\n", "\n", " extract_similarities_ls2(data, image_id)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 2 }