refactored tasks 7-10

This commit is contained in:
Kaushik Narayan R 2023-10-14 19:57:57 -07:00
parent 1f3b56b2e1
commit 91f782a485
5 changed files with 1076 additions and 904 deletions

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -21,21 +21,18 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import json\n",
"import os\n",
"import numpy as np\n",
"from utils import *\n", "from utils import *\n",
"import math\n", "warnings.filterwarnings('ignore')\n",
"import heapq" "%matplotlib inline\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -45,17 +42,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"cm_fd-cp-10-semantics.json loaded\n"
]
}
],
"source": [ "source": [
"selected_latent_space = valid_latent_spaces[\n", "selected_latent_space = valid_latent_spaces[\n",
" str(input(\"Enter latent space - one of \" + str(list(valid_latent_spaces.keys()))))\n", " str(input(\"Enter latent space - one of \" + str(list(valid_latent_spaces.keys()))))\n",
@ -65,11 +54,15 @@
" str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n", " str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
"]\n", "]\n",
"\n", "\n",
"k = int(input(\"Enter value of k: \"))\n", "k = int(input(\"Enter value of k (no. of latent semantics): \"))\n",
"if k < 1:\n", "if k < 1:\n",
" raise ValueError(\"k should be a positive integer\")\n", " raise ValueError(\"k should be a positive integer\")\n",
"\n", "\n",
"if selected_latent_space != 'cp':\n", "k_2 = int(input(\"Enter value of k_2 (no. of similar images): \"))\n",
"if k_2 < 1:\n",
" raise ValueError(\"k_2 should be a positive integer\")\n",
"\n",
"if selected_latent_space != \"cp\":\n",
" selected_dim_reduction_method = str(\n", " selected_dim_reduction_method = str(\n",
" input(\n", " input(\n",
" \"Enter dimensionality reduction method - one of \"\n", " \"Enter dimensionality reduction method - one of \"\n",
@ -79,38 +72,67 @@
"\n", "\n",
"label = int(input(\"Enter label: \"))\n", "label = int(input(\"Enter label: \"))\n",
"if label < 0 and label > 100:\n", "if label < 0 and label > 100:\n",
" raise ValueError(\"k should be between 0 and 100\")\n", " raise ValueError(\"label should be between 0 and 100\")\n",
"\n", "\n",
"knum = int(input(\"Enter value of knum: \"))\n", "label_rep = calculate_label_representatives(\n",
"if knum < 1:\n", " fd_collection, label, selected_feature_model\n",
" raise ValueError(\"knum should be a positive integer\")\n", ")\n"
"\n",
"label_rep = calculate_label_representatives(fd_collection, label, selected_feature_model)\n",
"\n",
"match selected_latent_space:\n",
" case \"\":\n",
" if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n",
" data = json.load(open(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n",
" print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n",
" else:\n",
" print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")\n",
" case \"cp\":\n",
" if os.path.exists(f\"{selected_feature_model}-cp-{k}-semantics.json\"):\n",
" data = json.load(open(f\"{selected_feature_model}-cp-{k}-semantics.json\"))\n",
" print(f\"{selected_feature_model}-cp-{k}-semantics.json loaded\")\n",
" else: \n",
" print(f\"{selected_feature_model}-cp-{k}-semantics.json does not exist\")\n",
" case _:\n",
" if os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n",
" data = json.load(open(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n",
" print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n",
" else:\n",
" print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"cm_fd-svd-10-semantics.json loaded\n"
]
}
],
"source": [
"# Loading latent semantics\n",
"match selected_latent_space:\n",
" # LS1\n",
" case \"\":\n",
" file_prefix = f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}\"\n",
" file_name = file_prefix + \"-semantics.json\"\n",
" model_name = file_prefix + \"-model.joblib\"\n",
" if os.path.exists(file_name):\n",
" data = json.load(open(file_name))\n",
" print(file_name + \" loaded\")\n",
" else:\n",
" raise Exception(file_name + \" does not exist\")\n",
" # LDA model\n",
" if selected_dim_reduction_method == \"lda\":\n",
" if os.path.exists(model_name):\n",
" data_model = load(model_name)\n",
" print(model_name + \" loaded\")\n",
" else:\n",
" raise Exception(model_name + \" does not exist\")\n",
" # LS2\n",
" case \"cp\":\n",
" file_name = f\"{selected_feature_model}-cp-{k}-semantics.json\"\n",
" if os.path.exists(file_name):\n",
" data = json.load(open(file_name))\n",
" print(file_name + \" loaded\")\n",
" else:\n",
" raise Exception(file_name + \" does not exist\")\n",
" # LS3, LS4\n",
" case _:\n",
" file_name = f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"\n",
" if os.path.exists(file_name):\n",
" data = json.load(open(file_name))\n",
" print(file_name + \" loaded\")\n",
" else:\n",
" raise Exception(file_name + \" does not exist\")\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -118,7 +140,7 @@
"\n", "\n",
" match dim_reduction:\n", " match dim_reduction:\n",
"\n", "\n",
" case 'svd':\n", " case \"svd\":\n",
" U = np.array(data[\"image-semantic\"])\n", " U = np.array(data[\"image-semantic\"])\n",
" S = np.array(data[\"semantics-core\"])\n", " S = np.array(data[\"semantics-core\"])\n",
" if len(S.shape) == 1:\n", " if len(S.shape) == 1:\n",
@ -132,19 +154,15 @@
" if all_images[i][\"true_label\"] == label:\n", " if all_images[i][\"true_label\"] == label:\n",
" label_vectors.append(U[i])\n", " label_vectors.append(U[i])\n",
" label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n", " label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n",
" \n",
" comparison_feature_space = np.matmul(U, S)\n",
"\n",
" if latent_space == \"image_sim\":\n",
" print(np.array(label_rep).shape)\n",
" print(np.array(S).shape)\n",
" comparison_vector = np.matmul(label_rep, S)\n", " comparison_vector = np.matmul(label_rep, S)\n",
" else:\n", " else:\n",
" comparison_vector = np.matmul(np.matmul(label_rep, V), S)\n", " comparison_vector = np.matmul(np.matmul(label_rep, V), S)\n",
" \n", "\n",
" comparison_feature_space = np.matmul(U, S)\n",
"\n",
" case \"nmf\":\n", " case \"nmf\":\n",
" H = np.array(data['semantic-feature'])\n", " H = np.array(data[\"semantic-feature\"])\n",
" comparison_feature_space = W = np.array(data['image-semantic'])\n", " comparison_feature_space = W = np.array(data[\"image-semantic\"])\n",
" if latent_space == \"image_sim\":\n", " if latent_space == \"image_sim\":\n",
" label_vectors = []\n", " label_vectors = []\n",
" length = len(W)\n", " length = len(W)\n",
@ -152,52 +170,67 @@
" if all_images[i][\"true_label\"] == label:\n", " if all_images[i][\"true_label\"] == label:\n",
" label_vectors.append(W[i])\n", " label_vectors.append(W[i])\n",
" label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n", " label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n",
"\n",
" if latent_space == \"image_sim\":\n",
" comparison_vector = label_rep\n", " comparison_vector = label_rep\n",
" else:\n", " else:\n",
" comparison_vector = np.matmul(label_rep, np.transpose(H))\n", " min_value = np.min(label_rep)\n",
" feature_vectors_shifted = label_rep - min_value\n",
" comparison_vector = nmf(feature_vectors_shifted, H, update_H=False)\n",
"\n", "\n",
" case \"lda\":\n",
" comparison_feature_space = np.array(data[\"image-semantic\"])\n",
" label_vectors = []\n",
" length = len(comparison_feature_space)\n",
" for i in range(length):\n",
" if all_images[i][\"true_label\"] == label:\n",
" label_vectors.append(comparison_feature_space[i])\n",
" comparison_vector = [sum(col) / len(col) for col in zip(*label_vectors)] \n",
" \n",
" case \"kmeans\":\n", " case \"kmeans\":\n",
" comparison_vector = []\n", " comparison_vector = []\n",
" comparison_feature_space = np.array(data[\"image-semantic\"])\n", " comparison_feature_space = np.array(data[\"image-semantic\"])\n",
" S = np.array(data[\"semantic-feature\"])\n", " S = np.array(data[\"semantic-feature\"])\n",
"\n", "\n",
" if latent_space == \"image_sim\":\n",
" sim_matrix = np.array(data[\"sim-matrix\"])\n",
" label_vectors = []\n",
" length = len(sim_matrix)\n",
" for i in range(length):\n",
" if all_images[i][\"true_label\"] == label:\n",
" label_vectors.append(sim_matrix[i])\n",
" label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n",
"\n",
" # get label_rep's kmeans semantic\n",
" for centroid in S:\n", " for centroid in S:\n",
" comparison_vector.append(math.dist(label_rep, centroid))\n", " comparison_vector.append(math.dist(label_rep, centroid))\n",
"\n", "\n",
" n = len(comparison_feature_space)\n", " case \"lda\":\n",
"\n",
" comparison_feature_space = np.array(data[\"image-semantic\"])\n",
" if latent_space == \"image_sim\":\n",
" label_vectors = []\n",
" length = len(comparison_feature_space)\n",
" for i in range(length):\n",
" if all_images[i][\"true_label\"] == label:\n",
" label_vectors.append(comparison_feature_space[i])\n",
" label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n",
" comparison_vector = label_rep\n",
" else:\n",
" min_value = np.min(label_rep)\n",
" feature_vectors_shifted = label_rep - min_value\n",
" comparison_vector = data_model.transform(\n",
" feature_vectors_shifted.flatten().reshape(1, -1)\n",
" ).flatten()\n",
"\n", "\n",
" distances = []\n", " distances = []\n",
" for i in range(n):\n", " for i in range(NUM_IMAGES):\n",
" distances.append({\"image_id\": i, \"label\": all_images[i][\"true_label\"], \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n", " distances.append(\n",
" {\n",
" \"image_id\": i,\n",
" \"label\": all_images[i][\"true_label\"],\n",
" \"distance\": math.dist(comparison_vector, comparison_feature_space[i]),\n",
" }\n",
" )\n",
"\n", "\n",
" distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)\n", " distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:k_2]\n",
"\n", "\n",
" similar_images = []\n", " for x in distances:\n",
"\n", " print(x)\n"
" for img in distances:\n",
" similar_images.append(img)\n",
" if len(similar_images) == knum:\n",
" break\n",
"\n",
"\n",
" for x in similar_images:\n",
" print(x)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -210,91 +243,97 @@
" if len(S.shape) == 1:\n", " if len(S.shape) == 1:\n",
" S = np.diag(S)\n", " S = np.diag(S)\n",
"\n", "\n",
" label_rep = calculate_label_representatives(fd_collection, label, selected_feature_model)\n",
" comparison_feature_space = np.matmul(label_rep, LS_f)\n", " comparison_feature_space = np.matmul(label_rep, LS_f)\n",
" comparison_vector = np.matmul(comparison_feature_space, S)\n", " comparison_vector = np.matmul(comparison_feature_space, S)\n",
"\n", "\n",
" comparison_image_space = np.matmul(LS_i, S)\n", " comparison_image_space = np.matmul(LS_i, S)\n",
" distances = []\n", " distances = []\n",
"\n", "\n",
" n = len(comparison_image_space)\n", " for i in range(NUM_IMAGES):\n",
" for i in range(n):\n", " distances.append(\n",
" distances.append({\"image\": i, \"distance\": math.dist(comparison_vector, comparison_image_space[i])})\n", " {\n",
" \n", " \"image\": i,\n",
" distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:knum]\n", " \"distance\": math.dist(comparison_vector, comparison_image_space[i]),\n",
" }\n",
" )\n",
"\n",
" distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:k_2]\n",
"\n", "\n",
" for x in distances:\n", " for x in distances:\n",
" print(x)" " print(x)\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def extract_similarities_ls3(dim_reduction, data, label):\n", "def extract_similarities_ls3(dim_reduction, data, label):\n",
"\n", "\n",
" match dim_reduction:\n", " if dim_reduction == \"svd\":\n",
"\n",
" case 'svd':\n",
" U = np.array(data[\"image-semantic\"])\n", " U = np.array(data[\"image-semantic\"])\n",
" S = np.array(data[\"semantics-core\"])\n", " S = np.array(data[\"semantics-core\"])\n",
" V = np.transpose(np.array(data[\"semantic-feature\"]))\n", " V = np.transpose(np.array(data[\"semantic-feature\"]))\n",
"\n", "\n",
" comparison_feature_space = np.matmul(U, S)\n", " comparison_feature_space = np.matmul(U, S)\n",
" comparison_vector = comparison_feature_space[label]\n", " else:\n",
" \n",
" case \"nmf\":\n",
" comparison_feature_space = np.array(data['image-semantic'])\n",
" comparison_vector = comparison_feature_space[label]\n",
"\n",
" case \"lda\":\n",
" comparison_feature_space = np.array(data['image-semantic'])\n",
" comparison_vector = comparison_feature_space[label]\n",
"\n",
" case \"kmeans\":\n",
" comparison_feature_space = np.array(data[\"image-semantic\"])\n", " comparison_feature_space = np.array(data[\"image-semantic\"])\n",
"\n",
" comparison_vector = comparison_feature_space[label]\n", " comparison_vector = comparison_feature_space[label]\n",
"\n", "\n",
" n = len(comparison_feature_space)\n",
" distances = []\n", " distances = []\n",
" for i in range(n):\n", " for i in range(NUM_LABELS):\n",
" if i != label:\n", " if i != label:\n",
" distances.append({\"label\": i, \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n", " distances.append(\n",
" {\n",
" \"label\": i,\n",
" \"distance\": math.dist(\n",
" comparison_vector, comparison_feature_space[i]\n",
" ),\n",
" }\n",
" )\n",
" \n",
" most_similar_label = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[0]\n",
" print(f\"Most similar label is {most_similar_label}\")\n",
"\n", "\n",
" label_distance = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:1]\n",
"\n",
" dataset = loadDataset(Caltech101)\n",
" similar_images = []\n", " similar_images = []\n",
" for i in range(len(dataset)):\n", " for i in range(len(dataset)):\n",
" _, l = dataset[i]\n", " _, l = dataset[i]\n",
" if l == label:\n", " if l == label:\n",
" similar_images.append(i)\n", " similar_images.append(i)\n",
"\n", "\n",
" similar_images = random.sample(similar_images, knum)\n", " similar_images = random.sample(similar_images, k_2)\n",
" images_distances = []\n", " images_distances = []\n",
" for i in similar_images:\n", " for i in similar_images:\n",
" images_distances.append({\"image_id\": i,\"distance\": label_distance[0][\"distance\"]})\n", " images_distances.append(\n",
" \n", " {\"image_id\": i, \"distance\": most_similar_label[\"distance\"]}\n",
" )\n",
"\n",
" for x in images_distances:\n", " for x in images_distances:\n",
" print(x)" " print(x)\n",
" \n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"{'image': 823, 'distance': 4006.335159603778}\n", "{'image_id': 499, 'label': 2, 'distance': 0.5891843615223927}\n",
"{'image': 809, 'distance': 4006.3942621209867}\n", "{'image_id': 449, 'label': 2, 'distance': 0.6183329800988425}\n",
"{'image': 806, 'distance': 4006.421689986329}\n", "{'image_id': 462, 'label': 2, 'distance': 0.7954630378173778}\n",
"{'image': 832, 'distance': 4006.422683206996}\n", "{'image_id': 512, 'label': 2, 'distance': 0.8431996693479317}\n",
"{'image': 830, 'distance': 4006.44733072835}\n" "{'image_id': 506, 'label': 2, 'distance': 0.8541263603745314}\n",
"{'image_id': 438, 'label': 2, 'distance': 0.9166483319951415}\n",
"{'image_id': 491, 'label': 2, 'distance': 0.9340236427529084}\n",
"{'image_id': 527, 'label': 2, 'distance': 0.9349318595824383}\n",
"{'image_id': 441, 'label': 2, 'distance': 0.9351164972683086}\n",
"{'image_id': 490, 'label': 2, 'distance': 0.9440402757056761}\n"
] ]
} }
], ],
@ -332,7 +371,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.11.6" "version": "3.10.5"
} }
}, },
"nbformat": 4, "nbformat": 4,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 71, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -21,21 +21,18 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 72, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import json\n",
"import os\n",
"import numpy as np\n",
"from utils import *\n", "from utils import *\n",
"import math\n", "warnings.filterwarnings('ignore')\n",
"import heapq" "%matplotlib inline\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 73, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -45,17 +42,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 74, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"image_sim-cm_fd-lda-10-model.joblib loaded\n"
]
}
],
"source": [ "source": [
"selected_latent_space = valid_latent_spaces[\n", "selected_latent_space = valid_latent_spaces[\n",
" str(input(\"Enter latent space - one of \" + str(list(valid_latent_spaces.keys()))))\n", " str(input(\"Enter latent space - one of \" + str(list(valid_latent_spaces.keys()))))\n",
@ -65,68 +54,85 @@
" str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n", " str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
"]\n", "]\n",
"\n", "\n",
"k = int(input(\"Enter value of k: \"))\n", "k = int(input(\"Enter value of k (no. of latent semantics): \"))\n",
"if k < 1:\n", "if k < 1:\n",
" raise ValueError(\"k should be a positive integer\")\n", " raise ValueError(\"k should be a positive integer\")\n",
"\n", "\n",
"selected_dim_reduction_method = str(\n", "k_2 = int(input(\"Enter value of k_2 (no. of similar images): \"))\n",
"if k_2 < 1:\n",
" raise ValueError(\"k_2 should be a positive integer\")\n",
"\n",
"if selected_latent_space != \"cp\":\n",
" selected_dim_reduction_method = str(\n",
" input(\n", " input(\n",
" \"Enter dimensionality reduction method - one of \"\n", " \"Enter dimensionality reduction method - one of \"\n",
" + str(list(valid_dim_reduction_methods.keys()))\n", " + str(list(valid_dim_reduction_methods.keys()))\n",
" )\n", " )\n",
")\n", " )\n",
"\n", "\n",
"label = int(input(\"Enter label: \"))\n", "label = int(input(\"Enter label: \"))\n",
"if label < 0 and label > 100:\n", "if label < 0 and label > 100:\n",
" raise ValueError(\"label should be between 0 and 100\")\n", " raise ValueError(\"label should be between 0 and 100\")\n",
"\n", "\n",
"knum = int(input(\"Enter value of knum: \"))\n", "label_rep = calculate_label_representatives(\n",
"if knum < 1:\n", " fd_collection, label, selected_feature_model\n",
" raise ValueError(\"knum should be a positive integer\")\n", ")\n"
"\n",
"label_rep = calculate_label_representatives(fd_collection, label, selected_feature_model)\n",
"\n",
"match selected_latent_space:\n",
" case \"\":\n",
" if selected_dim_reduction_method == \"lda\":\n",
" if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\") and os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n",
" if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\"):\n",
" model = load(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\")\n",
" data = json.load(open(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n",
" print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib and json loaded\")\n",
" else:\n",
" print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib does not exist\")\n",
" else:\n",
" if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n",
" data = json.load(open(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n",
" print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n",
" else:\n",
" print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")\n",
" case \"cp\":\n",
" if os.path.exists(f\"{selected_feature_model}-cp-{k}-semantics.json\"):\n",
" data = json.load(open(f\"{selected_feature_model}-cp-{k}-semantics.json\"))\n",
" print(f\"{selected_feature_model}-cp-{k}-semantics.json loaded\")\n",
" else:\n",
" print(f\"{selected_feature_model}-cp-{k}-semantics.json does not exist\")\n",
" case _:\n",
" if selected_dim_reduction_method == \"lda\":\n",
" if os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\") and os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n",
" model = load(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\")\n",
" data = json.load(open(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n",
" print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib loaded\")\n",
" else:\n",
" print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib does not exist\")\n",
" else:\n",
" if os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n",
" data = json.load(open(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n",
" print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n",
" else:\n",
" print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 75, "execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"cm_fd-svd-10-semantics.json loaded\n"
]
}
],
"source": [
"# Loading latent semantics\n",
"match selected_latent_space:\n",
" # LS1\n",
" case \"\":\n",
" file_prefix = f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}\"\n",
" file_name = file_prefix + \"-semantics.json\"\n",
" model_name = file_prefix + \"-model.joblib\"\n",
" if os.path.exists(file_name):\n",
" data = json.load(open(file_name))\n",
" print(file_name + \" loaded\")\n",
" else:\n",
" raise Exception(file_name + \" does not exist\")\n",
" # LDA model\n",
" if selected_dim_reduction_method == \"lda\":\n",
" if os.path.exists(model_name):\n",
" data_model = load(model_name)\n",
" print(model_name + \" loaded\")\n",
" else:\n",
" raise Exception(model_name + \" does not exist\")\n",
" # LS2\n",
" case \"cp\":\n",
" file_name = f\"{selected_feature_model}-cp-{k}-semantics.json\"\n",
" if os.path.exists(file_name):\n",
" data = json.load(open(file_name))\n",
" print(file_name + \" loaded\")\n",
" else:\n",
" raise Exception(file_name + \" does not exist\")\n",
" # LS3, LS4\n",
" case _:\n",
" file_name = f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"\n",
" if os.path.exists(file_name):\n",
" data = json.load(open(file_name))\n",
" print(file_name + \" loaded\")\n",
" else:\n",
" raise Exception(file_name + \" does not exist\")\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -134,7 +140,7 @@
"\n", "\n",
" match dim_reduction:\n", " match dim_reduction:\n",
"\n", "\n",
" case 'svd':\n", " case \"svd\":\n",
" U = np.array(data[\"image-semantic\"])\n", " U = np.array(data[\"image-semantic\"])\n",
" S = np.array(data[\"semantics-core\"])\n", " S = np.array(data[\"semantics-core\"])\n",
" if len(S.shape) == 1:\n", " if len(S.shape) == 1:\n",
@ -144,21 +150,22 @@
" if latent_space == \"image_sim\":\n", " if latent_space == \"image_sim\":\n",
" label_vectors = []\n", " label_vectors = []\n",
" length = len(U)\n", " length = len(U)\n",
" # get label rep from img sim matrix itself\n",
" # i.e get label's images' semantics and take rep from those\n",
" for i in range(length):\n", " for i in range(length):\n",
" if all_images[i][\"true_label\"] == label:\n", " if all_images[i][\"true_label\"] == label:\n",
" label_vectors.append(U[i])\n", " label_vectors.append(U[i])\n",
" label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n", " label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n",
" \n",
" comparison_feature_space = np.matmul(U, S)\n",
"\n",
" if latent_space == \"image_sim\":\n",
" comparison_vector = np.matmul(label_rep, S)\n", " comparison_vector = np.matmul(label_rep, S)\n",
" else:\n", " else:\n",
" # use label rep from feature space\n",
" comparison_vector = np.matmul(np.matmul(label_rep, V), S)\n", " comparison_vector = np.matmul(np.matmul(label_rep, V), S)\n",
" \n", "\n",
" comparison_feature_space = np.matmul(U, S)\n",
"\n",
" case \"nmf\":\n", " case \"nmf\":\n",
" H = np.array(data['semantic-feature'])\n", " H = np.array(data[\"semantic-feature\"])\n",
" comparison_feature_space = W = np.array(data['image-semantic'])\n", " comparison_feature_space = W = np.array(data[\"image-semantic\"])\n",
" if latent_space == \"image_sim\":\n", " if latent_space == \"image_sim\":\n",
" label_vectors = []\n", " label_vectors = []\n",
" length = len(W)\n", " length = len(W)\n",
@ -166,11 +173,11 @@
" if all_images[i][\"true_label\"] == label:\n", " if all_images[i][\"true_label\"] == label:\n",
" label_vectors.append(W[i])\n", " label_vectors.append(W[i])\n",
" label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n", " label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n",
"\n",
" if latent_space == \"image_sim\":\n",
" comparison_vector = label_rep\n", " comparison_vector = label_rep\n",
" else:\n", " else:\n",
" comparison_vector = np.matmul(label_rep, np.transpose(H))\n", " min_value = np.min(label_rep)\n",
" feature_vectors_shifted = label_rep - min_value\n",
" comparison_vector = nmf(feature_vectors_shifted, H, update_H=False)\n",
"\n", "\n",
" case \"kmeans\":\n", " case \"kmeans\":\n",
" comparison_vector = []\n", " comparison_vector = []\n",
@ -186,24 +193,40 @@
" label_vectors.append(sim_matrix[i])\n", " label_vectors.append(sim_matrix[i])\n",
" label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n", " label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n",
"\n", "\n",
" # get label_rep's kmeans semantic\n",
" for centroid in S:\n", " for centroid in S:\n",
" comparison_vector.append(math.dist(label_rep, centroid))\n", " comparison_vector.append(math.dist(label_rep, centroid))\n",
"\n", "\n",
" case \"lda\":\n", " case \"lda\":\n",
"\n",
" comparison_feature_space = np.array(data[\"image-semantic\"])\n", " comparison_feature_space = np.array(data[\"image-semantic\"])\n",
" if latent_space == \"image_sim\":\n",
" label_vectors = []\n", " label_vectors = []\n",
" length = len(comparison_feature_space)\n", " length = len(comparison_feature_space)\n",
" for i in range(length):\n", " for i in range(length):\n",
" if all_images[i][\"true_label\"] == label:\n", " if all_images[i][\"true_label\"] == label:\n",
" label_vectors.append(comparison_feature_space[i])\n", " label_vectors.append(comparison_feature_space[i])\n",
" comparison_vector = [sum(col) / len(col) for col in zip(*label_vectors)] \n", " label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n",
"\n", " comparison_vector = label_rep\n",
" n = len(comparison_feature_space)\n", " else:\n",
" min_value = np.min(label_rep)\n",
" feature_vectors_shifted = label_rep - min_value\n",
" comparison_vector = data_model.transform(\n",
" feature_vectors_shifted.flatten().reshape(1, -1)\n",
" ).flatten()\n",
"\n", "\n",
" distances = []\n", " distances = []\n",
" for i in range(n):\n", " for i in range(NUM_IMAGES):\n",
" if all_images[i][\"true_label\"] != label:\n", " if all_images[i][\"true_label\"] != label:\n",
" distances.append({\"image_id\": i, \"label\": all_images[i][\"true_label\"], \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n", " distances.append(\n",
" {\n",
" \"image_id\": i,\n",
" \"label\": all_images[i][\"true_label\"],\n",
" \"distance\": math.dist(\n",
" comparison_vector, comparison_feature_space[i]\n",
" ),\n",
" }\n",
" )\n",
"\n", "\n",
" distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)\n", " distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)\n",
"\n", "\n",
@ -211,21 +234,20 @@
" unique_labels = set()\n", " unique_labels = set()\n",
"\n", "\n",
" for img in distances:\n", " for img in distances:\n",
" if img['label'] not in unique_labels:\n", " if img[\"label\"] not in unique_labels:\n",
" similar_labels.append(img)\n", " similar_labels.append(img)\n",
" unique_labels.add(img[\"label\"])\n", " unique_labels.add(img[\"label\"])\n",
"\n", "\n",
" if len(similar_labels) == knum:\n", " if len(similar_labels) == k_2:\n",
" break\n", " break\n",
"\n", "\n",
"\n",
" for x in similar_labels:\n", " for x in similar_labels:\n",
" print(x)" " print(x)\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 76, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -245,76 +267,78 @@
" n = len(comparison_feature_space)\n", " n = len(comparison_feature_space)\n",
" for i in range(n):\n", " for i in range(n):\n",
" if i != label:\n", " if i != label:\n",
" distances.append({\"label\": i, \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n", " distances.append(\n",
" \n", " {\n",
" distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:knum]\n", " \"label\": i,\n",
" \"distance\": math.dist(\n",
" comparison_vector, comparison_feature_space[i]\n",
" ),\n",
" }\n",
" )\n",
"\n",
" distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:k_2]\n",
"\n", "\n",
" for x in distances:\n", " for x in distances:\n",
" print(x)" " print(x)\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 77, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def extract_similarities_ls3(dim_reduction, data, label):\n", "def extract_similarities_ls3(dim_reduction, data, label):\n",
"\n", "\n",
" match dim_reduction:\n", " if dim_reduction == \"svd\":\n",
"\n",
" case 'svd':\n",
" U = np.array(data[\"image-semantic\"])\n", " U = np.array(data[\"image-semantic\"])\n",
" S = np.array(data[\"semantics-core\"])\n", " S = np.array(data[\"semantics-core\"])\n",
" V = np.transpose(np.array(data[\"semantic-feature\"]))\n", " V = np.transpose(np.array(data[\"semantic-feature\"]))\n",
"\n", "\n",
" comparison_feature_space = np.matmul(U, S)\n", " comparison_feature_space = np.matmul(U, S)\n",
" comparison_vector = comparison_feature_space[label]\n", " else:\n",
" \n",
" case \"nmf\":\n",
" comparison_feature_space = np.array(data['image-semantic'])\n",
" comparison_vector = comparison_feature_space[label]\n",
"\n",
" case \"kmeans\":\n",
" comparison_feature_space = np.array(data[\"image-semantic\"])\n", " comparison_feature_space = np.array(data[\"image-semantic\"])\n",
"\n",
" comparison_vector = comparison_feature_space[label]\n", " comparison_vector = comparison_feature_space[label]\n",
"\n", "\n",
" case \"lda\":\n",
" comparison_feature_space = np.array(data[\"image-semantic\"])\n",
" comparison_vector = comparison_feature_space[label] \n",
"\n",
"\n",
" n = len(comparison_feature_space)\n", " n = len(comparison_feature_space)\n",
" distances = []\n", " distances = []\n",
" for i in range(n):\n", " for i in range(n):\n",
" if i != label:\n", " if i != label:\n",
" distances.append({\"label\": i, \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n", " distances.append(\n",
" {\n",
" \"label\": i,\n",
" \"distance\": math.dist(\n",
" comparison_vector, comparison_feature_space[i]\n",
" ),\n",
" }\n",
" )\n",
"\n", "\n",
" distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:knum]\n", " distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:k_2]\n",
"\n", "\n",
" for x in distances:\n", " for x in distances:\n",
" print(x)" " print(x)\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 78, "execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"{'image_id': 2641, 'label': 46, 'distance': 0.013618215122607105}\n", "{'image_id': 88, 'label': 0, 'distance': 1.0674257256118014}\n",
"{'image_id': 1686, 'label': 16, 'distance': 0.015215365128880378}\n", "{'image_id': 3495, 'label': 74, 'distance': 1.2947824352796302}\n",
"{'image_id': 2310, 'label': 35, 'distance': 0.015383486193179943}\n", "{'image_id': 3548, 'label': 76, 'distance': 1.3839125472415652}\n",
"{'image_id': 3781, 'label': 84, 'distance': 0.01541886635507712}\n", "{'image_id': 2306, 'label': 35, 'distance': 1.4136775151406638}\n",
"{'image_id': 1483, 'label': 11, 'distance': 0.015474891099448796}\n", "{'image_id': 2271, 'label': 34, 'distance': 1.560355392987607}\n",
"{'image_id': 2719, 'label': 48, 'distance': 0.01960489858697963}\n", "{'image_id': 2097, 'label': 28, 'distance': 1.6213029580319027}\n",
"{'image_id': 3787, 'label': 85, 'distance': 0.02006387165132467}\n", "{'image_id': 2444, 'label': 39, 'distance': 1.6252904256132055}\n",
"{'image_id': 3877, 'label': 87, 'distance': 0.02050382578938892}\n", "{'image_id': 1656, 'label': 15, 'distance': 1.6283345060828458}\n",
"{'image_id': 3719, 'label': 82, 'distance': 0.02293235381986182}\n", "{'image_id': 3223, 'label': 63, 'distance': 1.6574252628682995}\n",
"{'image_id': 3403, 'label': 70, 'distance': 0.024912695992711693}\n" "{'image_id': 3717, 'label': 82, 'distance': 1.6796825272768603}\n"
] ]
} }
], ],
@ -358,7 +382,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.11.4" "version": "3.10.5"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -21,6 +21,7 @@ import tensorly as tl
# OS and env # OS and env
import json import json
import os
from os import getenv from os import getenv
from dotenv import load_dotenv from dotenv import load_dotenv
import warnings import warnings
@ -354,6 +355,12 @@ def pearson_distance_measure(img_1_fd, img_2_fd):
# such that lower distance implies more similarity # such that lower distance implies more similarity
return 0.5 * (1 - pearsonr(img_1_fd_reshaped, img_2_fd_reshaped).statistic) return 0.5 * (1 - pearsonr(img_1_fd_reshaped, img_2_fd_reshaped).statistic)
def kl_divergence_measure(p, q):
# Avoid division by zero
epsilon = 1e-10
return np.sum(p * np.log((p + epsilon) / (q + epsilon)))
valid_feature_models = { valid_feature_models = {
"cm": "cm_fd", "cm": "cm_fd",