pranavbrkr ebf1f1629b update
2023-10-14 23:39:57 -07:00

368 lines
14 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
}
],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import os\n",
"import numpy as np\n",
"from utils import *\n",
"import math\n",
"import heapq\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n",
"all_images = fd_collection.find()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"label_sim-cm_fd-lda-10-model.joblib loaded\n"
]
}
],
"source": [
"selected_latent_space = valid_latent_spaces[\n",
" str(input(\"Enter latent space - one of \" + str(list(valid_latent_spaces.keys()))))\n",
"]\n",
"\n",
"selected_feature_model = valid_feature_models[\n",
" str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
"]\n",
"\n",
"k = int(input(\"Enter value of k: \"))\n",
"if k < 1:\n",
" raise ValueError(\"k should be a positive integer\")\n",
"\n",
"selected_dim_reduction_method = str(\n",
" input(\n",
" \"Enter dimensionality reduction method - one of \"\n",
" + str(list(valid_dim_reduction_methods.keys()))\n",
" )\n",
")\n",
"\n",
"image_id = int(input(\"Enter image ID: \"))\n",
"if image_id < 0 and image_id > 8676 and image_id % 2 != 0:\n",
" raise ValueError(\"image id should be even number between 0 and 8676\")\n",
"\n",
"img_label = all_images[int(image_id / 2)][\"true_label\"]\n",
"\n",
"knum = int(input(\"Enter value of knum: \"))\n",
"if knum < 1:\n",
" raise ValueError(\"knum should be a positive integer\")\n",
"\n",
"match selected_latent_space:\n",
" case \"\":\n",
" if selected_dim_reduction_method == \"lda\":\n",
" if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\") and os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n",
" if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\"):\n",
" model = load(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\")\n",
" data = json.load(open(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n",
" print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib and json loaded\")\n",
" else:\n",
" print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib does not exist\")\n",
" else:\n",
" if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n",
" data = json.load(open(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n",
" print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n",
" else:\n",
" print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")\n",
" case \"cp\":\n",
" if os.path.exists(f\"{selected_feature_model}-cp-{k}-semantics.json\"):\n",
" data = json.load(open(f\"{selected_feature_model}-cp-{k}-semantics.json\"))\n",
" print(f\"{selected_feature_model}-cp-{k}-semantics.json loaded\")\n",
" else:\n",
" print(f\"{selected_feature_model}-cp-{k}-semantics.json does not exist\")\n",
" case _:\n",
" if selected_dim_reduction_method == \"lda\":\n",
" if os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\") and os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n",
" model = load(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\")\n",
" data = json.load(open(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n",
" print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib loaded\")\n",
" else:\n",
" print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib does not exist\")\n",
" else:\n",
" if os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}img-{k}-semantics.json\"):\n",
" data = json.load(open(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n",
" print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n",
" else:\n",
" print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"def extract_similarities_ls1_ls4(latent_space, dim_reduction, selected_feature_model, data, image_id):\n",
"\n",
" image_fd = np.array(all_images[int(image_id / 2)][selected_feature_model]).flatten()\n",
"\n",
" match dim_reduction:\n",
"\n",
" case 'svd':\n",
" U = np.array(data[\"image-semantic\"])\n",
" S = np.array(data[\"semantics-core\"])\n",
" if len(S.shape) == 1:\n",
" S = np.diag(S)\n",
" V = np.transpose(np.array(data[\"semantic-feature\"]))\n",
" \n",
" comparison_feature_space = np.matmul(U, S)\n",
"\n",
" if latent_space == \"image_sim\":\n",
" comparison_vector = comparison_feature_space[int(image_id / 2)]\n",
" else:\n",
" comparison_vector = np.matmul(np.matmul(image_fd, V), S)\n",
" \n",
" case \"nmf\":\n",
" H = np.array(data['semantic-feature'])\n",
" comparison_feature_space = np.array(data['image-semantic'])\n",
"\n",
" if latent_space == \"image_sim\":\n",
" comparison_vector = comparison_feature_space[int(image_id / 2)]\n",
" else:\n",
" comparison_vector = np.matmul(image_fd, np.transpose(H))\n",
"\n",
" case \"kmeans\":\n",
" comparison_vector = []\n",
" comparison_feature_space = np.array(data[\"image-semantic\"])\n",
" S = np.array(data[\"semantic-feature\"])\n",
"\n",
" for centroid in S:\n",
" if latent_space == \"image_sim\":\n",
" sim_matrix = np.array(data[\"sim-matrix\"])\n",
" comparison_vector.append(math.dist(sim_matrix[int(image_id / 2)], centroid))\n",
" else:\n",
" comparison_vector.append(math.dist(image_fd, centroid))\n",
" \n",
" case \"lda\":\n",
" comparison_feature_space = np.array(data[\"image-semantic\"])\n",
" if latent_space == \"image_sim\":\n",
" comparison_vector = comparison_feature_space[int(image_id / 2)]\n",
" else:\n",
" fd = np.array(all_images[int(image_id / 2)][selected_feature_model])\n",
" min_value = np.min(fd)\n",
" feature_vectors_shifted = fd - min_value\n",
" comparison_vector = model.transform(feature_vectors_shifted.flatten().reshape(1, -1)).flatten()\n",
" print(comparison_feature_space.shape)\n",
" print(comparison_vector.shape)\n",
"\n",
" n = len(comparison_feature_space)\n",
"\n",
" distances = []\n",
" for i in range(n):\n",
" if (i * 2) != image_id:\n",
" distances.append({\"image_id\": i, \"label\": all_images[i][\"true_label\"], \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n",
"\n",
" distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)\n",
"\n",
" similar_labels = []\n",
" unique_labels = set()\n",
"\n",
" for img in distances:\n",
" if img['label'] not in unique_labels:\n",
" similar_labels.append(img)\n",
" unique_labels.add(img[\"label\"])\n",
"\n",
" if len(similar_labels) == knum:\n",
" break\n",
"\n",
"\n",
" for x in similar_labels:\n",
" print(x)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"def extract_similarities_ls2(data, image_id):\n",
"\n",
" IS = np.array(data[\"image-semantic\"])\n",
" S = np.array(data[\"semantics-core\"])\n",
"\n",
" if len(S.shape) == 1:\n",
" S = np.diag(S)\n",
"\n",
" comparison_feature_space = np.matmul(IS, S)\n",
" comparison_vector = comparison_feature_space[int(image_id / 2)]\n",
"\n",
" distances = []\n",
"\n",
" n = len(comparison_feature_space)\n",
" for i in range(n):\n",
" if i != (image_id / 2):\n",
" distances.append({\"image_id\": i * 2, \"label\": all_images[i][\"true_label\"], \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n",
" \n",
" distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)\n",
"\n",
" similar_labels = []\n",
" unique_labels = set()\n",
"\n",
" for img in distances:\n",
" if img[\"label\"] not in unique_labels and img[\"label\"] != img_label:\n",
" similar_labels.append(img)\n",
" unique_labels.add(img[\"label\"])\n",
"\n",
" if len(similar_labels) == knum:\n",
" break\n",
"\n",
"\n",
" for x in similar_labels:\n",
" print(x)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"def extract_similarities_ls3(dim_reduction, data, image_id):\n",
"\n",
" img_label = all_images[int(image_id / 2)][\"true_label\"]\n",
"\n",
" match dim_reduction:\n",
"\n",
" case 'svd':\n",
" U = np.array(data[\"image-semantic\"])\n",
" S = np.array(data[\"semantics-core\"])\n",
" V = np.transpose(np.array(data[\"semantic-feature\"]))\n",
"\n",
" comparison_feature_space = np.matmul(U, S)\n",
" comparison_vector = comparison_feature_space[img_label]\n",
" \n",
" case \"nmf\":\n",
" comparison_feature_space = np.array(data['image-semantic'])\n",
" comparison_vector = comparison_feature_space[img_label]\n",
"\n",
" case \"kmeans\":\n",
" comparison_feature_space = np.array(data[\"image-semantic\"])\n",
" comparison_vector = comparison_feature_space[img_label]\n",
"\n",
" case \"lda\":\n",
" comparison_feature_space = np.array(data[\"image-semantic\"])\n",
" comparison_vector = comparison_feature_space[img_label]\n",
"\n",
" n = len(comparison_feature_space)\n",
" distance = float('inf')\n",
" most_similar_label = img_label\n",
" distances = []\n",
" for i in range(n):\n",
" if i != img_label:\n",
" distances.append({\"label\": i, \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n",
"\n",
" distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:knum]\n",
"\n",
" for img in distances:\n",
" print(img)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'label': 19, 'distance': 6.355424425784916e-06}\n",
"{'label': 33, 'distance': 1.2740918489579356e-05}\n",
"{'label': 54, 'distance': 6.85116641512525e-05}\n",
"{'label': 49, 'distance': 8.314856743319156e-05}\n",
"{'label': 28, 'distance': 0.00016883047586624644}\n",
"{'label': 36, 'distance': 0.0002122873813686887}\n",
"{'label': 35, 'distance': 0.00023350326417372972}\n",
"{'label': 44, 'distance': 0.0002406295461878455}\n",
"{'label': 9, 'distance': 0.00024234313765943418}\n",
"{'label': 10, 'distance': 0.0002640071831902495}\n"
]
}
],
"source": [
"match selected_latent_space:\n",
"\n",
" case \"\" | \"image_sim\":\n",
" \n",
" extract_similarities_ls1_ls4(selected_latent_space, selected_dim_reduction_method, selected_feature_model, data, image_id)\n",
"\n",
" case \"label_sim\":\n",
"\n",
" extract_similarities_ls3(selected_dim_reduction_method, data, image_id)\n",
"\n",
" case \"cp\":\n",
"\n",
" extract_similarities_ls2(data, image_id)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}