mirror of
https://github.com/20kaushik02/CSE515_MWDB_Project.git
synced 2025-12-06 07:44:07 +00:00
task10
This commit is contained in:
parent
3ca1614746
commit
c652a6606e
290
Phase 2/task_10.ipynb
Normal file
290
Phase 2/task_10.ipynb
Normal file
@ -0,0 +1,290 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The autoreload extension is already loaded. To reload it, use:\n",
|
||||
" %reload_ext autoreload\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%load_ext autoreload\n",
|
||||
"%autoreload 2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import os\n",
|
||||
"import numpy as np\n",
|
||||
"from utils import *\n",
|
||||
"import math\n",
|
||||
"import heapq"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n",
|
||||
"all_images = fd_collection.find()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"label_sim-cm_fd-kmeans-10-semantics.json loaded\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"selected_latent_space = valid_latent_spaces[\n",
|
||||
" str(input(\"Enter latent space - one of \" + str(list(valid_latent_spaces.keys()))))\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"selected_feature_model = valid_feature_models[\n",
|
||||
" str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"k = int(input(\"Enter value of k: \"))\n",
|
||||
"if k < 1:\n",
|
||||
" raise ValueError(\"k should be a positive integer\")\n",
|
||||
"\n",
|
||||
"selected_dim_reduction_method = str(\n",
|
||||
" input(\n",
|
||||
" \"Enter dimensionality reduction method - one of \"\n",
|
||||
" + str(list(valid_dim_reduction_methods.keys()))\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"label = int(input(\"Enter label: \"))\n",
|
||||
"if label < 0 and label > 100:\n",
|
||||
" raise ValueError(\"k should be between 0 and 100\")\n",
|
||||
"\n",
|
||||
"knum = int(input(\"Enter value of knum: \"))\n",
|
||||
"if knum < 1:\n",
|
||||
" raise ValueError(\"knum should be a positive integer\")\n",
|
||||
"\n",
|
||||
"label_rep = calculate_label_representatives(fd_collection, label, selected_feature_model)\n",
|
||||
"\n",
|
||||
"match selected_latent_space:\n",
|
||||
" case \"\":\n",
|
||||
" if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n",
|
||||
" data = json.load(open(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n",
|
||||
" print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n",
|
||||
" else:\n",
|
||||
" print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")\n",
|
||||
" case \"cp\":\n",
|
||||
" if os.path.exists(f\"{selected_feature_model}-cp-{k}-semantics.json\"):\n",
|
||||
" data = json.load(open(f\"{selected_feature_model}-cp-{k}-semantics.json\"))\n",
|
||||
" print(f\"{selected_feature_model}-cp-{k}-semantics.json loaded\")\n",
|
||||
" else: \n",
|
||||
" print(f\"{selected_feature_model}-cp-{k}-semantics.json does not exist\")\n",
|
||||
" case _:\n",
|
||||
" if os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n",
|
||||
" data = json.load(open(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n",
|
||||
" print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n",
|
||||
" else:\n",
|
||||
" print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def extract_similarities_ls1_ls4(latent_space, dim_reduction, data, label, label_rep):\n",
|
||||
"\n",
|
||||
" match dim_reduction:\n",
|
||||
"\n",
|
||||
" case 'svd':\n",
|
||||
" U = np.array(data[\"image-semantic\"])\n",
|
||||
" S = np.array(data[\"semantics-core\"])\n",
|
||||
" if len(S.shape) == 1:\n",
|
||||
" S = np.diag(S)\n",
|
||||
" V = np.transpose(np.array(data[\"semantic-feature\"]))\n",
|
||||
"\n",
|
||||
" if latent_space == \"image_sim\":\n",
|
||||
" label_vectors = []\n",
|
||||
" length = len(U)\n",
|
||||
" for i in range(length):\n",
|
||||
" if all_images[i][\"true_label\"] == label:\n",
|
||||
" label_vectors.append(U[i])\n",
|
||||
" label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n",
|
||||
" \n",
|
||||
" comparison_feature_space = np.matmul(U, S)\n",
|
||||
"\n",
|
||||
" if latent_space == \"image_sim\":\n",
|
||||
" print(np.array(label_rep).shape)\n",
|
||||
" print(np.array(S).shape)\n",
|
||||
" comparison_vector = np.matmul(label_rep, S)\n",
|
||||
" else:\n",
|
||||
" comparison_vector = np.matmul(np.matmul(label_rep, V), S)\n",
|
||||
" \n",
|
||||
" case \"nmf\":\n",
|
||||
" H = np.array(data['semantic-feature'])\n",
|
||||
" comparison_feature_space = W = np.array(data['image-semantic'])\n",
|
||||
" if latent_space == \"image_sim\":\n",
|
||||
" label_vectors = []\n",
|
||||
" length = len(W)\n",
|
||||
" for i in range(length):\n",
|
||||
" if all_images[i][\"true_label\"] == label:\n",
|
||||
" label_vectors.append(W[i])\n",
|
||||
" label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n",
|
||||
"\n",
|
||||
" if latent_space == \"image_sim\":\n",
|
||||
" comparison_vector = label_rep\n",
|
||||
" else:\n",
|
||||
" comparison_vector = np.matmul(label_rep, np.transpose(H))\n",
|
||||
"\n",
|
||||
" case \"kmeans\":\n",
|
||||
" comparison_vector = []\n",
|
||||
" comparison_feature_space = np.array(data[\"image-semantic\"])\n",
|
||||
" S = np.array(data[\"semantic-feature\"])\n",
|
||||
"\n",
|
||||
" for centroid in S:\n",
|
||||
" comparison_vector.append(math.dist(label_rep, centroid))\n",
|
||||
"\n",
|
||||
" n = len(comparison_feature_space)\n",
|
||||
"\n",
|
||||
" distances = []\n",
|
||||
" for i in range(n):\n",
|
||||
" distances.append({\"image_id\": i, \"label\": all_images[i][\"true_label\"], \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n",
|
||||
"\n",
|
||||
" distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)\n",
|
||||
"\n",
|
||||
" similar_images = []\n",
|
||||
"\n",
|
||||
" for img in distances:\n",
|
||||
" similar_images.append(img)\n",
|
||||
" if len(similar_images) == knum:\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" for x in similar_images:\n",
|
||||
" print(x)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def extract_similarities_ls3(dim_reduction, data, label):\n",
|
||||
"\n",
|
||||
" match dim_reduction:\n",
|
||||
"\n",
|
||||
" case 'svd':\n",
|
||||
" U = np.array(data[\"image-semantic\"])\n",
|
||||
" S = np.array(data[\"semantics-core\"])\n",
|
||||
" V = np.transpose(np.array(data[\"semantic-feature\"]))\n",
|
||||
"\n",
|
||||
" comparison_feature_space = np.matmul(U, S)\n",
|
||||
" comparison_vector = comparison_feature_space[label]\n",
|
||||
" \n",
|
||||
" case \"nmf\":\n",
|
||||
" comparison_feature_space = np.array(data['image-semantic'])\n",
|
||||
" comparison_vector = comparison_feature_space[label]\n",
|
||||
"\n",
|
||||
" case \"kmeans\":\n",
|
||||
" comparison_feature_space = np.array(data[\"image-semantic\"])\n",
|
||||
" comparison_vector = comparison_feature_space[label]\n",
|
||||
"\n",
|
||||
" n = len(comparison_feature_space)\n",
|
||||
" distances = []\n",
|
||||
" for i in range(n):\n",
|
||||
" if i != label:\n",
|
||||
" distances.append({\"label\": i, \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n",
|
||||
"\n",
|
||||
" label_distance = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:1]\n",
|
||||
"\n",
|
||||
" dataset = loadDataset(Caltech101)\n",
|
||||
" similar_images = []\n",
|
||||
" for i in range(len(dataset)):\n",
|
||||
" _, l = dataset[i]\n",
|
||||
" if l == label:\n",
|
||||
" similar_images.append(i)\n",
|
||||
"\n",
|
||||
" similar_images = random.sample(similar_images, knum)\n",
|
||||
" images_distances = []\n",
|
||||
" for i in similar_images:\n",
|
||||
" images_distances.append({\"image_id\": i,\"distance\": label_distance[0][\"distance\"]})\n",
|
||||
" \n",
|
||||
" for x in images_distances:\n",
|
||||
" print(x)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'image_id': 2309, 'distance': 4.117664288663269}\n",
|
||||
"{'image_id': 1930, 'distance': 4.117664288663269}\n",
|
||||
"{'image_id': 1940, 'distance': 4.117664288663269}\n",
|
||||
"{'image_id': 1929, 'distance': 4.117664288663269}\n",
|
||||
"{'image_id': 2250, 'distance': 4.117664288663269}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"match selected_latent_space:\n",
|
||||
"\n",
|
||||
" case \"\" | \"image_sim\":\n",
|
||||
" \n",
|
||||
" extract_similarities_ls1_ls4(selected_latent_space, selected_dim_reduction_method, data, label, label_rep)\n",
|
||||
"\n",
|
||||
" case \"label_sim\":\n",
|
||||
"\n",
|
||||
" extract_similarities_ls3(selected_dim_reduction_method, data, label)\n",
|
||||
" "
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user