mirror of
https://github.com/20kaushik02/CSE515_MWDB_Project.git
synced 2025-12-06 11:04:07 +00:00
ls1 and ls3 svd and nmf
This commit is contained in:
parent
a0d7b500b3
commit
6e012173f0
@ -2,9 +2,18 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 62,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"The autoreload extension is already loaded. To reload it, use:\n",
|
||||||
|
" %reload_ext autoreload\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"%load_ext autoreload\n",
|
"%load_ext autoreload\n",
|
||||||
"%autoreload 2"
|
"%autoreload 2"
|
||||||
@ -12,18 +21,31 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 63,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"from utils import *"
|
"import numpy as np\n",
|
||||||
|
"from utils import *\n",
|
||||||
|
"import math\n",
|
||||||
|
"import heapq"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 64,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n",
|
||||||
|
"all_images = fd_collection.find()\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 65,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -50,6 +72,11 @@
|
|||||||
"if label < 0 and label > 100:\n",
|
"if label < 0 and label > 100:\n",
|
||||||
" raise ValueError(\"k should be between 0 and 100\")\n",
|
" raise ValueError(\"k should be between 0 and 100\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"knum = int(input(\"Enter value of knum: \"))\n",
|
||||||
|
"if knum < 1:\n",
|
||||||
|
" raise ValueError(\"knum should be a positive integer\")\n",
|
||||||
|
"\n",
|
||||||
|
"label_rep = calculate_label_representatives(fd_collection, label, selected_feature_model)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"match selected_latent_space:\n",
|
"match selected_latent_space:\n",
|
||||||
" case \"\":\n",
|
" case \"\":\n",
|
||||||
@ -72,34 +99,169 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 66,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": [
|
||||||
|
"def extract_similarities_ls1(dim_reduction, data, label, label_rep):\n",
|
||||||
|
"\n",
|
||||||
|
" match dim_reduction:\n",
|
||||||
|
"\n",
|
||||||
|
" case 'svd':\n",
|
||||||
|
" U = np.array(data[\"image-semantic\"])\n",
|
||||||
|
" S = np.array(data[\"semantics-core\"])\n",
|
||||||
|
" V = np.transpose(np.array(data[\"semantic-feature\"]))\n",
|
||||||
|
"\n",
|
||||||
|
" comparison_feature_space = np.matmul(U, S)\n",
|
||||||
|
" comparison_vector = np.matmul(np.matmul(label_rep, V), S)\n",
|
||||||
|
" \n",
|
||||||
|
" case \"nmf\":\n",
|
||||||
|
" H = np.array(data['semantic-feature'])\n",
|
||||||
|
" comparison_feature_space = np.array(data['image-semantic'])\n",
|
||||||
|
" comparison_vector = np.matmul(label_rep, np.transpose(H))\n",
|
||||||
|
"\n",
|
||||||
|
" print(comparison_feature_space.shape)\n",
|
||||||
|
" n = len(comparison_feature_space)\n",
|
||||||
|
" \n",
|
||||||
|
" distances = []\n",
|
||||||
|
" for i in range(n):\n",
|
||||||
|
" if i != label:\n",
|
||||||
|
" distances.append({\"image_id\": i, \"label\": all_images[i][\"true_label\"],\"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n",
|
||||||
|
"\n",
|
||||||
|
" distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)\n",
|
||||||
|
"\n",
|
||||||
|
" similar_labels = []\n",
|
||||||
|
" unique_labels = set()\n",
|
||||||
|
"\n",
|
||||||
|
" for img in distances:\n",
|
||||||
|
" if img['label'] not in unique_labels:\n",
|
||||||
|
" similar_labels.append(img)\n",
|
||||||
|
" unique_labels.add(img[\"label\"])\n",
|
||||||
|
"\n",
|
||||||
|
" if len(similar_labels) == knum:\n",
|
||||||
|
" break\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" for x in similar_labels:\n",
|
||||||
|
" print(x)"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 8,
|
"execution_count": 67,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def extract_similarities_ls3(dim_reduction, data, label):\n",
|
||||||
|
"\n",
|
||||||
|
" match dim_reduction:\n",
|
||||||
|
"\n",
|
||||||
|
" case 'svd':\n",
|
||||||
|
" U = np.array(data[\"image-semantic\"])\n",
|
||||||
|
" S = np.array(data[\"semantics-core\"])\n",
|
||||||
|
" V = np.transpose(np.array(data[\"semantic-feature\"]))\n",
|
||||||
|
"\n",
|
||||||
|
" comparison_feature_space = np.matmul(U, S)\n",
|
||||||
|
" comparison_vector = comparison_feature_space[label]\n",
|
||||||
|
" \n",
|
||||||
|
" case \"nmf\":\n",
|
||||||
|
" comparison_feature_space = np.array(data['image-semantic'])\n",
|
||||||
|
" comparison_vector = comparison_feature_space[label]\n",
|
||||||
|
"\n",
|
||||||
|
" n = len(comparison_feature_space)\n",
|
||||||
|
" distances = []\n",
|
||||||
|
" for i in range(n):\n",
|
||||||
|
" if i != label:\n",
|
||||||
|
" distances.append({\"label\": i, \"distance\": math.dist(comparison_vector, comparison_feature_space[i])})\n",
|
||||||
|
"\n",
|
||||||
|
" distances = sorted(distances, key=lambda x: x[\"distance\"], reverse=False)[:knum]\n",
|
||||||
|
"\n",
|
||||||
|
" for x in distances:\n",
|
||||||
|
" print(x)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 68,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"(101, 10)\n",
|
"{'label': 4, 'distance': 0.9931105104385977}\n",
|
||||||
"(10, 10)\n",
|
"{'label': 92, 'distance': 1.1209182190288185}\n",
|
||||||
"(10, 101)\n"
|
"{'label': 65, 'distance': 1.2107732156271573}\n",
|
||||||
|
"{'label': 21, 'distance': 1.5053484881391492}\n",
|
||||||
|
"{'label': 2, 'distance': 1.698430977110922}\n",
|
||||||
|
"{'label': 100, 'distance': 1.8636096001573115}\n",
|
||||||
|
"{'label': 95, 'distance': 2.003755992104511}\n",
|
||||||
|
"{'label': 11, 'distance': 2.069066281581252}\n",
|
||||||
|
"{'label': 60, 'distance': 2.070894540798742}\n",
|
||||||
|
"{'label': 88, 'distance': 2.0925931256031}\n",
|
||||||
|
"{'label': 43, 'distance': 2.1056747598887218}\n",
|
||||||
|
"{'label': 33, 'distance': 2.165431005806523}\n",
|
||||||
|
"{'label': 90, 'distance': 2.174626607979455}\n",
|
||||||
|
"{'label': 83, 'distance': 2.188609736988739}\n",
|
||||||
|
"{'label': 68, 'distance': 2.209562202827548}\n",
|
||||||
|
"{'label': 59, 'distance': 2.27130902508622}\n",
|
||||||
|
"{'label': 35, 'distance': 2.276916489521396}\n",
|
||||||
|
"{'label': 70, 'distance': 2.283111150497479}\n",
|
||||||
|
"{'label': 53, 'distance': 2.2871296343421075}\n",
|
||||||
|
"{'label': 42, 'distance': 2.2943393449254192}\n",
|
||||||
|
"{'label': 1, 'distance': 2.299515307388396}\n",
|
||||||
|
"{'label': 89, 'distance': 2.300444335700286}\n",
|
||||||
|
"{'label': 64, 'distance': 2.3105619552648906}\n",
|
||||||
|
"{'label': 47, 'distance': 2.3258018764464126}\n",
|
||||||
|
"{'label': 28, 'distance': 2.33793138436563}\n",
|
||||||
|
"{'label': 91, 'distance': 2.348432279582375}\n",
|
||||||
|
"{'label': 66, 'distance': 2.378823252101462}\n",
|
||||||
|
"{'label': 52, 'distance': 2.3845656934663344}\n",
|
||||||
|
"{'label': 17, 'distance': 2.3851103284430946}\n",
|
||||||
|
"{'label': 29, 'distance': 2.392106657184808}\n",
|
||||||
|
"{'label': 46, 'distance': 2.4059349825734024}\n",
|
||||||
|
"{'label': 98, 'distance': 2.425981349727766}\n",
|
||||||
|
"{'label': 12, 'distance': 2.4320238781945878}\n",
|
||||||
|
"{'label': 5, 'distance': 2.433658250868235}\n",
|
||||||
|
"{'label': 72, 'distance': 2.4438014606638965}\n",
|
||||||
|
"{'label': 96, 'distance': 2.446857205149324}\n",
|
||||||
|
"{'label': 18, 'distance': 2.4473786634019508}\n",
|
||||||
|
"{'label': 0, 'distance': 2.4482053195868017}\n",
|
||||||
|
"{'label': 49, 'distance': 2.451590137889849}\n",
|
||||||
|
"{'label': 14, 'distance': 2.4717097207497414}\n",
|
||||||
|
"{'label': 85, 'distance': 2.473715190942228}\n",
|
||||||
|
"{'label': 19, 'distance': 2.4754273396104534}\n",
|
||||||
|
"{'label': 51, 'distance': 2.4810475345400316}\n",
|
||||||
|
"{'label': 75, 'distance': 2.4850838216864224}\n",
|
||||||
|
"{'label': 93, 'distance': 2.4867224184341175}\n",
|
||||||
|
"{'label': 44, 'distance': 2.498509815319209}\n",
|
||||||
|
"{'label': 82, 'distance': 2.501339416798757}\n",
|
||||||
|
"{'label': 54, 'distance': 2.506342353975533}\n",
|
||||||
|
"{'label': 9, 'distance': 2.5065630929096394}\n",
|
||||||
|
"{'label': 41, 'distance': 2.51345667730748}\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"match selected_latent_space:\n",
|
"match selected_latent_space:\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" case \"\":\n",
|
||||||
|
" \n",
|
||||||
|
" extract_similarities_ls1(selected_dim_reduction_method, data, label, label_rep)\n",
|
||||||
|
"\n",
|
||||||
" case \"label_sim\":\n",
|
" case \"label_sim\":\n",
|
||||||
"\n",
|
"\n",
|
||||||
" extract_simila\n"
|
" extract_similarities_ls3(selected_dim_reduction_method, data, label)\n",
|
||||||
|
" "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
|
|||||||
@ -841,6 +841,25 @@ def svd(matrix, k):
|
|||||||
|
|
||||||
return left_singular_vectors, np.diag(singular_values), right_singular_vectors.T
|
return left_singular_vectors, np.diag(singular_values), right_singular_vectors.T
|
||||||
|
|
||||||
|
def nmf(matrix, k, num_iterations=100):
|
||||||
|
d1, d2 = matrix.shape
|
||||||
|
# Initialize W and H matrices with random non-negative values
|
||||||
|
W = np.random.rand(d1, k)
|
||||||
|
H = np.random.rand(k, d2)
|
||||||
|
|
||||||
|
for iteration in range(num_iterations):
|
||||||
|
# Update H matrix
|
||||||
|
numerator_h = np.dot(W.T, matrix)
|
||||||
|
denominator_h = np.dot(np.dot(W.T, W), H)
|
||||||
|
H *= numerator_h / denominator_h
|
||||||
|
|
||||||
|
# Update W matrix
|
||||||
|
numerator_w = np.dot(matrix, H.T)
|
||||||
|
denominator_w = np.dot(W, np.dot(H, H.T))
|
||||||
|
W *= numerator_w / denominator_w
|
||||||
|
|
||||||
|
return W, H
|
||||||
|
|
||||||
def extract_latent_semantics_from_feature_model(
|
def extract_latent_semantics_from_feature_model(
|
||||||
fd_collection,
|
fd_collection,
|
||||||
k,
|
k,
|
||||||
@ -1087,8 +1106,7 @@ def extract_latent_semantics_from_sim_matrix(
|
|||||||
)
|
)
|
||||||
model.fit(feature_vectors_shifted)
|
model.fit(feature_vectors_shifted)
|
||||||
|
|
||||||
W = model.transform(feature_vectors_shifted)
|
W, H = nmf(feature_vectors_shifted, k = k)
|
||||||
H = model.components_
|
|
||||||
|
|
||||||
all_latent_semantics = {
|
all_latent_semantics = {
|
||||||
"image-semantic": W.tolist(),
|
"image-semantic": W.tolist(),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user