refactored pranav's code for task 5

This commit is contained in:
Kaushik Narayan R 2023-10-11 16:56:23 -07:00
parent 9e05228e94
commit 6e21bc168a
3 changed files with 274 additions and 154 deletions

View File

@ -1,142 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"from pymongo import MongoClient\n",
"from task0a import *\n",
"import scipy\n",
"import numpy as np\n",
"from sklearn.decomposition import NMF\n",
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
"from sklearn.cluster import KMeans\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client = MongoClient()\n",
"client = MongoClient(host=\"localhost\", port=27017)\n",
"\n",
"# Select the database\n",
"db = client.Multimedia_Web_DBs\n",
"\n",
"# Fetch all documents from the collection and then sort them by \"_id\"\n",
"feature_descriptors = list(db.Caltech101_Feature_Descriptors.find({}))\n",
"feature_descriptors = sorted(list(db.Caltech101_Feature_Descriptors.find({})), key=lambda x: x[\"_id\"], reverse=False)\n",
"\n",
"num_labels = 101"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def extractKLatentSemantics(k, feature_model, dim_reduction):\n",
"\n",
" feature_vectors = [x[feature_model] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
" feature_labels = [x[\"label\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
" feature_ids = [x[\"_id\"] for x in feature_descriptors if x[\"_id\"] % 2 == 0]\n",
"\n",
" filename = ''\n",
"\n",
"\n",
" match dim_reduction:\n",
"\n",
" case 1:\n",
" filename = f'{feature_model}-svd-semantics.json'\n",
" U, S, Vh = scipy.sparse.linalg.svds(np.array(feature_vectors), k=k)\n",
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
"\n",
" case 2:\n",
" filename = f'{feature_model}-nnmf-semantics.json'\n",
" model = NMF(n_components = k, init = 'random', solver = 'cd', alpha_H = 0.01, alpha_W = 0.01, max_iter = 10000)\n",
" min_value = np.min(feature_vectors)\n",
" feature_vectors_shifted = feature_vectors - min_value\n",
" U = model.fit_transform(np.array(feature_vectors_shifted))\n",
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
"\n",
" case 3:\n",
" filename = f'{feature_model}-lda-semantics.json'\n",
" U = LinearDiscriminantAnalysis(n_components = k).fit_transform(feature_vectors, feature_labels)\n",
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
"\n",
" case 4:\n",
" filename = f'{feature_model}-kmeans-semantics.json'\n",
" kmeans = KMeans(n_clusters = k)\n",
" kmeans.fit(feature_vectors)\n",
" U = kmeans.transform(feature_vectors)\n",
" k_latent_semantics = sorted(list(zip(feature_ids, U.tolist())), key = lambda x: x[1][0], reverse = True)\n",
" \n",
" k_latent_semantics = [{\"_id\": item[0], \"semantics\": item[1]} for item in k_latent_semantics]\n",
" with open(filename, 'w', encoding='utf-8') as f:\n",
" json.dump(k_latent_semantics, f, ensure_ascii = False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def main():\n",
"\n",
" # Load dataset\n",
"\n",
" # User input for Image ID\n",
" k = int(input(\"Enter k: \"))\n",
"\n",
" features = ['color_moments', 'hog', 'layer3', 'avgpool', 'fc']\n",
"\n",
" # User input for feature model to extract\n",
" print(\"\\n1: Color moments\")\n",
" print(\"2: HOG\")\n",
" print(\"3: Resnet50 Avgpool layer\")\n",
" print(\"4: Resnet50 Layer 3\")\n",
" print(\"5: Resnet50 FC layer\")\n",
" feature_model = features[int(input(\"Select the feature model: \")) - 1]\n",
"\n",
" print(\"\\n1. SVD\")\n",
" print(\"2. NNMF\")\n",
" print(\"3. LDA\")\n",
" print(\"4. k-means\")\n",
" dim_reduction = int(input(\"Select the dimensionality reduction technique: \"))\n",
"\n",
" extractKLatentSemantics(k, feature_model, dim_reduction)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" main()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

214
Phase 2/task_5.ipynb Normal file
View File

@ -0,0 +1,214 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from utils import *\n",
"warnings.filterwarnings('ignore')\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Applying svd on the fc_fd space to get 10 latent semantics (showing only top 10 image-weight pairs for each latent semantic)...\n",
"Latent semantic no. 0\n",
"Image_ID\t80\t-\tWeight\t0.2614097705550824\n",
"Image_ID\t74\t-\tWeight\t0.255431983850539\n",
"Image_ID\t72\t-\tWeight\t0.24329045773521019\n",
"Image_ID\t76\t-\tWeight\t0.22867416408250565\n",
"Image_ID\t38\t-\tWeight\t0.19933358228759127\n",
"Image_ID\t70\t-\tWeight\t0.18697368408982706\n",
"Image_ID\t78\t-\tWeight\t0.13796715203849405\n",
"Image_ID\t130\t-\tWeight\t0.12802644225327572\n",
"Image_ID\t128\t-\tWeight\t0.12766513481071043\n",
"Image_ID\t116\t-\tWeight\t0.12432195172872901\n",
"Latent semantic no. 1\n",
"Image_ID\t42\t-\tWeight\t0.24451953308549035\n",
"Image_ID\t104\t-\tWeight\t0.17513827022527176\n",
"Image_ID\t2\t-\tWeight\t0.17502495949250704\n",
"Image_ID\t0\t-\tWeight\t0.17209867451969002\n",
"Image_ID\t170\t-\tWeight\t0.16656363902027468\n",
"Image_ID\t96\t-\tWeight\t0.15318453472976815\n",
"Image_ID\t40\t-\tWeight\t0.1432149719665029\n",
"Image_ID\t44\t-\tWeight\t0.1429496131499582\n",
"Image_ID\t160\t-\tWeight\t0.13479710738132986\n",
"Image_ID\t6\t-\tWeight\t0.1264545662660414\n",
"Latent semantic no. 2\n",
"Image_ID\t86\t-\tWeight\t0.21244971577008848\n",
"Image_ID\t96\t-\tWeight\t0.19744514449239337\n",
"Image_ID\t90\t-\tWeight\t0.19463642108355275\n",
"Image_ID\t32\t-\tWeight\t0.18145091969843855\n",
"Image_ID\t42\t-\tWeight\t0.16316970985189788\n",
"Image_ID\t26\t-\tWeight\t0.15711519451212017\n",
"Image_ID\t184\t-\tWeight\t0.14991640994990046\n",
"Image_ID\t134\t-\tWeight\t0.1462330756631442\n",
"Image_ID\t40\t-\tWeight\t0.14437675159652016\n",
"Image_ID\t182\t-\tWeight\t0.1383518461119224\n",
"Latent semantic no. 3\n",
"Image_ID\t90\t-\tWeight\t0.1720078267722524\n",
"Image_ID\t156\t-\tWeight\t0.16000154385617743\n",
"Image_ID\t158\t-\tWeight\t0.1512646317732056\n",
"Image_ID\t160\t-\tWeight\t0.14646801598350143\n",
"Image_ID\t152\t-\tWeight\t0.1464352560589073\n",
"Image_ID\t150\t-\tWeight\t0.14619374900432364\n",
"Image_ID\t30\t-\tWeight\t0.14143498327111978\n",
"Image_ID\t36\t-\tWeight\t0.14028252934190766\n",
"Image_ID\t92\t-\tWeight\t0.14010606099568526\n",
"Image_ID\t96\t-\tWeight\t0.12878454015856147\n",
"Latent semantic no. 4\n",
"Image_ID\t0\t-\tWeight\t0.1851068625752792\n",
"Image_ID\t68\t-\tWeight\t0.18233577289211206\n",
"Image_ID\t70\t-\tWeight\t0.17658848660973384\n",
"Image_ID\t2\t-\tWeight\t0.1740864069632969\n",
"Image_ID\t64\t-\tWeight\t0.1652208125636303\n",
"Image_ID\t144\t-\tWeight\t0.1473307832877541\n",
"Image_ID\t140\t-\tWeight\t0.13555748295430797\n",
"Image_ID\t142\t-\tWeight\t0.12823249250147356\n",
"Image_ID\t86\t-\tWeight\t0.12718092599165637\n",
"Image_ID\t76\t-\tWeight\t0.1252879989162334\n",
"Latent semantic no. 5\n",
"Image_ID\t38\t-\tWeight\t0.18831453133913492\n",
"Image_ID\t44\t-\tWeight\t0.17741038115946053\n",
"Image_ID\t42\t-\tWeight\t0.16444727858214978\n",
"Image_ID\t130\t-\tWeight\t0.15436113645002744\n",
"Image_ID\t40\t-\tWeight\t0.1536450181907607\n",
"Image_ID\t132\t-\tWeight\t0.14964910372393345\n",
"Image_ID\t46\t-\tWeight\t0.147369630386678\n",
"Image_ID\t36\t-\tWeight\t0.14003912645014002\n",
"Image_ID\t128\t-\tWeight\t0.13864439525825356\n",
"Image_ID\t138\t-\tWeight\t0.13770732538821512\n",
"Latent semantic no. 6\n",
"Image_ID\t114\t-\tWeight\t0.15664448468019831\n",
"Image_ID\t2\t-\tWeight\t0.15491061836983144\n",
"Image_ID\t0\t-\tWeight\t0.1530303208538504\n",
"Image_ID\t6\t-\tWeight\t0.15295162665264536\n",
"Image_ID\t106\t-\tWeight\t0.14505207452002586\n",
"Image_ID\t110\t-\tWeight\t0.14364619871330633\n",
"Image_ID\t104\t-\tWeight\t0.14360445482307752\n",
"Image_ID\t116\t-\tWeight\t0.14309751290704328\n",
"Image_ID\t108\t-\tWeight\t0.14103122187663494\n",
"Image_ID\t112\t-\tWeight\t0.13936814882577545\n",
"Latent semantic no. 7\n",
"Image_ID\t158\t-\tWeight\t0.15332739573127638\n",
"Image_ID\t152\t-\tWeight\t0.15027095321242787\n",
"Image_ID\t2\t-\tWeight\t0.148228537938103\n",
"Image_ID\t0\t-\tWeight\t0.14693245027728857\n",
"Image_ID\t156\t-\tWeight\t0.1439438847861891\n",
"Image_ID\t8\t-\tWeight\t0.14356918947005834\n",
"Image_ID\t10\t-\tWeight\t0.1431162549061445\n",
"Image_ID\t6\t-\tWeight\t0.14277108702825383\n",
"Image_ID\t150\t-\tWeight\t0.1424099571884803\n",
"Image_ID\t164\t-\tWeight\t0.13731169848767164\n",
"Latent semantic no. 8\n",
"Image_ID\t136\t-\tWeight\t0.14826723874051348\n",
"Image_ID\t142\t-\tWeight\t0.1444905135922577\n",
"Image_ID\t116\t-\tWeight\t0.14310970423245634\n",
"Image_ID\t132\t-\tWeight\t0.13967210710664973\n",
"Image_ID\t152\t-\tWeight\t0.13699976834141417\n",
"Image_ID\t114\t-\tWeight\t0.13649814331495427\n",
"Image_ID\t138\t-\tWeight\t0.13624706512987708\n",
"Image_ID\t106\t-\tWeight\t0.13620952950667425\n",
"Image_ID\t110\t-\tWeight\t0.1346054901033104\n",
"Image_ID\t144\t-\tWeight\t0.13436573258693213\n",
"Latent semantic no. 9\n",
"Image_ID\t38\t-\tWeight\t0.15911686596038474\n",
"Image_ID\t2\t-\tWeight\t0.15207108925634513\n",
"Image_ID\t0\t-\tWeight\t0.15116756158498235\n",
"Image_ID\t6\t-\tWeight\t0.15009399187071035\n",
"Image_ID\t10\t-\tWeight\t0.14437025978168486\n",
"Image_ID\t4\t-\tWeight\t0.14315858315130434\n",
"Image_ID\t34\t-\tWeight\t0.14296451776950192\n",
"Image_ID\t22\t-\tWeight\t0.14272703151065388\n",
"Image_ID\t24\t-\tWeight\t0.14254462871698045\n",
"Image_ID\t20\t-\tWeight\t0.14096073579756538\n"
]
}
],
"source": [
"selected_feature_model = valid_feature_models[\n",
" str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
"]\n",
"\n",
"k = int(input(\"Enter value of k: \"))\n",
"if k < 1:\n",
" raise ValueError(\"k should be a positive integer\")\n",
"\n",
"selected_dim_reduction_method = str(\n",
" input(\n",
" \"Enter dimensionality reduction method - one of \"\n",
" + str(list(valid_dim_reduction_methods.keys()))\n",
" )\n",
")\n",
"\n",
"label_sim_matrix = find_label_label_similarity(fd_collection,selected_feature_model)\n",
"\n",
"extract_latent_semantics(\n",
" fd_collection,\n",
" k,\n",
" selected_feature_model,\n",
" selected_dim_reduction_method,\n",
" sim_matrix=label_sim_matrix,\n",
" top_images=10,\n",
" fn_prefix='label_sim-'\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -641,9 +641,9 @@ class KMeans:
for c in self.cluster_centers_: for c in self.cluster_centers_:
prev_centroid = prev_centroids[c] prev_centroid = prev_centroids[c]
current_centroid = self.cluster_centers_[c] current_centroid = self.cluster_centers_[c]
convergence_tol = np.sum(abs( convergence_tol = np.sum(
(prev_centroid - current_centroid) / prev_centroid * 100.0 abs((prev_centroid - current_centroid) / prev_centroid * 100.0)
)) )
if convergence_tol > self.tol: if convergence_tol > self.tol:
optimized = False optimized = False
if self.verbose > 0: if self.verbose > 0:
@ -676,11 +676,19 @@ class KMeans:
def extract_latent_semantics( def extract_latent_semantics(
fd_collection, k, feature_model, dim_reduction_method, top_images=None fd_collection,
k,
feature_model,
dim_reduction_method,
sim_matrix=None,
top_images=None,
fn_prefix="",
): ):
""" """
Extract latent semantics for entire collection at once for a given feature_model and dim_reduction_method, and display the imageID-semantic weight pairs Extract latent semantics for entire collection at once for a given feature_model and dim_reduction_method, and display the imageID-semantic weight pairs
Use `sim_matrix` to manually give similarity matrix instead of feature space
Leave `top_images` blank to display all imageID-weight pairs Leave `top_images` blank to display all imageID-weight pairs
""" """
@ -694,18 +702,28 @@ def extract_latent_semantics(
) )
all_images = list(fd_collection.find()) all_images = list(fd_collection.find())
feature_vectors = np.array([img[feature_model] for img in all_images])
feature_labels = [img["true_label"] for img in all_images]
feature_ids = [img["image_id"] for img in all_images] feature_ids = [img["image_id"] for img in all_images]
top_img_str = "" top_img_str = ""
if top_images is not None: if top_images is not None:
top_img_str = f" (showing only top {top_images} image-weight pairs for each latent semantic)" top_img_str = f" (showing only top {top_images} image-weight pairs for each latent semantic)"
print(
"Applying {} on the {} space to get {} latent semantics{}...".format( # if similarity matrix is provided
dim_reduction_method, feature_model, k, top_img_str if sim_matrix is not None:
feature_vectors = sim_matrix
print(
"Applying {} on the {} space to get {} latent semantics{}...".format(
dim_reduction_method, feature_model, k, top_img_str
)
)
# else take feature space from database
else:
feature_vectors = np.array([img[feature_model] for img in all_images])
print(
"Applying {} on the given similarity matrix to get {} latent semantics{}...".format(
dim_reduction_method, k, top_img_str
)
) )
)
displayed_latent_semantics = {} displayed_latent_semantics = {}
all_latent_semantics = {} all_latent_semantics = {}
@ -827,8 +845,38 @@ def extract_latent_semantics(
print(f"Image_ID\t{image_id}\t-\tWeight\t{weight}") print(f"Image_ID\t{image_id}\t-\tWeight\t{weight}")
with open( with open(
f"{feature_model}-{dim_reduction_method}-{k}-semantics.json", f"{fn_prefix}{feature_model}-{dim_reduction_method}-{k}-semantics.json",
"w", "w",
encoding="utf-8", encoding="utf-8",
) as output_file: ) as output_file:
json.dump(all_latent_semantics, output_file, ensure_ascii=False) json.dump(all_latent_semantics, output_file, ensure_ascii=False)
def find_label_label_similarity(fd_collection, feature_model):
"""
Calculate similarity between labels. Lower values indicate higher similarities
"""
assert (
feature_model in valid_feature_models.values()
), "feature_model should be one of " + str(list(valid_feature_models.keys()))
label_sim_matrix = []
label_mean_vectors = []
num_labels = 101
for label in range(num_labels):
# get representative vectors for the label
label_mean_vectors.append(
calculate_label_representatives(fd_collection, label, feature_model)
)
label_sim_matrix = np.zeros((num_labels, num_labels))
for i in range(num_labels):
for j in range(i + 1, num_labels):
# Note: lower the value, lower the distance => higher the similarity
label_sim_matrix[i][j] = feature_distance_matches[feature_model](
np.array(label_mean_vectors[i]), np.array(label_mean_vectors[j])
)
return label_sim_matrix