diff --git a/Phase 2/task_9.ipynb b/Phase 2/task_9.ipynb index fe438e2..b501307 100644 --- a/Phase 2/task_9.ipynb +++ b/Phase 2/task_9.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 207, + "execution_count": 71, "metadata": {}, "outputs": [ { @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 208, + "execution_count": 72, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 209, + "execution_count": 73, "metadata": {}, "outputs": [], "source": [ @@ -45,14 +45,14 @@ }, { "cell_type": "code", - "execution_count": 210, + "execution_count": 74, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "cm_fd-cp-10-semantics.json loaded\n" + "image_sim-cm_fd-lda-10-model.joblib loaded\n" ] } ], @@ -88,28 +88,45 @@ "\n", "match selected_latent_space:\n", " case \"\":\n", - " if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n", - " data = json.load(open(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n", - " print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n", - " else:\n", - " print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")\n", + " if selected_dim_reduction_method == \"lda\":\n", + " if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\") and os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n", + " if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\"):\n", + " model = load(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\")\n", + " data = json.load(open(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n", + " print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib and json loaded\")\n", + " else:\n", + " print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib does not exist\")\n", + " else:\n", + " if os.path.exists(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n", + " data = json.load(open(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n", + " print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n", + " else:\n", + " print(f\"{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")\n", " case \"cp\":\n", " if os.path.exists(f\"{selected_feature_model}-cp-{k}-semantics.json\"):\n", " data = json.load(open(f\"{selected_feature_model}-cp-{k}-semantics.json\"))\n", " print(f\"{selected_feature_model}-cp-{k}-semantics.json loaded\")\n", - " else: \n", + " else:\n", " print(f\"{selected_feature_model}-cp-{k}-semantics.json does not exist\")\n", " case _:\n", - " if os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n", - " data = json.load(open(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n", - " print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n", + " if selected_dim_reduction_method == \"lda\":\n", + " if os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\") and os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n", + " model = load(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib\")\n", + " data = json.load(open(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n", + " print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib loaded\")\n", + " else:\n", + " print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-model.joblib does not exist\")\n", " else:\n", - " print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")\n" + " if os.path.exists(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"):\n", + " data = json.load(open(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json\"))\n", + " print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json loaded\")\n", + " else:\n", + " print(f\"{selected_latent_space}-{selected_feature_model}-{selected_dim_reduction_method}-{k}-semantics.json does not exist\")" ] }, { "cell_type": "code", - "execution_count": 211, + "execution_count": 75, "metadata": {}, "outputs": [], "source": [ @@ -169,10 +186,18 @@ " label_vectors.append(sim_matrix[i])\n", " label_rep = [sum(col) / len(col) for col in zip(*label_vectors)]\n", "\n", - "\n", " for centroid in S:\n", " comparison_vector.append(math.dist(label_rep, centroid))\n", "\n", + " case \"lda\":\n", + " comparison_feature_space = np.array(data[\"image-semantic\"])\n", + " label_vectors = []\n", + " length = len(comparison_feature_space)\n", + " for i in range(length):\n", + " if all_images[i][\"true_label\"] == label:\n", + " label_vectors.append(comparison_feature_space[i])\n", + " comparison_vector = [sum(col) / len(col) for col in zip(*label_vectors)] \n", + "\n", " n = len(comparison_feature_space)\n", "\n", " distances = []\n", @@ -200,7 +225,7 @@ }, { "cell_type": "code", - "execution_count": 233, + "execution_count": 76, "metadata": {}, "outputs": [], "source": [ @@ -230,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 234, + "execution_count": 77, "metadata": {}, "outputs": [], "source": [ @@ -254,6 +279,11 @@ " comparison_feature_space = np.array(data[\"image-semantic\"])\n", " comparison_vector = comparison_feature_space[label]\n", "\n", + " case \"lda\":\n", + " comparison_feature_space = np.array(data[\"image-semantic\"])\n", + " comparison_vector = comparison_feature_space[label] \n", + "\n", + "\n", " n = len(comparison_feature_space)\n", " distances = []\n", " for i in range(n):\n", @@ -268,23 +298,23 @@ }, { "cell_type": "code", - "execution_count": 235, + "execution_count": 78, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'label': 2, 'distance': 0.9999999999999999}\n", - "{'label': 4, 'distance': 0.9999999999999999}\n", - "{'label': 6, 'distance': 0.9999999999999999}\n", - "{'label': 7, 'distance': 0.9999999999999999}\n", - "{'label': 8, 'distance': 0.9999999999999999}\n", - "{'label': 9, 'distance': 0.9999999999999999}\n", - "{'label': 10, 'distance': 0.9999999999999999}\n", - "{'label': 11, 'distance': 0.9999999999999999}\n", - "{'label': 13, 'distance': 0.9999999999999999}\n", - "{'label': 14, 'distance': 0.9999999999999999}\n" + "{'image_id': 2641, 'label': 46, 'distance': 0.013618215122607105}\n", + "{'image_id': 1686, 'label': 16, 'distance': 0.015215365128880378}\n", + "{'image_id': 2310, 'label': 35, 'distance': 0.015383486193179943}\n", + "{'image_id': 3781, 'label': 84, 'distance': 0.01541886635507712}\n", + "{'image_id': 1483, 'label': 11, 'distance': 0.015474891099448796}\n", + "{'image_id': 2719, 'label': 48, 'distance': 0.01960489858697963}\n", + "{'image_id': 3787, 'label': 85, 'distance': 0.02006387165132467}\n", + "{'image_id': 3877, 'label': 87, 'distance': 0.02050382578938892}\n", + "{'image_id': 3719, 'label': 82, 'distance': 0.02293235381986182}\n", + "{'image_id': 3403, 'label': 70, 'distance': 0.024912695992711693}\n" ] } ], @@ -303,6 +333,13 @@ "\n", " extract_similarities_ls2(data, label)\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/Phase 2/utils.py b/Phase 2/utils.py index 5153773..18508b6 100644 --- a/Phase 2/utils.py +++ b/Phase 2/utils.py @@ -1117,7 +1117,7 @@ def extract_latent_semantics_from_sim_matrix( dump( model, - f"{sim_type}-{feature_model}-{dim_reduction_method}-{k}-model.joblib", + f"{sim_type}_sim-{feature_model}-{dim_reduction_method}-{k}-model.joblib", ) # for each latent semantic, sort object-weight pairs by weights in descending order