mirror of
https://github.com/20kaushik02/CSE515_MWDB_Project.git
synced 2025-12-06 07:54:07 +00:00
1171 lines
40 KiB
Plaintext
1171 lines
40 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from task2_utils import *\n",
|
|
"warnings.filterwarnings('ignore')\n",
|
|
"# interactive plot\n",
|
|
"%matplotlib widget"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def calculate_image_similarity(data, distance_measure):\n",
|
|
" \"\"\"Object-object similarity with given distance measure\"\"\"\n",
|
|
" n = data.shape[0]\n",
|
|
" image_sim_matrix = np.zeros((n, n))\n",
|
|
" for i in range(n):\n",
|
|
" for j in range(i + 1, n):\n",
|
|
" image_sim_matrix[i][j] = image_sim_matrix[j][i] = distance_measure(\n",
|
|
" np.array(data[i]), np.array(data[j])\n",
|
|
" )\n",
|
|
" return image_sim_matrix"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def mds_projection(data_sim_matrix, n_components=2):\n",
|
|
" \"\"\"MDS projection to n-D space\"\"\"\n",
|
|
" n = data_sim_matrix.shape[0]\n",
|
|
" # Centering matrix\n",
|
|
" C = np.eye(n) - np.ones((n, n)) / n\n",
|
|
" # B = -1/2 * C * D^2 * C\n",
|
|
" B = -0.5 * C @ (data_sim_matrix**2) @ C\n",
|
|
" # Eigen decomposition\n",
|
|
" eigvals, eigvecs = np.linalg.eigh(B)\n",
|
|
"\n",
|
|
" # Sort eigenvalues and corresponding eigenvectors\n",
|
|
" indices = np.argsort(eigvals)[::-1]\n",
|
|
" eigvals = eigvals[indices]\n",
|
|
" eigvecs = eigvecs[:, indices]\n",
|
|
"\n",
|
|
" # Take the first n_components eigenvectors\n",
|
|
" components = eigvecs[:, :n_components]\n",
|
|
"\n",
|
|
" return components"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def avgandmin_knn_distance(data_sim_matrix, k):\n",
|
|
" \"\"\"Get avg. and minimum k-th nearest neighbor distance\"\"\"\n",
|
|
"\t# Sort each row of the distance matrix and extract the kth-nearest neighbor distance\n",
|
|
" kth_neighbor_distances = np.sort(data_sim_matrix, axis=1)[:, k-1]\n",
|
|
"\n",
|
|
" # Understanding KNN distribution to figure out strategy to find epsilon range\n",
|
|
" # plt.plot(np.sort(kth_neighbor_distances))\n",
|
|
" # plt.show()\n",
|
|
" \n",
|
|
" # Calculate the average and minimum distance of the kth-nearest neighbor\n",
|
|
" average_distance = np.mean(kth_neighbor_distances)\n",
|
|
" minimum_distance = np.min(kth_neighbor_distances)\n",
|
|
"\n",
|
|
" return average_distance, minimum_distance\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def display_cluster_stats(clusters):\n",
|
|
" \"\"\"Display cluster counts and noise point count\"\"\"\n",
|
|
" cluster_counts = np.unique(clusters, return_counts=True)\n",
|
|
" cluster_counts_dict = dict(\n",
|
|
" (unique_label, unique_count)\n",
|
|
" for unique_label, unique_count in zip(cluster_counts[0], cluster_counts[1])\n",
|
|
" )\n",
|
|
" print(\"Clusters:\", cluster_counts_dict)\n",
|
|
" print(\"No. of clusters:\", len(cluster_counts_dict.keys() - {-1}))\n",
|
|
" if -1 in cluster_counts_dict:\n",
|
|
" print(\"Noise points:\", cluster_counts_dict[-1])\n",
|
|
" else:\n",
|
|
" print(\"No noise points\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class DBSCAN:\n",
|
|
" def __init__(\n",
|
|
" self, label, data, distance_measure, eps, min_samples, data_sim_matrix=None\n",
|
|
" ):\n",
|
|
" self.label = label\n",
|
|
" self.eps = eps\n",
|
|
" self.min_samples = min_samples\n",
|
|
"\n",
|
|
" self.data = data\n",
|
|
" self.distance_measure = distance_measure\n",
|
|
" self.num_images = data.shape[0]\n",
|
|
"\n",
|
|
" self.image_sim_matrix = np.zeros((self.num_images, self.num_images))\n",
|
|
" if data_sim_matrix is not None:\n",
|
|
" self.image_sim_matrix = data_sim_matrix\n",
|
|
"\n",
|
|
" self.clusters = np.zeros(self.num_images) # 0 represents unclassified points\n",
|
|
" self.core_points = []\n",
|
|
"\n",
|
|
" def dbscan(self):\n",
|
|
" \"\"\"DBSCAN algorithm\"\"\"\n",
|
|
" # if similarities not provided/calculated already\n",
|
|
" if np.array_equal(\n",
|
|
" self.image_sim_matrix, np.zeros((self.num_images, self.num_images))\n",
|
|
" ):\n",
|
|
" calculate_image_similarity(self.data, self.distance_measure)\n",
|
|
"\n",
|
|
" cluster_id = 0\n",
|
|
" for i in range(self.num_images):\n",
|
|
" if self.clusters[i] != 0:\n",
|
|
" continue # Skip already classified points\n",
|
|
"\n",
|
|
" neighbors = self.region_query(i)\n",
|
|
" if len(neighbors) < self.min_samples:\n",
|
|
" self.clusters[i] = -1 # Mark point as noise\n",
|
|
" else:\n",
|
|
" cluster_id += 1 # New cluster identified\n",
|
|
" self.clusters[i] = cluster_id\n",
|
|
" self.grow_cluster(neighbors, cluster_id)\n",
|
|
"\n",
|
|
" return self.clusters\n",
|
|
"\n",
|
|
" def region_query(self, center):\n",
|
|
" distances = self.image_sim_matrix[center]\n",
|
|
" return [i for i, dist in enumerate(distances) if dist < self.eps]\n",
|
|
"\n",
|
|
" def grow_cluster(self, neighbors, cluster_id):\n",
|
|
" i = 0\n",
|
|
" # check neighbors for connected components\n",
|
|
" while i < len(neighbors):\n",
|
|
" neighbor = neighbors[i]\n",
|
|
"\n",
|
|
" if self.clusters[neighbor] == -1:\n",
|
|
" self.clusters[neighbor] = cluster_id # Change noise to border point\n",
|
|
" elif self.clusters[neighbor] == 0:\n",
|
|
" self.clusters[neighbor] = cluster_id\n",
|
|
" new_neighbors = self.region_query(neighbor)\n",
|
|
" # If new point is a core point\n",
|
|
" if len(new_neighbors) >= self.min_samples:\n",
|
|
" neighbors += new_neighbors # add its neighbors to list of neighbors to consider\n",
|
|
" i += 1\n",
|
|
" \n",
|
|
" def get_core_points(self, label_img_ids):\n",
|
|
" \"\"\"Find core points (after clustering only!)\"\"\"\n",
|
|
" for i in range(self.num_images):\n",
|
|
" if self.clusters[i] == -1:\n",
|
|
" continue # Skip noise points\n",
|
|
"\n",
|
|
" neighbors = self.region_query(i)\n",
|
|
" if len(neighbors) < self.min_samples:\n",
|
|
" continue # not a core point\n",
|
|
" else:\n",
|
|
" self.core_points.append(label_img_ids[i])\n",
|
|
"\n",
|
|
" def mds_scatter_clusters(self):\n",
|
|
" \"\"\"Visualize clusters as point clouds in 2-D space\"\"\"\n",
|
|
" # Perform MDS projection\n",
|
|
" mds_components = mds_projection(self.image_sim_matrix)\n",
|
|
"\n",
|
|
" # Plot clusters\n",
|
|
" plt.figure(figsize=(8, 6))\n",
|
|
" for label in set(self.clusters):\n",
|
|
" cluster_points = mds_components[self.clusters == label]\n",
|
|
" plt.scatter(\n",
|
|
" cluster_points[:, 0],\n",
|
|
" cluster_points[:, 1],\n",
|
|
" label=f\"{(f'Cluster {int(label)}') if label != -1 else 'Noise points'}\",\n",
|
|
" )\n",
|
|
"\n",
|
|
" plt.title(\"DBSCAN clusters projected onto 2-D MDS space\")\n",
|
|
" plt.xlabel(\"MDS component 1\")\n",
|
|
" plt.ylabel(\"MDS component 2\")\n",
|
|
" plt.legend()\n",
|
|
" plt.savefig(f\"Plots/DBSCAN_MDS_Label_{self.label}.png\")\n",
|
|
" plt.show()\n",
|
|
"\n",
|
|
" def group_image_clusters(self, image_data):\n",
|
|
" # Perform MDS projection\n",
|
|
" mds_components = mds_projection(self.image_sim_matrix)\n",
|
|
" # Scaling up to fit images inside\n",
|
|
" mds_components = mds_components * 10000\n",
|
|
"\n",
|
|
" min_x_mds = np.min(mds_components[:, 0])\n",
|
|
" min_y_mds = np.min(mds_components[:, 1])\n",
|
|
" max_x_mds = np.max(mds_components[:, 0])\n",
|
|
" max_y_mds = np.max(mds_components[:, 1])\n",
|
|
"\n",
|
|
" img_width = (max_x_mds - min_x_mds) / 10\n",
|
|
" img_height = (max_y_mds - min_y_mds) / 10\n",
|
|
"\n",
|
|
" # Plot clusters\n",
|
|
" plt.figure(figsize=(8, 6))\n",
|
|
" for label in set(self.clusters):\n",
|
|
" cluster_points = mds_components[self.clusters == label]\n",
|
|
" plt.scatter(\n",
|
|
" cluster_points[:, 0],\n",
|
|
" cluster_points[:, 1],\n",
|
|
" label=f\"{(f'Cluster {int(label)}') if label != -1 else 'Noise points'}\",\n",
|
|
" zorder=1,\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Display image thumbnails at cluster centroids\n",
|
|
" cluster_indices = np.where(self.clusters == label)[0]\n",
|
|
" cluster_center = np.mean(mds_components[cluster_indices], axis=0)\n",
|
|
" thumbnail_data = image_data[cluster_indices[0]].resize(\n",
|
|
" (int(np.ceil(img_width)), int(np.ceil(img_height)))\n",
|
|
" )\n",
|
|
" im = plt.imshow(\n",
|
|
" thumbnail_data,\n",
|
|
" extent=(\n",
|
|
" cluster_center[0] - 0.5 * img_width,\n",
|
|
" cluster_center[0] + 0.5 * img_width,\n",
|
|
" cluster_center[1] - 0.5 * img_height,\n",
|
|
" cluster_center[1] + 0.5 * img_height,\n",
|
|
" ),\n",
|
|
" interpolation=\"nearest\",\n",
|
|
" cmap=plt.cm.gray_r,\n",
|
|
" zorder=0,\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Image border\n",
|
|
" x1, x2, y1, y2 = im.get_extent()\n",
|
|
" (im_border,) = plt.plot(\n",
|
|
" [x1, x2, x2, x1, x1],\n",
|
|
" [y1, y1, y2, y2, y1],\n",
|
|
" \"-\",\n",
|
|
" linewidth=2,\n",
|
|
" solid_capstyle=\"butt\",\n",
|
|
" zorder=0,\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Click to bring to front\n",
|
|
" def region_click(event, region_area=im, region_border=im_border):\n",
|
|
" if region_area.contains(event)[0]:\n",
|
|
" region_border.set_zorder(2)\n",
|
|
" region_area.set_zorder(2)\n",
|
|
" else:\n",
|
|
" region_border.set_zorder(0)\n",
|
|
" region_area.set_zorder(0)\n",
|
|
"\n",
|
|
" im.figure.canvas.mpl_connect(\"button_press_event\", region_click)\n",
|
|
"\n",
|
|
" plt.title(\"2-D MDS space with image thumbnails at centroids\")\n",
|
|
" plt.xlabel(\"MDS component 1\")\n",
|
|
" plt.ylabel(\"MDS component 2\")\n",
|
|
" ax = plt.gca()\n",
|
|
" ax.margins(0.05)\n",
|
|
" ax.set_aspect(0.75 / ax.get_data_ratio())\n",
|
|
" plt.legend()\n",
|
|
" plt.savefig(f\"Plots/DBSCAN_MDS_Label_{self.label}_with_images.png\")\n",
|
|
" plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# selected_feature_model = valid_feature_models[\n",
|
|
"# str(input(\"Enter feature model - one of \" + str(list(valid_feature_models.keys()))))\n",
|
|
"# ]\n",
|
|
"selected_feature_model = valid_feature_models[\"avgpool\"]\n",
|
|
"# selected_distance_measure = euclidean_distance_measure\n",
|
|
"selected_distance_measure = feature_distance_matches[selected_feature_model]\n",
|
|
"selected_c = 5"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Clustering label 0 ...\n",
|
|
"Clustering label 1 ...\n",
|
|
"Clustering label 2 ...\n",
|
|
"Clustering label 3 ...\n",
|
|
"Clustering label 4 ...\n",
|
|
"Clustering label 5 ...\n",
|
|
"Clustering label 6 ...\n",
|
|
"Clustering label 7 ...\n",
|
|
"Clustering label 8 ...\n",
|
|
"Clustering label 9 ...\n",
|
|
"Clustering label 10 ...\n",
|
|
"Clustering label 11 ...\n",
|
|
"Clustering label 12 ...\n",
|
|
"Clustering label 13 ...\n",
|
|
"Clustering label 14 ...\n",
|
|
"Clustering label 15 ...\n",
|
|
"Clustering label 16 ...\n",
|
|
"Clustering label 17 ...\n",
|
|
"Clustering label 18 ...\n",
|
|
"Clustering label 19 ...\n",
|
|
"Clustering label 20 ...\n",
|
|
"Clustering label 21 ...\n",
|
|
"Clustering label 22 ...\n",
|
|
"Clustering label 23 ...\n",
|
|
"Clustering label 24 ...\n",
|
|
"Clustering label 25 ...\n",
|
|
"Clustering label 26 ...\n",
|
|
"Clustering label 27 ...\n",
|
|
"Clustering label 28 ...\n",
|
|
"Clustering label 29 ...\n",
|
|
"Clustering label 30 ...\n",
|
|
"Clustering label 31 ...\n",
|
|
"Clustering label 32 ...\n",
|
|
"Clustering label 33 ...\n",
|
|
"Clustering label 34 ...\n",
|
|
"Clustering label 35 ...\n",
|
|
"Clustering label 36 ...\n",
|
|
"Clustering label 37 ...\n",
|
|
"Clustering label 38 ...\n",
|
|
"Clustering label 39 ...\n",
|
|
"Clustering label 40 ...\n",
|
|
"Clustering label 41 ...\n",
|
|
"Clustering label 42 ...\n",
|
|
"Clustering label 43 ...\n",
|
|
"Clustering label 44 ...\n",
|
|
"Clustering label 45 ...\n",
|
|
"Clustering label 46 ...\n",
|
|
"Clustering label 47 ...\n",
|
|
"Clustering label 48 ...\n",
|
|
"Clustering label 49 ...\n",
|
|
"Clustering label 50 ...\n",
|
|
"Clustering label 51 ...\n",
|
|
"Clustering label 52 ...\n",
|
|
"Clustering label 53 ...\n",
|
|
"Clustering label 54 ...\n",
|
|
"Clustering label 55 ...\n",
|
|
"Clustering label 56 ...\n",
|
|
"Clustering label 57 ...\n",
|
|
"Clustering label 58 ...\n",
|
|
"Clustering label 59 ...\n",
|
|
"Clustering label 60 ...\n",
|
|
"Clustering label 61 ...\n",
|
|
"Clustering label 62 ...\n",
|
|
"Clustering label 63 ...\n",
|
|
"Clustering label 64 ...\n",
|
|
"Clustering label 65 ...\n",
|
|
"Clustering label 66 ...\n",
|
|
"Clustering label 67 ...\n",
|
|
"Clustering label 68 ...\n",
|
|
"Clustering label 69 ...\n",
|
|
"Clustering label 70 ...\n",
|
|
"Clustering label 71 ...\n",
|
|
"Clustering label 72 ...\n",
|
|
"Clustering label 73 ...\n",
|
|
"Clustering label 74 ...\n",
|
|
"Clustering label 75 ...\n",
|
|
"Clustering label 76 ...\n",
|
|
"Clustering label 77 ...\n",
|
|
"Clustering label 78 ...\n",
|
|
"Clustering label 79 ...\n",
|
|
"Clustering label 80 ...\n",
|
|
"Clustering label 81 ...\n",
|
|
"Clustering label 82 ...\n",
|
|
"Clustering label 83 ...\n",
|
|
"Clustering label 84 ...\n",
|
|
"Clustering label 85 ...\n",
|
|
"Clustering label 86 ...\n",
|
|
"Clustering label 87 ...\n",
|
|
"Clustering label 88 ...\n",
|
|
"Clustering label 89 ...\n",
|
|
"Clustering label 90 ...\n",
|
|
"Clustering label 91 ...\n",
|
|
"Clustering label 92 ...\n",
|
|
"Clustering label 93 ...\n",
|
|
"Clustering label 94 ...\n",
|
|
"Clustering label 95 ...\n",
|
|
"Clustering label 96 ...\n",
|
|
"Clustering label 97 ...\n",
|
|
"Clustering label 98 ...\n",
|
|
"Clustering label 99 ...\n",
|
|
"Clustering label 100 ...\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"best_models = []\n",
|
|
"for label in range(NUM_LABELS):\n",
|
|
"# for label in [0, 1]:\n",
|
|
" print(\"Clustering label\", label, \"...\")\n",
|
|
" # get label's images in PIL format\n",
|
|
" label_imgs = []\n",
|
|
" label_img_ids = [\n",
|
|
" label_img[\"image_id\"] for label_img in fd_collection.find({\"true_label\": label})\n",
|
|
" ]\n",
|
|
" for img_id in label_img_ids:\n",
|
|
" img, true_label = dataset[img_id]\n",
|
|
" label_imgs.append(transforms.ToPILImage()(img))\n",
|
|
"\n",
|
|
" # get image features\n",
|
|
" label_fds = np.array(\n",
|
|
" [\n",
|
|
" np.array(img_fds[selected_feature_model]).flatten()\n",
|
|
" for img_fds in fd_collection.find({\"true_label\": label})\n",
|
|
" ]\n",
|
|
" )\n",
|
|
"\n",
|
|
" label_dbscan_results = (np.zeros(label_fds.shape[0]), 0, 0)\n",
|
|
" label_min_noise = label_fds.shape[0]\n",
|
|
" label_min_cluster_diff = np.inf\n",
|
|
"\n",
|
|
" label_img_sim_matrix = calculate_image_similarity(\n",
|
|
" label_fds, selected_distance_measure\n",
|
|
" )\n",
|
|
"\n",
|
|
" # decrementally try min_samples, starting from twice the desired no. of clusters\n",
|
|
" for cur_min_samples in range(2 * selected_c, 1, -1):\n",
|
|
" # find range of epsilon to try, by checking all from mean to min knn distance\n",
|
|
" # k is current min_samples\n",
|
|
" max_eps, min_eps = avgandmin_knn_distance(label_img_sim_matrix, cur_min_samples)\n",
|
|
"\n",
|
|
" # try epsilon values\n",
|
|
" for cur_eps in np.linspace(min_eps, max_eps, num=100):\n",
|
|
" label_dbscan = DBSCAN(\n",
|
|
" label,\n",
|
|
" label_fds,\n",
|
|
" selected_distance_measure,\n",
|
|
" cur_eps,\n",
|
|
" cur_min_samples,\n",
|
|
" label_img_sim_matrix,\n",
|
|
" )\n",
|
|
"\n",
|
|
" clusters = label_dbscan.dbscan()\n",
|
|
"\n",
|
|
" cluster_counts = np.unique(clusters, return_counts=True)\n",
|
|
" cluster_counts_dict = dict(\n",
|
|
" (unique_label, unique_count)\n",
|
|
" for unique_label, unique_count in zip(\n",
|
|
" cluster_counts[0], cluster_counts[1]\n",
|
|
" )\n",
|
|
" )\n",
|
|
"\n",
|
|
" if cluster_counts_dict.get(-1) is not None:\n",
|
|
" noise_pts = cluster_counts_dict.get(-1)\n",
|
|
" else:\n",
|
|
" noise_pts = 0\n",
|
|
" cluster_diff = abs(len(cluster_counts_dict.keys() - {-1}) - selected_c)\n",
|
|
"\n",
|
|
" # store only most desirable clustering: as close as possible to c clusters, and then minimum noise\n",
|
|
" if cluster_diff < label_min_cluster_diff or (\n",
|
|
" cluster_diff == label_min_cluster_diff and noise_pts <= label_min_noise\n",
|
|
" ):\n",
|
|
" # print(\n",
|
|
" # \"Better clustering:\",\n",
|
|
" # label_dbscan_results[1],\n",
|
|
" # \"->\",\n",
|
|
" # cur_eps,\n",
|
|
" # \"\\t\",\n",
|
|
" # label_dbscan_results[2],\n",
|
|
" # \"->\",\n",
|
|
" # cur_min_samples,\n",
|
|
" # )\n",
|
|
" # print(\n",
|
|
" # \"Noise improvement:\",\n",
|
|
" # label_min_noise,\n",
|
|
" # \"->\",\n",
|
|
" # noise_pts,\n",
|
|
" # \"\\tCluster count improvement:\",\n",
|
|
" # label_min_cluster_diff,\n",
|
|
" # \"->\",\n",
|
|
" # cluster_diff,\n",
|
|
" # )\n",
|
|
" label_dbscan_results = (clusters, cur_eps, cur_min_samples)\n",
|
|
" label_min_noise = noise_pts\n",
|
|
" label_min_cluster_diff = cluster_diff\n",
|
|
"\n",
|
|
" best_label_dbscan = DBSCAN(\n",
|
|
" label,\n",
|
|
" label_fds,\n",
|
|
" selected_distance_measure,\n",
|
|
" label_dbscan_results[1],\n",
|
|
" label_dbscan_results[2],\n",
|
|
" label_img_sim_matrix,\n",
|
|
" )\n",
|
|
" best_label_dbscan.clusters = label_dbscan_results[0]\n",
|
|
" best_label_dbscan.get_core_points(label_img_ids)\n",
|
|
"\n",
|
|
" # store best clustering\n",
|
|
" best_models.append(best_label_dbscan)\n",
|
|
"\n",
|
|
" # # Interpretation\n",
|
|
" # print(\"Label:\", label)\n",
|
|
" # # print(\"Epsilon:\", label_dbscan_results[1], \"\\tMinPts:\", label_dbscan_results[2])\n",
|
|
" # display_cluster_stats(label_dbscan_results[0])\n",
|
|
" # print(\"Core points:\", len(best_label_dbscan.core_points))\n",
|
|
" # # MDS point cloud\n",
|
|
" # best_label_dbscan.mds_scatter_clusters()\n",
|
|
" # # Image thumbnail overlay\n",
|
|
" # best_label_dbscan.group_image_clusters(label_imgs)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Visualization:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Label: 0\n",
|
|
"Clusters: {-1.0: 94, 1.0: 15, 2.0: 94, 3.0: 3, 4.0: 4, 5.0: 8}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 94\n",
|
|
"Core points: 103\n",
|
|
"Label: 1\n",
|
|
"Clusters: {-1.0: 32, 1.0: 154, 2.0: 11, 3.0: 4, 4.0: 8, 5.0: 8}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 32\n",
|
|
"Core points: 106\n",
|
|
"Label: 2\n",
|
|
"Clusters: {-1.0: 60, 1.0: 4, 2.0: 4, 3.0: 8, 4.0: 4, 5.0: 20}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 60\n",
|
|
"Core points: 35\n",
|
|
"Label: 3\n",
|
|
"Clusters: {-1.0: 162, 1.0: 218, 2.0: 3, 3.0: 3, 4.0: 9, 5.0: 4}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 162\n",
|
|
"Core points: 199\n",
|
|
"Label: 4\n",
|
|
"Clusters: {-1.0: 16, 1.0: 9, 2.0: 3}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 16\n",
|
|
"Core points: 12\n",
|
|
"Label: 5\n",
|
|
"Clusters: {-1.0: 166, 1.0: 222, 2.0: 2, 3.0: 2, 4.0: 5, 5.0: 3}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 166\n",
|
|
"Core points: 234\n",
|
|
"Label: 6\n",
|
|
"Clusters: {-1.0: 10, 1.0: 7, 2.0: 2, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 10\n",
|
|
"Core points: 11\n",
|
|
"Label: 7\n",
|
|
"Clusters: {-1.0: 15, 1.0: 4, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 15\n",
|
|
"Core points: 6\n",
|
|
"Label: 8\n",
|
|
"Clusters: {-1.0: 12, 1.0: 9, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 12\n",
|
|
"Core points: 11\n",
|
|
"Label: 9\n",
|
|
"Clusters: {-1.0: 14, 1.0: 10, 2.0: 3}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 14\n",
|
|
"Core points: 3\n",
|
|
"Label: 10\n",
|
|
"Clusters: {-1.0: 15, 1.0: 4, 2.0: 2, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 15\n",
|
|
"Core points: 8\n",
|
|
"Label: 11\n",
|
|
"Clusters: {-1.0: 6, 1.0: 9, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 6\n",
|
|
"Core points: 11\n",
|
|
"Label: 12\n",
|
|
"Clusters: {-1.0: 36, 1.0: 16, 2.0: 3, 3.0: 2, 4.0: 5, 5.0: 2}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 36\n",
|
|
"Core points: 28\n",
|
|
"Label: 13\n",
|
|
"Clusters: {-1.0: 27, 1.0: 14, 2.0: 2, 3.0: 2, 4.0: 2, 5.0: 2}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 27\n",
|
|
"Core points: 22\n",
|
|
"Label: 14\n",
|
|
"Clusters: {-1.0: 13, 1.0: 3, 2.0: 3, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 13\n",
|
|
"Core points: 8\n",
|
|
"Label: 15\n",
|
|
"Clusters: {-1.0: 24, 1.0: 13, 2.0: 3, 3.0: 3}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 24\n",
|
|
"Core points: 19\n",
|
|
"Label: 16\n",
|
|
"Clusters: {-1.0: 36, 1.0: 4, 2.0: 2, 3.0: 3}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 36\n",
|
|
"Core points: 9\n",
|
|
"Label: 17\n",
|
|
"Clusters: {-1.0: 10, 1.0: 11, 2.0: 2, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 10\n",
|
|
"Core points: 15\n",
|
|
"Label: 18\n",
|
|
"Clusters: {-1.0: 15, 1.0: 5, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 15\n",
|
|
"Core points: 7\n",
|
|
"Label: 19\n",
|
|
"Clusters: {-1.0: 23, 1.0: 30, 2.0: 2, 3.0: 2, 4.0: 2, 5.0: 2}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 23\n",
|
|
"Core points: 38\n",
|
|
"Label: 20\n",
|
|
"Clusters: {-1.0: 17, 1.0: 3, 2.0: 2, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 17\n",
|
|
"Core points: 7\n",
|
|
"Label: 21\n",
|
|
"Clusters: {-1.0: 13, 1.0: 13, 2.0: 3}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 13\n",
|
|
"Core points: 3\n",
|
|
"Label: 22\n",
|
|
"Clusters: {-1.0: 18, 1.0: 9, 2.0: 2, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 18\n",
|
|
"Core points: 13\n",
|
|
"Label: 23\n",
|
|
"Clusters: {-1.0: 28, 1.0: 22, 2.0: 2, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 28\n",
|
|
"Core points: 26\n",
|
|
"Label: 24\n",
|
|
"Clusters: {-1.0: 9, 1.0: 11, 2.0: 3}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 9\n",
|
|
"Core points: 10\n",
|
|
"Label: 25\n",
|
|
"Clusters: {-1.0: 17, 1.0: 16, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 17\n",
|
|
"Core points: 18\n",
|
|
"Label: 26\n",
|
|
"Clusters: {-1.0: 15, 1.0: 17, 2.0: 4}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 15\n",
|
|
"Core points: 7\n",
|
|
"Label: 27\n",
|
|
"Clusters: {-1.0: 21, 1.0: 2, 2.0: 10, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 21\n",
|
|
"Core points: 14\n",
|
|
"Label: 28\n",
|
|
"Clusters: {-1.0: 14, 1.0: 7, 2.0: 2, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 14\n",
|
|
"Core points: 11\n",
|
|
"Label: 29\n",
|
|
"Clusters: {-1.0: 11, 1.0: 2, 2.0: 8, 3.0: 3, 4.0: 2}\n",
|
|
"No. of clusters: 4\n",
|
|
"Noise points: 11\n",
|
|
"Core points: 15\n",
|
|
"Label: 30\n",
|
|
"Clusters: {-1.0: 16, 1.0: 2, 2.0: 2, 3.0: 5, 4.0: 3}\n",
|
|
"No. of clusters: 4\n",
|
|
"Noise points: 16\n",
|
|
"Core points: 12\n",
|
|
"Label: 31\n",
|
|
"Clusters: {-1.0: 23, 1.0: 4, 2.0: 2, 3.0: 3, 4.0: 2}\n",
|
|
"No. of clusters: 4\n",
|
|
"Noise points: 23\n",
|
|
"Core points: 11\n",
|
|
"Label: 32\n",
|
|
"Clusters: {-1.0: 14, 1.0: 10, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 14\n",
|
|
"Core points: 12\n",
|
|
"Label: 33\n",
|
|
"Clusters: {-1.0: 16, 1.0: 5, 2.0: 2, 3.0: 5, 4.0: 2, 5.0: 2}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 16\n",
|
|
"Core points: 16\n",
|
|
"Label: 34\n",
|
|
"Clusters: {-1.0: 20, 1.0: 2, 2.0: 2, 3.0: 8, 4.0: 2}\n",
|
|
"No. of clusters: 4\n",
|
|
"Noise points: 20\n",
|
|
"Core points: 14\n",
|
|
"Label: 35\n",
|
|
"Clusters: {-1.0: 19, 1.0: 7, 2.0: 6, 3.0: 2, 4.0: 2, 5.0: 2}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 19\n",
|
|
"Core points: 19\n",
|
|
"Label: 36\n",
|
|
"Clusters: {-1.0: 20, 1.0: 5, 2.0: 3, 3.0: 4}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 20\n",
|
|
"Core points: 3\n",
|
|
"Label: 37\n",
|
|
"Clusters: {-1.0: 18, 1.0: 3, 2.0: 3, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 18\n",
|
|
"Core points: 8\n",
|
|
"Label: 38\n",
|
|
"Clusters: {-1.0: 27, 1.0: 2, 2.0: 3}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 27\n",
|
|
"Core points: 5\n",
|
|
"Label: 39\n",
|
|
"Clusters: {-1.0: 23, 1.0: 15, 2.0: 3, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 23\n",
|
|
"Core points: 20\n",
|
|
"Label: 40\n",
|
|
"Clusters: {-1.0: 22, 1.0: 4, 2.0: 2, 3.0: 5}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 22\n",
|
|
"Core points: 11\n",
|
|
"Label: 41\n",
|
|
"Clusters: {-1.0: 29, 1.0: 2, 2.0: 3}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 29\n",
|
|
"Core points: 5\n",
|
|
"Label: 42\n",
|
|
"Clusters: {-1.0: 5, 1.0: 17}\n",
|
|
"No. of clusters: 1\n",
|
|
"Noise points: 5\n",
|
|
"Core points: 13\n",
|
|
"Label: 43\n",
|
|
"Clusters: {-1.0: 10, 1.0: 5, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 10\n",
|
|
"Core points: 7\n",
|
|
"Label: 44\n",
|
|
"Clusters: {-1.0: 4, 1.0: 13}\n",
|
|
"No. of clusters: 1\n",
|
|
"Noise points: 4\n",
|
|
"Core points: 11\n",
|
|
"Label: 45\n",
|
|
"Clusters: {-1.0: 19, 1.0: 2, 2.0: 2, 3.0: 3}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 19\n",
|
|
"Core points: 7\n",
|
|
"Label: 46\n",
|
|
"Clusters: {-1.0: 20, 1.0: 24, 2.0: 3, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 20\n",
|
|
"Core points: 29\n",
|
|
"Label: 47\n",
|
|
"Clusters: {-1.0: 21, 1.0: 21, 2.0: 2, 3.0: 2, 4.0: 2, 5.0: 2}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 21\n",
|
|
"Core points: 29\n",
|
|
"Label: 48\n",
|
|
"Clusters: {-1.0: 9, 1.0: 8, 2.0: 4}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 9\n",
|
|
"Core points: 3\n",
|
|
"Label: 49\n",
|
|
"Clusters: {-1.0: 17, 1.0: 3, 2.0: 2, 3.0: 5}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 17\n",
|
|
"Core points: 10\n",
|
|
"Label: 50\n",
|
|
"Clusters: {-1.0: 22, 1.0: 20, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 22\n",
|
|
"Core points: 22\n",
|
|
"Label: 51\n",
|
|
"Clusters: {-1.0: 19, 1.0: 2, 2.0: 2, 3.0: 5, 4.0: 9, 5.0: 3}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 19\n",
|
|
"Core points: 21\n",
|
|
"Label: 52\n",
|
|
"Clusters: {-1.0: 8, 1.0: 6, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 8\n",
|
|
"Core points: 8\n",
|
|
"Label: 53\n",
|
|
"Clusters: {-1.0: 13, 1.0: 17, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 13\n",
|
|
"Core points: 19\n",
|
|
"Label: 54\n",
|
|
"Clusters: {-1.0: 26, 1.0: 2, 2.0: 13, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 26\n",
|
|
"Core points: 17\n",
|
|
"Label: 55\n",
|
|
"Clusters: {-1.0: 30, 1.0: 2, 2.0: 19, 3.0: 4, 4.0: 2}\n",
|
|
"No. of clusters: 4\n",
|
|
"Noise points: 30\n",
|
|
"Core points: 27\n",
|
|
"Label: 56\n",
|
|
"Clusters: {-1.0: 6, 1.0: 24}\n",
|
|
"No. of clusters: 1\n",
|
|
"Noise points: 6\n",
|
|
"Core points: 18\n",
|
|
"Label: 57\n",
|
|
"Clusters: {-1.0: 32, 1.0: 6, 2.0: 3}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 32\n",
|
|
"Core points: 9\n",
|
|
"Label: 58\n",
|
|
"Clusters: {-1.0: 21, 1.0: 14, 2.0: 2, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 21\n",
|
|
"Core points: 18\n",
|
|
"Label: 59\n",
|
|
"Clusters: {-1.0: 12, 1.0: 4, 2.0: 2, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 12\n",
|
|
"Core points: 8\n",
|
|
"Label: 60\n",
|
|
"Clusters: {-1.0: 26, 1.0: 3, 2.0: 2, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 26\n",
|
|
"Core points: 7\n",
|
|
"Label: 61\n",
|
|
"Clusters: {-1.0: 10, 1.0: 7, 2.0: 3, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 10\n",
|
|
"Core points: 12\n",
|
|
"Label: 62\n",
|
|
"Clusters: {-1.0: 4, 1.0: 4, 2.0: 12}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 4\n",
|
|
"Core points: 12\n",
|
|
"Label: 63\n",
|
|
"Clusters: {-1.0: 27, 1.0: 2, 2.0: 8, 3.0: 3, 4.0: 3}\n",
|
|
"No. of clusters: 4\n",
|
|
"Noise points: 27\n",
|
|
"Core points: 16\n",
|
|
"Label: 64\n",
|
|
"Clusters: {-1.0: 1, 1.0: 15}\n",
|
|
"No. of clusters: 1\n",
|
|
"Noise points: 1\n",
|
|
"Core points: 9\n",
|
|
"Label: 65\n",
|
|
"Clusters: {-1.0: 24, 1.0: 5, 2.0: 2, 3.0: 3, 4.0: 2, 5.0: 2}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 24\n",
|
|
"Core points: 14\n",
|
|
"Label: 66\n",
|
|
"Clusters: {-1.0: 14, 1.0: 6, 2.0: 6, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 14\n",
|
|
"Core points: 14\n",
|
|
"Label: 67\n",
|
|
"Clusters: {-1.0: 9, 1.0: 3, 2.0: 2, 3.0: 3}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 9\n",
|
|
"Core points: 8\n",
|
|
"Label: 68\n",
|
|
"Clusters: {-1.0: 4, 1.0: 16}\n",
|
|
"No. of clusters: 1\n",
|
|
"Noise points: 4\n",
|
|
"Core points: 12\n",
|
|
"Label: 69\n",
|
|
"Clusters: {-1.0: 9, 1.0: 2, 2.0: 3, 3.0: 5, 4.0: 4}\n",
|
|
"No. of clusters: 4\n",
|
|
"Noise points: 9\n",
|
|
"Core points: 14\n",
|
|
"Label: 70\n",
|
|
"Clusters: {-1.0: 3, 1.0: 16}\n",
|
|
"No. of clusters: 1\n",
|
|
"Noise points: 3\n",
|
|
"Core points: 11\n",
|
|
"Label: 71\n",
|
|
"Clusters: {-1.0: 19, 1.0: 2, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 19\n",
|
|
"Core points: 4\n",
|
|
"Label: 72\n",
|
|
"Clusters: {-1.0: 10, 1.0: 2, 2.0: 12, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 10\n",
|
|
"Core points: 16\n",
|
|
"Label: 73\n",
|
|
"Clusters: {-1.0: 5, 1.0: 7, 2.0: 3, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 5\n",
|
|
"Core points: 12\n",
|
|
"Label: 74\n",
|
|
"Clusters: {-1.0: 8, 1.0: 13, 2.0: 3, 3.0: 5}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 8\n",
|
|
"Core points: 16\n",
|
|
"Label: 75\n",
|
|
"Clusters: {-1.0: 34, 1.0: 3, 2.0: 2, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 34\n",
|
|
"Core points: 7\n",
|
|
"Label: 76\n",
|
|
"Clusters: {-1.0: 22, 1.0: 2, 2.0: 2, 3.0: 3}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 22\n",
|
|
"Core points: 7\n",
|
|
"Label: 77\n",
|
|
"Clusters: {-1.0: 14, 1.0: 9, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 14\n",
|
|
"Core points: 11\n",
|
|
"Label: 78\n",
|
|
"Clusters: {-1.0: 5, 1.0: 15}\n",
|
|
"No. of clusters: 1\n",
|
|
"Noise points: 5\n",
|
|
"Core points: 11\n",
|
|
"Label: 79\n",
|
|
"Clusters: {-1.0: 17, 1.0: 8, 2.0: 3, 3.0: 3}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 17\n",
|
|
"Core points: 14\n",
|
|
"Label: 80\n",
|
|
"Clusters: {-1.0: 10, 1.0: 8, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 10\n",
|
|
"Core points: 10\n",
|
|
"Label: 81\n",
|
|
"Clusters: {-1.0: 16, 1.0: 22, 2.0: 4}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 16\n",
|
|
"Core points: 22\n",
|
|
"Label: 82\n",
|
|
"Clusters: {-1.0: 14, 1.0: 7, 2.0: 3, 3.0: 2, 4.0: 2}\n",
|
|
"No. of clusters: 4\n",
|
|
"Noise points: 14\n",
|
|
"Core points: 14\n",
|
|
"Label: 83\n",
|
|
"Clusters: {-1.0: 10, 1.0: 2, 2.0: 2, 3.0: 4}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 10\n",
|
|
"Core points: 8\n",
|
|
"Label: 84\n",
|
|
"Clusters: {-1.0: 19, 1.0: 7, 2.0: 2, 3.0: 2, 4.0: 2}\n",
|
|
"No. of clusters: 4\n",
|
|
"Noise points: 19\n",
|
|
"Core points: 13\n",
|
|
"Label: 85\n",
|
|
"Clusters: {-1.0: 6, 1.0: 11, 2.0: 5}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 6\n",
|
|
"Core points: 14\n",
|
|
"Label: 86\n",
|
|
"Clusters: {-1.0: 25, 1.0: 9, 2.0: 2, 3.0: 2, 4.0: 2, 5.0: 3}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 25\n",
|
|
"Core points: 18\n",
|
|
"Label: 87\n",
|
|
"Clusters: {-1.0: 18, 1.0: 7, 2.0: 2, 3.0: 3}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 18\n",
|
|
"Core points: 12\n",
|
|
"Label: 88\n",
|
|
"Clusters: {-1.0: 21, 1.0: 7, 2.0: 2, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 21\n",
|
|
"Core points: 11\n",
|
|
"Label: 89\n",
|
|
"Clusters: {-1.0: 12, 1.0: 3, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 12\n",
|
|
"Core points: 5\n",
|
|
"Label: 90\n",
|
|
"Clusters: {-1.0: 25, 1.0: 3, 2.0: 7, 3.0: 2, 4.0: 2, 5.0: 4}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 25\n",
|
|
"Core points: 18\n",
|
|
"Label: 91\n",
|
|
"Clusters: {-1.0: 12, 1.0: 10, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 12\n",
|
|
"Core points: 12\n",
|
|
"Label: 92\n",
|
|
"Clusters: {-1.0: 28, 1.0: 11, 2.0: 2, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 28\n",
|
|
"Core points: 15\n",
|
|
"Label: 93\n",
|
|
"Clusters: {-1.0: 29, 1.0: 6, 2.0: 3}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 29\n",
|
|
"Core points: 9\n",
|
|
"Label: 94\n",
|
|
"Clusters: {-1.0: 65, 1.0: 45, 2.0: 3, 3.0: 2, 4.0: 2, 5.0: 2}\n",
|
|
"No. of clusters: 5\n",
|
|
"Noise points: 65\n",
|
|
"Core points: 54\n",
|
|
"Label: 95\n",
|
|
"Clusters: {-1.0: 12, 1.0: 2, 2.0: 2, 3.0: 3}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 12\n",
|
|
"Core points: 7\n",
|
|
"Label: 96\n",
|
|
"Clusters: {-1.0: 22, 1.0: 5, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 22\n",
|
|
"Core points: 7\n",
|
|
"Label: 97\n",
|
|
"Clusters: {-1.0: 13, 1.0: 2, 2.0: 2}\n",
|
|
"No. of clusters: 2\n",
|
|
"Noise points: 13\n",
|
|
"Core points: 4\n",
|
|
"Label: 98\n",
|
|
"Clusters: {-1.0: 15, 1.0: 6, 2.0: 5, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 15\n",
|
|
"Core points: 13\n",
|
|
"Label: 99\n",
|
|
"Clusters: {-1.0: 12, 1.0: 4, 2.0: 2, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 12\n",
|
|
"Core points: 8\n",
|
|
"Label: 100\n",
|
|
"Clusters: {-1.0: 13, 1.0: 12, 2.0: 3, 3.0: 2}\n",
|
|
"No. of clusters: 3\n",
|
|
"Noise points: 13\n",
|
|
"Core points: 17\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for best_model in best_models:\n",
|
|
" label_imgs = []\n",
|
|
" label_img_ids = [\n",
|
|
" label_img[\"image_id\"]\n",
|
|
" for label_img in fd_collection.find({\"true_label\": best_model.label})\n",
|
|
" ]\n",
|
|
" for img_id in label_img_ids:\n",
|
|
" img, true_label = dataset[img_id]\n",
|
|
" label_imgs.append(transforms.ToPILImage()(img))\n",
|
|
" # Interpretation\n",
|
|
" print(\"Label:\", best_model.label)\n",
|
|
" # print(\"Epsilon:\", best_model.eps, \"\\tMinPts:\", best_model.min_samples)\n",
|
|
" display_cluster_stats(best_model.clusters)\n",
|
|
" print(\"Core points:\", len(best_model.core_points))\n",
|
|
" # MDS point cloud\n",
|
|
" best_model.mds_scatter_clusters()\n",
|
|
" # # Image thumbnail overlay\n",
|
|
" best_model.group_image_clusters(label_imgs)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"full_fd_collection = getCollection(\"knravish_mwdb_phase_1\", \"fd_collection\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Predict label based on nearest core point\n",
|
|
"all_core_pts = []\n",
|
|
"for best_model in best_models:\n",
|
|
" all_core_pts.extend(best_model.core_points)\n",
|
|
"all_core_pts = [\n",
|
|
" (x[\"image_id\"], x[\"true_label\"], np.array(x[selected_feature_model]))\n",
|
|
" for x in full_fd_collection.find({\"image_id\": {\"$in\": all_core_pts}})\n",
|
|
"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[(898, 2, 0.04471824055950302), (998, 2, 0.04670729369738935), (896, 2, 0.05814791090290394), (886, 2, 0.06548439873670464), (970, 2, 0.06915823780454072)]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# all odd images\n",
|
|
"# for img_id in range(1, 8676, 2):\n",
|
|
"for img_id in [881]:\n",
|
|
" img_fd = np.array(\n",
|
|
" full_fd_collection.find_one({\"image_id\": img_id})[selected_feature_model]\n",
|
|
" )\n",
|
|
" distances = []\n",
|
|
" for core_pt in all_core_pts:\n",
|
|
" distances.append(\n",
|
|
" (\n",
|
|
" core_pt[0],\n",
|
|
" core_pt[1],\n",
|
|
" selected_distance_measure(\n",
|
|
" core_pt[2],\n",
|
|
" img_fd,\n",
|
|
" ),\n",
|
|
" )\n",
|
|
" )\n",
|
|
" print(sorted(distances, key=lambda dist: dist[2])[:selected_c])\n",
|
|
" break"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|