{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from utils import *\n", "warnings.filterwarnings('ignore')\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "def pca_inherent_dimensionality(data, threshold):\n", "\n", " # Calculate the mean of the data\n", " mean = np.mean(data, axis=0)\n", " # Center the data by subtracting the mean\n", " centered_data = data - mean\n", " # Normalize the data\n", " normalized_data = centered_data / np.std(centered_data, axis=0)\n", "\n", " # Reshape the centered data to ensure compatible dimensions\n", " if(len(normalized_data.shape)==3):\n", " reshaped_normalized_data = normalized_data.reshape(normalized_data.shape[0], normalized_data.shape[2])\n", " else:\n", " reshaped_normalized_data=normalized_data\n", "\n", " # Calculate the covariance matrix\n", " #covariance_matrix = np.dot(reshaped_normalized_data.T, reshaped_normalized_data)\n", " covariance_matrix = np.dot(reshaped_normalized_data.T, reshaped_normalized_data) / (reshaped_normalized_data.shape[0] - 1) #to bring the values in the range of 0 to 1\n", "\n", " # Compute the eigenvalues and eigenvectors of the covariance matrix\n", " eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)\n", " # Sort the eigenvalues in descending order\n", " #sorted_indices = np.argsort(eigenvalues)[::-1]\n", " # Sort the eigenvectors accordingly\n", " #sorted_eigenvectors = eigenvectors[:, sorted_indices]\n", " #print(sorted_eigenvectors)\n", " #print(sorted_indices)\n", "\n", " # Calculate the mean of each subarray- the sorted_eigenvectors are in the form of subarrays, while computing the inherent dimensionality, each value is compared with \n", " #the threshold, hence mean of each subarray is computed and then it is compared with the threshold value (I am not sure if we can do this?)\n", " #means = np.mean(sorted_eigenvectors, axis=1)\n", " \n", " # Determine the number of eigenvalues greater than the threshold\n", " #inherent_dimensionality = np.sum(means>threshold)\n", " #inherent_dimensionality = len(significant_eigenvalues)\n", " significant_eigenvalues = eigenvalues[eigenvalues > threshold]\n", " inherent_dimensionality = len(significant_eigenvalues)\n", " #significant_eigenvalues = sorted_eigenvectors[sorted_indices][eigenvalues > threshold]\n", "\n", " return inherent_dimensionality" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Inherent dimensionality associated with the even numbered images: 260\n" ] } ], "source": [ "# Retrieve all feature spaces from the database\n", "data = []\n", "for document in fd_collection.find():\n", " feature_space = document[\"fc_fd\"]\n", " data.append(feature_space)\n", "\n", "threshold=0.5\n", "print(\"Inherent dimensionality associated with the even numbered images: \", pca_inherent_dimensionality(data, threshold))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 2 }