mirror of
https://github.com/20kaushik02/CSE515_MWDB_Project.git
synced 2025-12-06 07:44:07 +00:00
125 lines
4.1 KiB
Plaintext
125 lines
4.1 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from utils import *\n",
|
|
"warnings.filterwarnings('ignore')\n",
|
|
"%matplotlib inline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def pca_inherent_dimensionality(data, threshold):\n",
|
|
"\n",
|
|
" # Calculate the mean of the data\n",
|
|
" mean = np.mean(data, axis=0)\n",
|
|
" # Center the data by subtracting the mean\n",
|
|
" centered_data = data - mean\n",
|
|
" # Normalize the data\n",
|
|
" normalized_data = centered_data / np.std(centered_data, axis=0)\n",
|
|
"\n",
|
|
" # Reshape the centered data to ensure compatible dimensions\n",
|
|
" if(len(normalized_data.shape)==3):\n",
|
|
" reshaped_normalized_data = normalized_data.reshape(normalized_data.shape[0], normalized_data.shape[2])\n",
|
|
" else:\n",
|
|
" reshaped_normalized_data=normalized_data\n",
|
|
"\n",
|
|
" # Calculate the covariance matrix\n",
|
|
" #covariance_matrix = np.dot(reshaped_normalized_data.T, reshaped_normalized_data)\n",
|
|
" covariance_matrix = np.dot(reshaped_normalized_data.T, reshaped_normalized_data) / (reshaped_normalized_data.shape[0] - 1) #to bring the values in the range of 0 to 1\n",
|
|
"\n",
|
|
" # Compute the eigenvalues and eigenvectors of the covariance matrix\n",
|
|
" eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)\n",
|
|
" # Sort the eigenvalues in descending order\n",
|
|
" #sorted_indices = np.argsort(eigenvalues)[::-1]\n",
|
|
" # Sort the eigenvectors accordingly\n",
|
|
" #sorted_eigenvectors = eigenvectors[:, sorted_indices]\n",
|
|
" #print(sorted_eigenvectors)\n",
|
|
" #print(sorted_indices)\n",
|
|
"\n",
|
|
" # Calculate the mean of each subarray- the sorted_eigenvectors are in the form of subarrays, while computing the inherent dimensionality, each value is compared with \n",
|
|
" #the threshold, hence mean of each subarray is computed and then it is compared with the threshold value (I am not sure if we can do this?)\n",
|
|
" #means = np.mean(sorted_eigenvectors, axis=1)\n",
|
|
" \n",
|
|
" # Determine the number of eigenvalues greater than the threshold\n",
|
|
" #inherent_dimensionality = np.sum(means>threshold)\n",
|
|
" #inherent_dimensionality = len(significant_eigenvalues)\n",
|
|
" significant_eigenvalues = eigenvalues[eigenvalues > threshold]\n",
|
|
" inherent_dimensionality = len(significant_eigenvalues)\n",
|
|
" #significant_eigenvalues = sorted_eigenvectors[sorted_indices][eigenvalues > threshold]\n",
|
|
"\n",
|
|
" return inherent_dimensionality"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Inherent dimensionality associated with the even numbered images: 260\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Retrieve all feature spaces from the database\n",
|
|
"data = []\n",
|
|
"for document in fd_collection.find():\n",
|
|
" feature_space = document[\"fc_fd\"]\n",
|
|
" data.append(feature_space)\n",
|
|
"\n",
|
|
"threshold=0.5\n",
|
|
"print(\"Inherent dimensionality associated with the even numbered images: \", pca_inherent_dimensionality(data, threshold))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.11"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|