Add files via upload

This commit is contained in:
MadhuraWani803 2023-11-25 14:10:58 -07:00 committed by GitHub
parent 40e5144d53
commit e293f98ee1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

124
Phase 3/phase3_task0a.ipynb Normal file
View File

@ -0,0 +1,124 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from utils import *\n",
"warnings.filterwarnings('ignore')\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"def pca_inherent_dimensionality(data, threshold):\n",
"\n",
" # Calculate the mean of the data\n",
" mean = np.mean(data, axis=0)\n",
" # Center the data by subtracting the mean\n",
" centered_data = data - mean\n",
" # Normalize the data\n",
" normalized_data = centered_data / np.std(centered_data, axis=0)\n",
"\n",
" # Reshape the centered data to ensure compatible dimensions\n",
" if(len(normalized_data.shape)==3):\n",
" reshaped_normalized_data = normalized_data.reshape(normalized_data.shape[0], normalized_data.shape[2])\n",
" else:\n",
" reshaped_normalized_data=normalized_data\n",
"\n",
" # Calculate the covariance matrix\n",
" #covariance_matrix = np.dot(reshaped_normalized_data.T, reshaped_normalized_data)\n",
" covariance_matrix = np.dot(reshaped_normalized_data.T, reshaped_normalized_data) / (reshaped_normalized_data.shape[0] - 1) #to bring the values in the range of 0 to 1\n",
"\n",
" # Compute the eigenvalues and eigenvectors of the covariance matrix\n",
" eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)\n",
" # Sort the eigenvalues in descending order\n",
" #sorted_indices = np.argsort(eigenvalues)[::-1]\n",
" # Sort the eigenvectors accordingly\n",
" #sorted_eigenvectors = eigenvectors[:, sorted_indices]\n",
" #print(sorted_eigenvectors)\n",
" #print(sorted_indices)\n",
"\n",
" # Calculate the mean of each subarray- the sorted_eigenvectors are in the form of subarrays, while computing the inherent dimensionality, each value is compared with \n",
" #the threshold, hence mean of each subarray is computed and then it is compared with the threshold value (I am not sure if we can do this?)\n",
" #means = np.mean(sorted_eigenvectors, axis=1)\n",
" \n",
" # Determine the number of eigenvalues greater than the threshold\n",
" #inherent_dimensionality = np.sum(means>threshold)\n",
" #inherent_dimensionality = len(significant_eigenvalues)\n",
" significant_eigenvalues = eigenvalues[eigenvalues > threshold]\n",
" inherent_dimensionality = len(significant_eigenvalues)\n",
" #significant_eigenvalues = sorted_eigenvectors[sorted_indices][eigenvalues > threshold]\n",
"\n",
" return inherent_dimensionality"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Inherent dimensionality associated with the even numbered images: 260\n"
]
}
],
"source": [
"# Retrieve all feature spaces from the database\n",
"data = []\n",
"for document in fd_collection.find():\n",
" feature_space = document[\"fc_fd\"]\n",
" data.append(feature_space)\n",
"\n",
"threshold=0.5\n",
"print(\"Inherent dimensionality associated with the even numbered images: \", pca_inherent_dimensionality(data, threshold))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}