From e293f98ee1ace58589cd4d708c2391a26aa907ff Mon Sep 17 00:00:00 2001 From: MadhuraWani803 <103093329+MadhuraWani803@users.noreply.github.com> Date: Sat, 25 Nov 2023 14:10:58 -0700 Subject: [PATCH] Add files via upload --- Phase 3/phase3_task0a.ipynb | 124 ++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 Phase 3/phase3_task0a.ipynb diff --git a/Phase 3/phase3_task0a.ipynb b/Phase 3/phase3_task0a.ipynb new file mode 100644 index 0000000..f6e4ca2 --- /dev/null +++ b/Phase 3/phase3_task0a.ipynb @@ -0,0 +1,124 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import *\n", + "warnings.filterwarnings('ignore')\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "def pca_inherent_dimensionality(data, threshold):\n", + "\n", + " # Calculate the mean of the data\n", + " mean = np.mean(data, axis=0)\n", + " # Center the data by subtracting the mean\n", + " centered_data = data - mean\n", + " # Normalize the data\n", + " normalized_data = centered_data / np.std(centered_data, axis=0)\n", + "\n", + " # Reshape the centered data to ensure compatible dimensions\n", + " if(len(normalized_data.shape)==3):\n", + " reshaped_normalized_data = normalized_data.reshape(normalized_data.shape[0], normalized_data.shape[2])\n", + " else:\n", + " reshaped_normalized_data=normalized_data\n", + "\n", + " # Calculate the covariance matrix\n", + " #covariance_matrix = np.dot(reshaped_normalized_data.T, reshaped_normalized_data)\n", + " covariance_matrix = np.dot(reshaped_normalized_data.T, reshaped_normalized_data) / (reshaped_normalized_data.shape[0] - 1) #to bring the values in the range of 0 to 1\n", + "\n", + " # Compute the eigenvalues and eigenvectors of the covariance matrix\n", + " eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)\n", + " # Sort the eigenvalues in descending order\n", + " #sorted_indices = np.argsort(eigenvalues)[::-1]\n", + " # Sort the eigenvectors accordingly\n", + " #sorted_eigenvectors = eigenvectors[:, sorted_indices]\n", + " #print(sorted_eigenvectors)\n", + " #print(sorted_indices)\n", + "\n", + " # Calculate the mean of each subarray- the sorted_eigenvectors are in the form of subarrays, while computing the inherent dimensionality, each value is compared with \n", + " #the threshold, hence mean of each subarray is computed and then it is compared with the threshold value (I am not sure if we can do this?)\n", + " #means = np.mean(sorted_eigenvectors, axis=1)\n", + " \n", + " # Determine the number of eigenvalues greater than the threshold\n", + " #inherent_dimensionality = np.sum(means>threshold)\n", + " #inherent_dimensionality = len(significant_eigenvalues)\n", + " significant_eigenvalues = eigenvalues[eigenvalues > threshold]\n", + " inherent_dimensionality = len(significant_eigenvalues)\n", + " #significant_eigenvalues = sorted_eigenvectors[sorted_indices][eigenvalues > threshold]\n", + "\n", + " return inherent_dimensionality" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Inherent dimensionality associated with the even numbered images: 260\n" + ] + } + ], + "source": [ + "# Retrieve all feature spaces from the database\n", + "data = []\n", + "for document in fd_collection.find():\n", + " feature_space = document[\"fc_fd\"]\n", + " data.append(feature_space)\n", + "\n", + "threshold=0.5\n", + "print(\"Inherent dimensionality associated with the even numbered images: \", pca_inherent_dimensionality(data, threshold))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}