Add files via upload

2026-01-25 06:14:04 +00:00 · 2023-11-25 14:10:58 -07:00
parent 40e5144d53
commit e293f98ee1
1 changed files with 124 additions and 0 deletions
--- a/3/phase3_task0a.ipynb
+++ b/3/phase3_task0a.ipynb
@@ -0,0 +1,124 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from utils import *\n",
    "warnings.filterwarnings('ignore')\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "fd_collection = getCollection(\"team_5_mwdb_phase_2\", \"fd_collection\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pca_inherent_dimensionality(data, threshold):\n",
    "\n",
    "    # Calculate the mean of the data\n",
    "    mean = np.mean(data, axis=0)\n",
    "    # Center the data by subtracting the mean\n",
    "    centered_data = data - mean\n",
    "    # Normalize the data\n",
    "    normalized_data = centered_data / np.std(centered_data, axis=0)\n",
    "\n",
    "    # Reshape the centered data to ensure compatible dimensions\n",
    "    if(len(normalized_data.shape)==3):\n",
    "        reshaped_normalized_data = normalized_data.reshape(normalized_data.shape[0], normalized_data.shape[2])\n",
    "    else:\n",
    "        reshaped_normalized_data=normalized_data\n",
    "\n",
    "    # Calculate the covariance matrix\n",
    "    #covariance_matrix = np.dot(reshaped_normalized_data.T, reshaped_normalized_data)\n",
    "    covariance_matrix = np.dot(reshaped_normalized_data.T, reshaped_normalized_data) / (reshaped_normalized_data.shape[0] - 1) #to bring the values in the range of 0 to 1\n",
    "\n",
    "    # Compute the eigenvalues and eigenvectors of the covariance matrix\n",
    "    eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)\n",
    "    # Sort the eigenvalues in descending order\n",
    "    #sorted_indices = np.argsort(eigenvalues)[::-1]\n",
    "    # Sort the eigenvectors accordingly\n",
    "    #sorted_eigenvectors = eigenvectors[:, sorted_indices]\n",
    "    #print(sorted_eigenvectors)\n",
    "    #print(sorted_indices)\n",
    "\n",
    "    # Calculate the mean of each subarray- the sorted_eigenvectors are in the form of subarrays, while computing the inherent dimensionality, each value is compared with \n",
    "    #the threshold, hence mean of each subarray is computed and then it is compared with the threshold value (I am not sure if we can do this?)\n",
    "    #means = np.mean(sorted_eigenvectors, axis=1)\n",
    "    \n",
    "    # Determine the number of eigenvalues greater than the threshold\n",
    "    #inherent_dimensionality = np.sum(means>threshold)\n",
    "    #inherent_dimensionality = len(significant_eigenvalues)\n",
    "    significant_eigenvalues = eigenvalues[eigenvalues > threshold]\n",
    "    inherent_dimensionality = len(significant_eigenvalues)\n",
    "    #significant_eigenvalues = sorted_eigenvectors[sorted_indices][eigenvalues > threshold]\n",
    "\n",
    "    return inherent_dimensionality"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Inherent dimensionality associated with the even numbered images:  260\n"
     ]
    }
   ],
   "source": [
    "# Retrieve all feature spaces from the database\n",
    "data = []\n",
    "for document in fd_collection.find():\n",
    "    feature_space = document[\"fc_fd\"]\n",
    "    data.append(feature_space)\n",
    "\n",
    "threshold=0.5\n",
    "print(\"Inherent dimensionality associated with the even numbered images: \", pca_inherent_dimensionality(data, threshold))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }