In [24]:
from utils import *
warnings.filterwarnings('ignore')
%matplotlib inline
from statistics import mode
import pickle
from sklearn.metrics import precision_recall_fscore_support
from sklearn.decomposition import PCA

In [25]:
fd_collection = getCollection("team_5_mwdb_phase_2", "fd_collection")

In [26]:
selected_feature_model = "fc_fd"

classification_method = str(
    input(
        "Enter classification method - one of "
        + str(list(valid_classification_methods.keys()))
    )
)

if classification_method == "m-nn":
    m = int(input("Enter value of m: "))
    if m < 1:
        raise ValueError("m should be a positive integer")

In [27]:
all_images = list(fd_collection.find())
all_images = sorted(all_images, key = lambda x: x["image_id"])

odd_image_ids = [img["image_id"] for img in all_images if img["image_id"] % 2 == 0]

even_image_labels = [img["true_label"] for img in all_images if img["image_id"] % 2 == 0]
odd_image_labels = [img["true_label"] for img in all_images if img["image_id"] % 2 != 0]

feature_vectors = [np.array(img[selected_feature_model]).flatten() for img in all_images]

total_len = len(feature_vectors)
even_feature_vectors = []
odd_feature_vectors = []

for i in range(total_len):
  if i % 2 == 0:
    even_feature_vectors.append(feature_vectors[i])
  else:
    odd_feature_vectors.append(feature_vectors[i])

even_feature_vectors = np.array(even_feature_vectors)
odd_feature_vectors = np.array(odd_feature_vectors)

odd_len = odd_feature_vectors.shape[0]
even_len = even_feature_vectors.shape[0]

In [28]:
match valid_classification_methods[classification_method]:

    case 1:

        predictions = []

        for i, odd_vector in enumerate(odd_feature_vectors):

            pq = []

            for j, even_vector in enumerate(even_feature_vectors):
                
                distance = np.linalg.norm(odd_vector - even_vector)

                if len(pq) < m:
                    heapq.heappush(pq, (-distance, even_image_labels[j]))
                else:
                    heapq.heappushpop(pq, (-distance, even_image_labels[j]))
            
            labels = [label for dist, label in pq]

            
            pred = max(set(labels), key = labels.count)

            predictions.append(pred)

            print(f"Image ID: {i * 2 + 1} is similar to label {pred}")
    

    case 2:

        max_depth = 10
        reduced_dimensionality = 150

        pca = PCA(n_components = reduced_dimensionality)
        even_feature_vectors = pca.fit_transform(even_feature_vectors)
        odd_feature_vectors = pca.fit_transform(odd_feature_vectors)
        

        if os.path.exists(f'decision_tree_{max_depth}_{reduced_dimensionality}.pkl'):
            with open(f'decision_tree_{max_depth}_{reduced_dimensionality}.pkl', 'rb') as file:
                tree = pickle.load(file)
                print("Decision tree loaded")
        else:
            print("Creating the decision tree ...")
            tree = DecisionTree(max_depth = max_depth)
            tree.fit(even_feature_vectors, np.array(even_image_labels))
            print("Decision tree formed")
            with open(f'decision_tree_{max_depth}_{reduced_dimensionality}.pkl', 'wb') as file:
                pickle.dump(tree, file)

        predictions = tree.predict(odd_feature_vectors)

        pred_len = len(predictions)

        for i in range(pred_len):
            print(f"Image ID: {i * 2 + 1} is similar to label {predictions[i]}")


Image ID: 1 is similar to label 0
Image ID: 3 is similar to label 1
Image ID: 5 is similar to label 0
Image ID: 7 is similar to label 0
Image ID: 9 is similar to label 0
Image ID: 11 is similar to label 1
Image ID: 13 is similar to label 0
Image ID: 15 is similar to label 0
Image ID: 17 is similar to label 0
Image ID: 19 is similar to label 1
Image ID: 21 is similar to label 0
Image ID: 23 is similar to label 1
Image ID: 25 is similar to label 0
Image ID: 27 is similar to label 0
Image ID: 29 is similar to label 0
Image ID: 31 is similar to label 0
Image ID: 33 is similar to label 1
Image ID: 35 is similar to label 0
Image ID: 37 is similar to label 1
Image ID: 39 is similar to label 0
Image ID: 41 is similar to label 1
Image ID: 43 is similar to label 0
Image ID: 45 is similar to label 0
Image ID: 47 is similar to label 0
Image ID: 49 is similar to label 0
Image ID: 51 is similar to label 0
Image ID: 53 is similar to label 0
Image ID: 55 is similar to label 0
Image ID: 57 is similar t

In [29]:
precision, recall, f1_score, accuracy = calculate_metrics(odd_image_labels, predictions, 101)

for i in range(101):
    print(f"Class {i}: Precision={precision[i]}, Recall={recall[i]}, F1-score={f1_score[i]}")

print(f"Accuracy: {accuracy * 100}%")

Class 0: Precision=0.9137931034482759, Recall=0.7327188940092166, F1-score=0.813299232736573
Class 1: Precision=0.7821011673151751, Recall=0.9220183486238532, F1-score=0.8463157894736842
Class 2: Precision=0.9615384615384616, Recall=1.0, F1-score=0.9803921568627451
Class 3: Precision=0.9708029197080292, Recall=1.0, F1-score=0.9851851851851852
Class 4: Precision=0.9310344827586207, Recall=1.0, F1-score=0.9642857142857143
Class 5: Precision=0.9255813953488372, Recall=0.995, F1-score=0.9590361445783133
Class 6: Precision=1.0, Recall=0.047619047619047616, F1-score=0.0909090909090909
Class 7: Precision=0.9, Recall=0.8571428571428571, F1-score=0.8780487804878048
Class 8: Precision=0.9565217391304348, Recall=0.9166666666666666, F1-score=0.9361702127659574
Class 9: Precision=0.9523809523809523, Recall=0.7407407407407407, F1-score=0.8333333333333334
Class 10: Precision=0.85, Recall=0.7391304347826086, F1-score=0.7906976744186046
Class 11: Precision=0.9375, Recall=0.9375, F1-score=0.9375
Class 1