In [13]:
from utils import *
warnings.filterwarnings('ignore')
%matplotlib inline
from statistics import mode
import pickle
from sklearn.metrics import precision_recall_fscore_support
from sklearn.decomposition import PCA

In [14]:
fd_collection = getCollection("team_5_mwdb_phase_2", "fd_collection")

In [15]:
selected_feature_model = "fc_fd"

classification_method = str(
    input(
        "Enter classification method - one of "
        + str(list(valid_classification_methods.keys()))
    )
)

if classification_method == "m-nn":
    m = int(input("Enter value of m: "))
    if m < 1:
        raise ValueError("m should be a positive integer")

In [16]:
all_images = list(fd_collection.find())
all_images = sorted(all_images, key = lambda x: x["image_id"])

odd_image_ids = [img["image_id"] for img in all_images if img["image_id"] % 2 == 0]

even_image_labels = [img["true_label"] for img in all_images if img["image_id"] % 2 == 0]
odd_image_labels = [img["true_label"] for img in all_images if img["image_id"] % 2 != 0]

feature_vectors = [np.array(img[selected_feature_model]).flatten() for img in all_images]

total_len = len(feature_vectors)
even_feature_vectors = []
odd_feature_vectors = []

for i in range(total_len):
  if i % 2 == 0:
    even_feature_vectors.append(feature_vectors[i])
  else:
    odd_feature_vectors.append(feature_vectors[i])

even_feature_vectors = np.array(even_feature_vectors)
odd_feature_vectors = np.array(odd_feature_vectors)

odd_len = odd_feature_vectors.shape[0]
even_len = even_feature_vectors.shape[0]

In [17]:
match valid_classification_methods[classification_method]:

    case 1:

        predictions = []

        for i, odd_vector in enumerate(odd_feature_vectors):

            pq = []

            for j, even_vector in enumerate(even_feature_vectors):
                
                distance = np.linalg.norm(odd_vector - even_vector)

                if len(pq) < m:
                    heapq.heappush(pq, (-distance, even_image_labels[j]))
                else:
                    heapq.heappushpop(pq, (-distance, even_image_labels[j]))
            
            labels = [label for dist, label in pq]

            
            pred = max(set(labels), key = labels.count)

            predictions.append(pred)

            print(f"Image ID: {i * 2 + 1} is similar to label {pred}")
    

    case 2:

        max_depth = 10

        pca = PCA(n_components = 150)
        even_feature_vectors = pca.fit_transform(even_feature_vectors)
        odd_feature_vectors = pca.fit_transform(odd_feature_vectors)

        if os.path.exists(f'decision_tree_{max_depth}.pkl'):
            with open(f'decision_tree_{max_depth}.pkl', 'rb') as file:
                tree = pickle.load(file)
                print("Decision tree loaded")
        else:
            print("Creating the decision tree ...")
            tree = DecisionTree(max_depth = max_depth)
            tree.fit(even_feature_vectors, np.array(even_image_labels))
            print("Decision tree formed")
            with open(f'decision_tree_{max_depth}.pkl', 'wb') as file:
                pickle.dump(tree, file)

        predictions = tree.predict(odd_feature_vectors)

        pred_len = len(predictions)

        for i in range(pred_len):
            print(f"Image ID: {i * 2 + 1} is similar to label {predictions[i]}")

    case 3:

        predictions = []

        # classifier = PPRClassifier()
        # classifier.fit(even_feature_vectors, even_image_labels)

        # predictions = classifier.predict(odd_feature_vectors)

        # for i, predicted_label in enumerate(predictions):
        #     print(f"Image ID: {i * 2 + 1}, Label: {predicted_label}")


Creating the decision tree ...
Decision tree formed
Image ID: 1 is similar to label 12
Image ID: 3 is similar to label 1
Image ID: 5 is similar to label 0
Image ID: 7 is similar to label 0
Image ID: 9 is similar to label 1
Image ID: 11 is similar to label 0
Image ID: 13 is similar to label 0
Image ID: 15 is similar to label 1
Image ID: 17 is similar to label 1
Image ID: 19 is similar to label 1
Image ID: 21 is similar to label 1
Image ID: 23 is similar to label 1
Image ID: 25 is similar to label 1
Image ID: 27 is similar to label 1
Image ID: 29 is similar to label 1
Image ID: 31 is similar to label 1
Image ID: 33 is similar to label 0
Image ID: 35 is similar to label 1
Image ID: 37 is similar to label 1
Image ID: 39 is similar to label 94
Image ID: 41 is similar to label 1
Image ID: 43 is similar to label 0
Image ID: 45 is similar to label 0
Image ID: 47 is similar to label 0
Image ID: 49 is similar to label 1
Image ID: 51 is similar to label 1
Image ID: 53 is similar to label 0
Image 

In [18]:
precision, recall, f1_score, _ = precision_recall_fscore_support(odd_image_labels, predictions, labels=range(101))

for i in range(101):
    print(f"Class {i}: Precision={precision[i]}, Recall={recall[i]}, F1-score={f1_score[i]}")

correct_predictions = sum(1 for actual, predicted in zip(odd_image_labels, predictions) if actual == predicted)
accuracy = (correct_predictions / len(odd_image_labels)) * 100.0
print(f"Accuracy: {accuracy:.2f}%")        


Class 0: Precision=0.4808743169398907, Recall=0.4055299539170507, F1-score=0.44
Class 1: Precision=0.5176056338028169, Recall=0.6743119266055045, F1-score=0.5856573705179282
Class 2: Precision=0.0, Recall=0.0, F1-score=0.0
Class 3: Precision=0.0, Recall=0.0, F1-score=0.0
Class 4: Precision=0.0, Recall=0.0, F1-score=0.0
Class 5: Precision=0.975609756097561, Recall=0.9, F1-score=0.9362808842652797
Class 6: Precision=0.0, Recall=0.0, F1-score=0.0
Class 7: Precision=0.0, Recall=0.0, F1-score=0.0
Class 8: Precision=0.0, Recall=0.0, F1-score=0.0
Class 9: Precision=0.0, Recall=0.0, F1-score=0.0
Class 10: Precision=0.0, Recall=0.0, F1-score=0.0
Class 11: Precision=0.0, Recall=0.0, F1-score=0.0
Class 12: Precision=0.01874115983026874, Recall=0.828125, F1-score=0.03665283540802213
Class 13: Precision=0.0, Recall=0.0, F1-score=0.0
Class 14: Precision=0.0, Recall=0.0, F1-score=0.0
Class 15: Precision=0.0625, Recall=0.023809523809523808, F1-score=0.034482758620689655
Class 16: Precision=0.051948051