mirror of
https://github.com/20kaushik02/CSE546_Cloud_Computing_Projects.git
synced 2025-12-06 06:24:07 +00:00
done!
This commit is contained in:
parent
11cbf121b6
commit
90e7947768
439
Project-2/Part-2/grader_script_p2_v2.py
Normal file
439
Project-2/Part-2/grader_script_p2_v2.py
Normal file
@ -0,0 +1,439 @@
|
||||
__copyright__ = "Copyright 2024, VISA Lab"
|
||||
__license__ = "MIT"
|
||||
|
||||
import pdb
|
||||
import time
|
||||
import botocore
|
||||
import argparse
|
||||
import textwrap
|
||||
import boto3
|
||||
from boto3 import client as boto3_client
|
||||
from botocore.exceptions import ClientError
|
||||
from datetime import datetime,timezone,timedelta
|
||||
import re
|
||||
import os
|
||||
import shutil
|
||||
|
||||
class aws_grader():
|
||||
def __init__(self, access_key, secret_key, buckets, lambda_names, region, asu_id):
|
||||
|
||||
self.access_key = access_key
|
||||
self.secret_key = secret_key
|
||||
self.region = region
|
||||
self.s3 = boto3_client('s3', aws_access_key_id=self.access_key,
|
||||
aws_secret_access_key=self.secret_key, region_name=region)
|
||||
self.cloudwatch = boto3_client('cloudwatch', aws_access_key_id=self.access_key,
|
||||
aws_secret_access_key=self.secret_key, region_name=region)
|
||||
self.iam_session = boto3.Session(aws_access_key_id=self.access_key,
|
||||
aws_secret_access_key=self.secret_key)
|
||||
self.s3_resources = self.iam_session.resource('s3', region)
|
||||
self.lambda_function = boto3_client('lambda', aws_access_key_id=self.access_key,
|
||||
aws_secret_access_key=self.secret_key, region_name=region)
|
||||
self.in_bucket_name = buckets[0]
|
||||
self.out_bucket_name = buckets[2]
|
||||
self.buckets = buckets
|
||||
self.lambda_names = lambda_names
|
||||
self.test_result = {}
|
||||
self.end_to_end_latency = 0
|
||||
self.output_folder = f"outputs_{asu_id}"
|
||||
self.match = ["Trump", "Biden", "Bean", "Depp", "Diesel", "Floki", "Freeman", "Obama"]
|
||||
self.total_points = 0
|
||||
|
||||
def validate_lambda_exists_each(self, name, TC_num, num):
|
||||
TC_num_sub = TC_num + "_" + str(chr(97 + num))
|
||||
|
||||
try:
|
||||
response = self.lambda_function.get_function(
|
||||
FunctionName=name
|
||||
)
|
||||
print(f"Lambda function {name} HTTPStatusCode {response['ResponseMetadata']['HTTPStatusCode']}")
|
||||
self.test_result[TC_num_sub] = "PASS"
|
||||
except self.lambda_function.exceptions.ResourceNotFoundException as e:
|
||||
print(f"Error {e}")
|
||||
self.test_result[TC_num_sub] = "FAIL"
|
||||
print(f"Test status of {TC_num_sub} : {self.test_result[TC_num_sub]}")
|
||||
def validate_lambda_exists(self, TC_num):
|
||||
self.validate_lambda_exists_each("video-splitting",TC_num, num=0)
|
||||
self.validate_lambda_exists_each("face-recognition", TC_num, num=1)
|
||||
if self.test_result[TC_num + "_" + str(chr(97 + 0))] == "FAIL" or self.test_result[TC_num + "_" + str(chr(97 + 1))] == "FAIL":
|
||||
self.total_points -= 10
|
||||
|
||||
def validate_s3_subfolders_each(self, buckets, in_objects, TC_num):
|
||||
for num, bucket in enumerate(buckets[1:]):
|
||||
print(f"\nComparing buckets {buckets[0]} and {bucket} ...")
|
||||
TC_num_sub = TC_num+"_"+str(chr(97+num))
|
||||
self.test_result[TC_num_sub] = "PASS"
|
||||
for obj in in_objects['Contents']:
|
||||
folder_name = obj['Key'].rsplit('.', 1)[0]
|
||||
out_objects = self.s3.list_objects_v2(Bucket=bucket, Prefix=folder_name, Delimiter='/')
|
||||
if out_objects['KeyCount'] == 1 or out_objects['KeyCount'] == 11:
|
||||
folder_name = out_objects['CommonPrefixes'][0]['Prefix'].rsplit("/")[0]
|
||||
prefix_name = out_objects['Prefix']
|
||||
if folder_name == prefix_name:
|
||||
print(f"{prefix_name} matches with {folder_name}")
|
||||
else:
|
||||
prefix_name = out_objects['Prefix']
|
||||
self.test_result[TC_num_sub] = "FAIL"
|
||||
print(f"NO folder named {prefix_name}\nExiting this test case... ")
|
||||
# print(f"DEBUG :: {out_objects}")
|
||||
break
|
||||
print(f"Test status of {TC_num_sub} : {self.test_result[TC_num_sub]}")
|
||||
|
||||
def validate_s3_subfolders(self, TC_num):
|
||||
in_objects = self.s3.list_objects_v2(Bucket=self.in_bucket_name)
|
||||
if in_objects['KeyCount']==0:
|
||||
print(f"Empty bucket {self.in_bucket_name}")
|
||||
print(f"Test status of {TC_num} : {self.test_result[TC_num]}")
|
||||
return
|
||||
self.validate_s3_subfolders_each(buckets,in_objects,TC_num)
|
||||
|
||||
def check_non_empty_folders(self, bucket_num, TC_num):
|
||||
bucket = self.s3_resources.Bucket(self.buckets[bucket_num])
|
||||
TC_num_sub = TC_num + "_" + str(chr(96 + bucket_num))
|
||||
self.test_result[TC_num_sub] = "FAIL"
|
||||
try:
|
||||
objects = list(bucket.objects.all())
|
||||
print(f"{self.buckets[bucket_num]} contains {len(objects)} objects")
|
||||
if bucket_num==4:
|
||||
prefix_pattern = r"test_\d{2}/[oO]utput-\d{2}.txt"
|
||||
else:
|
||||
prefix_pattern = r"test_\d{2}/[oO]utput-\d{2}.(jpg|jpeg)"
|
||||
count = self.count_values_with_prefix(objects, prefix_pattern)
|
||||
# print(f"DEBUG :: Bucket {stage_1_bucket} has {count} objects that matches the pattern")
|
||||
if count >= 100:
|
||||
self.test_result[TC_num_sub] = "PASS"
|
||||
except ClientError:
|
||||
print(f"Couldn't get objects for bucket {bucket.name}")
|
||||
raise
|
||||
print(f"Test status of {TC_num_sub} : {self.test_result[TC_num_sub]}\n")
|
||||
|
||||
def count_values_with_prefix(self,objects, prefix_pattern):
|
||||
count = 0
|
||||
for o in objects:
|
||||
if re.match(prefix_pattern, o.key):
|
||||
# print(f"DEBUG :: Object key '{o.key}' follows the pattern '{prefix_pattern}'")
|
||||
count += 1
|
||||
else:
|
||||
print(f"Object key '{o.key}' does NOT follows the pattern '{prefix_pattern}'")
|
||||
return count
|
||||
|
||||
def validate_bucket_objects(self, TC_num, bucket_num):
|
||||
bucket = self.buckets[bucket_num]
|
||||
bucket_res = self.s3_resources.Bucket(self.buckets[bucket_num])
|
||||
|
||||
self.test_result[TC_num] = "FAIL"
|
||||
try:
|
||||
objects = list(bucket_res.objects.all())
|
||||
print(f"{self.buckets[bucket_num]} contains {len(objects)} objects")
|
||||
if bucket_num == 2:
|
||||
prefix_pattern = r"test_\d{2}.txt"
|
||||
else:
|
||||
prefix_pattern = r"test_\d{2}.(jpg|jpeg)"
|
||||
count = self.count_values_with_prefix(objects, prefix_pattern)
|
||||
# print(f"DEBUG :: Bucket {stage_1_bucket} has {count} objects that matches the pattern")
|
||||
if count >= 100:
|
||||
self.test_result[TC_num] = "PASS"
|
||||
self.total_points += 20
|
||||
else:
|
||||
missing = 100 - count
|
||||
self.total_points = self.total_points + 20 - min(missing, 20)
|
||||
except ClientError:
|
||||
print(f"Couldn't get objects for bucket {bucket.name}")
|
||||
raise
|
||||
print(f"Test status of {TC_num} : {self.test_result[TC_num]}\n")
|
||||
|
||||
def validate_s3_output_objects(self, TC_num):
|
||||
in_bucket = self.s3_resources.Bucket(self.buckets[0])
|
||||
try:
|
||||
in_objects = list(in_bucket.objects.all())
|
||||
print(f"{self.buckets[0]} contains {len(in_objects)} objects")
|
||||
|
||||
# Check if all the folders of stage-1, stage-2, stage-3, and output bucket are non-empty
|
||||
self.check_non_empty_folders(1,TC_num=TC_num)
|
||||
self.check_non_empty_folders(2,TC_num=TC_num)
|
||||
self.check_non_empty_folders(3,TC_num=TC_num)
|
||||
self.check_non_empty_folders(4,TC_num=TC_num)
|
||||
|
||||
except ClientError:
|
||||
print(f"Couldn't get objects for bucket {in_bucket.name}")
|
||||
raise
|
||||
else:
|
||||
return
|
||||
|
||||
# You have to make sure to run the workload generator and it executes within 15 mins
|
||||
# of polling for cloudwatch metrics.
|
||||
def check_lambda_duration_each(self, functionName, TC_num, subcase, threshold):
|
||||
TC_num_sub = TC_num + "_" + str(chr(96 + subcase))
|
||||
|
||||
response = self.cloudwatch.get_metric_data(
|
||||
MetricDataQueries=[
|
||||
{
|
||||
'Id': 'testDuration',
|
||||
'MetricStat': {
|
||||
'Metric': {
|
||||
'Namespace': 'AWS/Lambda',
|
||||
'MetricName': 'Duration',
|
||||
"Dimensions": [
|
||||
{
|
||||
"Name": "FunctionName",
|
||||
"Value": functionName
|
||||
}
|
||||
]
|
||||
},
|
||||
'Period': 600,
|
||||
'Stat': 'Average'
|
||||
},
|
||||
'ReturnData': True,
|
||||
},
|
||||
],
|
||||
StartTime=datetime.now().utcnow() - timedelta(minutes=15),
|
||||
EndTime=datetime.now().utcnow(),
|
||||
ScanBy='TimestampAscending'
|
||||
)
|
||||
print(response['MetricDataResults'][0]['Values'])
|
||||
values = response['MetricDataResults'][0]['Values']
|
||||
if not values:
|
||||
self.test_result[TC_num_sub] = "FAIL"
|
||||
print(f"Test status of {TC_num_sub} : {self.test_result[TC_num_sub]}")
|
||||
return
|
||||
if max(values) > threshold:
|
||||
self.test_result[TC_num_sub] = "FAIL"
|
||||
else:
|
||||
self.test_result[TC_num_sub] = "PASS"
|
||||
print(f"Test status of {TC_num_sub} : {self.test_result[TC_num_sub]}")
|
||||
|
||||
def check_lambda_duration(self, TC_num):
|
||||
self.check_lambda_duration_each('video-splitting', TC_num, 1, threshold=2000)
|
||||
self.check_lambda_duration_each('face-recognition', TC_num, 2, threshold=10000)
|
||||
|
||||
def check_lambda_concurrency_each(self,functionName, TC_num,subcase, threshold):
|
||||
TC_num_sub = TC_num + "_" + str(chr(96 + subcase))
|
||||
|
||||
response = self.cloudwatch.get_metric_data(
|
||||
MetricDataQueries=[
|
||||
{
|
||||
'Id': 'testConcurrency',
|
||||
'MetricStat': {
|
||||
'Metric': {
|
||||
'Namespace': 'AWS/Lambda',
|
||||
'MetricName': 'ConcurrentExecutions',
|
||||
"Dimensions": [
|
||||
{
|
||||
"Name": "FunctionName",
|
||||
"Value": functionName
|
||||
}
|
||||
]
|
||||
},
|
||||
'Period': 600,
|
||||
'Stat': 'Maximum'
|
||||
},
|
||||
'ReturnData': True,
|
||||
},
|
||||
],
|
||||
StartTime=datetime.now().utcnow() - timedelta(minutes=15),
|
||||
EndTime=datetime.now().utcnow(),
|
||||
ScanBy='TimestampAscending'
|
||||
)
|
||||
print(response['MetricDataResults'][0]['Values'])
|
||||
values = response['MetricDataResults'][0]['Values']
|
||||
if not values:
|
||||
self.test_result[TC_num_sub] = "FAIL"
|
||||
print(f"Test status of {TC_num_sub} : {self.test_result[TC_num_sub]}")
|
||||
return
|
||||
if max(values) < threshold:
|
||||
self.test_result[TC_num_sub] = "FAIL"
|
||||
else:
|
||||
self.test_result[TC_num_sub] = "PASS"
|
||||
print(f"Test status of {TC_num_sub} : {self.test_result[TC_num_sub]}")
|
||||
|
||||
def check_lambda_concurrency(self,TC_num):
|
||||
self.check_lambda_concurrency_each('video-splitting', TC_num,1, threshold=3)
|
||||
self.check_lambda_concurrency_each('face-recognition', TC_num,2, threshold=3)
|
||||
|
||||
def check_bucket_exist(self, bucket):
|
||||
if not bucket:
|
||||
print(f"Bucket name is empty!")
|
||||
return False
|
||||
try:
|
||||
self.s3.head_bucket(Bucket=bucket)
|
||||
print(f"Bucket {bucket} Exists!")
|
||||
return True
|
||||
except botocore.exceptions.ClientError as e:
|
||||
# If a client error is thrown, then check that it was a 404 error.
|
||||
# If it was a 404 error, then the bucket does not exist.
|
||||
error_code = int(e.response['Error']['Code'])
|
||||
if error_code == 403:
|
||||
print("Private Bucket. Forbidden Access!")
|
||||
return True
|
||||
elif error_code == 404:
|
||||
print(f"Bucket {bucket} does Not Exist!")
|
||||
return False
|
||||
def empty_s3_bucket(self, bucket_name):
|
||||
bucket = self.s3_resources.Bucket(bucket_name)
|
||||
bucket.objects.all().delete()
|
||||
print(f"{bucket_name} S3 Bucket is now EMPTY !!")
|
||||
|
||||
def count_bucket_objects(self, bucket_name):
|
||||
bucket = self.s3_resources.Bucket(bucket_name)
|
||||
count = 0
|
||||
for index in bucket.objects.all():
|
||||
count += 1
|
||||
#print(f"{bucket_name} S3 Bucket has {count} objects !!")
|
||||
return count
|
||||
|
||||
def validate_s3_buckets_initial_each(self, bucket_num, TC_num):
|
||||
TC_num_sub = TC_num + "_" + str(chr(97 + bucket_num))
|
||||
isExist = self.check_bucket_exist(self.buckets[bucket_num])
|
||||
self.test_result[TC_num_sub] = "FAIL"
|
||||
if isExist:
|
||||
obj_count = self.count_bucket_objects(self.buckets[bucket_num])
|
||||
print(f"S3 Bucket:{self.buckets[bucket_num]} has {obj_count} object(s)")
|
||||
if obj_count == 0:
|
||||
self.test_result[TC_num_sub] = "PASS"
|
||||
else:
|
||||
self.test_result[TC_num_sub] = "FAIL"
|
||||
print(f"Test status of {TC_num_sub} : {self.test_result[TC_num_sub]}\n")
|
||||
|
||||
def validate_s3_buckets_initial(self, TC_num):
|
||||
print(" - Run this BEFORE the workload generator client starts. Press Ctrl^C to exit.")
|
||||
print(" - WARN: If there are objects in the S3 buckets; they will be deleted")
|
||||
print(" ---------------------------------------------------------")
|
||||
|
||||
for i in range(len(self.buckets)):
|
||||
self.validate_s3_buckets_initial_each(bucket_num=i, TC_num=TC_num)
|
||||
if self.test_result[TC_num + "_" + str(chr(97 + 0))] == "FAIL" or \
|
||||
self.test_result[TC_num + "_" + str(chr(97 + 1))] == "FAIL" or \
|
||||
self.test_result[TC_num + "_" + str(chr(97 + 2))] == "FAIL":
|
||||
self.total_points -= 10
|
||||
|
||||
|
||||
def download_from_s3(self, bucket, folder_name):
|
||||
objects = self.s3.list_objects_v2(Bucket=bucket, Prefix=folder_name)
|
||||
for obj in objects.get('Contents', []):
|
||||
# Get key (filename) of the object
|
||||
key = obj['Key']
|
||||
self.s3.download_file(bucket, key, f"/tmp/{key}")
|
||||
|
||||
def check_end_to_end(self, TC_num):
|
||||
print(" - Make sure workload generator is started. Press Ctrl^C to exit.")
|
||||
print(" ---------------------------------------------------------")
|
||||
print("Proceed? (y/n)")
|
||||
proceed = input()
|
||||
if proceed == "y":
|
||||
start_time = time.time()
|
||||
self.test_result[TC_num] = "FAIL"
|
||||
|
||||
out_bucket = self.s3_resources.Bucket(self.buckets[2])
|
||||
while len(list(out_bucket.objects.all())) <= 100 or (time.time()-start_time)<=400:
|
||||
objects = out_bucket.objects.all()
|
||||
print(f"Total objects in output bucket: {len(list(objects))}, current time elapsed: {time.time()-start_time}")
|
||||
if len(list(objects)) == 100:
|
||||
self.end_to_end_latency = time.time() - start_time
|
||||
print(f"End-to-end latency is: {self.end_to_end_latency}")
|
||||
break
|
||||
if(time.time() - start_time) < 400:
|
||||
self.test_result[TC_num] = "PASS"
|
||||
|
||||
if self.end_to_end_latency <= 300:
|
||||
self.total_points += 30
|
||||
elif 300 < self.end_to_end_latency <=400:
|
||||
self.total_points += 20
|
||||
elif self.end_to_end_latency > 400:
|
||||
objects = out_bucket.objects.all()
|
||||
missing = 100 - len(list(objects))
|
||||
self.total_points = self.total_points + 20 - min(missing, 20)
|
||||
print(f"Test status of {TC_num} : {self.test_result[TC_num]}\n")
|
||||
|
||||
|
||||
def check_correctness(self, TC_num):
|
||||
if os.path.exists(self.output_folder):
|
||||
shutil.rmtree(self.output_folder)
|
||||
if not os.path.exists(self.output_folder):
|
||||
os.makedirs(self.output_folder)
|
||||
out_bucket = self.s3_resources.Bucket(self.buckets[2])
|
||||
objects = out_bucket.objects.all()
|
||||
print(f"Downloading {len(list(objects))} objects from the {self.buckets[2]} bucket ...")
|
||||
for o in objects:
|
||||
# print(self.output_folder,out_bucket,o.key)
|
||||
self.s3.download_file(self.buckets[2], o.key, os.path.join(self.output_folder,o.key))
|
||||
match_count = 0
|
||||
for i,filename in enumerate(sorted(os.listdir(self.output_folder))):
|
||||
with open(os.path.join(self.output_folder, filename),"r") as f:
|
||||
line = f.read()
|
||||
prefix_pattern = r"test_\d{2}.txt"
|
||||
if not re.match(prefix_pattern, filename):
|
||||
self.test_result[TC_num] = "FAIL"
|
||||
print(f"Filename does not follow the required pattern {prefix_pattern}. Cannnot check correctness test.")
|
||||
return
|
||||
filenum = int(filename.split("_")[1].split(".")[0]) % len(self.match)
|
||||
if line.strip() == self.match[filenum]:
|
||||
match_count += 1
|
||||
else:
|
||||
print(f"{filename} content {line.strip()} did not match with {self.match[i % len(self.match)]}")
|
||||
missing = 100 - match_count
|
||||
print(f"Missing correct results = {missing}")
|
||||
if missing == 0:
|
||||
self.test_result[TC_num] = "PASS"
|
||||
else:
|
||||
self.test_result[TC_num] = "FAIL"
|
||||
print(f"Test status of {TC_num} : {self.test_result[TC_num]}\n")
|
||||
self.total_points = self.total_points + 30 - min(missing, 30)
|
||||
|
||||
def display_menu(self):
|
||||
print("\n")
|
||||
print("=============================================================================")
|
||||
print("======== Welcome to CSE546 Cloud Computing AWS Console ======================")
|
||||
print("=============================================================================")
|
||||
print(f"IAM ACCESS KEY ID: {self.access_key}")
|
||||
print(f"IAM SECRET ACCESS KEY: {self.secret_key}")
|
||||
print("=============================================================================")
|
||||
print("1 - Validate all Lambda functions")
|
||||
print("2 - Validate S3 Buckets names and initial states")
|
||||
print("3 - End-to-end pipeline testing")
|
||||
print("4 - Validate Stage-1 bucket objects")
|
||||
print("5 - Validate Output bucket objects")
|
||||
print("6 - Check the correctness of the results")
|
||||
print("0 - Exit")
|
||||
print("Enter a choice:")
|
||||
choice = input()
|
||||
return choice
|
||||
|
||||
def main(self):
|
||||
while(1):
|
||||
choice = self.display_menu()
|
||||
if int(choice) == 1:
|
||||
self.validate_lambda_exists('Test_1')
|
||||
elif int(choice) == 2:
|
||||
self.validate_s3_buckets_initial('Test_2')
|
||||
elif int(choice) == 3:
|
||||
self.check_end_to_end('Test_3')
|
||||
elif int(choice) == 4:
|
||||
self.validate_bucket_objects('Test_4',bucket_num=1)
|
||||
elif int(choice) == 5:
|
||||
self.validate_bucket_objects('Test_5',bucket_num=2)
|
||||
elif int(choice) == 6:
|
||||
self.check_correctness('Test_6')
|
||||
elif int(choice) == 0:
|
||||
break
|
||||
print(f"Total points : {self.total_points}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Grading Script')
|
||||
parser.add_argument('--access_key', type=str, help='ACCCESS KEY ID of the grading IAM user')
|
||||
parser.add_argument('--secret_key', type=str, help='SECRET KEY of the grading IAM user')
|
||||
parser.add_argument('--asu_id', type=str, help='10-digit ASU ID')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
access_key = args.access_key
|
||||
secret_key = args.secret_key
|
||||
asu_id = args.asu_id
|
||||
input_bucket = asu_id+"-input"
|
||||
output_bucket = asu_id+"-output"
|
||||
stage_1_bucket = asu_id+"-stage-1"
|
||||
buckets = [input_bucket, stage_1_bucket, output_bucket]
|
||||
lambda_names = ["video-splitting", "face-recognition"]
|
||||
region = 'us-east-1'
|
||||
|
||||
aws_obj = aws_grader(access_key, secret_key, buckets, lambda_names,region, asu_id)
|
||||
aws_obj.main()
|
||||
54
Project-2/Part-2/src/face-recognition/Dockerfile
Normal file
54
Project-2/Part-2/src/face-recognition/Dockerfile
Normal file
@ -0,0 +1,54 @@
|
||||
#__copyright__ = "Copyright 2024, VISA Lab"
|
||||
#__license__ = "MIT"
|
||||
|
||||
# Define global args
|
||||
ARG FUNCTION_DIR="/home/app/"
|
||||
ARG RUNTIME_VERSION="3.8"
|
||||
ARG DISTRO_VERSION="3.12"
|
||||
|
||||
FROM alpine:latest
|
||||
FROM python:${RUNTIME_VERSION} AS python-alpine
|
||||
|
||||
RUN python${RUNTIME_VERSION} -m pip install --upgrade pip
|
||||
|
||||
FROM python-alpine AS build-image
|
||||
|
||||
# Include global args in this stage of the build
|
||||
ARG FUNCTION_DIR
|
||||
ARG RUNTIME_VERSION
|
||||
# Create function directory
|
||||
RUN mkdir -p ${FUNCTION_DIR}
|
||||
|
||||
# Install Lambda Runtime Interface Client for Python
|
||||
RUN python${RUNTIME_VERSION} -m pip install awslambdaric --target ${FUNCTION_DIR}
|
||||
|
||||
# Stage 3 - final runtime image
|
||||
# Grab a fresh copy of the Python image
|
||||
FROM python-alpine
|
||||
# Include global arg in this stage of the build
|
||||
ARG FUNCTION_DIR
|
||||
# Set working directory to function root directory
|
||||
WORKDIR ${FUNCTION_DIR}
|
||||
# Copy in the built dependencies
|
||||
COPY --from=build-image ${FUNCTION_DIR} ${FUNCTION_DIR}
|
||||
# (Optional) Add Lambda Runtime Interface Emulator and use a script in the ENTRYPOINT for simpler local runs
|
||||
ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie /usr/bin/aws-lambda-rie
|
||||
RUN chmod 755 /usr/bin/aws-lambda-rie
|
||||
|
||||
# Install torch for CPU
|
||||
RUN python${RUNTIME_VERSION} -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --target ${FUNCTION_DIR}
|
||||
|
||||
# Install other dependencies
|
||||
COPY requirements.txt ${FUNCTION_DIR}
|
||||
RUN python${RUNTIME_VERSION} -m pip install -r requirements.txt --target ${FUNCTION_DIR}
|
||||
|
||||
# Copy function code and data
|
||||
COPY entry.sh /
|
||||
RUN chmod 777 /entry.sh
|
||||
|
||||
COPY handler.py data.pt face_recognition_code.py dummy_lambda_invocation_event.json ${FUNCTION_DIR}
|
||||
COPY facenet_pytorch/ ${FUNCTION_DIR}facenet_pytorch/
|
||||
|
||||
# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
|
||||
ENTRYPOINT [ "/entry.sh" ]
|
||||
CMD [ "handler.handler" ]
|
||||
4
Project-2/Part-2/src/face-recognition/README.md
Normal file
4
Project-2/Part-2/src/face-recognition/README.md
Normal file
@ -0,0 +1,4 @@
|
||||
# Stage 2: face recognition
|
||||
|
||||
- invoked from stage 1
|
||||
- provided model code and data is used to get text output
|
||||
BIN
Project-2/Part-2/src/face-recognition/data.pt
Normal file
BIN
Project-2/Part-2/src/face-recognition/data.pt
Normal file
Binary file not shown.
@ -0,0 +1,4 @@
|
||||
{
|
||||
"bucket_name": "1229569564-stage-1",
|
||||
"image_file_name": "jellyfish_jam.jpg"
|
||||
}
|
||||
6
Project-2/Part-2/src/face-recognition/entry.sh
Normal file
6
Project-2/Part-2/src/face-recognition/entry.sh
Normal file
@ -0,0 +1,6 @@
|
||||
#!/bin/sh
|
||||
if [ -z "${AWS_LAMBDA_RUNTIME_API}" ]; then
|
||||
exec /usr/bin/aws-lambda-rie /usr/local/bin/python -m awslambdaric $1
|
||||
else
|
||||
exec /usr/local/bin/python -m awslambdaric $1
|
||||
fi
|
||||
42
Project-2/Part-2/src/face-recognition/face-recognition.yaml
Normal file
42
Project-2/Part-2/src/face-recognition/face-recognition.yaml
Normal file
@ -0,0 +1,42 @@
|
||||
# This AWS SAM template has been generated from your function's configuration. If
|
||||
# your function has one or more triggers, note that the AWS resources associated
|
||||
# with these triggers aren't fully specified in this template and include
|
||||
# placeholder values. Open this template in AWS Application Composer or your
|
||||
# favorite IDE and modify it to specify a serverless application with other AWS
|
||||
# resources.
|
||||
AWSTemplateFormatVersion: '2010-09-09'
|
||||
Transform: AWS::Serverless-2016-10-31
|
||||
Description: An AWS Serverless Application Model template describing your function.
|
||||
Resources:
|
||||
facerecognition:
|
||||
Type: AWS::Serverless::Function
|
||||
Properties:
|
||||
CodeUri: .
|
||||
Description: ''
|
||||
MemorySize: 1024
|
||||
Timeout: 60
|
||||
Architectures:
|
||||
- x86_64
|
||||
EphemeralStorage:
|
||||
Size: 512
|
||||
EventInvokeConfig:
|
||||
MaximumEventAgeInSeconds: 21600
|
||||
MaximumRetryAttempts: 2
|
||||
ImageUri: >-
|
||||
146064153251.dkr.ecr.us-east-1.amazonaws.com/546-proj2-p2-stage2@sha256:82aa3b656579c8a12dda71aaddf419e5ad175685f6bc006b4c85a37e1b2527c8
|
||||
PackageType: Image
|
||||
Policies:
|
||||
- Statement:
|
||||
- Effect: Allow
|
||||
Action:
|
||||
- logs:PutLogEvents
|
||||
- logs:CreateLogGroup
|
||||
- logs:CreateLogStream
|
||||
Resource: arn:aws:logs:*:*:*
|
||||
- Effect: Allow
|
||||
Action:
|
||||
- s3:GetObject
|
||||
- s3:PutObject
|
||||
Resource: arn:aws:s3:::*/*
|
||||
SnapStart:
|
||||
ApplyOn: None
|
||||
@ -0,0 +1,49 @@
|
||||
import os
|
||||
|
||||
os.environ["TORCH_HOME"] = "/tmp/"
|
||||
import cv2
|
||||
from PIL import Image
|
||||
from facenet_pytorch import MTCNN, InceptionResnetV1
|
||||
import torch
|
||||
|
||||
|
||||
mtcnn = MTCNN(
|
||||
image_size=240, margin=0, min_face_size=20
|
||||
) # initializing mtcnn for face detection
|
||||
resnet = InceptionResnetV1(
|
||||
pretrained="vggface2"
|
||||
).eval() # initializing resnet for face img to embeding conversion
|
||||
|
||||
|
||||
def face_recognition_function(key_path):
|
||||
# Face extraction
|
||||
img = cv2.imread(key_path, cv2.IMREAD_COLOR)
|
||||
boxes, _ = mtcnn.detect(img)
|
||||
|
||||
# Face recognition
|
||||
key = os.path.splitext(os.path.basename(key_path))[0].split(".")[0]
|
||||
img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
||||
face, prob = mtcnn(img, return_prob=True, save_path=None)
|
||||
saved_data = torch.load(os.path.join(os.path.dirname(__file__), "data.pt"))
|
||||
# loading data.pt file
|
||||
if face != None:
|
||||
emb = resnet(
|
||||
face.unsqueeze(0)
|
||||
).detach() # detech is to make required gradient false
|
||||
embedding_list = saved_data[0] # getting embedding data
|
||||
name_list = saved_data[1] # getting list of names
|
||||
dist_list = (
|
||||
[]
|
||||
) # list of matched distances, minimum distance is used to identify the person
|
||||
for idx, emb_db in enumerate(embedding_list):
|
||||
dist = torch.dist(emb, emb_db).item()
|
||||
dist_list.append(dist)
|
||||
idx_min = dist_list.index(min(dist_list))
|
||||
|
||||
# Save the result name in a file
|
||||
with open("/tmp/" + key + ".txt", "w+") as f:
|
||||
f.write(name_list[idx_min])
|
||||
return name_list[idx_min]
|
||||
else:
|
||||
print(f"No face is detected")
|
||||
return
|
||||
@ -0,0 +1,10 @@
|
||||
from .models.inception_resnet_v1 import InceptionResnetV1
|
||||
from .models.mtcnn import MTCNN
|
||||
from .models.utils.detect_face import extract_face
|
||||
|
||||
import warnings
|
||||
warnings.filterwarnings(
|
||||
action="ignore",
|
||||
message="This overload of nonzero is deprecated:\n\tnonzero()",
|
||||
category=UserWarning
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,351 @@
|
||||
import os
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
from .utils.download import download_url_to_file
|
||||
|
||||
|
||||
class BasicConv2d(nn.Module):
|
||||
|
||||
def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(
|
||||
in_planes,
|
||||
out_planes,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
bias=False,
|
||||
) # verify bias false
|
||||
self.bn = nn.BatchNorm2d(
|
||||
out_planes,
|
||||
eps=0.001, # value found in tensorflow
|
||||
momentum=0.1, # default pytorch value
|
||||
affine=True,
|
||||
)
|
||||
self.relu = nn.ReLU(inplace=False)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class Block35(nn.Module):
|
||||
|
||||
def __init__(self, scale=1.0):
|
||||
super().__init__()
|
||||
|
||||
self.scale = scale
|
||||
|
||||
self.branch0 = BasicConv2d(256, 32, kernel_size=1, stride=1)
|
||||
|
||||
self.branch1 = nn.Sequential(
|
||||
BasicConv2d(256, 32, kernel_size=1, stride=1),
|
||||
BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1),
|
||||
)
|
||||
|
||||
self.branch2 = nn.Sequential(
|
||||
BasicConv2d(256, 32, kernel_size=1, stride=1),
|
||||
BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1),
|
||||
BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1),
|
||||
)
|
||||
|
||||
self.conv2d = nn.Conv2d(96, 256, kernel_size=1, stride=1)
|
||||
self.relu = nn.ReLU(inplace=False)
|
||||
|
||||
def forward(self, x):
|
||||
x0 = self.branch0(x)
|
||||
x1 = self.branch1(x)
|
||||
x2 = self.branch2(x)
|
||||
out = torch.cat((x0, x1, x2), 1)
|
||||
out = self.conv2d(out)
|
||||
out = out * self.scale + x
|
||||
out = self.relu(out)
|
||||
return out
|
||||
|
||||
|
||||
class Block17(nn.Module):
|
||||
|
||||
def __init__(self, scale=1.0):
|
||||
super().__init__()
|
||||
|
||||
self.scale = scale
|
||||
|
||||
self.branch0 = BasicConv2d(896, 128, kernel_size=1, stride=1)
|
||||
|
||||
self.branch1 = nn.Sequential(
|
||||
BasicConv2d(896, 128, kernel_size=1, stride=1),
|
||||
BasicConv2d(128, 128, kernel_size=(1, 7), stride=1, padding=(0, 3)),
|
||||
BasicConv2d(128, 128, kernel_size=(7, 1), stride=1, padding=(3, 0)),
|
||||
)
|
||||
|
||||
self.conv2d = nn.Conv2d(256, 896, kernel_size=1, stride=1)
|
||||
self.relu = nn.ReLU(inplace=False)
|
||||
|
||||
def forward(self, x):
|
||||
x0 = self.branch0(x)
|
||||
x1 = self.branch1(x)
|
||||
out = torch.cat((x0, x1), 1)
|
||||
out = self.conv2d(out)
|
||||
out = out * self.scale + x
|
||||
out = self.relu(out)
|
||||
return out
|
||||
|
||||
|
||||
class Block8(nn.Module):
|
||||
|
||||
def __init__(self, scale=1.0, noReLU=False):
|
||||
super().__init__()
|
||||
|
||||
self.scale = scale
|
||||
self.noReLU = noReLU
|
||||
|
||||
self.branch0 = BasicConv2d(1792, 192, kernel_size=1, stride=1)
|
||||
|
||||
self.branch1 = nn.Sequential(
|
||||
BasicConv2d(1792, 192, kernel_size=1, stride=1),
|
||||
BasicConv2d(192, 192, kernel_size=(1, 3), stride=1, padding=(0, 1)),
|
||||
BasicConv2d(192, 192, kernel_size=(3, 1), stride=1, padding=(1, 0)),
|
||||
)
|
||||
|
||||
self.conv2d = nn.Conv2d(384, 1792, kernel_size=1, stride=1)
|
||||
if not self.noReLU:
|
||||
self.relu = nn.ReLU(inplace=False)
|
||||
|
||||
def forward(self, x):
|
||||
x0 = self.branch0(x)
|
||||
x1 = self.branch1(x)
|
||||
out = torch.cat((x0, x1), 1)
|
||||
out = self.conv2d(out)
|
||||
out = out * self.scale + x
|
||||
if not self.noReLU:
|
||||
out = self.relu(out)
|
||||
return out
|
||||
|
||||
|
||||
class Mixed_6a(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self.branch0 = BasicConv2d(256, 384, kernel_size=3, stride=2)
|
||||
|
||||
self.branch1 = nn.Sequential(
|
||||
BasicConv2d(256, 192, kernel_size=1, stride=1),
|
||||
BasicConv2d(192, 192, kernel_size=3, stride=1, padding=1),
|
||||
BasicConv2d(192, 256, kernel_size=3, stride=2),
|
||||
)
|
||||
|
||||
self.branch2 = nn.MaxPool2d(3, stride=2)
|
||||
|
||||
def forward(self, x):
|
||||
x0 = self.branch0(x)
|
||||
x1 = self.branch1(x)
|
||||
x2 = self.branch2(x)
|
||||
out = torch.cat((x0, x1, x2), 1)
|
||||
return out
|
||||
|
||||
|
||||
class Mixed_7a(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
self.branch0 = nn.Sequential(
|
||||
BasicConv2d(896, 256, kernel_size=1, stride=1),
|
||||
BasicConv2d(256, 384, kernel_size=3, stride=2),
|
||||
)
|
||||
|
||||
self.branch1 = nn.Sequential(
|
||||
BasicConv2d(896, 256, kernel_size=1, stride=1),
|
||||
BasicConv2d(256, 256, kernel_size=3, stride=2),
|
||||
)
|
||||
|
||||
self.branch2 = nn.Sequential(
|
||||
BasicConv2d(896, 256, kernel_size=1, stride=1),
|
||||
BasicConv2d(256, 256, kernel_size=3, stride=1, padding=1),
|
||||
BasicConv2d(256, 256, kernel_size=3, stride=2),
|
||||
)
|
||||
|
||||
self.branch3 = nn.MaxPool2d(3, stride=2)
|
||||
|
||||
def forward(self, x):
|
||||
x0 = self.branch0(x)
|
||||
x1 = self.branch1(x)
|
||||
x2 = self.branch2(x)
|
||||
x3 = self.branch3(x)
|
||||
out = torch.cat((x0, x1, x2, x3), 1)
|
||||
return out
|
||||
|
||||
|
||||
class InceptionResnetV1(nn.Module):
|
||||
"""Inception Resnet V1 model with optional loading of pretrained weights.
|
||||
|
||||
Model parameters can be loaded based on pretraining on the VGGFace2 or CASIA-Webface
|
||||
datasets. Pretrained state_dicts are automatically downloaded on model instantiation if
|
||||
requested and cached in the torch cache. Subsequent instantiations use the cache rather than
|
||||
redownloading.
|
||||
|
||||
Keyword Arguments:
|
||||
pretrained {str} -- Optional pretraining dataset. Either 'vggface2' or 'casia-webface'.
|
||||
(default: {None})
|
||||
classify {bool} -- Whether the model should output classification probabilities or feature
|
||||
embeddings. (default: {False})
|
||||
num_classes {int} -- Number of output classes. If 'pretrained' is set and num_classes not
|
||||
equal to that used for the pretrained model, the final linear layer will be randomly
|
||||
initialized. (default: {None})
|
||||
dropout_prob {float} -- Dropout probability. (default: {0.6})
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pretrained=None,
|
||||
classify=False,
|
||||
num_classes=None,
|
||||
dropout_prob=0.6,
|
||||
device=None,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
# Set simple attributes
|
||||
self.pretrained = pretrained
|
||||
self.classify = classify
|
||||
self.num_classes = num_classes
|
||||
|
||||
if pretrained == "vggface2":
|
||||
tmp_classes = 8631
|
||||
elif pretrained == "casia-webface":
|
||||
tmp_classes = 10575
|
||||
elif pretrained is None and self.classify and self.num_classes is None:
|
||||
raise Exception(
|
||||
'If "pretrained" is not specified and "classify" is True, "num_classes" must be specified'
|
||||
)
|
||||
|
||||
# Define layers
|
||||
self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2)
|
||||
self.conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1)
|
||||
self.conv2d_2b = BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1)
|
||||
self.maxpool_3a = nn.MaxPool2d(3, stride=2)
|
||||
self.conv2d_3b = BasicConv2d(64, 80, kernel_size=1, stride=1)
|
||||
self.conv2d_4a = BasicConv2d(80, 192, kernel_size=3, stride=1)
|
||||
self.conv2d_4b = BasicConv2d(192, 256, kernel_size=3, stride=2)
|
||||
self.repeat_1 = nn.Sequential(
|
||||
Block35(scale=0.17),
|
||||
Block35(scale=0.17),
|
||||
Block35(scale=0.17),
|
||||
Block35(scale=0.17),
|
||||
Block35(scale=0.17),
|
||||
)
|
||||
self.mixed_6a = Mixed_6a()
|
||||
self.repeat_2 = nn.Sequential(
|
||||
Block17(scale=0.10),
|
||||
Block17(scale=0.10),
|
||||
Block17(scale=0.10),
|
||||
Block17(scale=0.10),
|
||||
Block17(scale=0.10),
|
||||
Block17(scale=0.10),
|
||||
Block17(scale=0.10),
|
||||
Block17(scale=0.10),
|
||||
Block17(scale=0.10),
|
||||
Block17(scale=0.10),
|
||||
)
|
||||
self.mixed_7a = Mixed_7a()
|
||||
self.repeat_3 = nn.Sequential(
|
||||
Block8(scale=0.20),
|
||||
Block8(scale=0.20),
|
||||
Block8(scale=0.20),
|
||||
Block8(scale=0.20),
|
||||
Block8(scale=0.20),
|
||||
)
|
||||
self.block8 = Block8(noReLU=True)
|
||||
self.avgpool_1a = nn.AdaptiveAvgPool2d(1)
|
||||
self.dropout = nn.Dropout(dropout_prob)
|
||||
self.last_linear = nn.Linear(1792, 512, bias=False)
|
||||
self.last_bn = nn.BatchNorm1d(512, eps=0.001, momentum=0.1, affine=True)
|
||||
|
||||
if pretrained is not None:
|
||||
self.logits = nn.Linear(512, tmp_classes)
|
||||
load_weights(self, pretrained)
|
||||
|
||||
if self.classify and self.num_classes is not None:
|
||||
self.logits = nn.Linear(512, self.num_classes)
|
||||
|
||||
self.device = torch.device("cpu")
|
||||
if device is not None:
|
||||
self.device = device
|
||||
self.to(device)
|
||||
|
||||
def forward(self, x):
|
||||
"""Calculate embeddings or logits given a batch of input image tensors.
|
||||
|
||||
Arguments:
|
||||
x {torch.tensor} -- Batch of image tensors representing faces.
|
||||
|
||||
Returns:
|
||||
torch.tensor -- Batch of embedding vectors or multinomial logits.
|
||||
"""
|
||||
x = self.conv2d_1a(x)
|
||||
x = self.conv2d_2a(x)
|
||||
x = self.conv2d_2b(x)
|
||||
x = self.maxpool_3a(x)
|
||||
x = self.conv2d_3b(x)
|
||||
x = self.conv2d_4a(x)
|
||||
x = self.conv2d_4b(x)
|
||||
x = self.repeat_1(x)
|
||||
x = self.mixed_6a(x)
|
||||
x = self.repeat_2(x)
|
||||
x = self.mixed_7a(x)
|
||||
x = self.repeat_3(x)
|
||||
x = self.block8(x)
|
||||
x = self.avgpool_1a(x)
|
||||
x = self.dropout(x)
|
||||
x = self.last_linear(x.view(x.shape[0], -1))
|
||||
x = self.last_bn(x)
|
||||
if self.classify:
|
||||
x = self.logits(x)
|
||||
else:
|
||||
x = F.normalize(x, p=2, dim=1)
|
||||
return x
|
||||
|
||||
|
||||
def load_weights(mdl, name):
|
||||
"""Download pretrained state_dict and load into model.
|
||||
|
||||
Arguments:
|
||||
mdl {torch.nn.Module} -- Pytorch model.
|
||||
name {str} -- Name of dataset that was used to generate pretrained state_dict.
|
||||
|
||||
Raises:
|
||||
ValueError: If 'pretrained' not equal to 'vggface2' or 'casia-webface'.
|
||||
"""
|
||||
if name == "vggface2":
|
||||
path = "https://github.com/timesler/facenet-pytorch/releases/download/v2.2.9/20180402-114759-vggface2.pt"
|
||||
elif name == "casia-webface":
|
||||
path = "https://github.com/timesler/facenet-pytorch/releases/download/v2.2.9/20180408-102900-casia-webface.pt"
|
||||
else:
|
||||
raise ValueError(
|
||||
'Pretrained models only exist for "vggface2" and "casia-webface"'
|
||||
)
|
||||
|
||||
model_dir = os.path.join(get_torch_home(), "checkpoints")
|
||||
os.makedirs(model_dir, exist_ok=True)
|
||||
|
||||
cached_file = os.path.join(model_dir, os.path.basename(path))
|
||||
if not os.path.exists(cached_file):
|
||||
download_url_to_file(path, cached_file)
|
||||
|
||||
state_dict = torch.load(cached_file)
|
||||
mdl.load_state_dict(state_dict)
|
||||
|
||||
|
||||
def get_torch_home():
|
||||
torch_home = os.path.expanduser(
|
||||
os.getenv(
|
||||
"TORCH_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "torch")
|
||||
)
|
||||
)
|
||||
return torch_home
|
||||
@ -0,0 +1,519 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
from .utils.detect_face import detect_face, extract_face
|
||||
|
||||
|
||||
class PNet(nn.Module):
|
||||
"""MTCNN PNet.
|
||||
|
||||
Keyword Arguments:
|
||||
pretrained {bool} -- Whether or not to load saved pretrained weights (default: {True})
|
||||
"""
|
||||
|
||||
def __init__(self, pretrained=True):
|
||||
super().__init__()
|
||||
|
||||
self.conv1 = nn.Conv2d(3, 10, kernel_size=3)
|
||||
self.prelu1 = nn.PReLU(10)
|
||||
self.pool1 = nn.MaxPool2d(2, 2, ceil_mode=True)
|
||||
self.conv2 = nn.Conv2d(10, 16, kernel_size=3)
|
||||
self.prelu2 = nn.PReLU(16)
|
||||
self.conv3 = nn.Conv2d(16, 32, kernel_size=3)
|
||||
self.prelu3 = nn.PReLU(32)
|
||||
self.conv4_1 = nn.Conv2d(32, 2, kernel_size=1)
|
||||
self.softmax4_1 = nn.Softmax(dim=1)
|
||||
self.conv4_2 = nn.Conv2d(32, 4, kernel_size=1)
|
||||
|
||||
self.training = False
|
||||
|
||||
if pretrained:
|
||||
state_dict_path = os.path.join(os.path.dirname(__file__), '../data/pnet.pt')
|
||||
state_dict = torch.load(state_dict_path)
|
||||
self.load_state_dict(state_dict)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.prelu1(x)
|
||||
x = self.pool1(x)
|
||||
x = self.conv2(x)
|
||||
x = self.prelu2(x)
|
||||
x = self.conv3(x)
|
||||
x = self.prelu3(x)
|
||||
a = self.conv4_1(x)
|
||||
a = self.softmax4_1(a)
|
||||
b = self.conv4_2(x)
|
||||
return b, a
|
||||
|
||||
|
||||
class RNet(nn.Module):
|
||||
"""MTCNN RNet.
|
||||
|
||||
Keyword Arguments:
|
||||
pretrained {bool} -- Whether or not to load saved pretrained weights (default: {True})
|
||||
"""
|
||||
|
||||
def __init__(self, pretrained=True):
|
||||
super().__init__()
|
||||
|
||||
self.conv1 = nn.Conv2d(3, 28, kernel_size=3)
|
||||
self.prelu1 = nn.PReLU(28)
|
||||
self.pool1 = nn.MaxPool2d(3, 2, ceil_mode=True)
|
||||
self.conv2 = nn.Conv2d(28, 48, kernel_size=3)
|
||||
self.prelu2 = nn.PReLU(48)
|
||||
self.pool2 = nn.MaxPool2d(3, 2, ceil_mode=True)
|
||||
self.conv3 = nn.Conv2d(48, 64, kernel_size=2)
|
||||
self.prelu3 = nn.PReLU(64)
|
||||
self.dense4 = nn.Linear(576, 128)
|
||||
self.prelu4 = nn.PReLU(128)
|
||||
self.dense5_1 = nn.Linear(128, 2)
|
||||
self.softmax5_1 = nn.Softmax(dim=1)
|
||||
self.dense5_2 = nn.Linear(128, 4)
|
||||
|
||||
self.training = False
|
||||
|
||||
if pretrained:
|
||||
state_dict_path = os.path.join(os.path.dirname(__file__), '../data/rnet.pt')
|
||||
state_dict = torch.load(state_dict_path)
|
||||
self.load_state_dict(state_dict)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.prelu1(x)
|
||||
x = self.pool1(x)
|
||||
x = self.conv2(x)
|
||||
x = self.prelu2(x)
|
||||
x = self.pool2(x)
|
||||
x = self.conv3(x)
|
||||
x = self.prelu3(x)
|
||||
x = x.permute(0, 3, 2, 1).contiguous()
|
||||
x = self.dense4(x.view(x.shape[0], -1))
|
||||
x = self.prelu4(x)
|
||||
a = self.dense5_1(x)
|
||||
a = self.softmax5_1(a)
|
||||
b = self.dense5_2(x)
|
||||
return b, a
|
||||
|
||||
|
||||
class ONet(nn.Module):
|
||||
"""MTCNN ONet.
|
||||
|
||||
Keyword Arguments:
|
||||
pretrained {bool} -- Whether or not to load saved pretrained weights (default: {True})
|
||||
"""
|
||||
|
||||
def __init__(self, pretrained=True):
|
||||
super().__init__()
|
||||
|
||||
self.conv1 = nn.Conv2d(3, 32, kernel_size=3)
|
||||
self.prelu1 = nn.PReLU(32)
|
||||
self.pool1 = nn.MaxPool2d(3, 2, ceil_mode=True)
|
||||
self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
|
||||
self.prelu2 = nn.PReLU(64)
|
||||
self.pool2 = nn.MaxPool2d(3, 2, ceil_mode=True)
|
||||
self.conv3 = nn.Conv2d(64, 64, kernel_size=3)
|
||||
self.prelu3 = nn.PReLU(64)
|
||||
self.pool3 = nn.MaxPool2d(2, 2, ceil_mode=True)
|
||||
self.conv4 = nn.Conv2d(64, 128, kernel_size=2)
|
||||
self.prelu4 = nn.PReLU(128)
|
||||
self.dense5 = nn.Linear(1152, 256)
|
||||
self.prelu5 = nn.PReLU(256)
|
||||
self.dense6_1 = nn.Linear(256, 2)
|
||||
self.softmax6_1 = nn.Softmax(dim=1)
|
||||
self.dense6_2 = nn.Linear(256, 4)
|
||||
self.dense6_3 = nn.Linear(256, 10)
|
||||
|
||||
self.training = False
|
||||
|
||||
if pretrained:
|
||||
state_dict_path = os.path.join(os.path.dirname(__file__), '../data/onet.pt')
|
||||
state_dict = torch.load(state_dict_path)
|
||||
self.load_state_dict(state_dict)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.prelu1(x)
|
||||
x = self.pool1(x)
|
||||
x = self.conv2(x)
|
||||
x = self.prelu2(x)
|
||||
x = self.pool2(x)
|
||||
x = self.conv3(x)
|
||||
x = self.prelu3(x)
|
||||
x = self.pool3(x)
|
||||
x = self.conv4(x)
|
||||
x = self.prelu4(x)
|
||||
x = x.permute(0, 3, 2, 1).contiguous()
|
||||
x = self.dense5(x.view(x.shape[0], -1))
|
||||
x = self.prelu5(x)
|
||||
a = self.dense6_1(x)
|
||||
a = self.softmax6_1(a)
|
||||
b = self.dense6_2(x)
|
||||
c = self.dense6_3(x)
|
||||
return b, c, a
|
||||
|
||||
|
||||
class MTCNN(nn.Module):
|
||||
"""MTCNN face detection module.
|
||||
|
||||
This class loads pretrained P-, R-, and O-nets and returns images cropped to include the face
|
||||
only, given raw input images of one of the following types:
|
||||
- PIL image or list of PIL images
|
||||
- numpy.ndarray (uint8) representing either a single image (3D) or a batch of images (4D).
|
||||
Cropped faces can optionally be saved to file
|
||||
also.
|
||||
|
||||
Keyword Arguments:
|
||||
image_size {int} -- Output image size in pixels. The image will be square. (default: {160})
|
||||
margin {int} -- Margin to add to bounding box, in terms of pixels in the final image.
|
||||
Note that the application of the margin differs slightly from the davidsandberg/facenet
|
||||
repo, which applies the margin to the original image before resizing, making the margin
|
||||
dependent on the original image size (this is a bug in davidsandberg/facenet).
|
||||
(default: {0})
|
||||
min_face_size {int} -- Minimum face size to search for. (default: {20})
|
||||
thresholds {list} -- MTCNN face detection thresholds (default: {[0.6, 0.7, 0.7]})
|
||||
factor {float} -- Factor used to create a scaling pyramid of face sizes. (default: {0.709})
|
||||
post_process {bool} -- Whether or not to post process images tensors before returning.
|
||||
(default: {True})
|
||||
select_largest {bool} -- If True, if multiple faces are detected, the largest is returned.
|
||||
If False, the face with the highest detection probability is returned.
|
||||
(default: {True})
|
||||
selection_method {string} -- Which heuristic to use for selection. Default None. If
|
||||
specified, will override select_largest:
|
||||
"probability": highest probability selected
|
||||
"largest": largest box selected
|
||||
"largest_over_threshold": largest box over a certain probability selected
|
||||
"center_weighted_size": box size minus weighted squared offset from image center
|
||||
(default: {None})
|
||||
keep_all {bool} -- If True, all detected faces are returned, in the order dictated by the
|
||||
select_largest parameter. If a save_path is specified, the first face is saved to that
|
||||
path and the remaining faces are saved to <save_path>1, <save_path>2 etc.
|
||||
(default: {False})
|
||||
device {torch.device} -- The device on which to run neural net passes. Image tensors and
|
||||
models are copied to this device before running forward passes. (default: {None})
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, image_size=160, margin=0, min_face_size=20,
|
||||
thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
|
||||
select_largest=True, selection_method=None, keep_all=False, device=None
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self.image_size = image_size
|
||||
self.margin = margin
|
||||
self.min_face_size = min_face_size
|
||||
self.thresholds = thresholds
|
||||
self.factor = factor
|
||||
self.post_process = post_process
|
||||
self.select_largest = select_largest
|
||||
self.keep_all = keep_all
|
||||
self.selection_method = selection_method
|
||||
|
||||
self.pnet = PNet()
|
||||
self.rnet = RNet()
|
||||
self.onet = ONet()
|
||||
|
||||
self.device = torch.device('cpu')
|
||||
if device is not None:
|
||||
self.device = device
|
||||
self.to(device)
|
||||
|
||||
if not self.selection_method:
|
||||
self.selection_method = 'largest' if self.select_largest else 'probability'
|
||||
|
||||
def forward(self, img, save_path=None, return_prob=False):
|
||||
"""Run MTCNN face detection on a PIL image or numpy array. This method performs both
|
||||
detection and extraction of faces, returning tensors representing detected faces rather
|
||||
than the bounding boxes. To access bounding boxes, see the MTCNN.detect() method below.
|
||||
|
||||
Arguments:
|
||||
img {PIL.Image, np.ndarray, or list} -- A PIL image, np.ndarray, torch.Tensor, or list.
|
||||
|
||||
Keyword Arguments:
|
||||
save_path {str} -- An optional save path for the cropped image. Note that when
|
||||
self.post_process=True, although the returned tensor is post processed, the saved
|
||||
face image is not, so it is a true representation of the face in the input image.
|
||||
If `img` is a list of images, `save_path` should be a list of equal length.
|
||||
(default: {None})
|
||||
return_prob {bool} -- Whether or not to return the detection probability.
|
||||
(default: {False})
|
||||
|
||||
Returns:
|
||||
Union[torch.Tensor, tuple(torch.tensor, float)] -- If detected, cropped image of a face
|
||||
with dimensions 3 x image_size x image_size. Optionally, the probability that a
|
||||
face was detected. If self.keep_all is True, n detected faces are returned in an
|
||||
n x 3 x image_size x image_size tensor with an optional list of detection
|
||||
probabilities. If `img` is a list of images, the item(s) returned have an extra
|
||||
dimension (batch) as the first dimension.
|
||||
|
||||
Example:
|
||||
>>> from facenet_pytorch import MTCNN
|
||||
>>> mtcnn = MTCNN()
|
||||
>>> face_tensor, prob = mtcnn(img, save_path='face.png', return_prob=True)
|
||||
"""
|
||||
|
||||
# Detect faces
|
||||
batch_boxes, batch_probs, batch_points = self.detect(img, landmarks=True)
|
||||
# Select faces
|
||||
if not self.keep_all:
|
||||
batch_boxes, batch_probs, batch_points = self.select_boxes(
|
||||
batch_boxes, batch_probs, batch_points, img, method=self.selection_method
|
||||
)
|
||||
# Extract faces
|
||||
faces = self.extract(img, batch_boxes, save_path)
|
||||
|
||||
if return_prob:
|
||||
return faces, batch_probs
|
||||
else:
|
||||
return faces
|
||||
|
||||
def detect(self, img, landmarks=False):
|
||||
"""Detect all faces in PIL image and return bounding boxes and optional facial landmarks.
|
||||
|
||||
This method is used by the forward method and is also useful for face detection tasks
|
||||
that require lower-level handling of bounding boxes and facial landmarks (e.g., face
|
||||
tracking). The functionality of the forward function can be emulated by using this method
|
||||
followed by the extract_face() function.
|
||||
|
||||
Arguments:
|
||||
img {PIL.Image, np.ndarray, or list} -- A PIL image, np.ndarray, torch.Tensor, or list.
|
||||
|
||||
Keyword Arguments:
|
||||
landmarks {bool} -- Whether to return facial landmarks in addition to bounding boxes.
|
||||
(default: {False})
|
||||
|
||||
Returns:
|
||||
tuple(numpy.ndarray, list) -- For N detected faces, a tuple containing an
|
||||
Nx4 array of bounding boxes and a length N list of detection probabilities.
|
||||
Returned boxes will be sorted in descending order by detection probability if
|
||||
self.select_largest=False, otherwise the largest face will be returned first.
|
||||
If `img` is a list of images, the items returned have an extra dimension
|
||||
(batch) as the first dimension. Optionally, a third item, the facial landmarks,
|
||||
are returned if `landmarks=True`.
|
||||
|
||||
Example:
|
||||
>>> from PIL import Image, ImageDraw
|
||||
>>> from facenet_pytorch import MTCNN, extract_face
|
||||
>>> mtcnn = MTCNN(keep_all=True)
|
||||
>>> boxes, probs, points = mtcnn.detect(img, landmarks=True)
|
||||
>>> # Draw boxes and save faces
|
||||
>>> img_draw = img.copy()
|
||||
>>> draw = ImageDraw.Draw(img_draw)
|
||||
>>> for i, (box, point) in enumerate(zip(boxes, points)):
|
||||
... draw.rectangle(box.tolist(), width=5)
|
||||
... for p in point:
|
||||
... draw.rectangle((p - 10).tolist() + (p + 10).tolist(), width=10)
|
||||
... extract_face(img, box, save_path='detected_face_{}.png'.format(i))
|
||||
>>> img_draw.save('annotated_faces.png')
|
||||
"""
|
||||
|
||||
with torch.no_grad():
|
||||
batch_boxes, batch_points = detect_face(
|
||||
img, self.min_face_size,
|
||||
self.pnet, self.rnet, self.onet,
|
||||
self.thresholds, self.factor,
|
||||
self.device
|
||||
)
|
||||
|
||||
boxes, probs, points = [], [], []
|
||||
for box, point in zip(batch_boxes, batch_points):
|
||||
box = np.array(box)
|
||||
point = np.array(point)
|
||||
if len(box) == 0:
|
||||
boxes.append(None)
|
||||
probs.append([None])
|
||||
points.append(None)
|
||||
elif self.select_largest:
|
||||
box_order = np.argsort((box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]))[::-1]
|
||||
box = box[box_order]
|
||||
point = point[box_order]
|
||||
boxes.append(box[:, :4])
|
||||
probs.append(box[:, 4])
|
||||
points.append(point)
|
||||
else:
|
||||
boxes.append(box[:, :4])
|
||||
probs.append(box[:, 4])
|
||||
points.append(point)
|
||||
boxes = np.array(boxes)
|
||||
probs = np.array(probs)
|
||||
points = np.array(points)
|
||||
|
||||
if (
|
||||
not isinstance(img, (list, tuple)) and
|
||||
not (isinstance(img, np.ndarray) and len(img.shape) == 4) and
|
||||
not (isinstance(img, torch.Tensor) and len(img.shape) == 4)
|
||||
):
|
||||
boxes = boxes[0]
|
||||
probs = probs[0]
|
||||
points = points[0]
|
||||
|
||||
if landmarks:
|
||||
return boxes, probs, points
|
||||
|
||||
return boxes, probs
|
||||
|
||||
def select_boxes(
|
||||
self, all_boxes, all_probs, all_points, imgs, method='probability', threshold=0.9,
|
||||
center_weight=2.0
|
||||
):
|
||||
"""Selects a single box from multiple for a given image using one of multiple heuristics.
|
||||
|
||||
Arguments:
|
||||
all_boxes {np.ndarray} -- Ix0 ndarray where each element is a Nx4 ndarry of
|
||||
bounding boxes for N detected faces in I images (output from self.detect).
|
||||
all_probs {np.ndarray} -- Ix0 ndarray where each element is a Nx0 ndarry of
|
||||
probabilities for N detected faces in I images (output from self.detect).
|
||||
all_points {np.ndarray} -- Ix0 ndarray where each element is a Nx5x2 array of
|
||||
points for N detected faces. (output from self.detect).
|
||||
imgs {PIL.Image, np.ndarray, or list} -- A PIL image, np.ndarray, torch.Tensor, or list.
|
||||
|
||||
Keyword Arguments:
|
||||
method {str} -- Which heuristic to use for selection:
|
||||
"probability": highest probability selected
|
||||
"largest": largest box selected
|
||||
"largest_over_theshold": largest box over a certain probability selected
|
||||
"center_weighted_size": box size minus weighted squared offset from image center
|
||||
(default: {'probability'})
|
||||
threshold {float} -- theshold for "largest_over_threshold" method. (default: {0.9})
|
||||
center_weight {float} -- weight for squared offset in center weighted size method.
|
||||
(default: {2.0})
|
||||
|
||||
Returns:
|
||||
tuple(numpy.ndarray, numpy.ndarray, numpy.ndarray) -- nx4 ndarray of bounding boxes
|
||||
for n images. Ix0 array of probabilities for each box, array of landmark points.
|
||||
"""
|
||||
|
||||
#copying batch detection from extract, but would be easier to ensure detect creates consistent output.
|
||||
batch_mode = True
|
||||
if (
|
||||
not isinstance(imgs, (list, tuple)) and
|
||||
not (isinstance(imgs, np.ndarray) and len(imgs.shape) == 4) and
|
||||
not (isinstance(imgs, torch.Tensor) and len(imgs.shape) == 4)
|
||||
):
|
||||
imgs = [imgs]
|
||||
all_boxes = [all_boxes]
|
||||
all_probs = [all_probs]
|
||||
all_points = [all_points]
|
||||
batch_mode = False
|
||||
|
||||
selected_boxes, selected_probs, selected_points = [], [], []
|
||||
for boxes, points, probs, img in zip(all_boxes, all_points, all_probs, imgs):
|
||||
|
||||
if boxes is None:
|
||||
selected_boxes.append(None)
|
||||
selected_probs.append([None])
|
||||
selected_points.append(None)
|
||||
continue
|
||||
|
||||
# If at least 1 box found
|
||||
boxes = np.array(boxes)
|
||||
probs = np.array(probs)
|
||||
points = np.array(points)
|
||||
|
||||
if method == 'largest':
|
||||
box_order = np.argsort((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]))[::-1]
|
||||
elif method == 'probability':
|
||||
box_order = np.argsort(probs)[::-1]
|
||||
elif method == 'center_weighted_size':
|
||||
box_sizes = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
||||
img_center = (img.width / 2, img.height/2)
|
||||
box_centers = np.array(list(zip((boxes[:, 0] + boxes[:, 2]) / 2, (boxes[:, 1] + boxes[:, 3]) / 2)))
|
||||
offsets = box_centers - img_center
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), 1)
|
||||
box_order = np.argsort(box_sizes - offset_dist_squared * center_weight)[::-1]
|
||||
elif method == 'largest_over_threshold':
|
||||
box_mask = probs > threshold
|
||||
boxes = boxes[box_mask]
|
||||
box_order = np.argsort((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]))[::-1]
|
||||
if sum(box_mask) == 0:
|
||||
selected_boxes.append(None)
|
||||
selected_probs.append([None])
|
||||
selected_points.append(None)
|
||||
continue
|
||||
|
||||
box = boxes[box_order][[0]]
|
||||
prob = probs[box_order][[0]]
|
||||
point = points[box_order][[0]]
|
||||
selected_boxes.append(box)
|
||||
selected_probs.append(prob)
|
||||
selected_points.append(point)
|
||||
|
||||
if batch_mode:
|
||||
selected_boxes = np.array(selected_boxes)
|
||||
selected_probs = np.array(selected_probs)
|
||||
selected_points = np.array(selected_points)
|
||||
else:
|
||||
selected_boxes = selected_boxes[0]
|
||||
selected_probs = selected_probs[0][0]
|
||||
selected_points = selected_points[0]
|
||||
|
||||
return selected_boxes, selected_probs, selected_points
|
||||
|
||||
def extract(self, img, batch_boxes, save_path):
|
||||
# Determine if a batch or single image was passed
|
||||
batch_mode = True
|
||||
if (
|
||||
not isinstance(img, (list, tuple)) and
|
||||
not (isinstance(img, np.ndarray) and len(img.shape) == 4) and
|
||||
not (isinstance(img, torch.Tensor) and len(img.shape) == 4)
|
||||
):
|
||||
img = [img]
|
||||
batch_boxes = [batch_boxes]
|
||||
batch_mode = False
|
||||
|
||||
# Parse save path(s)
|
||||
if save_path is not None:
|
||||
if isinstance(save_path, str):
|
||||
save_path = [save_path]
|
||||
else:
|
||||
save_path = [None for _ in range(len(img))]
|
||||
|
||||
# Process all bounding boxes
|
||||
faces = []
|
||||
for im, box_im, path_im in zip(img, batch_boxes, save_path):
|
||||
if box_im is None:
|
||||
faces.append(None)
|
||||
continue
|
||||
|
||||
if not self.keep_all:
|
||||
box_im = box_im[[0]]
|
||||
|
||||
faces_im = []
|
||||
for i, box in enumerate(box_im):
|
||||
face_path = path_im
|
||||
if path_im is not None and i > 0:
|
||||
save_name, ext = os.path.splitext(path_im)
|
||||
face_path = save_name + '_' + str(i + 1) + ext
|
||||
|
||||
face = extract_face(im, box, self.image_size, self.margin, face_path)
|
||||
if self.post_process:
|
||||
face = fixed_image_standardization(face)
|
||||
faces_im.append(face)
|
||||
|
||||
if self.keep_all:
|
||||
faces_im = torch.stack(faces_im)
|
||||
else:
|
||||
faces_im = faces_im[0]
|
||||
|
||||
faces.append(faces_im)
|
||||
|
||||
if not batch_mode:
|
||||
faces = faces[0]
|
||||
|
||||
return faces
|
||||
|
||||
|
||||
def fixed_image_standardization(image_tensor):
|
||||
processed_tensor = (image_tensor - 127.5) / 128.0
|
||||
return processed_tensor
|
||||
|
||||
|
||||
def prewhiten(x):
|
||||
mean = x.mean()
|
||||
std = x.std()
|
||||
std_adj = std.clamp(min=1.0/(float(x.numel())**0.5))
|
||||
y = (x - mean) / std_adj
|
||||
return y
|
||||
|
||||
@ -0,0 +1,377 @@
|
||||
import torch
|
||||
from torch.nn.functional import interpolate
|
||||
from torchvision.transforms import functional as F
|
||||
from torchvision.ops.boxes import batched_nms
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
# OpenCV is optional, but required if using numpy arrays instead of PIL
|
||||
try:
|
||||
import cv2
|
||||
except:
|
||||
pass
|
||||
|
||||
def fixed_batch_process(im_data, model):
|
||||
batch_size = 512
|
||||
out = []
|
||||
for i in range(0, len(im_data), batch_size):
|
||||
batch = im_data[i:(i+batch_size)]
|
||||
out.append(model(batch))
|
||||
|
||||
return tuple(torch.cat(v, dim=0) for v in zip(*out))
|
||||
|
||||
def detect_face(imgs, minsize, pnet, rnet, onet, threshold, factor, device):
|
||||
if isinstance(imgs, (np.ndarray, torch.Tensor)):
|
||||
if isinstance(imgs,np.ndarray):
|
||||
imgs = torch.as_tensor(imgs.copy(), device=device)
|
||||
|
||||
if isinstance(imgs,torch.Tensor):
|
||||
imgs = torch.as_tensor(imgs, device=device)
|
||||
|
||||
if len(imgs.shape) == 3:
|
||||
imgs = imgs.unsqueeze(0)
|
||||
else:
|
||||
if not isinstance(imgs, (list, tuple)):
|
||||
imgs = [imgs]
|
||||
if any(img.size != imgs[0].size for img in imgs):
|
||||
raise Exception("MTCNN batch processing only compatible with equal-dimension images.")
|
||||
imgs = np.stack([np.uint8(img) for img in imgs])
|
||||
imgs = torch.as_tensor(imgs.copy(), device=device)
|
||||
|
||||
|
||||
|
||||
model_dtype = next(pnet.parameters()).dtype
|
||||
imgs = imgs.permute(0, 3, 1, 2).type(model_dtype)
|
||||
|
||||
batch_size = len(imgs)
|
||||
h, w = imgs.shape[2:4]
|
||||
m = 12.0 / minsize
|
||||
minl = min(h, w)
|
||||
minl = minl * m
|
||||
|
||||
# Create scale pyramid
|
||||
scale_i = m
|
||||
scales = []
|
||||
while minl >= 12:
|
||||
scales.append(scale_i)
|
||||
scale_i = scale_i * factor
|
||||
minl = minl * factor
|
||||
|
||||
# First stage
|
||||
boxes = []
|
||||
image_inds = []
|
||||
|
||||
scale_picks = []
|
||||
|
||||
all_i = 0
|
||||
offset = 0
|
||||
for scale in scales:
|
||||
im_data = imresample(imgs, (int(h * scale + 1), int(w * scale + 1)))
|
||||
im_data = (im_data - 127.5) * 0.0078125
|
||||
reg, probs = pnet(im_data)
|
||||
|
||||
boxes_scale, image_inds_scale = generateBoundingBox(reg, probs[:, 1], scale, threshold[0])
|
||||
boxes.append(boxes_scale)
|
||||
image_inds.append(image_inds_scale)
|
||||
|
||||
pick = batched_nms(boxes_scale[:, :4], boxes_scale[:, 4], image_inds_scale, 0.5)
|
||||
scale_picks.append(pick + offset)
|
||||
offset += boxes_scale.shape[0]
|
||||
|
||||
boxes = torch.cat(boxes, dim=0)
|
||||
image_inds = torch.cat(image_inds, dim=0)
|
||||
|
||||
scale_picks = torch.cat(scale_picks, dim=0)
|
||||
|
||||
# NMS within each scale + image
|
||||
boxes, image_inds = boxes[scale_picks], image_inds[scale_picks]
|
||||
|
||||
|
||||
# NMS within each image
|
||||
pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7)
|
||||
boxes, image_inds = boxes[pick], image_inds[pick]
|
||||
|
||||
regw = boxes[:, 2] - boxes[:, 0]
|
||||
regh = boxes[:, 3] - boxes[:, 1]
|
||||
qq1 = boxes[:, 0] + boxes[:, 5] * regw
|
||||
qq2 = boxes[:, 1] + boxes[:, 6] * regh
|
||||
qq3 = boxes[:, 2] + boxes[:, 7] * regw
|
||||
qq4 = boxes[:, 3] + boxes[:, 8] * regh
|
||||
boxes = torch.stack([qq1, qq2, qq3, qq4, boxes[:, 4]]).permute(1, 0)
|
||||
boxes = rerec(boxes)
|
||||
y, ey, x, ex = pad(boxes, w, h)
|
||||
|
||||
# Second stage
|
||||
if len(boxes) > 0:
|
||||
im_data = []
|
||||
for k in range(len(y)):
|
||||
if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1):
|
||||
img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k], (x[k] - 1):ex[k]].unsqueeze(0)
|
||||
im_data.append(imresample(img_k, (24, 24)))
|
||||
im_data = torch.cat(im_data, dim=0)
|
||||
im_data = (im_data - 127.5) * 0.0078125
|
||||
|
||||
# This is equivalent to out = rnet(im_data) to avoid GPU out of memory.
|
||||
out = fixed_batch_process(im_data, rnet)
|
||||
|
||||
out0 = out[0].permute(1, 0)
|
||||
out1 = out[1].permute(1, 0)
|
||||
score = out1[1, :]
|
||||
ipass = score > threshold[1]
|
||||
boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1)
|
||||
image_inds = image_inds[ipass]
|
||||
mv = out0[:, ipass].permute(1, 0)
|
||||
|
||||
# NMS within each image
|
||||
pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7)
|
||||
boxes, image_inds, mv = boxes[pick], image_inds[pick], mv[pick]
|
||||
boxes = bbreg(boxes, mv)
|
||||
boxes = rerec(boxes)
|
||||
|
||||
# Third stage
|
||||
points = torch.zeros(0, 5, 2, device=device)
|
||||
if len(boxes) > 0:
|
||||
y, ey, x, ex = pad(boxes, w, h)
|
||||
im_data = []
|
||||
for k in range(len(y)):
|
||||
if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1):
|
||||
img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k], (x[k] - 1):ex[k]].unsqueeze(0)
|
||||
im_data.append(imresample(img_k, (48, 48)))
|
||||
im_data = torch.cat(im_data, dim=0)
|
||||
im_data = (im_data - 127.5) * 0.0078125
|
||||
|
||||
# This is equivalent to out = onet(im_data) to avoid GPU out of memory.
|
||||
out = fixed_batch_process(im_data, onet)
|
||||
|
||||
out0 = out[0].permute(1, 0)
|
||||
out1 = out[1].permute(1, 0)
|
||||
out2 = out[2].permute(1, 0)
|
||||
score = out2[1, :]
|
||||
points = out1
|
||||
ipass = score > threshold[2]
|
||||
points = points[:, ipass]
|
||||
boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1)
|
||||
image_inds = image_inds[ipass]
|
||||
mv = out0[:, ipass].permute(1, 0)
|
||||
|
||||
w_i = boxes[:, 2] - boxes[:, 0] + 1
|
||||
h_i = boxes[:, 3] - boxes[:, 1] + 1
|
||||
points_x = w_i.repeat(5, 1) * points[:5, :] + boxes[:, 0].repeat(5, 1) - 1
|
||||
points_y = h_i.repeat(5, 1) * points[5:10, :] + boxes[:, 1].repeat(5, 1) - 1
|
||||
points = torch.stack((points_x, points_y)).permute(2, 1, 0)
|
||||
boxes = bbreg(boxes, mv)
|
||||
|
||||
# NMS within each image using "Min" strategy
|
||||
# pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7)
|
||||
pick = batched_nms_numpy(boxes[:, :4], boxes[:, 4], image_inds, 0.7, 'Min')
|
||||
boxes, image_inds, points = boxes[pick], image_inds[pick], points[pick]
|
||||
|
||||
boxes = boxes.cpu().numpy()
|
||||
points = points.cpu().numpy()
|
||||
|
||||
image_inds = image_inds.cpu()
|
||||
|
||||
batch_boxes = []
|
||||
batch_points = []
|
||||
for b_i in range(batch_size):
|
||||
b_i_inds = np.where(image_inds == b_i)
|
||||
batch_boxes.append(boxes[b_i_inds].copy())
|
||||
batch_points.append(points[b_i_inds].copy())
|
||||
|
||||
batch_boxes, batch_points = np.array(batch_boxes), np.array(batch_points)
|
||||
|
||||
return batch_boxes, batch_points
|
||||
|
||||
|
||||
def bbreg(boundingbox, reg):
|
||||
if reg.shape[1] == 1:
|
||||
reg = torch.reshape(reg, (reg.shape[2], reg.shape[3]))
|
||||
|
||||
w = boundingbox[:, 2] - boundingbox[:, 0] + 1
|
||||
h = boundingbox[:, 3] - boundingbox[:, 1] + 1
|
||||
b1 = boundingbox[:, 0] + reg[:, 0] * w
|
||||
b2 = boundingbox[:, 1] + reg[:, 1] * h
|
||||
b3 = boundingbox[:, 2] + reg[:, 2] * w
|
||||
b4 = boundingbox[:, 3] + reg[:, 3] * h
|
||||
boundingbox[:, :4] = torch.stack([b1, b2, b3, b4]).permute(1, 0)
|
||||
|
||||
return boundingbox
|
||||
|
||||
|
||||
def generateBoundingBox(reg, probs, scale, thresh):
|
||||
stride = 2
|
||||
cellsize = 12
|
||||
|
||||
reg = reg.permute(1, 0, 2, 3)
|
||||
|
||||
mask = probs >= thresh
|
||||
mask_inds = mask.nonzero()
|
||||
image_inds = mask_inds[:, 0]
|
||||
score = probs[mask]
|
||||
reg = reg[:, mask].permute(1, 0)
|
||||
bb = mask_inds[:, 1:].type(reg.dtype).flip(1)
|
||||
q1 = ((stride * bb + 1) / scale).floor()
|
||||
q2 = ((stride * bb + cellsize - 1 + 1) / scale).floor()
|
||||
boundingbox = torch.cat([q1, q2, score.unsqueeze(1), reg], dim=1)
|
||||
return boundingbox, image_inds
|
||||
|
||||
|
||||
def nms_numpy(boxes, scores, threshold, method):
|
||||
if boxes.size == 0:
|
||||
return np.empty((0, 3))
|
||||
|
||||
x1 = boxes[:, 0].copy()
|
||||
y1 = boxes[:, 1].copy()
|
||||
x2 = boxes[:, 2].copy()
|
||||
y2 = boxes[:, 3].copy()
|
||||
s = scores
|
||||
area = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
|
||||
I = np.argsort(s)
|
||||
pick = np.zeros_like(s, dtype=np.int16)
|
||||
counter = 0
|
||||
while I.size > 0:
|
||||
i = I[-1]
|
||||
pick[counter] = i
|
||||
counter += 1
|
||||
idx = I[0:-1]
|
||||
|
||||
xx1 = np.maximum(x1[i], x1[idx]).copy()
|
||||
yy1 = np.maximum(y1[i], y1[idx]).copy()
|
||||
xx2 = np.minimum(x2[i], x2[idx]).copy()
|
||||
yy2 = np.minimum(y2[i], y2[idx]).copy()
|
||||
|
||||
w = np.maximum(0.0, xx2 - xx1 + 1).copy()
|
||||
h = np.maximum(0.0, yy2 - yy1 + 1).copy()
|
||||
|
||||
inter = w * h
|
||||
if method == 'Min':
|
||||
o = inter / np.minimum(area[i], area[idx])
|
||||
else:
|
||||
o = inter / (area[i] + area[idx] - inter)
|
||||
I = I[np.where(o <= threshold)]
|
||||
|
||||
pick = pick[:counter].copy()
|
||||
return pick
|
||||
|
||||
|
||||
def batched_nms_numpy(boxes, scores, idxs, threshold, method):
|
||||
device = boxes.device
|
||||
if boxes.numel() == 0:
|
||||
return torch.empty((0,), dtype=torch.int64, device=device)
|
||||
# strategy: in order to perform NMS independently per class.
|
||||
# we add an offset to all the boxes. The offset is dependent
|
||||
# only on the class idx, and is large enough so that boxes
|
||||
# from different classes do not overlap
|
||||
max_coordinate = boxes.max()
|
||||
offsets = idxs.to(boxes) * (max_coordinate + 1)
|
||||
boxes_for_nms = boxes + offsets[:, None]
|
||||
boxes_for_nms = boxes_for_nms.cpu().numpy()
|
||||
scores = scores.cpu().numpy()
|
||||
keep = nms_numpy(boxes_for_nms, scores, threshold, method)
|
||||
return torch.as_tensor(keep, dtype=torch.long, device=device)
|
||||
|
||||
|
||||
def pad(boxes, w, h):
|
||||
boxes = boxes.trunc().int().cpu().numpy()
|
||||
x = boxes[:, 0]
|
||||
y = boxes[:, 1]
|
||||
ex = boxes[:, 2]
|
||||
ey = boxes[:, 3]
|
||||
|
||||
x[x < 1] = 1
|
||||
y[y < 1] = 1
|
||||
ex[ex > w] = w
|
||||
ey[ey > h] = h
|
||||
|
||||
return y, ey, x, ex
|
||||
|
||||
|
||||
def rerec(bboxA):
|
||||
h = bboxA[:, 3] - bboxA[:, 1]
|
||||
w = bboxA[:, 2] - bboxA[:, 0]
|
||||
|
||||
l = torch.max(w, h)
|
||||
bboxA[:, 0] = bboxA[:, 0] + w * 0.5 - l * 0.5
|
||||
bboxA[:, 1] = bboxA[:, 1] + h * 0.5 - l * 0.5
|
||||
bboxA[:, 2:4] = bboxA[:, :2] + l.repeat(2, 1).permute(1, 0)
|
||||
|
||||
return bboxA
|
||||
|
||||
|
||||
def imresample(img, sz):
|
||||
im_data = interpolate(img, size=sz, mode="area")
|
||||
return im_data
|
||||
|
||||
|
||||
def crop_resize(img, box, image_size):
|
||||
if isinstance(img, np.ndarray):
|
||||
img = img[box[1]:box[3], box[0]:box[2]]
|
||||
out = cv2.resize(
|
||||
img,
|
||||
(image_size, image_size),
|
||||
interpolation=cv2.INTER_AREA
|
||||
).copy()
|
||||
elif isinstance(img, torch.Tensor):
|
||||
img = img[box[1]:box[3], box[0]:box[2]]
|
||||
out = imresample(
|
||||
img.permute(2, 0, 1).unsqueeze(0).float(),
|
||||
(image_size, image_size)
|
||||
).byte().squeeze(0).permute(1, 2, 0)
|
||||
else:
|
||||
out = img.crop(box).copy().resize((image_size, image_size), Image.BILINEAR)
|
||||
return out
|
||||
|
||||
|
||||
def save_img(img, path):
|
||||
if isinstance(img, np.ndarray):
|
||||
cv2.imwrite(path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
|
||||
else:
|
||||
img.save(path)
|
||||
|
||||
|
||||
def get_size(img):
|
||||
if isinstance(img, (np.ndarray, torch.Tensor)):
|
||||
return img.shape[1::-1]
|
||||
else:
|
||||
return img.size
|
||||
|
||||
|
||||
def extract_face(img, box, image_size=160, margin=0, save_path=None):
|
||||
"""Extract face + margin from PIL Image given bounding box.
|
||||
|
||||
Arguments:
|
||||
img {PIL.Image} -- A PIL Image.
|
||||
box {numpy.ndarray} -- Four-element bounding box.
|
||||
image_size {int} -- Output image size in pixels. The image will be square.
|
||||
margin {int} -- Margin to add to bounding box, in terms of pixels in the final image.
|
||||
Note that the application of the margin differs slightly from the davidsandberg/facenet
|
||||
repo, which applies the margin to the original image before resizing, making the margin
|
||||
dependent on the original image size.
|
||||
save_path {str} -- Save path for extracted face image. (default: {None})
|
||||
|
||||
Returns:
|
||||
torch.tensor -- tensor representing the extracted face.
|
||||
"""
|
||||
margin = [
|
||||
margin * (box[2] - box[0]) / (image_size - margin),
|
||||
margin * (box[3] - box[1]) / (image_size - margin),
|
||||
]
|
||||
raw_image_size = get_size(img)
|
||||
box = [
|
||||
int(max(box[0] - margin[0] / 2, 0)),
|
||||
int(max(box[1] - margin[1] / 2, 0)),
|
||||
int(min(box[2] + margin[0] / 2, raw_image_size[0])),
|
||||
int(min(box[3] + margin[1] / 2, raw_image_size[1])),
|
||||
]
|
||||
|
||||
face = crop_resize(img, box, image_size)
|
||||
|
||||
if save_path is not None:
|
||||
os.makedirs(os.path.dirname(save_path) + "/", exist_ok=True)
|
||||
save_img(face, save_path)
|
||||
|
||||
face = F.to_tensor(np.float32(face))
|
||||
|
||||
return face
|
||||
@ -0,0 +1,102 @@
|
||||
import hashlib
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
from urllib.request import urlopen, Request
|
||||
|
||||
try:
|
||||
from tqdm.auto import tqdm # automatically select proper tqdm submodule if available
|
||||
except ImportError:
|
||||
try:
|
||||
from tqdm import tqdm
|
||||
except ImportError:
|
||||
# fake tqdm if it's not installed
|
||||
class tqdm(object): # type: ignore
|
||||
|
||||
def __init__(self, total=None, disable=False,
|
||||
unit=None, unit_scale=None, unit_divisor=None):
|
||||
self.total = total
|
||||
self.disable = disable
|
||||
self.n = 0
|
||||
# ignore unit, unit_scale, unit_divisor; they're just for real tqdm
|
||||
|
||||
def update(self, n):
|
||||
if self.disable:
|
||||
return
|
||||
|
||||
self.n += n
|
||||
if self.total is None:
|
||||
sys.stderr.write("\r{0:.1f} bytes".format(self.n))
|
||||
else:
|
||||
sys.stderr.write("\r{0:.1f}%".format(100 * self.n / float(self.total)))
|
||||
sys.stderr.flush()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.disable:
|
||||
return
|
||||
|
||||
sys.stderr.write('\n')
|
||||
|
||||
|
||||
def download_url_to_file(url, dst, hash_prefix=None, progress=True):
|
||||
r"""Download object at the given URL to a local path.
|
||||
Args:
|
||||
url (string): URL of the object to download
|
||||
dst (string): Full path where object will be saved, e.g. `/tmp/temporary_file`
|
||||
hash_prefix (string, optional): If not None, the SHA256 downloaded file should start with `hash_prefix`.
|
||||
Default: None
|
||||
progress (bool, optional): whether or not to display a progress bar to stderr
|
||||
Default: True
|
||||
Example:
|
||||
>>> torch.hub.download_url_to_file('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth', '/tmp/temporary_file')
|
||||
"""
|
||||
file_size = None
|
||||
# We use a different API for python2 since urllib(2) doesn't recognize the CA
|
||||
# certificates in older Python
|
||||
req = Request(url, headers={"User-Agent": "torch.hub"})
|
||||
u = urlopen(req)
|
||||
meta = u.info()
|
||||
if hasattr(meta, 'getheaders'):
|
||||
content_length = meta.getheaders("Content-Length")
|
||||
else:
|
||||
content_length = meta.get_all("Content-Length")
|
||||
if content_length is not None and len(content_length) > 0:
|
||||
file_size = int(content_length[0])
|
||||
|
||||
# We deliberately save it in a temp file and move it after
|
||||
# download is complete. This prevents a local working checkpoint
|
||||
# being overridden by a broken download.
|
||||
dst = os.path.expanduser(dst)
|
||||
dst_dir = os.path.dirname(dst)
|
||||
f = tempfile.NamedTemporaryFile(delete=False, dir=dst_dir)
|
||||
|
||||
try:
|
||||
if hash_prefix is not None:
|
||||
sha256 = hashlib.sha256()
|
||||
with tqdm(total=file_size, disable=not progress,
|
||||
unit='B', unit_scale=True, unit_divisor=1024) as pbar:
|
||||
while True:
|
||||
buffer = u.read(8192)
|
||||
if len(buffer) == 0:
|
||||
break
|
||||
f.write(buffer)
|
||||
if hash_prefix is not None:
|
||||
sha256.update(buffer)
|
||||
pbar.update(len(buffer))
|
||||
|
||||
f.close()
|
||||
if hash_prefix is not None:
|
||||
digest = sha256.hexdigest()
|
||||
if digest[:len(hash_prefix)] != hash_prefix:
|
||||
raise RuntimeError('invalid hash value (expected "{}", got "{}")'
|
||||
.format(hash_prefix, digest))
|
||||
shutil.move(f.name, dst)
|
||||
finally:
|
||||
f.close()
|
||||
if os.path.exists(f.name):
|
||||
os.remove(f.name)
|
||||
35
Project-2/Part-2/src/face-recognition/handler.py
Normal file
35
Project-2/Part-2/src/face-recognition/handler.py
Normal file
@ -0,0 +1,35 @@
|
||||
import os
|
||||
import json
|
||||
import boto3
|
||||
from face_recognition_code import face_recognition_function
|
||||
|
||||
print("Loading function")
|
||||
|
||||
# attach execution policies and IAM roles in deployment lambda
|
||||
sesh = boto3.Session()
|
||||
|
||||
s3_client = sesh.client("s3", region_name="us-east-1")
|
||||
|
||||
def handler(event, context):
|
||||
# get processed object from event info
|
||||
image = event["image_file_name"]
|
||||
in_bucket = event["bucket_name"]
|
||||
download_path = "/tmp/" + image
|
||||
s3_client.download_file(in_bucket, image, download_path)
|
||||
|
||||
# process it
|
||||
face_output = face_recognition_function(download_path)
|
||||
|
||||
# upload output object
|
||||
if face_output is not None:
|
||||
key = os.path.splitext(image)[0]
|
||||
s3_client.upload_file("/tmp/" + key + ".txt", "1229569564-output", key + ".txt")
|
||||
return 0
|
||||
else:
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with open("dummy_lambda_invocation_event.json", "r") as dummy_event:
|
||||
event = json.loads(dummy_event.read())
|
||||
handler(event, None)
|
||||
5
Project-2/Part-2/src/face-recognition/requirements.txt
Normal file
5
Project-2/Part-2/src/face-recognition/requirements.txt
Normal file
@ -0,0 +1,5 @@
|
||||
tqdm
|
||||
numpy
|
||||
pillow
|
||||
opencv-python-headless
|
||||
boto3
|
||||
22
Project-2/Part-2/src/face-recognition/stage2_policy.json
Normal file
22
Project-2/Part-2/src/face-recognition/stage2_policy.json
Normal file
@ -0,0 +1,22 @@
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"logs:PutLogEvents",
|
||||
"logs:CreateLogGroup",
|
||||
"logs:CreateLogStream"
|
||||
],
|
||||
"Resource": "arn:aws:logs:*:*:*"
|
||||
},
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"s3:GetObject",
|
||||
"s3:PutObject"
|
||||
],
|
||||
"Resource": "arn:aws:s3:::*/*"
|
||||
}
|
||||
]
|
||||
}
|
||||
4
Project-2/Part-2/src/video-splitting/README.md
Normal file
4
Project-2/Part-2/src/video-splitting/README.md
Normal file
@ -0,0 +1,4 @@
|
||||
# Stage 1: video splitting
|
||||
|
||||
- same as part 1
|
||||
- added functionality is asynchronous invocation of the stage 2 lambda
|
||||
78
Project-2/Part-2/src/video-splitting/video-splitting.yaml
Normal file
78
Project-2/Part-2/src/video-splitting/video-splitting.yaml
Normal file
@ -0,0 +1,78 @@
|
||||
# This AWS SAM template has been generated from your function's configuration. If
|
||||
# your function has one or more triggers, note that the AWS resources associated
|
||||
# with these triggers aren't fully specified in this template and include
|
||||
# placeholder values. Open this template in AWS Application Composer or your
|
||||
# favorite IDE and modify it to specify a serverless application with other AWS
|
||||
# resources.
|
||||
AWSTemplateFormatVersion: '2010-09-09'
|
||||
Transform: AWS::Serverless-2016-10-31
|
||||
Description: An AWS Serverless Application Model template describing your function.
|
||||
Resources:
|
||||
videosplitting:
|
||||
Type: AWS::Serverless::Function
|
||||
Properties:
|
||||
CodeUri: .
|
||||
Description: ''
|
||||
MemorySize: 512
|
||||
Timeout: 60
|
||||
Architectures:
|
||||
- x86_64
|
||||
EphemeralStorage:
|
||||
Size: 512
|
||||
EventInvokeConfig:
|
||||
MaximumEventAgeInSeconds: 21600
|
||||
MaximumRetryAttempts: 2
|
||||
ImageUri: >-
|
||||
146064153251.dkr.ecr.us-east-1.amazonaws.com/546-proj2-p2-stage1@sha256:114c28af83143f52258bceb3dd1b3e38f4447b2d2c4f7790af94b97f07d2542a
|
||||
PackageType: Image
|
||||
Policies:
|
||||
- Statement:
|
||||
- Effect: Allow
|
||||
Action:
|
||||
- logs:PutLogEvents
|
||||
- logs:CreateLogGroup
|
||||
- logs:CreateLogStream
|
||||
Resource: arn:aws:logs:*:*:*
|
||||
- Effect: Allow
|
||||
Action:
|
||||
- s3:GetObject
|
||||
- s3:PutObject
|
||||
Resource: arn:aws:s3:::*/*
|
||||
- Effect: Allow
|
||||
Action:
|
||||
- lambda:InvokeFunction
|
||||
Resource: '*'
|
||||
SnapStart:
|
||||
ApplyOn: None
|
||||
Events:
|
||||
BucketEvent1:
|
||||
Type: S3
|
||||
Properties:
|
||||
Bucket:
|
||||
Ref: Bucket1
|
||||
Events:
|
||||
- s3:ObjectCreated:*
|
||||
Bucket1:
|
||||
Type: AWS::S3::Bucket
|
||||
Properties:
|
||||
VersioningConfiguration:
|
||||
Status: Enabled
|
||||
BucketEncryption:
|
||||
ServerSideEncryptionConfiguration:
|
||||
- ServerSideEncryptionByDefault:
|
||||
SSEAlgorithm: AES256
|
||||
BucketPolicy1:
|
||||
Type: AWS::S3::BucketPolicy
|
||||
Properties:
|
||||
Bucket: Bucket1
|
||||
PolicyDocument:
|
||||
Statement:
|
||||
- Action: s3:*
|
||||
Effect: Deny
|
||||
Principal: '*'
|
||||
Resource:
|
||||
- arn:aws:s3:::Bucket1/*
|
||||
- arn:aws:s3:::Bucket1
|
||||
Condition:
|
||||
Bool:
|
||||
aws:SecureTransport: false
|
||||
120
Project-2/Part-2/workload_generator_p2.py
Normal file
120
Project-2/Part-2/workload_generator_p2.py
Normal file
@ -0,0 +1,120 @@
|
||||
from boto3 import client as boto3_client
|
||||
import os
|
||||
import argparse
|
||||
import time
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
|
||||
timestamps = {}
|
||||
|
||||
start_time = time.time()
|
||||
parser = argparse.ArgumentParser(description='Upload videos to input S3')
|
||||
# parser.add_argument('--num_request', type=int, help='one video per request')
|
||||
parser.add_argument('--access_key', type=str, help='ACCCESS KEY of the grading IAM user')
|
||||
parser.add_argument('--secret_key', type=str, help='SECRET KEY of the grading IAM user')
|
||||
parser.add_argument('--asu_id', type=str, help='10-digit ASU ID, e.g. 1234567890')
|
||||
parser.add_argument('--testcase_folder', type=str,
|
||||
help='the path of the folder where videos are saved, e.g. test_cases/test_case_1/')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
access_key = args.access_key
|
||||
secret_key = args.secret_key
|
||||
asu_id = args.asu_id
|
||||
input_bucket = asu_id + "-input"
|
||||
stage1_bucket = asu_id + "-stage-1"
|
||||
output_bucket = asu_id + "-output"
|
||||
test_cases = args.testcase_folder
|
||||
region = 'us-east-1'
|
||||
|
||||
s3 = boto3_client('s3', aws_access_key_id=access_key,
|
||||
aws_secret_access_key=secret_key, region_name=region)
|
||||
|
||||
|
||||
def clear_input_bucket(input_bucket):
|
||||
global s3
|
||||
list_obj = s3.list_objects_v2(Bucket=input_bucket)
|
||||
print(list_obj)
|
||||
try:
|
||||
for item in list_obj["Contents"]:
|
||||
key = item["Key"]
|
||||
s3.delete_object(Bucket=input_bucket, Key=key)
|
||||
except:
|
||||
print("Nothing to clear in input bucket")
|
||||
|
||||
|
||||
def clear_output_bucket(output_bucket):
|
||||
global s3
|
||||
list_obj = s3.list_objects_v2(Bucket=output_bucket)
|
||||
try:
|
||||
for item in list_obj["Contents"]:
|
||||
key = item["Key"]
|
||||
s3.delete_object(Bucket=output_bucket, Key=key)
|
||||
except:
|
||||
print("Nothing to clear in output bucket")
|
||||
|
||||
|
||||
def upload_to_input_bucket_s3(input_bucket, path, name):
|
||||
global s3
|
||||
s3.upload_file(path + name, input_bucket, name)
|
||||
|
||||
|
||||
def write_to_file(outfilename, save_dict):
|
||||
with open(outfilename, 'w') as f:
|
||||
f.write(json.dumps(save_dict))
|
||||
|
||||
|
||||
def upload_files(input_bucket, test_dir):
|
||||
for filename in os.listdir(test_dir):
|
||||
if filename.endswith(".mp4") or filename.endswith(".MP4"):
|
||||
print("Uploading to input bucket.. name: " + str(filename))
|
||||
filename_raw = filename.split(".mp4")[0]
|
||||
timestamps[filename_raw] = time.time()
|
||||
upload_to_input_bucket_s3(input_bucket, test_dir, filename)
|
||||
|
||||
# Stagger the requests by 3 seconds
|
||||
def upload_files_v2(input_bucket, test_dir):
|
||||
for filename in os.listdir(test_dir):
|
||||
if filename.endswith(".mp4") or filename.endswith(".MP4"):
|
||||
print("Uploading to input bucket.. name: " + str(filename))
|
||||
filename_raw = filename.split(".mp4")[0]
|
||||
timestamps[filename_raw] = datetime.timestamp(datetime.now())
|
||||
upload_to_input_bucket_s3(input_bucket, test_dir, filename)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
print("Clearing all the buckets ...")
|
||||
clear_input_bucket(input_bucket)
|
||||
clear_input_bucket(stage1_bucket)
|
||||
clear_input_bucket(output_bucket)
|
||||
|
||||
print("Starting the upload in 3 sec ...")
|
||||
time.sleep(3)
|
||||
# upload_files(input_bucket, test_cases)
|
||||
upload_files_v2(input_bucket, test_cases)
|
||||
|
||||
end_time = time.time()
|
||||
print("Time to run = ", end_time - start_time, "(seconds)")
|
||||
print(f"Timestamps: start {start_time}, end {end_time}")
|
||||
|
||||
print("Waiting for 10 sec to finish all the processing of the functions ...")
|
||||
time.sleep(10)
|
||||
if timestamps:
|
||||
for filename in os.listdir(test_cases):
|
||||
out_bucket = output_bucket
|
||||
response = s3.list_objects(Bucket=out_bucket, Prefix=filename.split(".mp4")[0])
|
||||
if "Contents" in response:
|
||||
time_lastmodified = datetime.timestamp(response['Contents'][0]['LastModified'])
|
||||
timestamps[filename.split(".mp4")[0]] = time_lastmodified - timestamps[filename.split(".mp4")[0]]
|
||||
|
||||
filtered_values = [value for value in timestamps.values() if 0 <= value <= 200]
|
||||
if filtered_values:
|
||||
minimum = min(filtered_values)
|
||||
maximum = max(filtered_values)
|
||||
average = sum(filtered_values) / len(filtered_values)
|
||||
print("Minimum:", minimum)
|
||||
print("Maximum:", maximum)
|
||||
print("Average:", average)
|
||||
else:
|
||||
print("No values between 20 and 200 found in the dictionary.")
|
||||
Loading…
x
Reference in New Issue
Block a user