# Import Packages
import os
import re
import pandas as pd
from bs4 import BeautifulSoup as soup
from PIL import Image


# Annotation Extraction Function
def Extract(path):
    # Safely open annotation
    with open(path) as annotation:
        # Read annotation with Beautiful Soup
        rawXML = annotation.read()
        bsXML = soup(rawXML,"xml")

        # Stores image metadata in variables
        size = bsXML.find('size')
        index =int(re.search(r'\d+',path).group())
        width = int(size.find('width').get_text())
        height = int(size.find('height').get_text())
        depth = int(size.find('depth').get_text())

        # Stores labels in a list of dictionaries
        faces = []
        objects = bsXML.find_all('object')
        for face in objects:
            labels={}
            labels['index'] = index
            labels['width'] = width
            labels['height'] = height
            labels['depth'] = depth
            labels['label'] = face.find('name').get_text()
            labels['xmin'] = int(face.find('xmin').get_text())
            labels['xmax'] = int(face.find('xmax').get_text())
            labels['ymin'] = int(face.find('ymin').get_text())
            labels['ymax'] = int(face.find('ymax').get_text())
            faces.append(labels)
        return faces


# Collect Images
imagePath = "./Data/images/"
imageList = [[int(re.search(r'\d+',f).group()),os.path.join(imagePath,f)] for f in os.listdir(imagePath) if os.path.isfile(os.path.join(imagePath,f))]
images = pd.DataFrame(imageList,columns=['index','path']).set_index('index')


# Collect Labels
annotationPath = "./Data/annotations/"
labelList = [x for f in os.listdir(annotationPath) for x in Extract(os.path.join(annotationPath,f))]
labels = pd.DataFrame(labelList)


# Find and remove labels with incorrectly worn mask label or faces smaller than 16x16
exclude = labels[(labels['label']=='mask_weared_incorrect') | (labels['xmax']-labels['xmin']<16) | (labels['xmax']-labels['xmin']<16)].index
labels.drop(exclude,inplace=True)
labels = labels.set_index('index')
images = images.loc[labels.index.unique()]


# Move balanced images to Train (300 with / 300 without) & Validation (100 with / 100 without) Directories
train_with = 0
train_without = 0
test_with = 0
test_without = 0
for idx, path in images['path'].iteritems():
    with Image.open(path) as im:
        imRGB = im.convert('RGB')
        imGRAY = imRGB.convert('L')
        l = labels.loc[[idx]]
        for i in range(len(l)):
            savePath=None
            tindex = str(idx)+"_"+str(i)
            c = imGRAY.crop((l.iloc[i]['xmin'],l.iloc[i]['ymin'],l.iloc[i]['xmax'],l.iloc[i]['ymax']))
            if l.iloc[i]['label']=='with_mask':
                if train_with<300:
                    savePath = "./Data/train/with_mask/"+tindex+".jpg"
                    train_with+=1
                elif train_without<100:
                    savePath = "./Data/validation/with_mask/"+tindex+".jpg"
                    train_without+=1
            else:
                if test_with<300:
                    savePath = "./Data/train/without_mask/"+tindex+".jpg"
                    test_with+=1
                elif test_without<100:
                    savePath = "./Data/validation/without_mask/"+tindex+".jpg"
                    test_without+=1
            if savePath:
                c.save(savePath)

Detection and Classification of People With and Without Masks in Images¶

Preprocessing¶