In [1]:
import zipfile
import PIL
from zipfile import ZipFile
from PIL import Image
import pytesseract
import cv2 as cv
import numpy as np
import inspect
import kraken
from kraken import pageseg
import PIL
from PIL import Image
from PIL import ImageDraw, ImageFont 
from PIL import ImageEnhance
from PIL import ImageColor
# loading the face detection classifier
face_cascade = cv.CascadeClassifier('readonly/haarcascade_frontalface_default.xml')

# the rest is up to you
def get_image_from_zip(url):
    img_list = []
    
    with ZipFile(url, 'r') as myzip:
        for i in myzip.namelist():
            img = Image.open(myzip.open(i))
            img_list.append(img.resize(img.size,Image.LANCZOS))
    return img_list 
def get_face(img):
    out_face = []
    gray = cv.cvtColor(np.asarray(img), cv.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray,scaleFactor = 1.3, minNeighbors = 5)
    try:
        for i in faces.tolist():
            bounding_box=(i[0],i[1],i[0]+i[2],i[1] + i[3])
            new = img.crop(bounding_box)
            new.thumbnail((100,100))
            out_face.append(new)
    except:
        return out_face
    return out_face   
def contact_image(img_list,width1,height1):
    zoom_x= 25
    try:
        first_image = img_list[0]   #thumbnail((4*zoom_x,4*zoom_x))
        contact_sheet=PIL.Image.new(first_image.mode,(width1,height1))   #3(first_image.width*4,first_image.height*2))
        x=0
        y=0

        #-------------------------------------------------
        for img in img_list:

            # Lets paste the current image into the contact sheet
            contact_sheet.paste(img, (x, y))
            # Now we update our X position. If it is going to be the width of the image, then we set it to 0
            # and update Y as well to point to the next "line" of the contact sheet.
            if x+first_image.width == contact_sheet.width:
                x=0
                y=y+first_image.height
            else:
                x=x+first_image.width

        # resize and display the contact sheet
        contact_sheet = contact_sheet.resize((int(contact_sheet.width),int(contact_sheet.height) ))
    except:
        return img_list
    return contact_sheet
# Rest is get from Cour
def word_in_page(word,img):
    import pytesseract
    text = pytesseract.image_to_string(img)
    if word in text:
        return True
    
def export_faces_from_img(img):
    faces_in = get_face(img)
    return contact_image(faces_in,500,200)

images=[]
img = Image.new("RGB",(500,240),"White")
enhancer=ImageEnhance.Brightness(img)
for i in range(0, 13):
    images.append(enhancer.enhance(1))


font = ImageFont.truetype("readonly/fanwood-webfont.ttf", 20)        
def re(img1,img2,name=0):
    try:
        for i in range(img2.width):
            for j in range(img2.height):
                img1.putpixel((i,j+40),(img2.load()[i,j][0],img2.load()[i,j][1],img2.load()[i,j][2]))
        draw = ImageDraw.Draw(img1)
        draw.text((0,10),"Results found in file {} ".format(name),"black", font)
    except:
        draw = ImageDraw.Draw(img1)
        draw.text((0,10),"Results found in file a-{}.png ".format(name),"black", font)
        draw.text((0,50),"But there were no faces in that file! ".format(name),"black", font)
    return img1
print("def functions")
#---------------------------------------------------------------------
raw_paper = get_image_from_zip('readonly/images.zip')
with ZipFile('readonly/images.zip', 'r') as myzip:
    img_name = myzip.namelist()
print('Got files and their names')
#---------------------------------------------------------------------
true_list = []
for i in range(len(raw_paper)):
    if  word_in_page("Mark",raw_paper[i]) == True:
        true_list.append(i)

print(true_list)
print("Got list of files that contain string")
#---------------------------------------------------------------------
tub0 = []
for i in true_list:
    tub0.append(re(images[i],export_faces_from_img(raw_paper[i]),img_name[i]))
display(contact_image(tub0,500,240*len(tub0)))    
print("Got contact")
def functions
Got files and their names
[0, 1, 2, 5, 6, 7, 12]
Got list of files that contain string
Got contact
In [5]:
images2=[]
img2 = Image.new("RGB",(500,240),"White")
enhancer2=ImageEnhance.Brightness(img2)
for i in range(0, 13):
    images2.append(enhancer2.enhance(1))
raw2 = get_image_from_zip('readonly/small_img.zip')
with ZipFile('readonly/small_img.zip', 'r') as myzip2:
    img_name2 = myzip2.namelist()
print('Got files and their names')
#---------------------------------------------------------------------
true_list2 = []
for i in range(len(raw2)):
    if  word_in_page("Christopher",raw2[i]) == True:
        true_list2.append(i)

print(true_list2)
print("Got list of files that contain string")
#---------------------------------------------------------------------
tub2 = []
for i in true_list2:
    tub2.append(re(images2[i],export_faces_from_img(raw2[i]),img_name2[i]))
display(contact_image(tub2,500,240*len(tub2)))    
print("Got contact")
Got files and their names
[0, 3]
Got list of files that contain string
Got contact