Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

OpenCV

OpenCV

OpenCV Installation

  • Install both OpenCV for Python and the external contribution packages
pip install opencv-python
pip install opencv-contrib-python

Download images

  • Visit Duck Duck Go.
  • Search for "cats" (or whatever other animal you like) and click on "Images".
  • Click on an image and then on the "View File" button.
  • Right-click and "Save Image As".
  • Make sure you remember in which folder you saved file.

OpenCV Read image

  • imread
  • imshow
  • waitKey
import cv2 as cv
import sys

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
#print(type(original))  # numpy.ndarray
print(original.shape)

cv.imshow('Original', original)
cv.waitKey(0)
  • press any key to stop the display

OpenCV - read image and fail

  • File does not exist
  • File is not in the correct format.
cv2.error: OpenCV(4.5.3) /tmp/pip-req-build-agffqapq/opencv/modules/highgui/src/window.cpp:1006: error: (-215:Assertion failed) size.width>0 && size.height>0 in function 'imshow'

OpenCV resize (rescale) images

  • shape

  • resize

  • INTER_AREA

  • To reduce the computation power needed to process the image (or video)

import cv2 as cv
import sys

if len(sys.argv) != 3:
    exit(f"Usage: {sys.argv[0]} FILENAME SCALE  where scale is 0.75 or some similar number between 0 and 1")

filename = sys.argv[1]
scale = float(sys.argv[2])

original = cv.imread(filename)
cv.imshow('Original', original)

height, width, colors = original.shape
new_height = int(height * scale)
new_width = int(width * scale)

resized = cv.resize(original, (new_width, new_height), interpolation=cv.INTER_AREA)
cv.imshow('Resized', resized)
cv.waitKey(0)
  • Works on images, videos, live videos

  • Try to resiz the image to be larger than the original using either INTER_AREA or INTER_LINER or INTER_CUBIC.

  • Cubic is slower but better quality

OpenCV cropping image

import cv2 as cv
import sys

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

img = cv.imread(filename)
print(img.shape)
cv.imshow('Image', img)

cropped = img[450:800, 600:1000]

print(cropped.shape)
cv.imshow('Cropped', cropped)

cv.waitKey(0)

OpenCV crop, resize and save image

  • imwrite
import cv2 as cv
import sys

if len(sys.argv) != 3:
    exit(f"Usage: {sys.argv[0]} FILENAME OUTFILE")

filename = sys.argv[1]
outfile = sys.argv[2]


original = cv.imread(filename)
print(original.shape)
cv.imshow('Original', original)


cropped = original[400:1200, 100:900]
print(cropped.shape)
cv.imshow('Cropped', cropped)



scale = 0.7
height, width, colors = cropped.shape
new_height = int(height * scale)
new_width = int(width * scale)
resized = cv.resize(cropped, (new_width, new_height), interpolation=cv.INTER_AREA)
print(resized.shape)
cv.imshow('Resized', resized)

cv.imwrite(outfile, resized)

cv.waitKey(0)

OpenCV - draw on new images

import cv2 as cv
import numpy as np
import sys


img = np.zeros((500, 500, 3), dtype='uint8')
print(img.shape) # (500, 500, 3)
red = 0
green = 0
blue = 255

img[:] = 255,255,255          # pain the whole image to white
#img[:] = blue,green,red          # pain the whole image
#img[0:100] = blue,green,red      # 100 rows on the top of the image
#img[:,0:100] = blue,green,red    # 100 columns on the left of the image
#img[100:200, 200:300] = blue,green,red  # A square on the image

#cv.line(img, (30, 70), (150, 90), color=(blue, green, red), thickness=3)

#cv.putText(img, text="Hello World", org=(20, 100), fontFace=cv.FONT_HERSHEY_COMPLEX, fontScale=1.0, color=(blue, green, red), thickness=2)

cv.imshow('Image', img)
cv.waitKey(0)

OpenCV - draw rectangle

  • rectangle
import cv2 as cv
import numpy as np
import sys


img = np.zeros((500, 500, 3), dtype='uint8')
print(img.shape) # (500, 500, 3)
red = 0
green = 0
blue = 255

cv.rectangle(img, (100,100), (250, 250), color=(blue, green, red), thickness=2)

red = 255
green = 0
blue = 0
cv.rectangle(img, (200,200), (350, 350), color=(blue, green, red), thickness=5)

red = 0
green = 255
blue = 0
cv.rectangle(img, (350,50), (450, 150), color=(blue, green, red), thickness=cv.FILLED)  # cv.FILLED == -1


cv.imshow('Image', img)
cv.waitKey(0)

OpenCV - draw circle

  • circle
import cv2 as cv
import numpy as np
import sys


img = np.zeros((500, 500, 3), dtype='uint8')
print(img.shape) # (500, 500, 3)

red = 0
green = 0
blue = 255
cv.circle(img, center=(200, 200), radius=150, color=(blue, green, red), thickness=2)

red = 255
green = 0
blue = 0
cv.circle(img, center=(100, 100), radius=75, color=(blue, green, red), thickness=5)

red = 0
green = 255
blue = 0
cv.circle(img, center=(300, 100), radius=75, color=(blue, green, red), thickness=cv.FILLED)


cv.imshow('Image', img)
cv.waitKey(0)

OpenCV convert BGR to greyscale and back to BGR

  • COLOR_BGR2GRAY

  • COLOR_GRAY2BGR

  • Grayscale is the distribution of pixel intensities.

  • Converting from grey to BGR just replicates the same 2-dimensional matrix 3 times for Blue, Green, and Red

import cv2 as cv
import sys
import numpy as np

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
cv.imshow('Original', original)
print(original.shape) # 3 dimension

gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('gray', gray)

back = cv.cvtColor(gray, cv.COLOR_GRAY2BGR)
cv.imshow('Back', back)

print(gray.shape) # 2 dimensional
print(back.shape) # 3 dimension
print(np.array_equal(gray, back[:,:,0])) # True
print(np.array_equal(gray, back[:,:,1])) # True
print(np.array_equal(gray, back[:,:,2])) # True

cv.waitKey(0)

OpenCV blur image

  • blur

  • Average the intensity of all the pixels in the kernel (window)

  • Median - helps reduce the nois in the image

  • Gaussian blur - less blurring than the average blurring, but more natural

  • Increasing the ksize (kernel size) and the image will be more blurred.

import cv2 as cv
import sys

if len(sys.argv) != 3:
    exit(f"Usage: {sys.argv[0]} FILENAME KERNEL")

filename = sys.argv[1]
kernel = int(sys.argv[2])

original = cv.imread(filename)
cv.imshow('Original', original)

blurred = cv.blur(original, ksize=(kernel, kernel))

cv.imshow('Blurred', blurred)
cv.waitKey(0)

OpenCV Gaussian blur

  • GaussianBlur
import cv2 as cv
import sys

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
blurred = cv.GaussianBlur(original, ksize=(3, 3), sigmaX=cv.BORDER_DEFAULT)

cv.imshow('Blurred', blurred)
cv.waitKey(0)

OpenCV Median blur

  • medianBlur

  • The kernel here is always square (for whatever reason) and thus we only pass a single number

  • Usually used with small kernel sizes (less than 7)

import cv2 as cv
import sys

if len(sys.argv) != 3:
    exit(f"Usage: {sys.argv[0]} FILENAME KERNEL")

filename = sys.argv[1]
kernel = int(sys.argv[2])

original = cv.imread(filename)
cv.imshow('Original', original)

blurred = cv.medianBlur(original, ksize=kernel)

cv.imshow('Median Blurred', blurred)
cv.waitKey(0)

OpenCV Bilateral blur

  • bilateralFilter
import cv2 as cv
import sys

if len(sys.argv) != 3:
    exit(f"Usage: {sys.argv[0]} FILENAME DIAMETER")

filename = sys.argv[1]
diameter = int(sys.argv[2])

original = cv.imread(filename)
cv.imshow('Original', original)

blurred = cv.bilateralFilter(original, d=diameter, sigmaColor=50, sigmaSpace=15)
cv.imshow('Bilateral Blurred', blurred)

cv.waitKey(0)

OpenCV blur part of an image

import cv2 as cv
import sys

if len(sys.argv) != 6:
    exit(f"Usage: {sys.argv[0]} FILENAME KERNEL TOP LEFT SIZE")

filename = sys.argv[1]
kernel = int(sys.argv[2])
top = int(sys.argv[3])
left = int(sys.argv[4])
size = int(sys.argv[5])

original = cv.imread(filename)
cv.imshow('Original', original)

blurred_part = cv.blur(original[top:top+size, left:left+size], ksize=(kernel, kernel), )
blurred = original.copy()
blurred[top:top+size, left:left+size] = blurred_part
cv.imshow('Blurred', blurred)

cv.waitKey(0)
python blur_average_part.py  ~/Dropbox/Data/img/yello-flower.jpeg 35 400 400 300

OpenCV - Thresholding

  • thresholding

  • binarization of image - where pixels are either black or white

  • eg. we pick a number and any pixel less than that number becomes black and any pixel with more than that number becomes white

OpenCV - Simple Thresholding

  • threshold
  • THRESH_BINARY
  • THRESH_BINARY_INV
import cv2 as cv
import sys
import numpy as np

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
cv.imshow('Original', original)

blank = np.zeros(original.shape, dtype='uint8')
#blank[:] = -1 # white
cv.imshow('Blank', blank)

grey = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Grey', grey)

thresh, threshold = cv.threshold(grey, thresh=125, maxval=255, type=cv.THRESH_BINARY)
print(thresh) # The thresh value we passed in
cv.imshow('Simple Threshold', threshold)

thresh, threshold_inverted = cv.threshold(grey, thresh=125, maxval=255, type=cv.THRESH_BINARY_INV)
cv.imshow('Simple Inverted Threshold', threshold_inverted)

cv.waitKey(0)

OpenCV - Adaptive Thresholding

  • adaptiveThreshold
  • ADAPTIVE_THRESH_MEAN_C
  • ADAPTIVE_THRESH_GAUSSIAN_C
import cv2 as cv
import sys
import numpy as np

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
cv.imshow('Original', original)

blank = np.zeros(original.shape, dtype='uint8')
#blank[:] = -1 # white
cv.imshow('Blank', blank)

grey = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Grey', grey)

# blockSize is like a kernelsize, the size of the box for which the algorithm computes the threshold
# computes the mean of each block and that's the optimal threshold for the center of that block
threshold = cv.adaptiveThreshold(grey, maxValue=255, adaptiveMethod=cv.ADAPTIVE_THRESH_MEAN_C, thresholdType=cv.THRESH_BINARY, blockSize=11, C=3)
cv.imshow('Adaptive Threshold', threshold)

# inverted_threshold = cv.adaptiveThreshold(grey, maxValue=255, adaptiveMethod=cv.ADAPTIVE_THRESH_MEAN_C, thresholdType=cv.THRESH_BINARY_INV, blockSize=11, C=3)
# cv.imshow('Adaptive Inverted Threshold', inverted_threshold)

# gaussian_threshold = cv.adaptiveThreshold(grey, maxValue=255, adaptiveMethod=cv.ADAPTIVE_THRESH_GAUSSIAN_C, thresholdType=cv.THRESH_BINARY, blockSize=11, C=3)
# cv.imshow('Adaptive Gaussian Threshold', gaussian_threshold)


# thresh, threshold_inverted = cv.threshold(grey, thresh=125, maxval=255, type=cv.THRESH_BINARY_INV)
# cv.imshow('Simple Inverted Threshold', threshold_inverted)

cv.waitKey(0)

OpenCV - Gradients and Edges

OpenCV - finding edges using Canny

import cv2 as cv
import sys

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
cv.imshow('Origianl', original)

edges = cv.Canny(original, =125, 175) # providing two thresholds
cv.imshow('Edges', edges)
cv.waitKey(0)
  • If we first blur the image and then run Canny on the blurred image, we get a lot less edges.

Dilate an image using structuring elements

OpenCV - Laplacian Gradients

import cv2 as cv
import sys
import numpy as np

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
print(original.shape)

cv.imshow('Original', original)

gray = cv.cvtColor(original, code=cv.COLOR_BGR2GRAY)
cv.imshow('Gray', gray)

lap = cv.Laplacian(gray, ddepth=cv.CV_64F)
cv.imshow('Laplacian', lap)

lap2 = np.uint8(np.absolute(lap))
cv.imshow('Laplacian 2', lap2)


cv.waitKey(0)

OpenCV - Sobel Gradients

import cv2 as cv
import sys
import numpy as np

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
print(original.shape)

cv.imshow('Original', original)

gray = cv.cvtColor(original, code=cv.COLOR_BGR2GRAY)
cv.imshow('Gray', gray)

sobelx = cv.Sobel(gray, ddepth=cv.CV_64F, dx=1, dy=0)
sobely = cv.Sobel(gray, ddepth=cv.CV_64F, dx=0, dy=1)
combined_sobel = cv.bitwise_or(sobelx, sobely)
cv.imshow('Sobel X', sobelx)
cv.imshow('Sobel Y', sobely)
cv.imshow('Sobel', combined_sobel)


cv.waitKey(0)

OpenCV - Canny Dilate Erode

  • canny
  • dilate
  • erode
import cv2 as cv
import sys

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
cv.imshow('Original', original)


height, width, colors = original.shape
new_width = 400
new_height = int(height * new_width / width)

smaller = cv.resize(original, (new_width, new_height), interpolation=cv.INTER_AREA)
cv.imshow('Smaller', smaller)

canny = cv.Canny(smaller, 125, 175) # providing two thresholds
cv.imshow('Canny', canny)

dilated = cv.dilate(canny, (7,7), iterations=3)
cv.imshow('Dilated', dilated)

eroded = cv.erode(dilated, (3, 3), iterations=1)
cv.imshow('Eroded', eroded)

cv.waitKey(0)
import cv2 as cv
import sys

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

img = cv.imread(filename)
print(img.shape)
cv.imshow('Image', img)

cropped = img[450:800, 600:1000]

print(cropped.shape)
cv.imshow('Cropped', cropped)

cv.waitKey(0)

OpenCV Move of an Image

  • Shifting the image along the X or Y axes.
import cv2 as cv
import sys
import numpy as np

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
cv.imshow('Original', original)

def move(img, x, y):
    move_matrix = np.float32([[1, 0, x], [0, 1, y]])
    dimensions = (img.shape[1], img.shape[0])
    return cv.warpAffine(img, move_matrix, dimensions)

moved = move(original, 100, 100)

# x < 0  = left
# x > 0  = Right
# y < 0  = Up
# y > 0  = Down

cv.imshow('Moved', moved)

cv.waitKey(0)

OpenCV - Rotate Image

import cv2 as cv
import sys
#import numpy as np

if len(sys.argv) != 3:
    exit(f"Usage: {sys.argv[0]} FILENAME DEGREES")

filename = sys.argv[1]
degrees = float(sys.argv[2])

original = cv.imread(filename)
cv.imshow('Original', original)

def rotate(img, angle, center=None):
    height, width = img.shape[0:2]
    if center is None:
        center = (width//2, height//2)

    rotation_matrix = cv.getRotationMatrix2D(center, angle, 1.0)

    dimensions = (width, height)
    return cv.warpAffine(img, rotation_matrix, dimensions)

rotated = rotate(original, degrees)

cv.imshow('Rotated', rotated)

cv.waitKey(0)
  • Enlarge (add empy parts around it) before rotating.

OpenCV - Flip Image

import cv2 as cv
import sys

if len(sys.argv) != 3:
    exit(f"""
       Usage: {sys.argv[0]} FILENAME CODE
       Where
         0 - vertical flip
         1 - horizontal flip
        -1 - 180 degrees rotation (flipping both vertical and horiziontal)
    """)

filename = sys.argv[1]
flip_code = int(sys.argv[2])

original = cv.imread(filename)
cv.imshow('Original', original)

flipped = cv.flip(original, flip_code)

cv.imshow('Flipped', flipped)

cv.waitKey(0)

OpenCV - Contours

  • Similar, but not the same as edges

  • Contour line

  • First convert the image to grey-scale

  • Then find the edges of the

  • contours is a python list of the coordinates of the contour of the image

  • hierarchies the hierarchical representation of the contours

import cv2 as cv
import sys

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
cv.imshow('Original', original)
grey = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Grey', grey)
canny = cv.Canny(grey, 125, 175)  # # or of original?
contours, hierarchies = cv.findContours(canny, cv.RETR_LIST, cv.CHAIN_APPROX_NONE)
# Try also
# cv.RETR_EXTERNAL
# cv.RETR_TREE

# cv.CHAIN_APPROX_NONE - return all the contours
# cv.CHAIN_APPROX_SIMPLE - compress to endpoints

# Number of contours: length of the list

# Try to blur the image before finding the edges and finding the contours it will probably reduce the contours

# ret, threshold = cv.threshold(img, 125, 255, cv.THRESH_BINARY)
# this will try to binarize the image


print(contours)
cv.waitKey(0)

OpenCV - Draw contours of an image on a blank image

import cv2 as cv
import sys
import numpy as np

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
cv.imshow('Original', original)

blank = np.zeros(original.shape, dtype='uint8')
#blank[:] = -1 # white
cv.imshow('Blank', blank)

grey = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Grey', grey)

ret, threshold = cv.threshold(grey, thresh=125, maxval=255, type=cv.THRESH_BINARY)
cv.imshow('Threshold', threshold)

contours, hierarchies = cv.findContours(threshold, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE)
print(f'Number of countours: {len(contours)}')

cv.drawContours(blank, contours=contours, contourIdx=-1, color=(0, 255, 255), thickness=2)
# contourIdx number of countours we want
cv.imshow('Contours', blank)

# We can compare the result to the result of Canny
# We can try to use instead of the threshole image the Canny immage to findContours
# In general it is probably better to use Canny instead of threshold for finding contours

print(contours)
cv.waitKey(0)

OpenCV - ColorSpaces

OpenCV - BGR to RGB

  • COLOR_BGR2RGB
  • COLOR_RGB2BGR
import cv2 as cv
import sys
import numpy as np
import matplotlib.pyplot as plt

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
cv.imshow('Original', original)
print(original.shape)

rgb = cv.cvtColor(original, cv.COLOR_BGR2RGB)
cv.imshow('RGB', rgb)
print(rgb.shape)

bgr = cv.cvtColor(rgb, cv.COLOR_RGB2BGR)
cv.imshow('BGR', bgr)
print(bgr.shape)

print(np.array_equal(original, bgr)) # False

diff = original-bgr
cv.imshow('Diff', diff)

print(set(diff.flatten()))

cv.waitKey(0)

#plt.imshow(original)
#plt.show()

OpenCV - BGR to HSV

  • COLOR_BGR2HSV

  • COLOR_HSV2BGR

  • The conversion is not loss-less.

  • Better use images that are 16-bit than images that are 8-bit (as my camera takes them)

  • Even with 16 bit there might be some changes as the conversions are floating point.

import cv2 as cv
import sys
import numpy as np

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
cv.imshow('Original', original)
print(original.shape)

hsv = cv.cvtColor(original, cv.COLOR_BGR2HSV)
cv.imshow('HSV', hsv)
print(hsv.shape)

back = cv.cvtColor(hsv, cv.COLOR_HSV2BGR)
cv.imshow('Back', back)
print(back.shape)

print(np.array_equal(original, back))

diff = original-back
cv.imshow('Diff', diff)
print(diff.shape)

print(set(diff.flatten()))

cv.waitKey(0)
  • Check the yello flower

OpenCV - BGR to LAB

  • COLOR_BGR2LAB
  • COLOR_LAB2BGR
import cv2 as cv
import sys
import numpy as np

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
cv.imshow('Original', original)
print(original.shape)

lab = cv.cvtColor(original, cv.COLOR_BGR2LAB)
cv.imshow('LAB', lab)
print(lab.shape)

back = cv.cvtColor(lab, cv.COLOR_LAB2BGR)
cv.imshow('Back', back)
print(back.shape)

print(np.array_equal(original, back)) # False

diff = original-back
cv.imshow('Diff', diff)
print(diff.shape)

print(set(diff.flatten()))


cv.waitKey(0)

OpenCV - Split and merge color channels

  • split

  • merge

  • The Channels: R, G, B

  • Creates 3 greyscale images (2D matrices)

import cv2 as cv
import sys
import numpy as np

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
#print(type(original))  # numpy.ndarray

cv.imshow('Original', original)
print(original.shape) # 3 dimensional matrix

blue, green, red = cv.split(original)
cv.imshow('Blue', blue)
print(blue.shape) # 2 dimensional matrix


cv.imshow('Green', green)
print(green.shape) # 2 dimensional matrix

cv.imshow('Red', red)
print(red.shape) # 2 dimensional matrix

bgr = cv.merge([blue, green, red])
cv.imshow('BGR', bgr)
print(bgr.shape) # 3 dimensional matrix

print(np.array_equal(original, bgr)) # True
print(np.array_equal(original[:,:,0], blue)) # True
print(np.array_equal(original[:,:,1], green)) # True
print(np.array_equal(original[:,:,2], red)) # True

black = np.zeros(original.shape[:2], dtype='uint8') # aka. blank

real_blue = cv.merge([blue, black, black])
cv.imshow('Real blue', real_blue)
print(np.array_equal(original[:,:,0], real_blue[:, :, 0])) # True

real_green = cv.merge([black, green, black])
cv.imshow('Real green', real_green)
print(np.array_equal(original[:,:,1], real_green[:, :, 1])) # True

real_red = cv.merge([black, black, red])
cv.imshow('Real red', real_red)
print(np.array_equal(original[:,:,2], real_green[:, :, 2])) # True



diff = original-bgr
cv.imshow('Diff', diff)
print(set(diff.flatten())) # {0}

cv.waitKey(0)

OpenCV - bitwise operations

  • bitwise_and
  • bitwise_or
  • bitwise_xor
  • bitwise_not
import cv2 as cv
import numpy as np

blank = np.zeros(shape=(400, 400), dtype='uint8')

square = cv.rectangle(blank.copy(), pt1=(50, 50), pt2=(350, 350), color=255, thickness=cv.FILLED)
circle = cv.circle(blank.copy(), center=(200, 200), radius=180, color=255, thickness=cv.FILLED)
cv.imshow('square', square)
cv.imshow('circle', circle)

bitwise_and = cv.bitwise_and(square, circle)
cv.imshow('Bitwise AND', bitwise_and)

bitwise_or = cv.bitwise_or(square, circle)
cv.imshow('Bitwise OR', bitwise_or)

bitwise_xor = cv.bitwise_xor(square, circle)
cv.imshow('Bitwise XOR', bitwise_xor)

bitwise_not_square = cv.bitwise_not(square)
cv.imshow('Bitwise NOT Square', bitwise_not_square)

bitwise_not_circle = cv.bitwise_not(circle)
cv.imshow('Bitwise NOT Circle', bitwise_not_circle)

cv.waitKey(0)

OpenCV - masking

  • Using bitwise AND on the same image using a mask parameter as well
import cv2 as cv
import sys
import numpy as np

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
print(original.shape)
height, width, _= original.shape
print(width)
cv.imshow('Original', original)

blank = np.zeros(shape=original.shape[:2], dtype='uint8')
cv.imshow('Blank', blank)

mask = cv.circle(blank.copy(), center=(int(width/2), int(height/2)), radius=int(min(width/3, height/3)), color=255, thickness=cv.FILLED)
cv.imshow('Mask', mask)
print(mask.shape) # 2D

masked = cv.bitwise_and(original, original, mask=mask)
cv.imshow('Masked', masked)


full_mask = cv.cvtColor(mask, cv.COLOR_GRAY2BGR)
print(full_mask.shape) #3D

masked2 = cv.bitwise_and(original, full)
cv.imshow('Masked 2', masked2)
print(np.array_equal(masked, masked2)) #True


cv.waitKey(0)

OpenCV - grayscale historgrams

  • calcHist

  • Visualize the distribution of pixel intensities in a picture

import cv2 as cv
import sys
import matplotlib.pyplot as plt
import numpy as np

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
cv.imshow('Original', original)

gray = cv.cvtColor(original, code=cv.COLOR_BGR2GRAY)
cv.imshow('Gray', gray)

mask = None
# height, width = gray.shape[:2]
# blank = np.zeros(shape=gray.shape[:2], dtype='uint8')
# mask = cv.circle(blank.copy(), center=(int(width/2), int(height/2)), radius=int(min(width/5, height/5)), color=255, thickness=cv.FILLED)
# cv.imshow('Mask', mask)
# masked = cv.bitwise_and(gray, gray, mask=mask)
# cv.imshow('Masked', masked)

cv.waitKey(0)


# histSize = number of bins
gray_hist = cv.calcHist([gray], channels=[0], mask=mask, histSize=[256], ranges=[0, 256])
plt.figure()
plt.title("GrayScale Histogram")
plt.xlabel("Bins")
plt.xticks(ticks=[0, 50, 100, 150, 200, 255], labels=["0\nBlack", 50, "100", 150, 200, "255\nWhite"])
plt.ylabel("# of pixels")
plt.plot(gray_hist)
plt.xlim([0, 256])
plt.show()

OpenCV - color historgrams

  • calcHist
import cv2 as cv
import sys
import matplotlib.pyplot as plt
import numpy as np

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
cv.imshow('Original', original)

mask = None
# height, width = original.shape[:2]
# blank = np.zeros(shape=original.shape[:2], dtype='uint8')
# mask = cv.circle(blank.copy(), center=(int(width/2), int(height/2)), radius=int(min(width/5, height/5)), color=255, thickness=cv.FILLED)
# cv.imshow('Mask', mask)
# masked = cv.bitwise_and(original, original, mask=mask)
# cv.imshow('Masked', masked)

cv.waitKey(0)

plt.figure()
plt.title("Color Histogram")
plt.xlabel("Bins")
plt.xticks(ticks=[0, 50, 100, 150, 200, 255], labels=["0\nDark", 50, "100", 150, 200, "255\nBright"])
plt.ylabel("# of pixels")

colors  = ('blue', 'green', 'red')
for ix, color in enumerate(colors):
    hist = cv.calcHist([original], channels=[ix], mask=mask, histSize=[256], ranges=[0, 256])
    plt.plot(hist)
    plt.xlim([0, 256])

plt.show()

OpenCV - Face detection

  • CascadeClassifier

  • detectMultiScale

  • Using a pretrained classifier provided by OpenCV

  • Haarcascade

  • Local binary pattern

  • get the haarcascade_frontalface_default.xml XML file from [GitHub]) and save it locally

import cv2 as cv
import sys

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]

original = cv.imread(filename)
print(original.shape)
cv.imshow('Original', original)

gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
cv.imshow('Grey', gray)

haar_classifier = cv.CascadeClassifier('haarcascade_frontalface_default.xml')

faces_rect = haar_classifier.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=3)
# print(faces_rect)
print(f"Number of faces: {len(faces_rect)}")

faces = original.copy()
for (x, y, width, height) in faces_rect:
    cv.rectangle(faces, (x, y), (x+width, y+height), color=(0, 0, 255), thickness=2)
cv.imshow('Faces', faces)

cv.waitKey(0)

OpenCV - Face recognition

  • To be finished!
import cv2 as cv
import sys
import numpy as np


def get_faces(filename, haar_calssifier):
    original = cv.imread(filename)
    gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)
    faces_rect = haar_classifier.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=3)
    faces = []
    for face in faces_rect:
        (x, y, width, height) = face
        faces.append({"img": gray[].copy()[y:y+height, x:x+width], "loc": face})
    return faces

def collect_data(directory):
    haar_classifier = cv.CascadeClassifier('haarcascade_frontalface_default.xml')
    # Have a bunch of pictures in each picture one person with a mapping of the name of the person.
    # Run image detection on each file and crop the recognized part.

    features = [] # list of cropped faces
    labels = [] # list of the corresponding names
    # for each subdirectory in directory (we assume it is the name of the person)
    #    for each image in the subdirectory
    #    features.append(crop(path_to_iamge)["img"])
    #    labels.append(name_of_person)

    features = np.array(features, dtype='object')
    labels = np.array(labels, dtype='object')
    np.save('features.npy', features)
    np.save('labels.npy', labels)

def create_recognizer():
    featurs = np.load('features.npy')
    labesl = np.save('labels.npy')

    face_recognizer = cv.face.LBPHFaceRecognizer_create()
    face_recognizer.train(features, labels)
    face_recognizer.save("face_trained.yml")


def recognize_image(filename):
    original = cv.imread(filename)

    face_recognizer = cv.face.LBPHFaceRecognizer_create()
    face_recognizer.read("face_trained.yml")

    # get the cropped faces from an image
    faces = get_faces(filename)
    for face in faces:
        label, confidence = face_recognizer.predict(face["img"])
        print(f"This image is {label} with a confidence of {confidence}") # location: face["loc"]
        cv.putText(original, text=str(label), org=(20, 20), fontFace=cv.FONT_HERSHEY_COMPLEX, fontScale=1.0, color=(0, 0, 255), thickness=2)
    cv.imshow('Faces', original)
    cv.waitKey(0)

if len(sys.argv) != 3:
    exit(f"Usage: {sys.argv[0]} DIRECTORY")

directory = sys.argv[1]
filename = sys.argv[2]
collect_data(directory)
create_recognizer()
recognize_image(filename)

Jupyter notebook

examples/opencv/image_representation.ipynb examples/opencv/opencv.ipynb

Download movies

  • Install pytube
pip install pytube
  • Visit YouTube
  • Search for "cameleon changing color"
  • Take the URL of any video
pytube https://www.youtube.com/watch?v=hXMZ214pNZ4

OpenCV Read video

import cv2 as cv
import sys

if len(sys.argv) != 2:
    exit(f"Usage: {sys.argv[0]} FILENAME")

filename = sys.argv[1]


capture = cv.VideoCapture(filename)

while True:
    success, frame = capture.read()
    cv.imshow('Video', frame)
    #cv.waitKey(0)
    # press d to quit the video in the middle
    if cv.waitKey(20) & 0xFF == ord('d'):
        break


capture.release()
cv.destroyAllWindows()

  • instead of filename or a number 0,1, 2 to capture video using camera
# v2.error: OpenCV(4.5.3) /tmp/pip-req-build-agffqapq/opencv/modules/highgui/src/window.cpp:1006:
#    error: (-215:Assertion failed) size.width>0 && size.height>0 in function 'imshow'

# This -215 means we ran out of the frames.

Only for Live video: capture.set(3, width) capture.set(4, height)

Show images - accept key presses

  • React to various keys
  • Close application if use closes the image window
  • Add slider to change the size of the image
import cv2 as cv
import sys
import os

WAIT_TIME = 100
START_HEIGHT = 300
MAX_HEIGHT = 600

def resize(img, current_height):
    #print(img.shape)
    height, width, colors = img.shape

    if height > current_height:
        scale = current_height/height
        new_height = int(height * scale)
        new_width = int(width * scale)
        img = cv.resize(img, (new_width, new_height), interpolation=cv.INTER_AREA)
    return img

def slider_changed(event):
    pass
    #print('slider')
    #print(event)

def get_size():
    slider = cv.getWindowProperty('img', cv.WND_PROP_VISIBLE)
    #print(slider)
    current_height = START_HEIGHT
    if slider > 0:
        current_height = cv.getTrackbarPos("Size", "img")
    #print(current_height)
    return current_height


def main():
    if len(sys.argv) != 2:
        exit(f"Usage: {sys.argv[0]} DIRNAME")


    dirname = sys.argv[1]
    files = list(filter(lambda thing: not os.path.isdir(thing), map(lambda filename: os.path.join(dirname, filename), os.listdir(dirname))))
    #print(files)
    prev_idx = -1
    prev_height = 0
    idx = 0

    cv.namedWindow("img")
    cv.createTrackbar("Size", "img", START_HEIGHT, MAX_HEIGHT, slider_changed)
    ##cv.resizeWindow("slider", 640, 45)

    while True:
        current_height = get_size()
        if idx != prev_idx or current_height != prev_height:
            prev_idx = idx
            prev_height = current_height
            filename = files[idx]
            img = cv.imread(filename)

            img = resize(img, current_height)
            cv.imshow('img', img)

        # Make sure application exits if we close the window of the image
        visible = cv.getWindowProperty('img', cv.WND_PROP_VISIBLE)
        if visible == 0.0:
            cv.destroyAllWindows()
            break

        key = cv.waitKey(WAIT_TIME)
        if key == -1:
            continue
        #print(f'key {key} pressed')
        if key == ord('q') or key == ord('x'):
            cv.destroyAllWindows()
            break
        if key == ord('n'):
            idx += 1
        if key == ord('p'):
            idx -= 1
        idx = idx % len(files)


main()

OpenCV Resources

pip install caer      # just some extra package