For one project, there was a need for multiple models within the same Python application. These models were trained using the Cognitive Services: Custom Vision Service. There are two steps to using an exported model:
- Prepare the image
- Classify the image
Prepare an image for prediction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from PIL import Image | |
import numpy as np | |
import cv2 | |
def convert_to_opencv(image): | |
# RGB -> BGR conversion is performed as well. | |
image = image.convert('RGB') | |
r,g,b = np.array(image).T | |
opencv_image = np.array([b,g,r]).transpose() | |
return opencv_image | |
def crop_center(img,cropx,cropy): | |
h, w = img.shape[:2] | |
startx = w//2–(cropx//2) | |
starty = h//2–(cropy//2) | |
return img[starty:starty+cropy, startx:startx+cropx] | |
def resize_down_to_1600_max_dim(image): | |
h, w = image.shape[:2] | |
if (h < 1600 and w < 1600): | |
return image | |
new_size = (1600 * w // h, 1600) if (h > w) else (1600, 1600 * h // w) | |
return cv2.resize(image, new_size, interpolation = cv2.INTER_LINEAR) | |
def resize_to_256_square(image): | |
h, w = image.shape[:2] | |
return cv2.resize(image, (256, 256), interpolation = cv2.INTER_LINEAR) | |
def update_orientation(image): | |
exif_orientation_tag = 0x0112 | |
if hasattr(image, '_getexif'): | |
exif = image._getexif() | |
if (exif != None and exif_orientation_tag in exif): | |
orientation = exif.get(exif_orientation_tag, 1) | |
# orientation is 1 based, shift to zero based and flip/transpose based on 0-based values | |
orientation -= 1 | |
if orientation >= 4: | |
image = image.transpose(Image.TRANSPOSE) | |
if orientation == 2 or orientation == 3 or orientation == 6 or orientation == 7: | |
image = image.transpose(Image.FLIP_TOP_BOTTOM) | |
if orientation == 1 or orientation == 2 or orientation == 5 or orientation == 6: | |
image = image.transpose(Image.FLIP_LEFT_RIGHT) | |
return image | |
def prepare_image(image): | |
# Update orientation based on EXIF tags, if the file has orientation info. | |
image = update_orientation(image) | |
# Convert to OpenCV format | |
image = convert_to_opencv(image) | |
# If the image has either w or h greater than 1600 we resize it down respecting | |
# aspect ratio such that the largest dimension is 1600 | |
image = resize_down_to_1600_max_dim(image) | |
# We next get the largest center square | |
h, w = image.shape[:2] | |
min_dim = min(w,h) | |
max_square_image = crop_center(image, min_dim, min_dim) | |
# Resize that square down to 256×256 | |
augmented_image = resize_to_256_square(max_square_image) | |
augmented_image = crop_center(augmented_image, 244, network_input_size) | |
return augmented_image | |
Classify the image
To run multiple models in Python was fairly simple. Simply call tf.reset_default_graph() after saving the loaded session into memory.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import numpy as np | |
# The category name and probability percentage | |
class CategoryScore: | |
def __init__(self, category, probability: float): | |
self.category = category | |
self.probability = probability | |
# The categorizer handles running tensorflow models | |
class Categorizer: | |
def __init__(self, model_file_path: str, map: []): | |
self.map = map | |
self.graph = tf.Graph() | |
self.graph.as_default() | |
self.graph_def = self.graph.as_graph_def() | |
with tf.gfile.GFile(model_file_path, 'rb') as f: | |
self.graph_def.ParseFromString(f.read()) | |
tf.import_graph_def(self.graph_def, name='') | |
output_layer = 'loss:0' | |
self.input_node = 'Placeholder:0' | |
self.sess = tf.Session() | |
self.prob_tensor = self.sess.graph.get_tensor_by_name(output_layer) | |
tf.reset_default_graph() | |
def score(self, image): | |
predictions, = self.sess.run(self.prob_tensor, {self.input_node: [image]}) | |
label_index = 0 | |
scores = [] | |
for p in predictions: | |
category_score = CategoryScore(self.map[label_index],np.float64(np.round(p, 8))) | |
scores.append(category_score) | |
label_index += 1 | |
return scores |
After the CustomVisionCategorizer is create, just call score and it will score with the labels in the map.