Commit 7d7e0025 authored by Merlijn Wajer's avatar Merlijn Wajer
Browse files

Initial commit

Pipeline #55292 failed with stages
in 0 seconds
#!/usr/bin/env python3
# Autocrop debug tool
import argparse
from PIL import Image, ImageDraw
import numpy as np
from internetarchiveautocrop.const import VERSION
from internetarchiveautocrop.autocrop import autocrop_session, \
autocrop_load_model, autocrop_image
def autocrop(in_img, out_img, model_path, rot=None):
image =
if rot is not None:
image = image.rotate(rot)
session = autocrop_session()
with session:
model = autocrop_load_model(model_path)
# TODO: image rotation from scandata Q.Q
image.thumbnail((1000, 1500), Image.ANTIALIAS)
image = image.convert('RGB')
image_width, image_height = image.size
rect, skew, quad, probs, bins = autocrop_image(model, image, debug=True)
probs_i = Image.fromarray((probs*255).astype(np.uint8))
bins_i = Image.fromarray(bins * 255)
probs_i_width, probs_i_height = probs_i.size
bins_i_width, bins_i_height = bins_i.size
newimg ='RGB', (max(image_width, bins_i_width, probs_i_width),
image_height + bins_i_height + probs_i_height))
#newimg ='RGB', (image_width + bins_i_width + probs_i_width, max(image_height, bins_i_height, probs_i_height)))
newimg.paste(image, (0, 0))
draw = ImageDraw.Draw(newimg)
tl, tr, bl, br = map(tuple, quad)
draw.line([tl, tr], fill=128, width=5)
draw.line([tr, br], fill=128, width=5)
draw.line([br, bl], fill=128, width=5)
draw.line([bl, tl], fill=128, width=5)
newimg.paste(bins_i, (0, image_height))
newimg.paste(probs_i, (0, image_height+bins_i_height))
#newimg.paste(bins_i, (image_width, 0))
#newimg.paste(probs_i, (image_width+bins_i_width, 0)), format='PNG')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Autocrop debug tool %s' \
parser.add_argument('-i', '--input-image', type=str, default=None,
parser.add_argument('-o', '--output-image', type=str, default=None,
parser.add_argument('-m', '--model-path', type=str, default=None,
parser.add_argument('-r', '--rotate-input-image', type=int, default=None,
args = parser.parse_args()
autocrop(args.input_image, args.output_image, args.model_path,
from . import const
from . import autocrop
import tensorflow as tf
import numpy as np
import cv2
from math import atan2, degrees, sqrt, dist
from dh_segment.inference import LoadedModel
from dh_segment.post_processing import boxes_detection, binarization
def autocrop_load_model(model_dir):
return LoadedModel(model_dir, predict_mode='image')
def gpu_available():
return len(tf.config.list_physical_devices('GPU')) > 0
def autocrop_session():
gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.05)
# Limit to just one thread, we don't want to use more than one for our use
# case, since it doesn't scale linearly (8 parallel autocrops is more
# efficient than one autocrop with 8 thread)
session_conf = tf.compat.v1.ConfigProto(
return tf.compat.v1.Session(config=session_conf)
def autocrop_image(m, pil_image, debug=False):
# XXX: TODO: some of this is from dhSegment
prediction_outputs = m.predict(pil_image)
probs = prediction_outputs['probs'][0]
original_shape = prediction_outputs['original_shape']
probs = probs[:, :, 1] # Take only class '1' (class 0 is the background, class 1 is the page)
probs = probs / np.max(probs) # Normalize to be in [0, 1]
probs_upscaled = cv2.resize(probs.astype(np.uint8, copy=False),
tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST)
# Binarize the predictions
page_bin = page_make_binary_mask(probs)
# Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes)
bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False),
tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST)
#imsave('/tmp/probs.png', (probs * 255).astype(np.uint8))
#imsave('/tmp/bin_upscaled.png', bin_upscaled * 255)
# Find quadrilateral enclosing the page
# TODO: min_rectangle is probably finding an area that's a bit too large,
# especially if we have things sticking out on the side
# We could try the quadrilateral mode, maybe for skew, but I don't think
# that makes much sense either. Maybe we should do our own contour finding.
# XXX: we currently ignore min_area in find_boxes
pred_page_coords = boxes_detection.find_boxes(bin_upscaled.astype(np.uint8, copy=False),
if pred_page_coords is None:
return None
quad = order_quad(pred_page_coords)
rect, skew = quad_to_skewed_rect(quad)
if debug:
return rect, skew, quad, probs, bin_upscaled
return rect, skew
def order_quad(quad):
x_sorted = sorted(quad, key=lambda x: x[0])
left_side = sorted(x_sorted[0:2], key=lambda x: x[1])
right_side = sorted(x_sorted[2:], key=lambda x: x[1])
top_left = left_side[0]
top_right = right_side[0]
bottom_left = left_side[1]
bottom_right = right_side[1]
return (top_left, top_right, bottom_left, bottom_right)
def quad_to_skewed_rect(quad):
top_left = quad[0]
top_right = quad[1]
bottom_left = quad[2]
bottom_right = quad[3]
# Get x1 from top_left[0] and bottom_left[0]
# Get y1 from top_left[1] and top_right[1]
# Get x2 from top_right[0] and bottom_right[0]
# Get y2 from bottom_left[1] and bottom_right[1]
x_top_angle = atan2(float(top_right[1] - top_left[1]),
float(top_right[0] - top_left[0]))
x_bottom_angle = atan2(float(bottom_right[1] - bottom_left[1]),
float(bottom_right[0] - bottom_left[0]))
y_left_angle = atan2(float(bottom_left[1] - top_left[1]),
float(bottom_left[0] - top_left[0]))
y_right_angle = atan2(float(bottom_right[1] - top_right[1]),
float(bottom_right[0] - top_right[0]))
avg_angle = (x_top_angle + x_bottom_angle + y_left_angle + y_right_angle) / 4.
x1 = top_left[0]
y1 = top_left[1]
x2 = top_left[0] + dist(top_left, top_right)
y2 = top_left[1] + dist(top_left, bottom_left)
return ((x1, y1), (x2, y2)), avg_angle
# XXX: from dhsegment
def page_make_binary_mask(probs: np.ndarray, threshold: float=-1) -> np.ndarray:
Computes the binary mask of the detected Page from the probabilities outputed by network
:param probs: array with values in range [0, 1]
:param threshold: threshold between [0 and 1], if negative Otsu's adaptive threshold will be used
:return: binary mask
mask = binarization.thresholding(probs, threshold)
mask = binarization.cleaning_binary(mask, kernel_size=5)
return mask
VERSION = '0.0.1'
__version__ = VERSION
from setuptools import setup
from distutils.util import convert_path
main_ns = {}
ver_path = convert_path('internetarchiveautocrop/')
with open(ver_path) as ver_file:
exec(, main_ns)
description='Internet Archive book autocrop tools',
author='Merlijn Boris Wolf Wajer',
#scripts=['bin/recode_pdf', 'tools/mrcview'],
#package_data={'internetarchivepdf': ['data/*']},
#zip_safe=False, # XXX: maybe this can go
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment