generate gt for rbox

# -*- coding: utf-8 -*-

"""

Created on Thu Feb 13 10:59:35 2020

@author: zchen

Convert an image and it's bounding boxes into score map and geo_map for machine learning

An image is of width x height dimensions

The bounding boxes are a list of row vector [x1, y1, x2, y2, ... x4, y4] which represents

the coordinates of the 4 vertices of a quadrangle.

The labels indicate if a bounding box contains text, 1 is true and 0 is false.

"""

import numpy as np

import cv2

from PIL import Image

import math

import matplotlib.pyplot as plt

def cal_distance(x1, y1, x2, y2):

'''calculate the Euclidean distance'''

return math.sqrt((x1 - x2)**2 + (y1 - y2)**2)

def move_points(vertices, index1, index2, r, coef):

'''move the two points to shrink edge

This basically checks two points of an edge, calculate the ratio to be shrunk

and move the two points closer to each other as per the ratio

The % 4 is for getting the right index of the points in the vertices array.

The outside could pass in an index > 4.

'''

index1 = index1 % 4

index2 = index2 % 4

# the index of the vertice array [x1, y1, x2, y2, .. x4, y4]

# the parameter index is from 0 to 3 after mod% operation

# need to convert it to the index from 0 to 7

x1_index = index1 * 2 + 0

y1_index = index1 * 2 + 1

x2_index = index2 * 2 + 0

y2_index = index2 * 2 + 1

r1 = r[index1] #the shortest adjcent edge's length

r2 = r[index2]

length_x = vertices[x1_index] - vertices[x2_index] #note this is from x1 to x2, and it could be negative

length_y = vertices[y1_index] - vertices[y2_index]

length = cal_distance(vertices[x1_index], vertices[y1_index], vertices[x2_index], vertices[y2_index])

if length > 1:

ratio = (r1 * coef) / length #the ratio of length to be shrunk

vertices[x1_index] += ratio * (-length_x) #shrink x axis proportionally, use negative because length_x is from x1 to x2

vertices[y1_index] += ratio * (-length_y) #shrink y axis proportionalyy

ratio = (r2 * coef) / length

vertices[x2_index] += ratio * length_x

vertices[y2_index] += ratio * length_y

return vertices

def shrink_poly(vertices, coef=0.3):

'''shrink the text region by ~30% so it bounds the text better? old poly has empty gap between text and poly edges

pick the pair of edges that are longer than the other pair, e.g. horizontal pair is longer than the vertical pair

shrink the longer pair and then the shorter pair

'''

x1, y1, x2, y2, x3, y3, x4, y4 = vertices

#firstly calculate the shortest adjcent edge's length for every vertice

#every vertice will move by the shortest length r x ratio

r1 = min(cal_distance(x1,y1,x2,y2), cal_distance(x1,y1,x4,y4))

r2 = min(cal_distance(x2,y2,x1,y1), cal_distance(x2,y2,x3,y3))

r3 = min(cal_distance(x3,y3,x2,y2), cal_distance(x3,y3,x4,y4))

r4 = min(cal_distance(x4,y4,x1,y1), cal_distance(x4,y4,x3,y3))

r = [r1, r2, r3, r4] #the shortest edge's length for vertice 1, 2, 3, and 4

# compare sum of hotizontal edges to sum to vertical edges

# shrink the longer pair of edges first and then the shorter pair

# the way to use offset makes the code succint but ugly to read

if cal_distance(x1,y1,x2,y2) + cal_distance(x3,y3,x4,y4) > cal_distance(x2,y2,x3,y3) + cal_distance(x1,y1,x4,y4):

offset = 0 # two longer edges are (x1y1-x2y2) & (x3y3-x4y4)

else:

offset = 1 # two longer edges are (x2y2-x3y3) & (x4y4-x1y1)

v = vertices.copy()

v = move_points(v, 0 + offset, 1 + offset, r, coef) #if offset =0, move v1-v2, else move v2-v3

v = move_points(v, 2 + offset, 3 + offset, r, coef) #if offset =0, move v3-v4, else move v4-v1

v = move_points(v, 1 + offset, 2 + offset, r, coef) #if offset =0, move v2-v3, else move v3-v4

v = move_points(v, 3 + offset, 4 + offset, r, coef) #if offset =0, move v4-v1, else move v1-v2

return v

def get_rotate_mat(theta):

'''get the rotation matrix for an angle theta

positive theta -> rotate counter clockwise, in a bottom up coordinate system , i.e. x goes from left to right and y goes from bottom to top

However, in an image the coordinate system is flipped, the y goes from top to bottom,

so if viewing an image the rotation is going clockwise

A dot product of the matrix and a vector is equal to rotating the vector by theta

The matrix is derived from below:

v1=x1, y1 and v2 = x2, y2

rotate v1 to v2 from angle alpha to angle alpha + theta

assume vector radius = r

x1/r = cos(alpha), y1/r = sin(alpha)

x2/r = cos(alpha + theta) = cos(alpha)cos(theta) - sin(alpha)sin(theta)

y2/r = sin(alpha + theta) = cos(alpha)sin(theta) + sin(alpha)cos(theta)

simply replace cos(alpha) and sin(alpha) into to the later two equations and get rid of r

x2 = x1 cos(theta) - y1 sin(theta)

y2 = x1 sin(theta) + y1 cos(theta)

[x2, y2] = [[cos(theta), - sin(theta)], [sin(theta), cos(theta)]] <dot> [x1, y1]

'''

return np.array([[math.cos(theta), -math.sin(theta)], [math.sin(theta), math.cos(theta)]])

def rotate_vertices(vertices, theta, anchor=None):

'''rotate vertices around anchor by theta

'''

v = vertices.reshape((4,2)).T #reshape(4,2) makes every row (x, y). The T makes every column (x, y) so as to times rotatio matrix

if anchor is None:

anchor = v[:,:1] #if no anchor, use the first vertex as anchor

rotate_mat = get_rotate_mat(theta)

res = np.dot(rotate_mat, v - anchor) #rotate

return (res + anchor).T.reshape(-1) #add back to the anchor to get the original vector and then transpose back to x,y row vector

def get_boundary(vertices):

'''get the rectangle that just encloses the poly

the rectangle's vertices are from the min max of x, and the min max of y

'''

x1, y1, x2, y2, x3, y3, x4, y4 = vertices

x_min = min(x1, x2, x3, x4)

x_max = max(x1, x2, x3, x4)

y_min = min(y1, y2, y3, y4)

y_max = max(y1, y2, y3, y4)

return x_min, x_max, y_min, y_max

def cal_error(vertices):

'''The sum of distances from the quadrangles vertices to the corresponding rectangle's vertices.

In this way it prefers less rotation, so a vertex is still close to where it was after rotation

'''

x_min, x_max, y_min, y_max = get_boundary(vertices)

x1, y1, x2, y2, x3, y3, x4, y4 = vertices

err = cal_distance(x1, y1, x_min, y_min) + cal_distance(x2, y2, x_max, y_min) + \

cal_distance(x3, y3, x_max, y_max) + cal_distance(x4, y4, x_min, y_max)

return err

def find_min_rect_angle(vertices):

'''find the minimum rectangle to enclose the poly

return the rectangle's rotation angle, ie theta

Note, here it rotates the text area (poly) not the rectangle, which equals to rotating rectangle in an opposite direction

Here finds the best angle to rotate poly in a bottom up coordinate system (y goes bottom to top)

A positive angle means rotating counter clockwise

That is equivalent to rotating the poly clockwise in a top down coordinate system (i.e. image, y goes top to bottom)

The best angle to rotate the rectangle would be -angle in the bottom up coordinate system.

However as coordinate system is flipped, y goes top to bottom, so the angle becomes negative of -angle which is angle.

Therefore, the best angle of rotating poly in bottom up system

= the best angle of rotating rectangle in top down system

--------> /

\ theta / theta

\ ---------->

'''

#check all angles from -90 to 90 degree

angle_interval = 1

angle_list = list(range(-90, 90, angle_interval))

area_list = []

for theta in angle_list: #for every angle -90 to 90, check the rotated area

rotated = rotate_vertices(vertices, theta / 180 * math.pi)

x1, y1, x2, y2, x3, y3, x4, y4 = rotated

#the horizontal rectangle that enclose the rotated poly, below calculates the area of the rectangle

temp_area = (max(x1, x2, x3, x4) - min(x1, x2, x3, x4)) * (max(y1, y2, y3, y4) - min(y1, y2, y3, y4)) # x_max-min * y_max-min

area_list.append(temp_area)

#sort index by the actual area

sorted_area_index = sorted(list(range(len(area_list))), key=lambda k : area_list[k])

min_error = float('inf')

best_index = -1

rank_num = 10

# find the best angle from the top 10 smallest rectangles

for index in sorted_area_index[:rank_num]:

#rorate again for calculating error

rotated = rotate_vertices(vertices, angle_list[index] / 180 * math.pi)

temp_error = cal_error(rotated)

if temp_error < min_error: #find the minimum sum of distances of ...

min_error = temp_error

best_index = index

return angle_list[best_index] / 180 * math.pi #the best angle that achieves smallest error from the top 10 minimum rectangles

def rotate_all_pixels(rotate_mat, anchor_x, anchor_y, img_height, img_width):

'''get rotated locations of all pixels for next stages

return 2 arrays, 1st one has all x axis for every pixel

2nd one has all y axis for every pixel

'''

width_range = np.arange(0, int(img.width))

height_range = np.arange(0, int(img.height))

index_width, index_height = np.meshgrid(width_range, height_range)

#flatten the x and y arrays from 2d to 1d

x_lin = index_width.reshape((1, index_width.size))

y_lin = index_height.reshape((1, index_height.size))

#put the x and y together, 1st row is x, the 2nd row is y

coord_mat = np.concatenate((x_lin, y_lin), 0)

#apply the same rotation matrix on every x,y pair in coord_mat

rotated_coord = np.dot(rotate_mat, coord_mat - np.array([[anchor_x], [anchor_y]])) + \

np.array([[anchor_x], [anchor_y]])

#reshape back to the original x, y dimensions

rotated_x = rotated_coord[0, :].reshape(index_width.shape)

rotated_y = rotated_coord[1, :].reshape(index_height.shape)

#for a image every pixel will be relocated to the new location rotated_x, rotated_y

return rotated_x, rotated_y

img_path = r'C:\Work\Python\EAST\1000_train_images\img_7.jpg'

gt_path = r'C:\Work\Python\EAST\1000_train_images_gt\gt_img_7.txt'

img = Image.open(img_path)

f = open(gt_path, encoding='utf-8')

lines = f.readlines()

labels = []

vertices = []

for line in lines:

vertices.append(list(map(int,line.rstrip('\n').lstrip('\ufeff').split(',')[:8])))

label = 0 if '###' in line else 1

labels.append(label)

vertices, labels = np.array(vertices), np.array(labels)

#score for every pixel

score_map = np.zeros((int(img.height), int(img.width), 1), np.float32)

#the rboxes, 4 distances and 1 theta

geo_map = np.zeros((int(img.height), int(img.width), 5), np.float32)

#the collection of text areas ploys

polys = []

#every poly

for i, vertice in enumerate(vertices):

if labels[i] == 0: #the ploy area has no text in it

continue

poly = np.around(shrink_poly(vertice).reshape((4,2))).astype(np.int32) # shrink the poly

polys.append(poly) #all the polys with a text

#a mask for img, default all pixels to 0 except the poly area, i.e. text area

temp_mask = np.zeros(score_map.shape[:-1], np.float32)

cv2.fillPoly(temp_mask, [poly], 1) #set the poly area on temp_mask to 1

#the best rotation angle for the vertice

theta = find_min_rect_angle(vertice)

#the rotation matrix for the theta

rotate_mat = get_rotate_mat(theta)

#rotate the vertice by theta

rotated_vertices = rotate_vertices(vertice, theta)

#get the bounding rectangle for the rotated vertices

x_min, x_max, y_min, y_max = get_boundary(rotated_vertices)

#calculated the new coordinates of all pixels after rotation around x1,y1

#this is for further calculating the distance from a pixel to the boudning rectangle

#both the poly (text area) and image need to rotate to align with each other

rotated_x, rotated_y = rotate_all_pixels(rotate_mat, vertice[0], vertice[1], img.height, img.width)

#calculate the distances to the rectangle's boundaries

#any pixel outside of the rectangle is set to 0

d1 = rotated_y - y_min

d1[d1<0] = 0

d2 = y_max - rotated_y

d2[d2<0] = 0

d3 = rotated_x - x_min

d3[d3<0] = 0

d4 = x_max - rotated_x

d4[d4<0] = 0

#the distances filtered by mask is saved to geo_map

#the the shrunk text region (mask) 's distances are retained

geo_map[:,:,0] += d1 * temp_mask

geo_map[:,:,1] += d2 * temp_mask

geo_map[:,:,2] += d3 * temp_mask

geo_map[:,:,3] += d4 * temp_mask

geo_map[:,:,4] += theta * temp_mask

cv2.fillPoly(score_map, polys, 1)

plt.imshow(img)

plt.show()

plt.imshow(score_map.reshape((img.height, img.width)))

plt.show()

plt.imshow(geo_map[:,:,0].reshape((img.height, img.width)))

plt.show()

plt.imshow(geo_map[:,:,1].reshape((img.height, img.width)))

plt.show()

plt.imshow(geo_map[:,:,2].reshape((img.height, img.width)))

plt.show()

plt.imshow(geo_map[:,:,3].reshape((img.height, img.width)))

plt.show()

plt.imshow(geo_map[:,:,4].reshape((img.height, img.width)))

plt.show()

Page updated

Google Sites

Report abuse