generate gt for rbox
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 13 10:59:35 2020
@author: zchen
Convert an image and it's bounding boxes into score map and geo_map for machine learning
An image is of width x height dimensions
The bounding boxes are a list of row vector [x1, y1, x2, y2, ... x4, y4] which represents
the coordinates of the 4 vertices of a quadrangle.
The labels indicate if a bounding box contains text, 1 is true and 0 is false.
"""
import numpy as np
import cv2
from PIL import Image
import math
import matplotlib.pyplot as plt
def cal_distance(x1, y1, x2, y2):
'''calculate the Euclidean distance'''
return math.sqrt((x1 - x2)**2 + (y1 - y2)**2)
def move_points(vertices, index1, index2, r, coef):
'''move the two points to shrink edge
This basically checks two points of an edge, calculate the ratio to be shrunk
and move the two points closer to each other as per the ratio
The % 4 is for getting the right index of the points in the vertices array.
The outside could pass in an index > 4.
'''
index1 = index1 % 4
index2 = index2 % 4
# the index of the vertice array [x1, y1, x2, y2, .. x4, y4]
# the parameter index is from 0 to 3 after mod% operation
# need to convert it to the index from 0 to 7
x1_index = index1 * 2 + 0
y1_index = index1 * 2 + 1
x2_index = index2 * 2 + 0
y2_index = index2 * 2 + 1
r1 = r[index1] #the shortest adjcent edge's length
r2 = r[index2]
length_x = vertices[x1_index] - vertices[x2_index] #note this is from x1 to x2, and it could be negative
length_y = vertices[y1_index] - vertices[y2_index]
length = cal_distance(vertices[x1_index], vertices[y1_index], vertices[x2_index], vertices[y2_index])
if length > 1:
ratio = (r1 * coef) / length #the ratio of length to be shrunk
vertices[x1_index] += ratio * (-length_x) #shrink x axis proportionally, use negative because length_x is from x1 to x2
vertices[y1_index] += ratio * (-length_y) #shrink y axis proportionalyy
ratio = (r2 * coef) / length
vertices[x2_index] += ratio * length_x
vertices[y2_index] += ratio * length_y
return vertices
def shrink_poly(vertices, coef=0.3):
'''shrink the text region by ~30% so it bounds the text better? old poly has empty gap between text and poly edges
pick the pair of edges that are longer than the other pair, e.g. horizontal pair is longer than the vertical pair
shrink the longer pair and then the shorter pair
'''
x1, y1, x2, y2, x3, y3, x4, y4 = vertices
#firstly calculate the shortest adjcent edge's length for every vertice
#every vertice will move by the shortest length r x ratio
r1 = min(cal_distance(x1,y1,x2,y2), cal_distance(x1,y1,x4,y4))
r2 = min(cal_distance(x2,y2,x1,y1), cal_distance(x2,y2,x3,y3))
r3 = min(cal_distance(x3,y3,x2,y2), cal_distance(x3,y3,x4,y4))
r4 = min(cal_distance(x4,y4,x1,y1), cal_distance(x4,y4,x3,y3))
r = [r1, r2, r3, r4] #the shortest edge's length for vertice 1, 2, 3, and 4
# compare sum of hotizontal edges to sum to vertical edges
# shrink the longer pair of edges first and then the shorter pair
# the way to use offset makes the code succint but ugly to read
if cal_distance(x1,y1,x2,y2) + cal_distance(x3,y3,x4,y4) > cal_distance(x2,y2,x3,y3) + cal_distance(x1,y1,x4,y4):
offset = 0 # two longer edges are (x1y1-x2y2) & (x3y3-x4y4)
else:
offset = 1 # two longer edges are (x2y2-x3y3) & (x4y4-x1y1)
v = vertices.copy()
v = move_points(v, 0 + offset, 1 + offset, r, coef) #if offset =0, move v1-v2, else move v2-v3
v = move_points(v, 2 + offset, 3 + offset, r, coef) #if offset =0, move v3-v4, else move v4-v1
v = move_points(v, 1 + offset, 2 + offset, r, coef) #if offset =0, move v2-v3, else move v3-v4
v = move_points(v, 3 + offset, 4 + offset, r, coef) #if offset =0, move v4-v1, else move v1-v2
return v
def get_rotate_mat(theta):
'''get the rotation matrix for an angle theta
positive theta -> rotate counter clockwise, in a bottom up coordinate system , i.e. x goes from left to right and y goes from bottom to top
However, in an image the coordinate system is flipped, the y goes from top to bottom,
so if viewing an image the rotation is going clockwise
A dot product of the matrix and a vector is equal to rotating the vector by theta
The matrix is derived from below:
v1=x1, y1 and v2 = x2, y2
rotate v1 to v2 from angle alpha to angle alpha + theta
assume vector radius = r
x1/r = cos(alpha), y1/r = sin(alpha)
x2/r = cos(alpha + theta) = cos(alpha)cos(theta) - sin(alpha)sin(theta)
y2/r = sin(alpha + theta) = cos(alpha)sin(theta) + sin(alpha)cos(theta)
simply replace cos(alpha) and sin(alpha) into to the later two equations and get rid of r
x2 = x1 cos(theta) - y1 sin(theta)
y2 = x1 sin(theta) + y1 cos(theta)
so
[x2, y2] = [[cos(theta), - sin(theta)], [sin(theta), cos(theta)]] <dot> [x1, y1]
'''
return np.array([[math.cos(theta), -math.sin(theta)], [math.sin(theta), math.cos(theta)]])
def rotate_vertices(vertices, theta, anchor=None):
'''rotate vertices around anchor by theta
'''
v = vertices.reshape((4,2)).T #reshape(4,2) makes every row (x, y). The T makes every column (x, y) so as to times rotatio matrix
if anchor is None:
anchor = v[:,:1] #if no anchor, use the first vertex as anchor
rotate_mat = get_rotate_mat(theta)
res = np.dot(rotate_mat, v - anchor) #rotate
return (res + anchor).T.reshape(-1) #add back to the anchor to get the original vector and then transpose back to x,y row vector
def get_boundary(vertices):
'''get the rectangle that just encloses the poly
the rectangle's vertices are from the min max of x, and the min max of y
'''
x1, y1, x2, y2, x3, y3, x4, y4 = vertices
x_min = min(x1, x2, x3, x4)
x_max = max(x1, x2, x3, x4)
y_min = min(y1, y2, y3, y4)
y_max = max(y1, y2, y3, y4)
return x_min, x_max, y_min, y_max
def cal_error(vertices):
'''The sum of distances from the quadrangles vertices to the corresponding rectangle's vertices.
In this way it prefers less rotation, so a vertex is still close to where it was after rotation
'''
x_min, x_max, y_min, y_max = get_boundary(vertices)
x1, y1, x2, y2, x3, y3, x4, y4 = vertices
err = cal_distance(x1, y1, x_min, y_min) + cal_distance(x2, y2, x_max, y_min) + \
cal_distance(x3, y3, x_max, y_max) + cal_distance(x4, y4, x_min, y_max)
return err
def find_min_rect_angle(vertices):
'''find the minimum rectangle to enclose the poly
return the rectangle's rotation angle, ie theta
Note, here it rotates the text area (poly) not the rectangle, which equals to rotating rectangle in an opposite direction
Here finds the best angle to rotate poly in a bottom up coordinate system (y goes bottom to top)
A positive angle means rotating counter clockwise
That is equivalent to rotating the poly clockwise in a top down coordinate system (i.e. image, y goes top to bottom)
The best angle to rotate the rectangle would be -angle in the bottom up coordinate system.
However as coordinate system is flipped, y goes top to bottom, so the angle becomes negative of -angle which is angle.
Therefore, the best angle of rotating poly in bottom up system
= the best angle of rotating rectangle in top down system
--------> /
\ theta / theta
\ ---------->
'''
#check all angles from -90 to 90 degree
angle_interval = 1
angle_list = list(range(-90, 90, angle_interval))
area_list = []
for theta in angle_list: #for every angle -90 to 90, check the rotated area
rotated = rotate_vertices(vertices, theta / 180 * math.pi)
x1, y1, x2, y2, x3, y3, x4, y4 = rotated
#the horizontal rectangle that enclose the rotated poly, below calculates the area of the rectangle
temp_area = (max(x1, x2, x3, x4) - min(x1, x2, x3, x4)) * (max(y1, y2, y3, y4) - min(y1, y2, y3, y4)) # x_max-min * y_max-min
area_list.append(temp_area)
#sort index by the actual area
sorted_area_index = sorted(list(range(len(area_list))), key=lambda k : area_list[k])
min_error = float('inf')
best_index = -1
rank_num = 10
# find the best angle from the top 10 smallest rectangles
for index in sorted_area_index[:rank_num]:
#rorate again for calculating error
rotated = rotate_vertices(vertices, angle_list[index] / 180 * math.pi)
temp_error = cal_error(rotated)
if temp_error < min_error: #find the minimum sum of distances of ...
min_error = temp_error
best_index = index
return angle_list[best_index] / 180 * math.pi #the best angle that achieves smallest error from the top 10 minimum rectangles
def rotate_all_pixels(rotate_mat, anchor_x, anchor_y, img_height, img_width):
'''get rotated locations of all pixels for next stages
return 2 arrays, 1st one has all x axis for every pixel
2nd one has all y axis for every pixel
'''
width_range = np.arange(0, int(img.width))
height_range = np.arange(0, int(img.height))
index_width, index_height = np.meshgrid(width_range, height_range)
#flatten the x and y arrays from 2d to 1d
x_lin = index_width.reshape((1, index_width.size))
y_lin = index_height.reshape((1, index_height.size))
#put the x and y together, 1st row is x, the 2nd row is y
coord_mat = np.concatenate((x_lin, y_lin), 0)
#apply the same rotation matrix on every x,y pair in coord_mat
rotated_coord = np.dot(rotate_mat, coord_mat - np.array([[anchor_x], [anchor_y]])) + \
np.array([[anchor_x], [anchor_y]])
#reshape back to the original x, y dimensions
rotated_x = rotated_coord[0, :].reshape(index_width.shape)
rotated_y = rotated_coord[1, :].reshape(index_height.shape)
#for a image every pixel will be relocated to the new location rotated_x, rotated_y
return rotated_x, rotated_y
img_path = r'C:\Work\Python\EAST\1000_train_images\img_7.jpg'
gt_path = r'C:\Work\Python\EAST\1000_train_images_gt\gt_img_7.txt'
img = Image.open(img_path)
f = open(gt_path, encoding='utf-8')
lines = f.readlines()
labels = []
vertices = []
for line in lines:
vertices.append(list(map(int,line.rstrip('\n').lstrip('\ufeff').split(',')[:8])))
label = 0 if '###' in line else 1
labels.append(label)
vertices, labels = np.array(vertices), np.array(labels)
#score for every pixel
score_map = np.zeros((int(img.height), int(img.width), 1), np.float32)
#the rboxes, 4 distances and 1 theta
geo_map = np.zeros((int(img.height), int(img.width), 5), np.float32)
#the collection of text areas ploys
polys = []
#every poly
for i, vertice in enumerate(vertices):
if labels[i] == 0: #the ploy area has no text in it
continue
poly = np.around(shrink_poly(vertice).reshape((4,2))).astype(np.int32) # shrink the poly
polys.append(poly) #all the polys with a text
#a mask for img, default all pixels to 0 except the poly area, i.e. text area
temp_mask = np.zeros(score_map.shape[:-1], np.float32)
cv2.fillPoly(temp_mask, [poly], 1) #set the poly area on temp_mask to 1
#the best rotation angle for the vertice
theta = find_min_rect_angle(vertice)
#the rotation matrix for the theta
rotate_mat = get_rotate_mat(theta)
#rotate the vertice by theta
rotated_vertices = rotate_vertices(vertice, theta)
#get the bounding rectangle for the rotated vertices
x_min, x_max, y_min, y_max = get_boundary(rotated_vertices)
#calculated the new coordinates of all pixels after rotation around x1,y1
#this is for further calculating the distance from a pixel to the boudning rectangle
#both the poly (text area) and image need to rotate to align with each other
rotated_x, rotated_y = rotate_all_pixels(rotate_mat, vertice[0], vertice[1], img.height, img.width)
#calculate the distances to the rectangle's boundaries
#any pixel outside of the rectangle is set to 0
d1 = rotated_y - y_min
d1[d1<0] = 0
d2 = y_max - rotated_y
d2[d2<0] = 0
d3 = rotated_x - x_min
d3[d3<0] = 0
d4 = x_max - rotated_x
d4[d4<0] = 0
#the distances filtered by mask is saved to geo_map
#the the shrunk text region (mask) 's distances are retained
geo_map[:,:,0] += d1 * temp_mask
geo_map[:,:,1] += d2 * temp_mask
geo_map[:,:,2] += d3 * temp_mask
geo_map[:,:,3] += d4 * temp_mask
geo_map[:,:,4] += theta * temp_mask
cv2.fillPoly(score_map, polys, 1)
plt.imshow(img)
plt.show()
plt.imshow(score_map.reshape((img.height, img.width)))
plt.show()
plt.imshow(geo_map[:,:,0].reshape((img.height, img.width)))
plt.show()
plt.imshow(geo_map[:,:,1].reshape((img.height, img.width)))
plt.show()
plt.imshow(geo_map[:,:,2].reshape((img.height, img.width)))
plt.show()
plt.imshow(geo_map[:,:,3].reshape((img.height, img.width)))
plt.show()
plt.imshow(geo_map[:,:,4].reshape((img.height, img.width)))
plt.show()