generate gt for rbox

# -*- coding: utf-8 -*-

"""

Created on Thu Feb 13 10:59:35 2020

@author: zchen

Convert an image and it's bounding boxes into score map and geo_map for machine learning

An image is of width x height dimensions

The bounding boxes are a list of row vector [x1, y1, x2, y2, ... x4, y4] which represents

the coordinates of the 4 vertices of a quadrangle.

The labels indicate if a bounding box contains text, 1 is true and 0 is false.

"""

import numpy as np

import cv2

from PIL import Image

import math

import matplotlib.pyplot as plt

def cal_distance(x1, y1, x2, y2):

'''calculate the Euclidean distance'''

return math.sqrt((x1 - x2)**2 + (y1 - y2)**2)

def move_points(vertices, index1, index2, r, coef):

    '''move the two points to shrink edge

       This basically checks two points of an edge, calculate the ratio to be shrunk

       and move the two points closer to each other as per the ratio

       The % 4 is for getting the right index of the points in the vertices array.

       The outside could pass in an index > 4.

    '''

    index1 = index1 % 4

    index2 = index2 % 4

    # the index of the vertice array [x1, y1, x2, y2, .. x4, y4]

    # the parameter index is from 0 to 3 after mod% operation

    # need to convert it to the index from 0 to 7

    x1_index = index1 * 2 + 0

    y1_index = index1 * 2 + 1

    x2_index = index2 * 2 + 0

    y2_index = index2 * 2 + 1


    r1 = r[index1] #the shortest adjcent edge's length

    r2 = r[index2]

    length_x = vertices[x1_index] - vertices[x2_index] #note this is from x1 to x2, and it could be negative

    length_y = vertices[y1_index] - vertices[y2_index]

    length = cal_distance(vertices[x1_index], vertices[y1_index], vertices[x2_index], vertices[y2_index])

    if length > 1:

        ratio = (r1 * coef) / length #the ratio of length to be shrunk

        vertices[x1_index] += ratio * (-length_x)  #shrink x axis proportionally, use negative because length_x is from x1 to x2

        vertices[y1_index] += ratio * (-length_y)  #shrink y axis proportionalyy

        ratio = (r2 * coef) / length

        vertices[x2_index] += ratio * length_x 

        vertices[y2_index] += ratio * length_y

    return vertices

 

def shrink_poly(vertices, coef=0.3):

    '''shrink the text region by ~30% so it bounds the text better? old poly has empty gap between text and poly edges

       pick the pair of edges that are longer than the other pair, e.g. horizontal pair is longer than the vertical pair

       shrink the longer pair and then the shorter pair

    '''

    

    x1, y1, x2, y2, x3, y3, x4, y4 = vertices

    #firstly calculate the shortest adjcent edge's length for every vertice

    #every vertice will move by the shortest length r x ratio

    r1 = min(cal_distance(x1,y1,x2,y2), cal_distance(x1,y1,x4,y4))

    r2 = min(cal_distance(x2,y2,x1,y1), cal_distance(x2,y2,x3,y3))

    r3 = min(cal_distance(x3,y3,x2,y2), cal_distance(x3,y3,x4,y4))

    r4 = min(cal_distance(x4,y4,x1,y1), cal_distance(x4,y4,x3,y3))

    r = [r1, r2, r3, r4] #the shortest edge's length for vertice 1, 2, 3, and 4

    # compare sum of hotizontal edges to sum to vertical edges

    # shrink the longer pair of edges first and then the shorter pair

    # the way to use offset makes the code succint but ugly to read

    if cal_distance(x1,y1,x2,y2) + cal_distance(x3,y3,x4,y4) > cal_distance(x2,y2,x3,y3) + cal_distance(x1,y1,x4,y4):

        offset = 0 # two longer edges are (x1y1-x2y2) & (x3y3-x4y4)

    else:

        offset = 1 # two longer edges are (x2y2-x3y3) & (x4y4-x1y1)

    v = vertices.copy()

    v = move_points(v, 0 + offset, 1 + offset, r, coef) #if offset =0, move v1-v2, else move v2-v3

    v = move_points(v, 2 + offset, 3 + offset, r, coef) #if offset =0, move v3-v4, else move v4-v1

    v = move_points(v, 1 + offset, 2 + offset, r, coef) #if offset =0, move v2-v3, else move v3-v4

    v = move_points(v, 3 + offset, 4 + offset, r, coef) #if offset =0, move v4-v1, else move v1-v2

    

    return v

    

def get_rotate_mat(theta):

    '''get the rotation matrix for an angle theta

         positive theta -> rotate counter clockwise, in a bottom up coordinate system , i.e. x goes from left to right and y goes from bottom to top

         

         However, in an image the coordinate system is flipped, the y goes from top to bottom, 

         so if viewing an image the rotation is going clockwise

         

         A dot product of the matrix and a vector is equal to rotating the vector by theta

         The matrix is derived from below:

             

         v1=x1, y1 and v2 = x2, y2

         rotate v1 to v2 from angle alpha to angle alpha + theta

         assume vector radius = r

         x1/r = cos(alpha), y1/r = sin(alpha)

         x2/r = cos(alpha + theta) = cos(alpha)cos(theta) - sin(alpha)sin(theta)

         y2/r = sin(alpha + theta) = cos(alpha)sin(theta) + sin(alpha)cos(theta)

         simply replace cos(alpha) and sin(alpha) into to the later two equations and get rid of r

         x2 = x1 cos(theta) - y1 sin(theta)

         y2 = x1 sin(theta) + y1 cos(theta)

         so

         [x2, y2] = [[cos(theta), - sin(theta)], [sin(theta), cos(theta)]] <dot> [x1, y1]

                 

    '''

    return np.array([[math.cos(theta), -math.sin(theta)], [math.sin(theta), math.cos(theta)]])

def rotate_vertices(vertices, theta, anchor=None):

    '''rotate vertices around anchor by theta

    '''

    v = vertices.reshape((4,2)).T #reshape(4,2) makes every row (x, y). The T makes every column (x, y) so as to times rotatio matrix

    if anchor is None:

        anchor = v[:,:1] #if no anchor, use the first vertex as anchor

    rotate_mat = get_rotate_mat(theta)

    res = np.dot(rotate_mat, v - anchor) #rotate

    return (res + anchor).T.reshape(-1) #add back to the anchor to get the original vector and then transpose back to x,y row vector

def get_boundary(vertices):

    '''get the rectangle that just encloses the poly

       the rectangle's vertices are from the min max of x, and the min max of y

    '''

    x1, y1, x2, y2, x3, y3, x4, y4 = vertices

    x_min = min(x1, x2, x3, x4)

    x_max = max(x1, x2, x3, x4)

    y_min = min(y1, y2, y3, y4)

    y_max = max(y1, y2, y3, y4)

    return x_min, x_max, y_min, y_max

    

def cal_error(vertices):

    '''The sum of distances from the quadrangles vertices to the corresponding rectangle's vertices.

       In this way it prefers less rotation, so a vertex is still close to where it was after rotation

   '''

    x_min, x_max, y_min, y_max = get_boundary(vertices)

    x1, y1, x2, y2, x3, y3, x4, y4 = vertices

    err = cal_distance(x1, y1, x_min, y_min) + cal_distance(x2, y2, x_max, y_min) + \

        cal_distance(x3, y3, x_max, y_max) + cal_distance(x4, y4, x_min, y_max)

    return err

    

def find_min_rect_angle(vertices):

    '''find the minimum rectangle to enclose the poly 

       return the rectangle's rotation angle, ie theta

       

       Note, here it rotates the text area (poly) not the rectangle, which equals to rotating rectangle in an opposite direction

       Here finds the best angle to rotate poly in a bottom up coordinate system (y goes bottom to top)

       A positive angle means rotating counter clockwise

       That is equivalent to rotating the poly clockwise in a top down coordinate system (i.e. image, y goes top to bottom)

       

       The best angle to rotate the rectangle would be -angle in the bottom up coordinate system.

       However as coordinate system is flipped, y goes top to bottom, so the angle becomes negative of -angle which is angle.

       

       Therefore, the best angle of rotating poly in bottom up system 

                  = the best angle of rotating rectangle in top down system

       

       -------->    /

        \ theta    /  theta

         \        ---------->

    '''

    #check all angles from -90 to 90 degree

    angle_interval = 1

    angle_list = list(range(-90, 90, angle_interval))

 

    area_list = []

    for theta in angle_list:  #for every angle -90 to 90, check the rotated area

        rotated = rotate_vertices(vertices, theta / 180 * math.pi)

        x1, y1, x2, y2, x3, y3, x4, y4 = rotated

        #the horizontal rectangle that enclose the rotated poly, below calculates the area of the rectangle

        temp_area = (max(x1, x2, x3, x4) - min(x1, x2, x3, x4)) * (max(y1, y2, y3, y4) - min(y1, y2, y3, y4)) # x_max-min * y_max-min

        area_list.append(temp_area)


    #sort index by the actual area

    sorted_area_index = sorted(list(range(len(area_list))), key=lambda k : area_list[k]) 

    

    min_error = float('inf')

    best_index = -1

    rank_num = 10

    # find the best angle from the top 10 smallest rectangles

    for index in sorted_area_index[:rank_num]:

        #rorate again for calculating error

        rotated = rotate_vertices(vertices, angle_list[index] / 180 * math.pi)

        temp_error = cal_error(rotated)

        if temp_error < min_error:  #find the minimum sum of distances of ...

            min_error = temp_error

            best_index = index

    return angle_list[best_index] / 180 * math.pi  #the best angle that achieves smallest error from the top 10 minimum rectangles

def rotate_all_pixels(rotate_mat, anchor_x, anchor_y, img_height, img_width):

    '''get rotated locations of all pixels for next stages

       return 2 arrays, 1st one has all x axis for every pixel

       2nd one has all y axis for every pixel

    '''

    width_range = np.arange(0, int(img.width)) 

    height_range = np.arange(0, int(img.height))   

    index_width, index_height = np.meshgrid(width_range, height_range)

    

    #flatten the x and y arrays from 2d to 1d

    x_lin = index_width.reshape((1, index_width.size))

    y_lin = index_height.reshape((1, index_height.size))

    #put the x and y together, 1st row is x, the 2nd row is y

    coord_mat = np.concatenate((x_lin, y_lin), 0)

    #apply the same rotation matrix on every x,y pair in coord_mat

    rotated_coord = np.dot(rotate_mat, coord_mat - np.array([[anchor_x], [anchor_y]])) + \

                                                   np.array([[anchor_x], [anchor_y]])

    #reshape back to the original x, y dimensions

    rotated_x = rotated_coord[0, :].reshape(index_width.shape)

    rotated_y = rotated_coord[1, :].reshape(index_height.shape)

    #for a image every pixel will be relocated to the new location rotated_x, rotated_y

    return rotated_x, rotated_y

 

img_path = r'C:\Work\Python\EAST\1000_train_images\img_7.jpg'

gt_path = r'C:\Work\Python\EAST\1000_train_images_gt\gt_img_7.txt'

img = Image.open(img_path)

f = open(gt_path, encoding='utf-8')

lines = f.readlines()

labels = []

vertices = []

for line in lines:

    vertices.append(list(map(int,line.rstrip('\n').lstrip('\ufeff').split(',')[:8])))

    label = 0 if '###' in line else 1

    labels.append(label)

vertices, labels = np.array(vertices), np.array(labels)

#score for every pixel

score_map = np.zeros((int(img.height), int(img.width), 1), np.float32)  

#the rboxes, 4 distances and 1 theta

geo_map = np.zeros((int(img.height), int(img.width), 5), np.float32)  


#the collection of text areas ploys

polys = []

#every poly 

for i, vertice in enumerate(vertices):

    if labels[i] == 0: #the ploy area has no text in it

        continue


    poly = np.around(shrink_poly(vertice).reshape((4,2))).astype(np.int32) # shrink the poly

    polys.append(poly) #all the polys with a text

    

    #a mask for img, default all pixels to 0 except the poly area, i.e. text area

    temp_mask = np.zeros(score_map.shape[:-1], np.float32) 

    cv2.fillPoly(temp_mask, [poly], 1) #set the poly area on temp_mask to 1


    #the best rotation angle for the vertice

    theta = find_min_rect_angle(vertice)

    #the rotation matrix for the theta

    rotate_mat = get_rotate_mat(theta)

    #rotate the vertice by theta

    rotated_vertices = rotate_vertices(vertice, theta)

    #get the bounding rectangle for the rotated vertices

    x_min, x_max, y_min, y_max = get_boundary(rotated_vertices)

    #calculated the new coordinates of all pixels after rotation around x1,y1

    #this is for further calculating the distance from a pixel to the boudning rectangle

    #both the poly (text area) and image need to rotate to align with each other

    rotated_x, rotated_y = rotate_all_pixels(rotate_mat, vertice[0], vertice[1], img.height, img.width)


    #calculate the distances to the rectangle's boundaries

    #any pixel outside of the rectangle is set to 0

    d1 = rotated_y - y_min

    d1[d1<0] = 0

    d2 = y_max - rotated_y

    d2[d2<0] = 0

    d3 = rotated_x - x_min

    d3[d3<0] = 0

    d4 = x_max - rotated_x

    d4[d4<0] = 0

    

    #the distances filtered by mask is saved to geo_map

    #the the shrunk text region (mask) 's distances are retained

    geo_map[:,:,0] += d1 * temp_mask

    geo_map[:,:,1] += d2 * temp_mask

    geo_map[:,:,2] += d3 * temp_mask

    geo_map[:,:,3] += d4 * temp_mask

    geo_map[:,:,4] += theta * temp_mask

    cv2.fillPoly(score_map, polys, 1)

    

plt.imshow(img)

plt.show()

plt.imshow(score_map.reshape((img.height, img.width)))

plt.show()

plt.imshow(geo_map[:,:,0].reshape((img.height, img.width)))

plt.show()

plt.imshow(geo_map[:,:,1].reshape((img.height, img.width)))

plt.show()

plt.imshow(geo_map[:,:,2].reshape((img.height, img.width)))

plt.show()

plt.imshow(geo_map[:,:,3].reshape((img.height, img.width)))

plt.show()

plt.imshow(geo_map[:,:,4].reshape((img.height, img.width)))

plt.show()