% simple ocr code
% only for captital English letters in the same font and fontsize
% other letters can be easily included
% inclination of letters is allowed
% Picture rotated beforehand so that text lines are horizontally oriented
% written by Xiaozhou 12/15/2010
fontname='Times New Roman';
fontsize=10.5;
letters='ABCDEFGHIJKLMNOPQRSTUVWXYZ';
nlt=length(letters);
hltim=round(fontsize*1.5);
wltim=hltim;
filename=[fontname num2str(nlt) '_' num2str(fontsize) '.mat'];
islibsaved=0; % 0 - need to create letter image file when use first time; 1 - letter image file exists
if islibsaved==0
ltimage=lt2im(fontname,fontsize,letters); % get the image of each letter
save(filename,'ltimage');
else
load(filename);
end
picname='test.jpg';
imobj=imread(picname);
imbw=1-im2bw(imobj,0.8); % convert to black and white pic, then invert color
noise=0.01; % noise in the pic
iread=1; % number of letter read
lincl=5; % left inclination
rincl=5; % right inclination
anginc=5; % angle increment for testing
angs=-rincl:anginc:lincl;
[h,w]=size(imbw);
startrow=1;
% find the first row of pixels containing text
while(sum(imbw(startrow,:))<noise)
startrow=startrow+1;
end
while(startrow<=h)
endrow=startrow;
% find the last row of pixels for the txt line
while(endrow<=h && sum(imbw(endrow,:))>noise)
endrow=endrow+1;
end
imbw1=imbw(startrow:endrow-1,:); % image of the txt line
startcol=1;
% find the first col of pixels containing text
while(startcol<=w && sum(imbw1(:,startcol))<noise)
startcol=startcol+1;
end
while(startcol<=w)
endcol=startcol;
% find the last col of pixels for the letter
while(endcol<=w && sum(imbw1(:,endcol))>noise)
endcol=endcol+1;
end
imbw2=imbw1(:,startcol:endcol-1); % image of the letter
imbw3=trimim(imbw2,1); % trim white margin
[h1,w1]=size(imbw3);
corrim1=0; % highest correlation factor between tested image and standard letter image
indlt=1; % index of letter best fit
anglt=0; % inclination angle best fit
for k=1:nlt
imlt=squeeze(1-ltimage(k,:,:));
for ang=angs
imlt1=imrotate(imlt,ang); % rotate the standard letter image
imlt2=trimim(imlt1,1);
[h2,w2]=size(imlt2);
tmp1=zeros(hltim,wltim); % create matrix of the same size to contain each image
tmp2=tmp1;
tmp1(1:h1,1:w1)=imbw3;
tmp2(1:h2,1:w2)=imlt2;
corrim=corr2(tmp1,tmp2);
if corrim>corrim1
corrim1=corrim;
indlt=k;
anglt=ang;
end
end
end
textocr(iread)=letters(indlt);
iread=iread+1;
% go to the next letter
startcol=endcol;
while(startcol<=w && sum(imbw1(:,startcol))<noise)
startcol=startcol+1;
end
end
% go to the next row
startrow=endrow;
while(startrow<=h && sum(imbw(startrow,:))<noise)
startrow=startrow+1;
end
end
% lt2im.m
% get image data of letters
function ltimage=lt2im(fontname,fontsize,letters)
s=figure;
nlt=length(letters);
h=round(fontsize*1.8);
w=h;
ltimage=ones(nlt,h,w);
for i=1:nlt
axis off
h=text(0.5,0.5,letters(i));
set(h,'FontName',fontname,'Fontsize',fontsize);
filename=[ num2str(i) '.tif'];
print(s,'-dtiff','-r0',filename);
A=imread(filename);
delete(filename);
B=im2bw(A);
B1=trimim(B,0);
% B2=imresize(B1,[size(B1,1),round(size(B1,2)*1.1)]);
ltimage(i,1:size(B1,1),1:size(B1,2))=B1;
clf;
end
close all
% trim white margin
function im1=trimim(im0,fcolor)
[i1,j1]=ind2sub(size(im0),find(im0==fcolor));
imax=max(i1);
imin=min(i1);
jmax=max(j1);
jmin=min(j1);
im1=im0(imin:imax,jmin:jmax);