% simple ocr code
% only for captital English letters in the same font and fontsize
% other letters can be easily included
% inclination of letters is allowed
% Picture rotated beforehand so that text lines are horizontally oriented
% written by Xiaozhou 12/15/2010
fontname='Times New Roman';
fontsize=10.5;
letters='ABCDEFGHIJKLMNOPQRSTUVWXYZ';
nlt=length(letters);
hltim=round(fontsize*1.5);
wltim=hltim;
filename=[fontname  num2str(nlt) '_' num2str(fontsize) '.mat'];
islibsaved=0; % 0 -  need to create letter image file when use first time; 1 - letter image file exists
if islibsaved==0
    ltimage=lt2im(fontname,fontsize,letters);  % get the image of each letter
    save(filename,'ltimage');
else
    load(filename);
end
picname='test.jpg';
imobj=imread(picname);
imbw=1-im2bw(imobj,0.8);    % convert to black and white pic, then invert color
noise=0.01;  % noise in the pic
iread=1;  % number of letter read
lincl=5;  % left inclination
rincl=5; % right inclination
anginc=5; % angle increment for testing
angs=-rincl:anginc:lincl;
[h,w]=size(imbw);
startrow=1;
 
% find the first row of pixels containing text
while(sum(imbw(startrow,:))<noise)
 startrow=startrow+1;
end
 
while(startrow<=h)
 endrow=startrow;
% find the last row of pixels for the txt line
 while(endrow<=h && sum(imbw(endrow,:))>noise)
  endrow=endrow+1;
 end
 imbw1=imbw(startrow:endrow-1,:); % image of the txt line
 startcol=1;
% find the first col of pixels containing text
 while(startcol<=w && sum(imbw1(:,startcol))<noise)
  startcol=startcol+1;
 end
 while(startcol<=w)
  endcol=startcol;
% find the last col of pixels for the letter
  while(endcol<=w && sum(imbw1(:,endcol))>noise)
   endcol=endcol+1;
  end
  imbw2=imbw1(:,startcol:endcol-1); % image of the letter
  imbw3=trimim(imbw2,1); % trim white margin
  [h1,w1]=size(imbw3);
  corrim1=0; % highest correlation factor between tested image and standard letter image
  indlt=1; % index of letter best fit
  anglt=0; % inclination angle best fit
  for k=1:nlt
   imlt=squeeze(1-ltimage(k,:,:));
   for ang=angs
    imlt1=imrotate(imlt,ang); % rotate the standard letter image
    imlt2=trimim(imlt1,1);
    [h2,w2]=size(imlt2);
    tmp1=zeros(hltim,wltim); % create matrix of the same size to contain each image
    tmp2=tmp1;
    tmp1(1:h1,1:w1)=imbw3;
    tmp2(1:h2,1:w2)=imlt2;
    corrim=corr2(tmp1,tmp2);
   
    if corrim>corrim1
     corrim1=corrim;
     indlt=k;
     anglt=ang;
    end
   end  
        end
  textocr(iread)=letters(indlt);
  iread=iread+1;
 
% go to the next letter
  startcol=endcol;
  while(startcol<=w && sum(imbw1(:,startcol))<noise)
   startcol=startcol+1;
  end
    end
% go to the next row
 startrow=endrow;
 while(startrow<=h && sum(imbw(startrow,:))<noise)
  startrow=startrow+1;
 end
end
% lt2im.m
% get image data of letters
function ltimage=lt2im(fontname,fontsize,letters)
s=figure;
nlt=length(letters);
h=round(fontsize*1.8);
w=h;
ltimage=ones(nlt,h,w);
for i=1:nlt
    axis off
    h=text(0.5,0.5,letters(i));
    set(h,'FontName',fontname,'Fontsize',fontsize);
    filename=[ num2str(i) '.tif'];
    print(s,'-dtiff','-r0',filename);
    A=imread(filename);
 delete(filename);
    B=im2bw(A);
    B1=trimim(B,0);
%    B2=imresize(B1,[size(B1,1),round(size(B1,2)*1.1)]);
    ltimage(i,1:size(B1,1),1:size(B1,2))=B1;
    clf;
end
close all