BibTex2Word2007 BibTex to Word 2007 Bibliography (References) Converter based on awk programming language

This awk script converts a Bibtex file generated or conformant to JabRef Bibliography manager into Micro$oft Word 2007 Bibliography format. 

This is a work in progress.

Download BibTex2Word2007 here v0.2.1

Download BibTex2Word2007 here v0.2 

 

Also check this if you use JabRef bibliography manager. You can backup selected pdf files by feeding a list of entries in a bibtex file. Useful if you want to send them to your supervisor or a colleague.

Download JabRefPDF here 

#!/usr/bin/awk -f

# To Do: Eliminate Latex code from BIB file such as \&
#
# BibTex2Word2007 Bibliography Converter v 0.2.1
# (c) 2007 Fawzi Sdudah <fawzi sdudah gmail com>
# http://sdudah.googlepages.com
# Download: http://sdudah.googlepages.com/BibTex2Word2007.awk
#
# This awk script converts a Bibtex file generated or conformant
# to JabRef Bibliography manager into Micro$ost Word 2007 Bibliography format.
#
# Usage: BibTex2Word2007 infile.bib > outfile.xml
# Word's XML file is in C:\Documents and Settings\<USER>\Application Data\Microsoft\Bibliography
#
# Limitations:
# 1. Values of BibTex entry keys should fit in one line only.
# e.g. the title should not take more than one line.
# 2. It handles the following Bibliography types: Journal Articles, Books,
# Inproceedings, Incollections, PhD Theses and Masters Theses.
# 3. Due to lack of information, this script is built by reverse engineering
# Word's sources xml format, so expect bugs.
# 4. Latex code such as \& and special characters (or page code) cause XML errors.
#
# Disclaimer: Written for my own one-time use; NOT thoroughly tested.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# This software is distributed under the GPL license, please
# read the license www.gnu.org

# v0.2.1 release notes:
# -fixed a bug in middle names. First and middle names are combined together.
# -fixed a bug in title section where title disappears because of a leading
# space and double braces code.
# -code matches title twice; in title and booktitle, so it prints title twice.
# -limitations notes above have been updated.

function strip()
{
sub(/{/,"");
sub(/}\,/,"");
sub(/}/,"");
}

BEGIN{
# # Change default field separater #
FS = "=";
print "<?xml version=\"1.0\" ?>";
print "<b:Sources SelectedStyle=\"\" xmlns:b=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\" xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/bibliography\">";
}



{
# Clean each line from double curlies "{{" and "}}," # we cannot do this with single curlies here.
sub(/{{/,"");
sub(/}}\,/,"");
sub(/}}/,"");

# Lonely Right Curly Bracket means end of an entry
# if we are parsing end of a BibTex entry, we have to close the corresponding xml entry

# delete leading whitespace (spaces, tabs) from front of each line
sub(/^[ \t]+/, "");

# print "*" $0 "*";
if($0=="}")
{
# we have to output authors and editors together. This is Word's format.
print "<b:Author>"

# #Authors#
print "<b:Author> <b:NameList>";
for(j=1;j<=numauthors;j++)
{ print author[j];
author[j]="";
}
print "</b:NameList></b:Author>";

# #Editors#
print "<b:Editor> <b:NameList>";
for(j=1;j<=numeditors;j++)
{ print editor[j];
editor[j]="";
}
print "</b:NameList></b:Editor>";

# #Seal Authors/Editors#
print "</b:Author>";
# #seal the whole entry#
print "</b:Source>";
}

#####***** New BibTex Entries *****#####
# delete trailing whitespace (spaces, tabs) from end of each line
key=$1;
sub(/[ \t]+$/, "",key);
############ ARTICLE #########
if($1 ~ "@ARTICLE")
{
# #tag#
split($0,tag,"{");
#strip the last comma
sub(/,/,"",tag[2]);
print "<b:Source>";
print "<b:Tag>", tag[2], "</b:Tag>";
print "<b:SourceType>JournalArticle</b:SourceType>";
}

########## Book ##########
if($1 ~ "@BOOK")
{
# #tag#
split($0,tag,"{");
#strip the last comma
sub(/,/,"",tag[2]);
print "<b:Source>";
print "<b:Tag>", tag[2], "</b:Tag>";
print "<b:SourceType>Book</b:SourceType>";
}

########## Booklet ##########
########## Conference ##########
########## Email ##########
########## Inbook ##########
########## Incollection ########## ---> Book Section in Word; best fit!
if($1 ~ "@INCOLLECTION")
{
# #tag#
split($0,tag,"{");
#strip the last comma
sub(/,/,"",tag[2]);
print "<b:Source>";
print "<b:Tag>", tag[2], "</b:Tag>";
print "<b:SourceType>BookSection</b:SourceType>";
}

########## Inproceedings ##########
if($1 ~ "@INPROCEEDINGS")
{
# #tag#
split($0,tag,"{");
#strip the last comma
sub(/,/,"",tag[2]);
print "<b:Source>";
print "<b:Tag>", tag[2], "</b:Tag>";
print "<b:SourceType>ConferenceProceedings</b:SourceType>";
}

########## Manual ##########
########## MasterThesis ##########
if($1 ~ "@MASTERSTHESIS")
{
# #tag#
split($0,tag,"{");
#strip the last comma
sub(/,/,"",tag[2]);
print "<b:Source>";
print "<b:Tag>", tag[2], "</b:Tag>";
print "<b:SourceType>Report</b:SourceType>";
### Does Word barf if order is not maintained????
print "<b:ThesisType>Masters Thesis</b:ThesisType>";
}

########## Misc ##########
########## Other #########
########## PhdThesis ##########
if($1 ~ "@PHDTHESIS")
{
####tag####
split($0,tag,"{");
#strip the last comma
sub(/,/,"",tag[2]);
print "<b:Source>";
print "<b:Tag>", tag[2], "</b:Tag>";
print "<b:SourceType>Report</b:SourceType>";
### Does Word barf if order is not maintained????
print "<b:ThesisType>PhD Thesis</b:ThesisType>";
}

########## Proceedings ##########
########## Techreport ##########
########## Unpublished ##########
########## WWW ##########




#####***** Fields for all entries *****#####

##### address #####

##### annote #####

##### Authors #####
if( key == "author")
{

strip();
# single or multi-author? authors are separated by "and".
numauthors=split($2,authors,/and/);

for(i=1;i <= numauthors;i++)
{
# split into first, middle and last name
# The assumption is that the author's last name is separated by a comma
# from his first and middle names. In this case, first and middle names are put together
# in the first name field in Word's xml file. You can play around the commented
# if statement in case you want something else. If you make changes here (authors), then
# you should do the same to the "editors" section below. They are virtually identical.

nname=split(authors[i],fmlname ,",");
a[1]="<b:Person>";
a[2]="<b:Last>" fmlname[1] "</b:Last>";
a[3]="";
a[4]="";
if(nname > 1){ a[3]= "<b:First>" fmlname[2] "</b:First>" ; }
#####XXXXXX if(nname > 2){ a[4]= "<b:Middle>" fmlname[2] "</b:Middle>"; }
a[4]="</b:Person>";
# save xml formatted author(s). How many?=numauthors.
author[i]=a[1] a[2] a[3] a[4];
}
}

##### booktitle ##### Conference Publication Name- Word- best fit, though)
if( key == "booktitle")
{
strip();
print "<b:ConferenceName>",$2,"</b:ConferenceName>";
}

##### chapter #####

##### City ##### Address, Province/Region, Country.
if( key == "address")
{
strip();
print "<b:City>", $2,"</b:City>";
}
##### crossref #####
##### Day #####
# <b:Day>1</b:Day>

##### edition #####
if( key == "edition")
{
strip();
print "<b:Edition>", $2, "</b:Edition>";
}

##### editor #######
# Note: relevant information to this section are provided in the
# "authors" section above.

if( key == "editor")
{
strip();
# single or multi-editor?
numeditors=split($2,editors,/and/);
for(i=1;i <= numeditors;i++)
{
# split into first, middle and last name
nname=split(editors[i],fmlname ," ");
e[1]="<b:Person>";
e[2]="<b:Last>" fmlname[1] "</b:Last>";
e[3]="";
e[4]="";
if(nname > 1){ e[3]="<b:First>" fmlname[2] "</b:First>" ; }
#####XXXXX if(nname > 2){ e[4]="<b:Middle>" fmlname[2] "</b:Middle>"; }
e[4]="</b:Person>";
# format xml editors...How many= numeditors.
editor[i]=e[1] e[2] e[3] e[4];
}
}

##### howpublished #####
##### institution #####

##### ISBN etc #####
if( key == "isbn")
{
strip();
print "<b:StandardNumber>",$2,"</b:StandardNumber>";
}

##### Journal Name #####
if( key == "journal")
{
strip();
print "<b:JournalName> ",$2," </b:JournalName>";
}

##### key #####
##### Month #####
if( key == "month")
{
strip();
print "<b:Month>",$2,"</b:Month>";
}

##### note #####
##### Number ##### probably issue = number
if( key == "number")
{
strip();
print "<b:Issue>",$2 ,"</b:Issue>";
}

##### organization #####

##### Pages #####
if( key == "pages")
{
strip();
# Bibtex uses two hyphens. Word 2007 uses one. so strip one.
sub(/-/,"");
print "<b:Pages> ",$2," </b:Pages>";
}

##### Publisher #####
if( key == "publisher")
{
strip();
print "<b:Publisher>", $2,"</b:Publisher>";
}

##### school ##### BibTex thesis --> Word 2007 report, also discard department.
#It should be included from BibTex.Also not to confuse with BibTex Institution field.
if( key == "school")
{
strip();
print "<b:Institution>", $2,"</b:Institution>";
}

##### series #####
##### Short Title #####
# <b:ShortTitle>my Short Title</b:ShortTitle>

##### Title #####
if( key == "title")
{
strip();
print "<b:Title>",$2,"</b:Title>";
}

##### type #####

##### Volume #####
if( key == "volume")
{
strip();
print "<b:Volume> ",$2 ," </b:Volume>";
}

##### Year #####
if( key == "year")
{
strip();
print "<b:Year> ",$2 ," </b:Year>";
}

##### Comment ##### note
if( ( key == "comment") || ($1 ~ "note") )
{
strip();
print "<b:Comments>", $2, "</b:Comments>";
}
####

};
############## End ##############
END{print "</b:Sources>"};