#include <stdio.h> // for getchar(), putchar(), printf(), EOF, NULL
#include <string.h> // for strcmp(), strlen(), strcpy()
#include <ctype.h> // for isspace(), isalpha(), isalnum()
#define MAXWORD 100
struct tnode // tree node
{
char *word; // points to the text
int count; // number of occurrences
struct tnode *left; // left child (smaller)
struct tnode *right; // right child (bigger)
};
struct tnode * // add node to the tree
addtnode (struct tnode *, char *);
int treeprint(struct tnode *); // start with the root node
int getword(char *, int);
// word frequency count
int main()
{
int c;
struct tnode *root;
char word[MAXWORD];
root = NULL; // initialize
while ((c = getword(word, MAXWORD)) != EOF)
{
if (isalpha(c) || c == '_')
{root = addtnode(root, word);}
}
c = treeprint(root);
if (c > 0) // root != NULL
{
c--; // to account for the last incrementation in treeprint()
if (c % 5 != 4) {putchar('\n');}
}
return 0;
}
struct tnode *
nalloc(void); // allocate node
// duplicate string:
char * strDup(char *); // strdup() is declared by string.h
struct tnode *
addtnode(struct tnode *p, char *w)
{
int cond;
if (p == NULL) // a new word has arrived
{
p = nalloc(); // make a new node
p->word = strDup(w);
p->count = 1;
p->left = p->right = NULL;
}
else if ((cond = strcmp(w, p->word)) == 0)
{p->count++;} // repeated word
else if (cond < 0) // less than into left subtree
{p->left = addtnode(p->left, w);}
else // greater than into right subtree
{p->right = addtnode(p->right, w);}
return p;
}
// in-order print of tree with root p (left, node, right)
int treeprint (struct tnode *p)
{ // return count of nodes
static int i = 0;
if (p != NULL)
{
treeprint(p->left);
printf("(%d, %s),%s", p->count, p->word, (i % 5 == 4)? "\n" : " ");
i++;
treeprint(p->right);
}
return i;
}
#include <stdlib.h> // for malloc()
struct tnode *
nalloc(void) // allocate space for a tree node
{ // make a tnode
return (struct tnode *) malloc (sizeof(struct tnode));
}
char * strDup(char *s) // make a duplicate of s
{
char *p;
// we assume a char is stored on a byte
p = (char *) malloc (strlen(s) + 1); // +1 for ending '\0'
if (p != NULL)
{strcpy(p, s);}
return p;
}
int getch(void);
void ungetch(int);
// get next word or character from input
int getword(char *word, int lim)
{
int c;
char *w = word;
while (isspace(c = getch()))
{} // skip beginning whitespace
if (c != EOF)
{*w++ = c;}
if (!isalpha(c) && c != '_')
{
*w = '\0';
return c;
}
for ( ; --lim > 0; w++)
{
if (!isalnum(*w = getch()) && *w != '_')
{
ungetch(*w);
break; // out of for(), w++ is not executed
}
}
*w = '\0';
return word[0];
}
// buffer for ungetch():
int buf = EOF-1; // not a real character, not even EOF
int getch(void) // get a (possibly pushed-back) character
{
if (buf < EOF)
{
return getchar();
}
int temp = buf; // buf >= EOF
buf = EOF-1; // reset buf
return temp;
}
// push character back on input (make it available for the next getch()):
void ungetch(int c)
{
buf = c;
}
/*
gcc tree.c -o tree
./tree < tree.c
(9, EOF), (4, MAXWORD), (8, NULL), (4, _), (10, a),
(2, account), (2, add), (6, addtnode), (3, allocate), (2, arrived),
(2, assume), (2, available), (3, back), (2, beginning), (2, bigger),
(2, break), (8, buf), (2, buffer), (2, by), (2, byte),
(20, c), (15, char), (5, character), (3, child), (4, cond),
(7, count), (2, ctype), (2, d), (2, declared), (2, define),
(3, duplicate), (4, else), (2, ending), (2, even), (2, executed),
(12, for), (2, frequency), (2, from), (2, gcc), (3, get),
(6, getch), (3, getchar), (4, getword), (2, greater), (6, h),
(2, has), (5, i), (13, if), (3, in), (5, include),
(2, incrementation), (2, initialize), (3, input), (19, int), (3, into),
(4, is), (3, isalnum), (4, isalpha), (3, isspace), (2, it),
(2, last), (9, left), (2, less), (3, lim), (2, main),
(5, make), (4, malloc), (3, n), (4, nalloc), (3, new),
(3, next), (8, node), (2, nodes), (4, not), (2, number),
(2, o), (2, occurrences), (6, of), (3, on), (2, or),
(2, order), (2, out), (27, p), (2, points), (2, possibly),
(2, print), (3, printf), (2, push), (2, pushed), (3, putchar),
(2, real), (2, repeated), (2, reset), (11, return), (9, right),
(9, root), (7, s), (2, sizeof), (2, skip), (2, smaller),
(2, space), (2, start), (2, static), (2, stdio), (2, stdlib),
(2, stored), (4, strDup), (3, strcmp), (3, strcpy), (2, strdup),
(4, string), (3, strlen), (15, struct), (3, subtree), (1, talloc),
(3, temp), (2, text), (3, than), (6, the), (16, tnode),
(4, to), (9, tree), (7, treeprint), (1, trees), (5, ungetch),
(7, void), (15, w), (2, we), (3, while), (2, whitespace),
(3, with), (15, word),
*/
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
Exercise 6-2. Write a program that reads a C program and prints in alphabetical order each group of variable names that are identical in the first 6 characters, but different somewhere thereafter. Don't count words within strings and comments. Make 6 a parameter that can be set from the command line.
#include <stdio.h> // for getchar(), putchar(), printf(), EOF
// test multiline preprocessor command:
#define FALSE \
0
#define TRUE 1
// testing a \
multiline \
C++ style \
comment
int main()
{
int c1, c2;
int insideComment = FALSE; // for C style comments
while ((c1 = getchar()) != EOF)
{
if (c1 == '\\') // escape character or sequence
{ /* skip \', \", \\ */ // \ continues a C++ style comment on the next line
c2 = getchar();
if (c2 == EOF)
{ // \ newline is part of a multiline preprocessor command or C++ comment
printf("\nEscape character not ended properly\n"); // Error message
return 1; // return from main(), end program with an error message
}
if (c2 == '\n') // not an escape, but a multiline command or comment
{
putchar(c1); // '\\'
putchar(c2); // '\n'
}
// else remove escape (print nothing)
}
else if (c1 == '\'') // character literal
{ // skip '\\', '"', '\"', '*', '#'
// remove character literal (print nothing)
while ((c1 = getchar()) != '\'')
{
if (c1 == EOF || c1 == '\n')
{
printf("\nCharacter literal not ended properly\n"); // Error message
return 1; // end program with an error message
}
// else
if (c1 == '\\') // escape char or sequence
{
// print nothing
c1 = getchar();
if (c1 == EOF)
{ // \ newline is part of a multiline preprocessor command or C++ comment
printf("\nEscape character not ended properly\n"); // Error message
return 1; // end program with an error message
}
// else skip char
}
// else skip char
} // here c1 == '\''
}
else if (c1 == '\"') // strings are on a single line
{ // Skip strings: "/*", "*/", "//", "'", "\"", "\'", "\\", "#"
// skip char (print nothing)
while ((c1 = getchar()) != '\"')
{
if (c1 == EOF || c1 == '\n')
{
printf("\nQuoted string not ended properly\n"); // Error message
return 1; // end program with an error message
}
else if (c1 == '\\') // escape char or sequence
{
// skip char (print nothing)
c1 = getchar();
if (c1 == EOF || c1 == '\n')
{
printf("\nEscape character not ended properly\n"); // Error message
return 1; // end program with an error message
}
// else skip char
}
// else skip char
} // here c1 == '\"'
} // continue with getchar() in outer while() to skip last '\"'
else if (c1 == '/')
{
c2 = /* testing */ getchar();
if (c2 == '/') // beginning of a C++ style comment
{
while ((c2 = getchar()) != EOF)
{ // \ continues a C++ style comment on the next line
if (c1 != '\\' && c2 == '\n')
{break;} // out of inner while()
c1 = c2; // c1 holds the penultimate char read
} // here c2 == '\n' or EOF, end of C++ style comment
if (c2 == EOF)
{return 0;} // end program normally, exit main()
putchar(c2); // putchar('\n'); // C++ style comment ends with '\n'
}
else if (c2 == '*') // beginning of a C style comment
{
insideComment = TRUE;
while (insideComment) // insideComment == 1 (insideComment != 0)
{
while ((c1 = getchar()) != '*')
{
if (c1 == EOF)
{
printf("\nC style comment not closed properly\n"); // Error message
return 1; // end program with an error message
}
// else skip chars till '*'
}
c2 = getchar();
if (c2 == EOF)
{
printf("\nC style comment not closed properly\n"); // Error message
return 1; // end program with an error message
}
else if (c2 == '/') // end of C style comment
{
insideComment = FALSE; // reset
break; // out of while(insideComment)
}
// else continue; // C style comment continues
} // end of while (insideComment)
}
else // no comment, false alarm
{
putchar(c1);
if (c2 != EOF) {putchar(c2);}
else {return 0;} // end program normally, exit main()
}
}
else if (c1 == '#') // skip preprocessor control lines
{
while ((c2 = getchar()) != EOF)
{ // \ continues a preprocessor command on the next line
if (c1 != '\\' && c2 == '\n')
{break;} // out of inner while()
c1 = c2; // c1 holds the penultimate char read
} // here c2 == '\n' or EOF, end of preprocessor command
if (c2 == EOF)
{return 0;} // end program normally
// else skip line (don't print newline)
}
else {putchar(c1);} // no comment
} // end of outer while()
return 0;
}
/*
gcc nocomment.c -o nocomment
./nocomment < groups.c > copy1.c
gcc nokeywords.c -o nokeywords
./nokeywords < copy1.c > copy2.c
rm copy1.c copy2.c // clean
*/
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
#include <stdio.h> // for getchar(), putchar(), printf(), EOF
#include <string.h> // for strcmp()
#include <ctype.h> // for isspace(), isalpha(), isalnum()
#define MAXWORD 100
#define NKEYS (sizeof (keytab) / sizeof(keytab[0]))
char * keytab[] =
{"auto", "break", "case", "char", "const", "continue",
"default", "define", "do", "double", "else", "enum",
"extern", "float", "for", "goto", "if", "include",
"int", "long", "register", "return", "short", "signed",
"sizeof", "static", "struct", "switch", "typedef",
"union", "unsigned", "void", "volatile", "while"};
// get next word or character from input, print whitespace
int getword(char *, int);
int binsearch(char *, char *[], int); // find C keywords
int main()
{
int c, n;
char word[MAXWORD];
while((c = getword(word, MAXWORD)) != EOF)
{
if (isalpha(c) || c == '_')
{
if ((n = binsearch(word, keytab, NKEYS)) < 0)
{printf("%s", word);}
}
else {printf("%s", word);}
}
return 0;
}
// find word in tab[0] ... tab[n-1]
int binsearch(char *word, char * tab[], int n)
{
int cond;
int low, high, mid;
low = 0;
high = n-1;
while (low <= high)
{
mid = (low + high) / 2;
if ((cond = strcmp(word, tab[mid])) < 0)
{high = mid - 1;}
else if (cond > 0)
{low = mid + 1;}
else {return mid;} // found it
}
return -1; // not found
}
int getch(void);
void ungetch(int);
// get next word or character from input, print whitespace
int getword(char *word, int lim)
{
int c;
char *w = word;
while (isspace(c = getch()))
{putchar(c);} // print beginning whitespace
if (c != EOF)
{*w++ = c;}
if (!isalpha(c) && c != '_')
{
*w = '\0';
return c;
}
for ( ; --lim > 0; w++)
{
if (!isalnum(*w = getch()) && *w != '_')
{
ungetch(*w);
break; // out of for(), w++ is not executed
}
}
*w = '\0';
return word[0];
}
// buffer for ungetch():
int buf = EOF-1; // not a real character, not even EOF
int getch(void) // get a (possibly pushed-back) character
{
if (buf < EOF)
{
return getchar();
}
int temp = buf; // buf >= EOF
buf = EOF-1; // reset buf
return temp;
}
// push character back on input (make it available for the next getch()):
void ungetch(int c)
{
buf = c;
}
/*
gcc nokeywords.c -o nokeywords
gcc nocomment.c -o nocomment
./nocomment < groups.c > copy1.c
./nokeywords < copy1.c > copy2.c
rm copy1.c copy2.c // clean
*/
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
#include <stdio.h> // for getchar(), putchar(), printf(), EOF, NULL
#include <string.h> // for strcmp(), strncmp(), strlen(), strcpy()
#include <stdlib.h> // for atoi(), malloc()
#include <ctype.h> // for isspace(), isalpha(), isalnum()
#define MAXWORD 100 // max word length
#define MAXLENGTH 10 // for words comparison
enum bool {FALSE, TRUE};
struct tnode // tree node
{
char *word; // points to the text
enum bool lastnode; // used for printing
struct tnode *left; // left child (smaller)
struct tnode *right; // right child (bigger)
};
struct tree
{
struct tnode *root;
struct tree *left; // left child (smaller)
struct tree *right; // right child (bigger)
};
struct tnode * // add node to the tree
addtnode (struct tnode *, char *);
struct tree *
addtree (struct tree *, char *, int);
int count; // used for printing
void treeprint(struct tnode *); // start with the root node
void printalltrees(struct tree *); // start with the root tree
int getword(char *, int);
// print alphabetically groups of words similar in the first n chars
int main(int argc, char *argv[])
{
if (argc > 2)
{
printf("Usage: ./groups n\n");
printf("0 <= n <= %d\n", MAXLENGTH);
return 1; // end program, signalling error
}
int n = 0; // default length for words comp. (compare first n chars)
if (argc == 2)
{
n = atoi(argv[1]);
if (n < 0 || n > 10)
{
printf("Usage: ./groups n\n");
printf("0 <= n <= %d\n", MAXLENGTH);
return 1; // end program, signalling error
}
}
struct tree *rtree; // root tree
char word[MAXWORD];
rtree = NULL; // initialize
int c;
while ((c = getword(word, MAXWORD)) != EOF)
{
if (isalpha(c) || c == '_')
{rtree = addtree(rtree, word, n);}
}
printalltrees(rtree);
return 0;
}
struct tree *
talloc(void); // allocate tree
struct tnode *
nalloc(void); // allocate node
// duplicate string:
char * strDup(char *); // strdup() is declared by string.h
struct tree *
addtree(struct tree *p, char *w, int n)
{
int cond;
if (p == NULL) // a new word has arrived
{
p = talloc(); // make a new tree
p->root = nalloc(); // make a new node
p->root->word = strDup(w);
p->root->lastnode = FALSE;
p->root->left = p->root->right = NULL;
p->left = p->right = NULL;
}
else if ((cond = strncmp(w, p->root->word, n)) == 0)
{ // same type of word
addtnode(p->root, w); // add to the current tree
}
else if (cond < 0) // less than into left subforest
{p->left = addtree(p->left, w, n);}
else // greater than into right subforest
{p->right = addtree(p->right, w, n);}
return p;
}
struct tnode *
addtnode(struct tnode *p, char *w)
{
int cond;
if (p == NULL) // a new word has arrived
{
p = nalloc(); // make a new node
p->word = strDup(w);
p->lastnode = FALSE;
p->left = p->right = NULL;
}
else if ((cond = strcmp(w, p->word)) < 0) // less than into left subtree
{p->left = addtnode(p->left, w);}
else if (cond > 0) // greater than into right subtree
{p->right = addtnode(p->right, w);}
// else // repeated word, do nothing
return p;
}
struct tree *
talloc(void) // allocate space for a tree node
{ // make a tnode
return (struct tree *) malloc (sizeof(struct tree));
}
struct tnode *
nalloc(void) // allocate space for a tree node
{ // make a tnode
return (struct tnode *) malloc (sizeof(struct tnode));
}
char * strDup(char *s) // make a duplicate of s
{
char *p;
// we assume a char is stored on a byte
p = (char *) malloc (strlen(s) + 1); // +1 for ending '\0'
if (p != NULL)
{strcpy(p, s);}
return p;
}
// in-order print of tree with root p (left, node, right)
void treeprint (struct tnode *p)
{
if (p != NULL)
{
treeprint(p->left);
if (p->lastnode == TRUE)
{printf("%s", p->word);}
else
{
printf("%s,%s", p->word, (count % 5 == 4)? "\n" : " ");
count++;
treeprint(p->right);
}
}
}
// in-order print of trees with root tree p (left, node, right)
void printalltrees(struct tree *p) // start with the root tree
{
struct tnode *t;
if (p != NULL)
{
printalltrees(p->left);
putchar('(');
count = 0; // reset
t = p->root;
while (t->right != NULL)
{t = t->right;}
t->lastnode = TRUE;
treeprint(p->root);
putchar(')'), putchar('\n');
printalltrees(p->right);
}
}
int getch(void);
void ungetch(int);
// get next word or character from input
int getword(char *word, int lim)
{
int c;
char *w = word;
while (isspace(c = getch()))
{} // skip beginning whitespace
if (c != EOF)
{*w++ = c;}
if (!isalpha(c) && c != '_')
{
*w = '\0';
return c;
}
for ( ; --lim > 0; w++)
{
if (!isalnum(*w = getch()) && *w != '_')
{
ungetch(*w);
break; // out of for(), w++ is not executed
}
}
*w = '\0';
return word[0];
}
// buffer for ungetch():
int buf = EOF-1; // not a real character, not even EOF
int getch(void) // get a (possibly pushed-back) character
{
if (buf < EOF)
{
return getchar();
}
int temp = buf; // buf >= EOF
buf = EOF-1; // reset buf
return temp;
}
// push character back on input (make it available for the next getch()):
void ungetch(int c)
{
buf = c;
}
/*
gcc groups.c -o groups
gcc nocomment.c -o nocomment
./nocomment < groups.c > copy1.c
gcc nokeywords.c -o nokeywords
./nokeywords < copy1.c > copy2.c
./groups < copy2.c
./groups 0 < copy2.c
(EOF, FALSE, MAXLENGTH, MAXWORD, NULL,
TRUE, addtnode, addtree, argc, argv,
atoi, bool, buf, c, cond,
count, getch, getchar, getword, isalnum,
isalpha, isspace, lastnode, left, lim,
main, malloc, n, nalloc, p,
printalltrees, printf, putchar, right, root,
rtree, s, strDup, strcmp, strcpy,
strlen, strncmp, t, talloc, temp,
tnode, tree, treeprint, ungetch, w,
word)
./groups -1
Usage: ./groups n
0 <= n <= 10
./groups 11
Usage: ./groups n
0 <= n <= 10
./groups 1 < copy2.c
(EOF)
(FALSE)
(MAXLENGTH, MAXWORD)
(NULL)
(TRUE)
(addtnode, addtree, argc, argv, atoi)
(bool, buf)
(c, cond, count)
(getch, getchar, getword)
(isalnum, isalpha, isspace)
(lastnode, left, lim)
(main, malloc)
(n, nalloc)
(p, printalltrees, printf, putchar)
(right, root, rtree)
(s, strDup, strcmp, strcpy, strlen,
strncmp)
(t, talloc, temp, tnode, tree,
treeprint)
(ungetch)
(w, word)
./groups 3 < copy2.c
./groups 6 < copy2.c
rm copy1.c copy2.c // clean
*/
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
Exercise 6-3. Write a cross-referencer that prints a list of all words in a document, and, for each word, a list of the line numbers on which it occurs. Remove noise words like "the," "and," and so on.
#include <stdio.h> // for getchar(), putchar(), printf(), EOF, NULL
#include <string.h> // for strcmp(), strlen(), strcpy()
#include <ctype.h> // for tolower(), isspace(), isalpha(), isalnum()
#define MAXWORD 100 // max word length
#define MAXNOLINES 100 // max no of lines
#define NOISEWORDSLEN (sizeof(noiseWords) / sizeof(noiseWords[0]))
struct tnode // tree node
{
char *word; // points to the text
int nlines; // no of lines on which the word appears
int lines[MAXNOLINES]; // line numbers on which the word appears
struct tnode *left; // left child (smaller)
struct tnode *right; // right child (bigger)
};
struct tnode * // add node to the tree
addtnode (struct tnode *, char *);
void treeprint(struct tnode *); // start with the root node
int getword(char *, int);
int lineno = 1; // line number (first line is 1)
char *noiseWords[] =
{"a", "about", "above", "after", "again", "against", "ain", "all", "am",
"an", "and", "any", "are", "aren", "as", "at", "b", "be", "because", "been",
"before", "being", "below", "between", "both", "but", "by", "c", "can",
"couldn", "d", "did", "didn", "do", "does", "doesn", "doing", "don", "down",
"during", "e", "each", "f", "few", "for", "from", "further", "g", "h", "had",
"hadn", "has", "hasn", "have", "haven", "having", "he", "her", "here", "hers",
"herself", "him", "himself", "his", "how", "i", "if", "in", "into", "is",
"isn", "it", "its", "itself", "j", "just", "k", "l", "ll", "m", "ma", "me",
"mightn", "more", "most", "mustn", "my", "myself", "n", "needn", "no", "nor",
"not", "now", "o", "of", "off", "on", "once", "only", "or", "other", "our",
"ours", "ourselves", "out", "over", "own", "p", "q", "r", "re", "s", "same",
"shan", "she", "should", "shouldn", "so", "some", "such", "t", "than", "that",
"the", "their", "theirs", "them", "themselves", "then", "there", "these",
"they", "this", "those", "through", "to", "too", "u", "under", "until", "up",
"v", "ve", "very", "w", "was", "wasn", "we", "were", "weren", "what", "when",
"where", "which", "while", "who", "whom", "why", "will", "with", "won",
"wouldn", "x", "y", "you", "your", "yours", "yourself", "yourselves", "z"};
long binsearch(char *word);
// cross-referencer: print words and lines on which they appear
int main()
{
int c;
struct tnode *root;
char word[MAXWORD];
root = NULL; // initialize
while ((c = getword(word, MAXWORD)) != EOF)
{
if (isalpha(c) && (binsearch(word) == -1))
{root = addtnode(root, word);}
}
treeprint(root);
return 0;
}
// case insensitive string comparison
int stricmp(char *s1, char *s2); // from C++
// find word in noiseWords[], noiseWords[] must be in alphabetical order
long binsearch(char *word)
{
int cond;
long unsigned mid, low = 0, high = NOISEWORDSLEN - 1;
while (low <= high)
{
mid = (low + high) / 2;
cond = stricmp(word, noiseWords[mid]); // case-insensitive comparison
if (cond < 0)
{high = mid - 1;}
else if (cond > 0)
{low = mid + 1;}
else {return mid;} // found
}
return -1; // not found
}
// case insensitive string comparison
int stricmp(char *s1, char *s2) // from C++
{
int diff;
while (*s1 && *s2)
{
diff = tolower(*s1) - tolower(*s2);
if (diff != 0) {return diff;}
s1++, s2++;
}
return tolower(*s1) - tolower(*s2);
}
struct tnode *
nalloc(void); // allocate node
// duplicate string:
char * strDup(char *); // strdup() is declared by string.h
struct tnode *
addtnode(struct tnode *p, char *w)
{
int cond;
if (p == NULL) // a new word has arrived
{
p = nalloc(); // make a new node
p->word = strDup(w);
p->lines[0] = lineno;
p->nlines = 1; // 1 line
p->left = p->right = NULL;
}
else if ((cond = strcmp(w, p->word)) == 0)
{ // repeated word, maybe on another line
if (p->lines[p->nlines - 1] != lineno)
{p->lines[p->nlines++] = lineno;}
}
else if (cond < 0) // less than into left subtree
{p->left = addtnode(p->left, w);}
else // greater than into right subtree
{p->right = addtnode(p->right, w);}
return p;
}
// in-order print of tree with root p (left, node, right)
void treeprint (struct tnode *p)
{ // return count of nodes
int i;
if (p != NULL)
{
treeprint(p->left);
printf("(%s: ", p->word);
for (i = 0; i < p->nlines - 1; i++)
{printf("%d,%s", p->lines[i], (i % 10 == 9) ? "\n" : " ");}
printf("%d)\n", p->lines[i]); // p->lines[p->nlines - 1]
i++;
treeprint(p->right);
}
}
#include <stdlib.h> // for malloc()
struct tnode *
nalloc(void) // allocate space for a tree node
{ // make a tnode
return (struct tnode *) malloc (sizeof(struct tnode));
}
char * strDup(char *s) // make a duplicate of s
{
char *p;
// we assume a char is stored on a byte
p = (char *) malloc (strlen(s) + 1); // +1 for ending '\0'
if (p != NULL)
{strcpy(p, s);}
return p;
}
int getch(void);
void ungetch(int);
// get next word or character from input
int getword(char *word, int lim)
{
int c;
char *w = word;
while (isspace(c = getch()))
{ // skip beginning whitespace
if (c == '\n') {lineno++;}
}
if (c != EOF)
{*w++ = c;}
if (!isalpha(c))
{
*w = '\0';
return c;
}
for ( ; --lim > 0; w++)
{
if (!isalnum(*w = getch()))
{
ungetch(*w);
break; // out of for(), w++ is not executed
}
}
*w = '\0';
return word[0];
}
// buffer for ungetch():
int buf = EOF-1; // not a real character, not even EOF
int getch(void) // get a (possibly pushed-back) character
{
if (buf < EOF)
{
return getchar();
}
int temp = buf; // buf >= EOF
buf = EOF-1; // reset buf
return temp;
}
// push character back on input (make it available for the next getch()):
void ungetch(int c)
{
buf = c;
}
/*
gcc cref.c -o cref
./cref < cref.c
(EOF: 1, 55, 185, 208, 212, 217, 218, 230)
(MAXNOLINES: 6, 13, 231)
(MAXWORD: 5, 51, 55, 232)
(NOISEWORDSLEN: 7, 73, 233)
(NULL: 1, 53, 113, 119, 139, 165, 234)
(add: 18, 235)
(addtnode: 19, 58, 109, 127, 129, 236)
(allocate: 104, 154, 237)
(alphabetical: 69, 238)
(another: 122, 239)
(appear: 46, 240)
(appears: 12, 13, 241)
(arrived: 113, 242)
(assume: 162, 243)
(available: 222, 244)
(back: 210, 222, 245)
(beginning: 181, 246)
(bigger: 15, 247)
(binsearch: 44, 57, 70, 248)
(break: 199, 249)
(buf: 208, 212, 217, 218, 225, 250)
(buffer: 207, 251)
(byte: 162, 252)
(case: 66, 78, 89, 253)
(char: 11, 19, 21, 25, 44, 51, 67, 70, 90, 106,
109, 159, 161, 162, 163, 175, 178, 254)
(character: 174, 208, 210, 222, 256)
(child: 14, 15, 257)
(comparison: 66, 78, 89, 258)
(cond: 72, 78, 79, 81, 111, 121, 126, 259)
(count: 136, 260)
(cref: 228, 229, 261)
(cross: 46, 262)
(ctype: 3, 263)
(declared: 106, 264)
(define: 5, 6, 7, 265)
(diff: 92, 95, 96, 266)
(duplicate: 105, 159, 267)
(else: 81, 83, 121, 126, 128, 268)
(ending: 163, 269)
(even: 208, 270)
(executed: 199, 271)
(find: 69, 272)
(first: 23, 273)
(found: 83, 86, 274)
(gcc: 228, 275)
(get: 174, 210, 276)
(getch: 171, 180, 196, 210, 222, 277)
(getchar: 1, 214, 278)
(getword: 21, 55, 175, 279)
(greater: 128, 280)
(high: 73, 75, 77, 80, 281)
(include: 1, 2, 3, 151, 282)
(initialize: 53, 283)
(input: 174, 222, 284)
(insensitive: 66, 78, 89, 285)
(int: 12, 13, 21, 23, 47, 49, 67, 72, 90, 92,
111, 137, 171, 172, 175, 177, 208, 210, 217, 223,
286)
(isalnum: 3, 196, 289)
(isalpha: 3, 57, 188, 290)
(isspace: 3, 180, 291)
(left: 14, 119, 126, 127, 134, 141, 292)
(length: 5, 293)
(less: 126, 294)
(lim: 175, 194, 295)
(line: 13, 23, 118, 122, 296)
(lineno: 23, 117, 123, 124, 182, 297)
(lines: 6, 12, 13, 46, 117, 123, 124, 144, 145, 298)
(long: 44, 70, 73, 299)
(low: 73, 75, 77, 82, 300)
(main: 47, 301)
(make: 115, 155, 159, 222, 302)
(malloc: 151, 156, 163, 303)
(max: 5, 6, 304)
(maybe: 122, 305)
(mid: 73, 77, 78, 80, 82, 83, 306)
(must: 69, 307)
(nalloc: 104, 115, 154, 308)
(new: 113, 115, 309)
(next: 174, 222, 310)
(nlines: 12, 118, 123, 124, 143, 145, 311)
(node: 9, 18, 20, 104, 115, 134, 154, 312)
(nodes: 136, 313)
(noiseWords: 7, 25, 69, 78, 314)
(number: 23, 315)
(numbers: 13, 316)
(order: 69, 134, 317)
(points: 11, 318)
(possibly: 210, 319)
(print: 46, 134, 320)
(printf: 1, 142, 144, 145, 321)
(push: 222, 322)
(pushed: 210, 323)
(putchar: 1, 324)
(real: 208, 325)
(referencer: 46, 326)
(repeated: 122, 327)
(reset: 218, 328)
(return: 63, 83, 86, 96, 100, 131, 136, 156, 168, 191,
204, 214, 220, 329)
(right: 15, 119, 128, 129, 134, 147, 331)
(root: 20, 50, 53, 58, 61, 134, 332)
(s1: 67, 90, 93, 95, 97, 100, 333)
(s2: 67, 90, 93, 95, 97, 100, 334)
(sizeof: 7, 156, 335)
(skip: 181, 336)
(smaller: 14, 337)
(space: 154, 338)
(start: 20, 339)
(stdio: 1, 340)
(stdlib: 151, 341)
(stored: 162, 342)
(strDup: 106, 116, 159, 343)
(strcmp: 2, 121, 344)
(strcpy: 2, 166, 345)
(strdup: 106, 346)
(stricmp: 67, 78, 90, 347)
(string: 2, 66, 89, 105, 106, 348)
(strlen: 2, 163, 349)
(struct: 9, 14, 15, 18, 19, 20, 50, 103, 108, 109,
135, 153, 156, 350)
(subtree: 126, 128, 352)
(temp: 217, 220, 353)
(text: 11, 354)
(tnode: 9, 14, 15, 18, 19, 20, 50, 103, 108, 109,
135, 153, 155, 156, 355)
(tolower: 3, 95, 100, 357)
(tree: 9, 18, 134, 154, 358)
(treeprint: 20, 61, 135, 141, 147, 359)
(ungetch: 172, 198, 207, 223, 360)
(unsigned: 73, 361)
(void: 20, 104, 135, 154, 171, 172, 210, 223, 362)
(whitespace: 181, 363)
(word: 5, 11, 12, 13, 44, 51, 55, 57, 58, 69,
70, 78, 113, 116, 121, 122, 142, 174, 175, 178,
204, 364)
(words: 46, 367)
*/
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
*****************************************************************************************
Exercise 6-4. Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count.
#include <stdio.h> // for getchar(), putchar(), printf(), EOF, NULL
#include <string.h> // for strcmp(), strlen(), strcpy()
#include <ctype.h> // for isspace(), isalpha(), isalnum()
#define MAXWORD 100 // max word length
#define MAXNOWORDS 1000 // max no of words (with the same frequency)
struct tnode // tree node
{
char *word; // points to the text
int count; // number of occurrences
struct tnode *left; // left child (smaller)
struct tnode *right; // right child (bigger)
};
struct fnode // frequency node
{
int nw; // no of words with the same frequency, size of words[]
char *words[MAXNOWORDS]; // words with the same frequency
int count; // number of occurrences (frequency)
struct fnode *left; // left child (smaller)
struct fnode *right; // right child (bigger)
};
struct tnode * // add node to the tree
addtnode (struct tnode *, char *);
struct fnode * // add tnode to fnode (frequency) tree
addfnode (struct fnode *, struct tnode *);
struct fnode * // build a frequency tree from existing tree
addAllNodes (struct fnode *, struct tnode *);
void printftree(struct fnode *); // start with the root node
int getword(char *, int);
// word frequency count
int main()
{
int c;
struct tnode *root;
char word[MAXWORD];
root = NULL; // initialize
while ((c = getword(word, MAXWORD)) != EOF)
{
if (isalpha(c) || c == '_')
{root = addtnode(root, word);}
}
struct fnode *fr = NULL; // root of the frequency tree
fr = addAllNodes(fr, root);
printftree(fr);
return 0;
}
struct tnode *
nalloc(void); // allocate tree node
// duplicate string:
char * strDup(char *); // strdup() is declared by string.h
struct tnode *
addtnode(struct tnode *p, char *w)
{
int cond;
if (p == NULL) // a new word has arrived
{
p = nalloc(); // make a new node
p->word = strDup(w);
p->count = 1;
p->left = p->right = NULL;
}
else if ((cond = strcmp(w, p->word)) == 0)
{p->count++;} // repeated word
else if (cond < 0) // less than into left subtree
{p->left = addtnode(p->left, w);}
else // greater than into right subtree
{p->right = addtnode(p->right, w);}
return p;
}
struct fnode *
falloc(void); // allocate frequency node
struct fnode *
addfnode(struct fnode *p, struct tnode *t)
{
if (p == NULL) // a new frequency has arrived
{
p = falloc(); // make a new frequency node
p->count = t->count;
p->words[0] = t->word; // no need to allocate space here
p->nw = 1;
p->left = p->right = NULL;
}
else if (p->count == t->count)
{ // repeated frequency, different words
p->words[p->nw++] = t->word; // no need to allocate space
}
else if (p->count < t->count) // greater than into left subtree
{p->left = addfnode(p->left, t);}
else // left than into right subtree
{p->right = addfnode(p->right, t);}
return p;
}
struct fnode * // build a frequency tree from existing tree
addAllNodes (struct fnode *fr, struct tnode *tr)
{
if (tr != NULL)
{ // pre-order traversal of tree (node, left, right)
fr = addfnode(fr, tr);
fr = addAllNodes(fr, tr->left);
fr = addAllNodes(fr, tr->right);
}
}
// in-order print of tree with root p (left, node, right)
void printftree (struct fnode *p)
{
int i;
if (p != NULL)
{
printftree(p->left);
printf("(%d: ", p->count);
for (i = 0; i < p->nw - 1; i++)
{printf("%s,%s", p->words[i], (i % 5 == 4) ? "\n" : " ");}
printf("%s)\n", p->words[i]); // p->words[p->nw - 1]
printftree(p->right);
}
}
#include <stdlib.h> // for malloc()
struct tnode *
nalloc(void) // allocate space for a tree node
{ // make a tnode
return (struct tnode *) malloc (sizeof(struct tnode));
}
struct fnode *
falloc(void) // allocate space for a frequency node
{ // make a fnode
return (struct fnode *) malloc (sizeof(struct fnode));
}
char * strDup(char *s) // make a duplicate of s
{
char *p;
// we assume a char is stored on a byte
p = (char *) malloc (strlen(s) + 1); // +1 for ending '\0'
if (p != NULL)
{strcpy(p, s);}
return p;
}
int getch(void);
void ungetch(int);
// get next word or character from input
int getword(char *word, int lim)
{
int c;
char *w = word;
while (isspace(c = getch()))
{} // skip beginning whitespace
if (c != EOF)
{*w++ = c;}
if (!isalpha(c) && c != '_')
{
*w = '\0';
return c;
}
for ( ; --lim > 0; w++)
{
if (!isalnum(*w = getch()) && *w != '_')
{
ungetch(*w);
break; // out of for(), w++ is not executed
}
}
*w = '\0';
return word[0];
}
// buffer for ungetch():
int buf = EOF-1; // not a real character, not even EOF
int getch(void) // get a (possibly pushed-back) character
{
if (buf < EOF)
{
return getchar();
}
int temp = buf; // buf >= EOF
buf = EOF-1; // reset buf
return temp;
}
// push character back on input (make it available for the next getch()):
void ungetch(int c)
{
buf = c;
}
/*
gcc freq.c -o freq
./freq < freq.c
(48: p)
(35: struct)
(21: fnode)
(19: int, tnode)
(18: left)
(17: right, word)
(16: char, a, c, frequency)
(15: if, w)
(13: for, count, node, tree)
(12: fr, words)
(11: NULL, of, return, void)
(9: EOF, root, the, t)
(8: buf, i, s)
(7: allocate, else, make)
(6: h, addtnode, addfnode, addAllNodes, getch,
nw, printftree, with, to, tr)
(5: include, child, character, freq, printf,
into, malloc, no, new, space,
subtree, than, ungetch)
(4: MAXWORD, _, cond, falloc, from,
getword, isalpha, is, nalloc, not,
same, string, strDup)
(3: MAXNOWORDS, bigger, add, arrived, back,
build, define, existing, duplicate, getchar,
get, greater, has, isspace, isalnum,
input, max, lim, n, need,
next, number, occurrences, order, on,
repeated, smaller, sizeof, strcmp, strcpy,
strlen, temp, while)
(2: ctype, assume, beginning, available, break,
buffer, by, byte, declared, d,
different, ending, executed, even, gcc,
here, in, stdio, initialize, length,
it, main, less, o, points,
or, out, pre, possibly, print,
putchar, pushed, push, real, reset,
size, skip, start, stdlib, stored,
strdup, text, traversal, we, whitespace)
*/