Appendix: Basic Programming Elements
Diomidis Spinellis
Department of Management Science and Technology
Athens University of Economics and Business
Athens, Greece
dds@aueb.gr
A Complete Program
/* $NetBSD: echo.c,v 1.7 1997/07/20 06:07:03 thorpej Exp $ */
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#ifndef lint
__COPYRIGHT(
"@(#) Copyright (c) 1989, 1993\n\
The Regents of the University of California. All rights reserved.\n");
#endif /* not lint */
#ifndef lint
#if 0
static char sccsid[] = "@(#)echo.c 8.1 (Berkeley) 5/31/93";
#else
__RCSID("$NetBSD: echo.c,v 1.7 1997/07/20 06:07:03 thorpej Exp $");
#endif
#endif /* not lint */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int __Pmain ((int, char *[]));
main(argc, argv)
int argc;
char *argv[];
{
int nflag;
/* This utility may NOT do getopt(3) option parsing. */
if (*++argv && !strcmp(*argv, "-n")) {
++argv;
nflag = 1;
}
else
nflag = 0;
while (*argv) {
(void)printf("%s", *argv);
if (*++argv)
putchar(' ');
}
if (!nflag)
putchar('\n');
exit(0);
}
Important issues:
- Headers
- Old-style declarations vs.
int main(int argc, char *argv[]);
- getopt
- strcmp return value. Common idiom:
#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)
- Java argv processing:
if (isConfig) {
configFile = args[i];
isConfig = false;
} else if (args[i].equals("-config")) {
isConfig = true;
} else if (args[i].equals("-debug")) {
debug = true;
} else if (args[i].equals("-nonaming")) {
- Variable initialization (nflag)
- printf and format specifications
- printf and putchar return value
if (ferror(stdout))
err(1, "stdout");
Expand: A Larger Example
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
/*
* expand - expand tabs to equivalent spaces
*/
int nstops;
int tabstops[100];
static void getstops __P((char *));
int main __P((int, char **));
static void usage __P((void));
int
main(argc, argv)
int argc;
char *argv[];
{
int c, column;
int n;
/* handle obsolete syntax */
while (argc > 1 && argv[1][0] && isdigit(argv[1][1])) {
getstops(&argv[1][1]);
argc--; argv++;
}
while ((c = getopt (argc, argv, "t:")) != -1) {
switch (c) {
case 't':
getstops(optarg);
break;
case '?':
default:
usage();
/* NOTREACHED */
}
}
argc -= optind;
argv += optind;
do {
if (argc > 0) {
if (freopen(argv[0], "r", stdin) == NULL) {
perror(argv[0]);
exit(1);
}
argc--, argv++;
}
column = 0;
while ((c = getchar()) != EOF) {
switch (c) {
case '\t':
if (nstops == 0) {
do {
putchar(' ');
column++;
} while (column & 07);
continue;
}
if (nstops == 1) {
do {
putchar(' ');
column++;
} while (((column - 1) % tabstops[0]) != (tabstops[0] - 1));
continue;
}
for (n = 0; n < nstops; n++)
if (tabstops[n] > column)
break;
if (n == nstops) {
putchar(' ');
column++;
continue;
}
while (column < tabstops[n]) {
putchar(' ');
column++;
}
continue;
case '\b':
if (column)
column--;
putchar('\b');
continue;
default:
putchar(c);
column++;
continue;
case '\n':
putchar(c);
column = 0;
continue;
}
}
} while (argc > 0);
exit(0);
}
static void
getstops(cp)
char *cp;
{
int i;
nstops = 0;
for (;;) {
i = 0;
while (*cp >= '0' && *cp <= '9')
i = i * 10 + *cp++ - '0';
if (i <= 0 || i > 256) {
bad:
fprintf(stderr, "Bad tab stop spec\n");
exit(1);
}
if (nstops > 0 && i <= tabstops[nstops-1])
goto bad;
tabstops[nstops++] = i;
if (*cp == 0)
break;
if (*cp != ',' && *cp != ' ')
goto bad;
cp++;
}
}
static void
usage()
{
(void)fprintf (stderr, "usage: expand [-t tablist] [file ...]\n");
exit(1);
}
Declarations and visibility
int nstops;
int tabstops[100];
static void getstops (char *);
int main (int, char **);
static void usage(void);
Issues:
- Global functions and variables
- Declare before use
- Use of the static keyword
Functions and Global Variables
Functions can help you identify a program's major parts.
To find what a function does:
Guess, based on the function name.
Read the comment at the beginning of the function.
Examine how the function is used.
Read the code in the function body.
Consult external program documentation.
while Loops, Conditions, and Blocks
while ((c = getopt (argc, argv, "t:")) != -1) {
switch (c) {
case 't':
getstops(optarg);
break;
case '?':
default:
usage();
/* NOTREACHED */
}
}
Issues:
switch Statements
- Example: expand getopt processing
- The use of fallthrough:
case 'a':
fts_options |= FTS_SEEDOT;
/* FALLTHROUGH */
case 'A':
f_listdot = 1;
break;
- Defensive handling of default statements:
switch (program) {
case ATQ:
/* [...] */
case BATCH:
writefile(time(NULL), 'b');
break;
default:
panic("Internal error");
break;
}
for Loops
- Typical case:
for (i = 0; i < len; i++) {
- Atypical cases
-
for ( i = 0; i <= extrknt; i++ )
-
for (i = 1; i < month; i++)
-
for (i = 1; i <= nargs; i++)
-
for (code = 255; code >= 0; code--) {
- Library function loops:
if ((dd = opendir(dir)) == NULL)
return (CC_ERROR);
for (dp = readdir(dd); dp != NULL; dp = readdir(dd)) {
- Use of multiple expressions:
for (cnt = 1, t = p; cnt <= cnt_orig; ++t, ++cnt) {
- Infinite loop:
for (;;) {
s = (state_t) (*s)();
quiet = 0;
}
Loop Control Statements
- break: exit loop
- continue: restart loop
- Perl equivalents: last, next
- continue as placeholder
for (; *string && isdigit(*string); string++)
continue;
- Java's labeled loops and control statements:
skip:
for ( /* [...] */ ) {
if ( ch == limit.charAt(0) ) {
for (int i = 1 ; i < limlen ; i++) {
if ( /* [...] */ )
continue skip;
}
return ret;
}
}
- Used for clarity:
comp : while(prev < length) {
/* [...] */
if (pos >= length || pos == -1) {
/* [...] */
break comp;
}
}
Character Expressions
- Range membership:
while ('0' <= *cp && *cp <= '9')
- Case conversion:
if ('a' <= *s && *s <= 'z')
*s -= ('a' - 'A');
- Both examples above illustrate non-portable code.
- A more complex example:
i = 0;
while (*cp >= '0' && *cp <= '9')
i = i * 10 + *cp++ - '0';
Boolean Expressions
goto Statements
- Can lead to spaghetti code
- Not supported in Java
- Used to exit after an error:
for (;;) {
i = 0;
while (*cp >= '0' && *cp <= '9')
i = i * 10 + *cp++ - '0';
if (i <= 0 || i > 256) {
bad:
fprintf(stderr, "Bad tab stop spec\n");
exit(1);
}
if (nstops > 0 && i <= tabstops[nstops-1])
goto bad;
tabstops[nstops++] = i;
if (*cp == 0)
break;
if (*cp != ',' && *cp != ' ')
goto bad;
cp++;
}
- Execute code again:
struct servent *
getservent()
{
char *p;
register char *cp, **q;
if (servf == NULL && (servf = fopen(_PATH_SERVICES, "r" )) == NULL)
return (NULL);
again:
if ((p = fgets(line, BUFSIZ, servf)) == NULL)
return (NULL);
if (*p == '#')
goto again;
cp = strpbrk(p, "#\n");
if (cp == NULL)
goto again;
*cp = '\0';
serv.s_name = p;
p = strpbrk(p, " \t");
if (p == NULL)
goto again;
*p++ = '\0';
while (*p == ' ' || *p == '\t')
p++;
cp = strpbrk(p, ",/");
- Exit from deep control structures (instead of break):
for (;;) {
/* [...] */
/* Gather incoming message bytes if needed. */
if ((sc->sc_state & NCR_DROP_MSGIN) == 0) {
if (n >= NCR_MAX_MSG_LEN) {
ncr_sched_msgout(sc, SEND_REJECT);
sc->sc_state |= NCR_DROP_MSGIN;
} else {
*sc->sc_imp++ = *sc->sci_data;
n++;
/*
* This testing is suboptimal, but most
* messages will be of the one byte variety, so
* it should not affect performance
* significantly.
*/
if (n == 1 && IS1BYTEMSG(sc->sc_imess[0]))
goto have_msg;
if (n == 2 && IS2BYTEMSG(sc->sc_imess[0]))
goto have_msg;
if (n >= 3 && ISEXTMSG(sc->sc_imess[0]) &&
n == sc->sc_imess[1] + 2)
goto have_msg;
}
}
/* [...] */
/* back to nextbyte */
}
have_msg:
/* We now have a complete message. Parse it. */
Refactoring in the Small
- Keep code's functional properties, while improving non-functional
properties
- Readability
- Maintainability
- Portability
- Reliability
- Can be performed in the large (architecture), or in the small (code elements)
Illustrative Example
if (*cp == 0)
break;
if (*cp != ',' && *cp != ' ')
goto bad;
cp++;
Changed into:
if (*cp == 0)
break;
else if (*cp != ',' && *cp != ' ')
goto bad;
else
cp++;
Refactoring Example
Original Code
(worms game)
op = &(!x ? (!y ? upleft : (y == bottom ? lowleft : left)) :
(x == last ? (!y ? upright : (y == bottom ? lowright : right)) :
(!y ? upper : (y == bottom ? lower : normal))))[w->orientation];
What is going one here?
More Readable Representation
(Formatted as cascading if-else)
op = &(
!x ? (
!y ?
upleft
: ( y == bottom ?
lowleft
:
left
)
) : ( x == last ? (
!y ?
upright
: ( y == bottom ?
lowright
:
right
)
) : (
!y ?
upper
: ( y == bottom ?
lower
:
normal
)
)
))[w->orientation];
Re-implementation
struct options *locations[3][3] = {
{upleft, upper, upright},
{left, normal, right},
{lowleft, lower, lowright},
};
int xlocation, ylocation;
if (x == 0)
xlocation = 0;
else if (x == last)
xlocation = 2;
else
xlocation = 1;
if (y == 0)
ylocation = 0;
else if (y == bottom)
ylocation = 2;
else
ylocation = 1;
op = &(locations[ylocation][xlocation])[w];
Other Implementation
(Suggested by Guy Steele)
op =
&( !y ? (!x ? upleft : x!=last ? upper : upright ) :
y!=bottom ? (!x ? left : x!=last ? normal : right ) :
(!x ? lowleft : x!=last ? lower : lowright )
)[w->orientation];
Other Refactoring Changes
- Add brackets in expressions
- Add missing comments
- But "don't comment bad code, rewrite it"
- Use better variable names
- Fix/improve indentation
- The indent program may be helpful (apply with care)
- Beware of forks; do it only on code you own
- Use a version control system
- Separate style commits from functional commits
Assignment Expressions
- Assignment in an if statement:
if ((p = q))
q[-1] = '\n';
- Avoided with explicit comparison:
if ((p = strchr(name, '=')) != NULL) {
p++;
- Made explicit with a non-modifiable lvalue:
if (0 == serconsole)
serconsinit = 0;
- In Java and C# the above matter only for boolean expressions
Bit Manipulation
- Example:
do {
putchar(' ');
column++;
} while (column & 07);
Rule:
a & b /* is sometimes used for */ a % (b + 1) /* b = 2**n - 1 */
- Example:
n = ((dp - cp) << 2) + 1; /* 4 times + NULL */
Rule:
a << n /* is sometimes used for */ a * k /* k = 2**n */
- Example:
bp = bp1 + ((bp2 - bp1) >> 1);
Rule:
a >> n /* is sometimes used for */ a / k /* k = 2**n */
Reading Control Structures
In structured programs:
- Treat the contents of a loop as a separate block.
Read
while (xenum.hasMoreElements()) {
/* [...] */
if (object instanceof Resource) {
/* [...] */
if (!copy(is, os))
/* [...] */
} else if (object instanceof InputStream) {
/* [...] */
if (!copy((InputStream) object, os))
/* [...] */
} else if (object instanceof DirContext) {
/* [...] */
}
}
as
while (xenum.hasMoreElements()) {
/* [...] */
}
- In a body of a structure, consider its controlling
expression as an asserion
if (!links.contains(next)) {
links.add(next);
}
- The above may not hold for sequences that include
- return
- goto
- break
- continue
- exceptions
Further Reading
- Edsger Wybe Dijkstra.
Go to statement considered harmful.
Communications of the ACM, 11(3):147–148, March 1968.
- Martin Fowler.
Refactoring: Improving the Design of Existing Code.
Addison-Wesley, Boston, MA, 2000.
With contributions by Kent Beck, John Brant, William Opdyke, and Don Roberts.
- C. A. R. Hoare.
Proof of a program: Find.
Communications of the ACM, 14(1):39–45, January 1971.
- Brian W. Kernighan
and P. J. Plauger.
The
Elements of Programming Style.
McGraw-Hill, New York, second edition, 1978.
- Brian W. Kernighan
and Dennis M. Ritchie.
The C
Programming Language.
Prentice-Hall, Englewood Cliffs, NJ, second edition, 1988.
- Donald E. Knuth.
The
Art of Computer Programming, volume 3: Sorting and Searching.
Addison-Wesley, Reading, MA, second edition, 1998.
- Peter Van Der Linden.
Expert
C Programming, pages 226–231.
Prentice-Hall, 1994.
- Richard J. Miara,
Joyce A. Musselman, Juan A. Navarro, and Ben Shneiderman.
Program indentation and comprehensibility.
Communications of the ACM, 26(11):861–867, 1983.
- Microsoft
Corporation.
Microsoft C# Language Specifications.
Microsoft Press, Redmond, WA, 2001.
- Paul W. Oman and Curtis R.
Cook.
Typographic style is more than cosmetic.
Communications of the ACM, 33(5):506–520, May 1990.
- Dennis M. Ritchie.
The C programming language—reference manual.
In Unix Programmer's Manual [Unix Programmer's Manual, 1979].
Also available online http://plan9.bell-labs.com/7thEdMan/.
- Diomidis Spinellis.
Code Reading: The Open
Source Perspective, pages 19–60.
Effective Software Development Series. Addison-Wesley, Boston, MA, 2003.
- Bjarne Stroustrup.
The
C++ Programming Language.
Addison-Wesley, Reading, MA, third edition, 1997.
- Ted Tenny.
Program readability: Procedures versus comments.
IEEE Transactions on Software Engineering, 14(9):1271–1279,
September 1988.
- Edward J. Thomas and
Paul W. Oman.
A bibliography of programming style.
ACM SIGPLAN Notices, 25(2):7–16, February 1990.
- UNIX
Programmer's Manual. Volume 2—Supplementary Documents.
Bell Telephone Laboratories, Murray Hill, NJ, seventh edition, 1979.
Also available online http://plan9.bell-labs.com/7thEdMan/.
- Larry Wall, Tom
Christiansen, Randal L. Schwartz, and Stephen Potter.
Programming Perl.
O'Reilly and Associates, Sebastopol, CA, third edition, 2000.
Exercises and Discussion Topics
-
Experiment to find-out how your C, C++, and Java compilers deal with
uninitialized variables.
Outline your results and propose an inspection procedure for
locating uninitialized variables.
-
Look in the course's source code repository for programs
that do not verify the result of library calls.
Propose practical fixes.
-
Examine the visibility of functions and variables in programs in your
environment.
Can it be improved (made more conservative)?
-
Discover how the editor you are using can identify matching braces and parentheses.
If it can not, consider to start using another editor.
-
Provide five examples from the course's source code collection
where the code structure can be improved to make it more readable.
-
You can find tens of intentionally unreadable C programs at the
International Obfuscated C Code Contest web site (http://www.ioccc.org).
Most of them use several layers of obfuscation to hide their algorithms.
See how gradual code changes can help you untangle their code.
If you are not familiar with the C preprocessor try to avoid programs
with a large number of #define lines.
-
The Perl language mandates the use of braces for all its control
structures.
Comment on how this affects the readability of Perl programs.
-
The code body of switch statements in the course's source code
collection is formatted differently from the other statements.
Express the formatting rule used, and explain its rationale.
-
The for statement in the C language family is very flexible.
Examine the source code provided to create a list of ten different uses.
-
Locate ten occurrences of break and continue in the source
code provided with the course.
For each case indicate the point where execution will transfer
after the corresponding statement is executed, and explain why
the statement is used.
Do not try to understand in full the logic of the code;
simply provide an explanation based on the statement's use pattern.
-
Find, simplify, and reason about five non-trivial boolean expressions in
the source-code base.
Do not spend time on understanding what the expression elements mean,
concentrate on the conditions that will make the expression become true
or false.
Where possible, identify and utilize the properties of short-circuit evaluation.
-
Study Section 2.11 from the book.
Provide a proof about the correctness of a sort algorithm,
found in the course's soruce repository.