,
, , , , , , , , , ,
,
Others can be added to the string array at the top of the source file. It
is then necessary to recompile the source for the new tags to be included
in scans. */
import java.io.*;
class tag_checker {
private static Writer
bt, // for output text file
tb; // to capture files containing tag
private static FileReader
fr; // for each HTML file scanned
/* The 29 HTML tags that must have corresponding end-tags. They must be
placed in this array in alphabetical order within order of length, with
the longer words appearing first. */
private static final String
Tags[] = {
"blockquote","address","strong","lists","small","table","title",
"body","cite","code","font","head","html","nobr","samp","big",
"div","pre","sub","sup","em","h1","h2","h3","h4","tt","b","i","u"
};
private static int
TagNest[] = new int[Tags.length];
private static boolean
inTag = false,
TABLE = false;
private static String
tag = "", // for accumulating the characters of a captured tag
bd = "", // first command line arguement [parent file path]
sd = "", // 2nd command line arguement [directory to be word-counted]
fp, // full path to HTML file being examined for tag-pair errors
badTags = "";
private static int
dl = 0, // length of parent directory path name + a terminating '/'
tagsense = 0; // +1 = start tag; -1 = end tag
/* Create a file reader for the next HTML file to be scanned, then read in
the bytes from it, accumulating any text between '<' and '>' characters
and testing what is thus captured to see if it is one of the tag-types
that must have a corresponding end-tag. An integer stack techique is em-
ployed to achieve this. Each tag-type has a 'number of occurrences ele-
ment within the integer array TagNest[], which is incremented every time
a tag of that type is encountered and decremented every time its end-tag
version is encountered. Thus, at the end of the file, if each tag has
its corresponding end-tag, the value in its TagNest[] element will be
zero. If it is not, then there is a missing end-tag somewhere for this
tag-type within the file currently being scanned.
Called by itself and from only one place in scan(). */
static String tagCapture() {
int
c = 0; // for input character
// Zero all elements of the TagNest array.
for(int i = 0; i < Tags.length; i++)
TagNest[i] = 0;
try {
fr = new FileReader(fp); // Create a new file reader.
/* Then read and examine each character in turn
from it until the end-of-file is encountered. */
while((c = fr.read()) != -1) {
if(c == '<') { // If the new char is an HTML tag start-bracket '<',
inTag = true; // set the 'in tag' flag and
tag = ""; // clear the tag string.
} else // Else, if we are currently inside
if(inTag) { // a tag [between a '<' and a '>']:
/* If the new character is a tag terminator baracker '>', we have
reached the end of the tag name, so clear the 'in tag' flag and
proceed to analyse what we have captured. */
if(c == '>') {
inTag = false;
/* If the tag can have parameters, these occur after the space
that follows the tag-name. We only want the tag-name, so cut
off the tag text upto but excluding the first encountered
'space' character. */
int y = tag.indexOf(' ');
if(y != -1)
tag = tag.substring(0,y);
if(tag.equals("table")) // if the captured tag is a
TABLE = true; // set the TABLE flag to show this
/* If the tag we have just captured is the end-tag of a tag-
pair [ie it begins with a slash], chop off the initial slash
and decrement the nesting number for this type of tag. */
if(tag.startsWith("/")) {
tag = tag.substring(1, tag.length());
tagsense = -1;
}
else // Else the tag just found must be a start-tag,
tagsense = 1; // so increment nesting number for this tag type.
/* Find the index number of the captured tag's type and
increment/decrement the nesting number for this tag. */
for(int i = 0; i < Tags.length; i++)
if(tag.equals(Tags[i])) {
TagNest[i] += tagsense;
break;
}
}
else // Else add the new character
tag += (char)c; // to the tag accumulator string.
} // end of if(inTag)
} // end of while() loop
fr.close(); // close the file reader
} catch(Exception e) { } // catch 'end-of-file' exception
/* Put the names of any unmatch tags found in this
file into a string and return the sting tagList. */
badTags = "";
for(int i = 0; i < Tags.length; i++) {
if(TagNest[i] != 0) badTags += ", <" + Tags[i] +">";
}
/* Provided that at least one unpaired tag was captured, chop the initial
comma + space from the first tag. Then write the full path of the file,
plus the list of unpaired tags within it, to the output file.*/
if(!badTags.equals("")) {
badTags = badTags.substring(2, badTags.length());
return fp + "\n" + badTags + "\n\n";
}
return "";
}
/* When invoked, it examines the files and directories contained within the
directory 'd' passed to it as its parameter. If an entry is an HTML file,
it calls the tagCapture method below. The 'relative' filespec is the path
+filename from the point of view of the current directory. If an entry is
a directory, it simply calls itself to deal with that (sub) directory as
it is doing with the current directory. Thus it can handle any depth of
sub-directories from the parent. This method is re-entrant. It calls itself.
It is also called from one place only in main().*/
private static void scan(String d) throws IOException {
char ch = ' ';
File fd = new File(d); // create file object for given directory name
String D[] = fd.list(); // list all items in this directory
/* For each HTML file in the sub-directory, get name of
[next] sub-directory and create a file object for it. */
for(int i = 0; i < D.length; i++) {
fp = d + "/" + D[i]; // full path to HTML file being examined
File fs = new File(fp);
/* Provided it is an existing directory and it isn't a
development directory, then re-enter this method. */
if(fs.isDirectory()
&& !fp.endsWith("webtools")
&& !fp.endsWith("images")
&& !fp.endsWith("applets")
&& !fp.endsWith("java_progs")
&& !fp.endsWith("C-programs"))
scan(fp);
/* ELSE it should be a file. So, if the file exists and
it is an HTML file, call the tagCapture() method below. */
else
if(fs.isFile() && fp.endsWith(".html")) {
String s = tagCapture();
if(!s.equals(""))
bt.write(s);
if(TABLE) {
TABLE = false;
tb.write(fp + '\n');
}
}
} // end of the for() loop
}
public static void main(String args[]) throws Exception {
/* Provided at least the two mandatory command line arguements have
been entered, set the first arguement as the name of the given document
base directory and the second arguement as the sub-directory to be
word-counted. */
bd = "/home/rob/Private";
sd = "website";
String d = bd + "/" + sd;
dl = bd.length() + 1; // form the full path and note its length + 1
File pd = new File(d); // create a file object from the full path
/* If command line argument is an existing directory, open
the output file writer in order to be able to use write() */
if(pd.isDirectory()) {
bt = new FileWriter("badtags.txt");
tb = new FileWriter("table_tags.txt");
scan(d); // scan for HTML files in the specified directory tree
bt.close();
tb.close();
} else System.out.println(d + " is not a directory.");
}
}