/**
* Web Site Indexer.
* @author Robert John Morton
* @version 29 June 2009, adapted for CSS version of website 11/09/2018 */
/* This program does NOT generate the java search engine index.
Look in the folder 'search-engine' for this.
This program creates a file articles_index.html containing a list of
hyperlinked titles and descriptions for all the chapter and article
files in the website.
This program excludes files with tag
from articles_index.html and instead writes them to the file
articles_noindex.html
It also produces standard sitemap.xml and robots.txt files for web site.
Sample command line entries:
to output in file website_index.htm
java website_indexer /home/rob/website
to include the HTML description under each title
output in website_descriptions.htm
java website_indexer /home/rob/website descriptions */
import java.io.*;
import java.util.*;
import java.text.SimpleDateFormat;
class articles_indexer {
public static final String DATE_FORMAT_NOW = "yyyy-MM-dd";
static int
dir_level = 0, //current directory level used in scan()
dl, //length of parent directory path name + terminating '/'
arts = 0, //chapter+articles files counter
pdfs = 0, //index for pdf articles
frms = 0, //counter for frame, title and contents files
pt = 0, //counter for files written in Portuguese
ru = 0, //counter for articles written in Russian
accepts_en = 0, //list number of the article entry in articles_index
accepts_pt = 0, //list number of the article entry in articles_index_br
accepts_ru = 0, //list number of the article entry in articles_index_ru
rejects = 0; //respective article counters
static long FL = 0, fl = 0; //for capturing the length of the longest file.
static Writer
fslist_en, //for articles_index.html
fslist_pt, //for articles_index_br.html
fslist_ru, //for articles_index_ru.html
fsreject, //for noindex articles
fsshort, //for shortlist files
sitemap, //for sitemap.xml
robots; //robots.txt
static String
FOLDERS[] = {
"book","chaos","computers","home","internet","landshare",
"navigation","poems","radio","science","software"
},
MONTHS[] = {
"Jan","Feb","Mar","Apr","May","Jun",
"Jul","Aug","Sep","Oct","Nov","Dec"
},
siteURL = "https://robmorton.website/",
SM[] = new String[1000], //string array in which to build site map index
A[] = new String[1000], //String array for the articles' filespecs
T[] = new String[1000], //String array for the articles' titles
Q[] = new String[1000], //String array for the articles' descriptions
PT[] = new String[1000], //for files in Portuguese
RU[] = new String[1000], //for files in Russian
PDF[] = new String[400], //for the filenames of the PDF articles
Descr = "", //for HTML file description
bd;
static long
DM[] = new long[1000], //array for files modified dates
PDM[] = new long[300]; //dates modified for PDF files
static boolean
GotDescription = false,
GotTheNoIndexTag = false,
B[] = new boolean[1000], //array of noindex flags
P[] = new boolean[1000], //array of Portuguese flags
R[] = new boolean[1000]; //array of Russian flags
/* HTML HEADER CODING FOR THE FILE: aricles_index.html WHICH LISTS ALL THE
ARTICLES WRITTEN IN ENGLISH. Called from only one place in artIdx(). */
private static void EnglishHeader() throws IOException {
fslist_en.write(
"\n"
+ "
\n"
+ "\n"
+ "\n"
+ "Index to Book, Articles & Essays by Robert John Morton\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n\n"
+ "\n\n"
+ "\n\n"
+ "
\n\n"
+ "Browse the article titles and summaries or use your browser's FIND facility to search the list for what you want. Alternatively, use this website's dedicated meta-tag search engine, which, specifically for this website, is vastly more precise than the mainstream public search engines.\n"
+ "
\n"
);
}
/* HTML HEADER CODING FOR THE FILE: aricles_index_br.html WHICH LISTS ALL
THE ARTICLES WRITTEN IN PORTUGUESE. Called from 1 place in artIdx(). */
private static void PortugueseHeader() throws IOException {
fslist_pt.write(
"\n"
+ "\n"
+ "\n"
+ "\n"
+ "Índice para livro, artigos e ensaios por Robert John Morton\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n\n"
+ "\n\n"
+ "\n\n"
+ "
\n\n"
+ "Navegue pelos títulos e resumos dos artigos ou use a facilidade FIND do seu navegador para pesquisar na lista o que deseja. Como alternativa, use a meta tag dedicada do site buscador, que, especificamente para este site, é muito mais preciso do que os principais motores de busca públicos.\n\n"
+ "
\n"
);
}
/* HTML HEADER CODING FOR THE FILE: aricles_index_ru.html WHICH LISTS ALL
THE ARTICLES WRITTEN IN RUSSIAN. Called from 1 place in artIdx(). */
private static void RussianHeader() throws IOException {
fslist_ru.write(
"\n"
+ "\n"
+ "\n"
+ "\n"
+ "Указатель к книгам и эссе Роберта Джона Мортона\n"
+ "\n"
+ "\n"
+ "\n"
+ "\n\n"
+ "\n\n"
+ "\n\n"
+ "
\n\n"
+ "Просмотрите заголовки и резюме эссе или используйте функцию НАЙТИ вашего браузера, чтобы найти в списке то, что вы хотите. В качестве альтернативы используйте специальный метатег этого веб-сайта.поисковый движок, который, особенно для этого веб-сайта, намного точнее, чем основные общедоступные поисковые системы.\n"
+ "
\n"
);
}
/* HTML HEADER CODE FOR THE FILE noindex.html WHICH LIST ALL THE NON-
INDEXABLE FILES. Called from only one place in artIdx(). */
private static void Reject() throws IOException {
fsreject.write(
""
+ "Articles with noindex Meta Tag"
+ "
\n"
);
}
/* COMPILE A LIST OF THE PDF ARTICLE FILE NAMES
Called from only one place in main(). */
private static void listPDFs() throws IOException {
String d = bd + "/articles-pdf";
File fd = new File(d); // create file object for articles-pdf directory
String D[] = fd.list(); // list all items in this directory
// for each file in the sub-directory
for(int i = 0; i < D.length; i++) {
String dd = D[i]; // relative path name of next PDF file
String fp = d + "/" + dd; // get full path name of [next] PDF file
File fs = new File(fp); // create a file object for it
/* If it is an existing PDF or PNG file then put its filename in
the PDF files array and date that this file was last modified. */
if(fs.isFile() && (fp.endsWith(".pdf") || fp.endsWith(".png"))) {
PDF[pdfs] = dd;
PDM[pdfs++] = fs.lastModified();
}
}
}
/* CHECK TO SEE IF THE CAPTURED TAG IS A DESCRIPTION TMETA TAG. FORMAT:
Called from only one place in HTMLtitle(). */
static void isDescriptionTag(String Tag) {
if(GotDescription) return;
/* If the tag's text begins with the word "meta" and it also con-
tains the word "description" try to locate the word "content". */
if( (Tag.indexOf("meta") == 0) && (Tag.indexOf("description") != -1) ) {
int x = Tag.indexOf("content");
/* If the word "content" is found, find the position of the following
opening quote mark and the position of the closing quote mark. Pro-
vided some actual text exists between the two quote marks, put it in
the "Descr" string and set the "got description" flag. */
if(x != -1) {
x = Tag.indexOf('\"', x + 7) + 1;
int y = Tag.indexOf('\"', x);
if(y > x) {
Descr = Tag.substring(x, y);
GotDescription = true;
}
}
}
}
/* CHECK TO SEE IF THE CAPTURED TAG IS A noindex META TAG
Called from only one place in HTMLtitle(). */
static void isNoIndexTag(String Tag) {
if(GotTheNoIndexTag) return; // already verified that it is.
if( (Tag.indexOf("meta") == 0) && // tag begins with the word "meta"
(Tag.indexOf("robots") != -1) && // and contains the word "robots"
(Tag.indexOf("noindex") != -1)) // and contains the word "noindex"
GotTheNoIndexTag = true; // so say it is a "noindex" tag
return;
}
/* EXAMINE THE CONTENTS OF THE HTML FILE
Called from only one place in scan(). */
static String HTMLtitle(String fp) {
FileReader fr; // file reader for the file to be examined
int x; // for java UNICODE char input from file stream
char c; // for each char retrieved from file input stream
boolean // The following flags are true when getting
inTag = false, // chararacters that are part of a tag name
inTitle = false; // chararacters that are part of the file's title
String
Tag = "", // raw tag input string
Title = ""; // title content string
Descr = ""; // clear the description string
GotDescription = false;
GotTheNoIndexTag = false;
try {
fr = new FileReader(fp); // create a file reader for this file
/* while there are more characters yet to read in from
the file: [loop broken only by End-Of-File error] */
while((x = fr.read()) != -1) {
c = (char)x; // get next character from file stream
if(c == '<') // if initial tag-delimiter "<" char encountered
inTag = true; // we're inside a tag so set InTag flag and exit
else if(c == '>') { // if final tag-delimiter ">" char encountered
/* If it is an initial title tag , set that we're now
receiving title characters; otherwise, if it's an 'end' title
tag then set that we are no longer within a title */
if(Tag.equals("title"))
inTitle = true;
else if(Tag.equals("/title"))
inTitle = false;
/* Otherwise, if we've reached end of section
of the HTML file then if no description text acquired,
make the description a red 'No description' message. */
else if(Tag.equals("/head")) {
if(Descr.equals(""))
Descr = "No Description";
}
/* Otherwise, we've not yet hit the end of the section, so
check if the captured text is a description or a 'no index' tag. */
else {
isDescriptionTag(Tag);
isNoIndexTag(Tag);
}
Tag = ""; // clear for the next tag to be encountered
inTag = false; // we are no longer inside a tag
}
else if(inTag) // if currently inside a tag
Tag += c; // add the current character to the tag name
else if(inTitle) // if currently inside the title
Title += c; // add current character to the text
} // end of WHILE loop
fr.close(); // close the file reader
}
catch(Exception e) { // catch the end-of-file exception and
Title = ""; // clear Title accumulator ready for the next pass
}
// Cut out possible leading and trailing '\n'
if(!Title.equals("")) {
if(Title.indexOf('\n') == 0)
Title = Title.substring(1, Title.length());
if(Title.indexOf('\n') == Title.length() - 1)
Title = Title.substring(0, Title.length() - 1);
}
return Title; // return the of the HTML file
}
/* This method is re-entrant. It calls itself. When invoked, it lists the
files and directories contained within the directory 'dir' passed to it
as its parameter. It then examines each entry in that directory. If an
entry is an HTML file, which is not a _frame or _title file, it writes
that file's relative filespec to the A array. The 'relative' filespec
is the path+filename from the point of view of the parent directory. If
an entry is a directory, it simply calls itself to deal with that (sub)
directory as it is doing with the current directory. Thus it can handle
any depth of sub-directories from the parent.
Called only by itself and from one place on main(). */
private static void scan(String d) throws IOException {
File fd = new File(d); // create file object for given directory name
String D[] = fd.list(); // list all items in this directory
// for each file in the sub-directory
for(int i = 0; i < D.length; i++) {
String dd = D[i]; //relative path name of next sub-directory or file
String fp = d + "/" + dd;//get full path name of [next] sub-directory
File fs = new File(fp); //create a file object for it
if(fs.isDirectory()) { // if it is a directory
/* Prime the flag initially to indicate that this
is NOT one of the directories to be indexed. */
boolean flag = false;
/* If we are currently in the top-level
directory of the website, then... */
if(dir_level == 0) {
/* If the ith sub-directory is one of those
to be indexed then set the flag to indicate
that this sub-directory must be indexed. */
for(int j = 0; j < FOLDERS.length; j++)
if(dd.equals(FOLDERS[j])) {
flag = true;
break;
}
}
/* Else, if we are not in the top-level directory, we
must index this subdirectory anyway, so set the flag. */
else flag = true;
/* Provided this is one of the directories to be indexed and it
is not an images, applets or java_progs directory, then... */
if(flag && (dd.indexOf("images") == -1)
&& (dd.indexOf("applets") == -1)
&& (dd.indexOf("C-programs") == -1)
&& (dd.indexOf("java_progs") == -1)) {
/* increment the directory level to the one we
are now in, re-enter this method and decrement
the directory level back to the one above. */
dir_level++;
scan(fp);
dir_level--;
}
}
/* On the other hand, if it is not a directory but is an existing
HTML file and we are not currently in the top-level directory: */
else
if(fs.isFile()
&& fp.endsWith(".html")
&& (fp.indexOf("index") == -1)
&& (fp.indexOf("noindex") == -1)
&& (fp.indexOf("contents") == -1)
&& !fp.endsWith("error404.html")
&& !fp.endsWith("error404_br.html")
&& (dir_level > 0) ) {
// Find the longest HTML file.
if((fl = fs.length()) > FL)
FL = fl;
/* form this HTML file's relative filespec
and put its title in the T array. */
String rfs = fp.substring(dl,fp.length());
T[arts] = HTMLtitle(fp);
/* If the file contains a noindex meta tag then set not to
include this file in the articles and sitemap files,
otherwise set to include it. */
if(GotTheNoIndexTag)
B[arts] = true;
else
B[arts] = false;
/* Set the date that this file was last modified,
the contents of HTML description meta tag and put
article's relative filespec in A array. */
DM[arts] = fs.lastModified();
Q[arts] = Descr;
A[arts] = rfs;
// If filename contains "_br" then list file as written in Portuguese.
if(dd.endsWith("_br.html")) {
P[arts] = true;
PT[pt++] = rfs;
// If filename contains "_ru" then list file as written in Russian.
} else if(dd.endsWith("_ru.html")) {
R[arts] = true;
RU[ru++] = rfs;
// Otherwise assume it is wriiten in english.
} else {
P[arts] = false;
R[arts] = false;
}
arts++;
}
}
}
// Called from only one place in artIdx().
static String mhld(String L, String D, int x) {
String a = "";
if(x < 10) a = "00";
else if(x < 100) a = "0";
/* Hyperlink title between definition title tags
file description between definition detail tags. */
return "
" + a + x + " " + L + "
\n"
+ "
"
+ D + "
\n";
}
/* Make filespec into a hyperlink: if file has no title, use the
link text and return the hyperlink tags with enclosed text.
Called from only one place in artIdx(). */
static String mhl(String s, String T) {
if(T.equals("")) T = s;
return "" + T + "";
}
/* THE FOLLOWING METHOD EMBODIES C A R HOARE'S QUICK SORT ALGORITHM.
Note that it is a highly re-entrant method: it calls itself indefinitely.
Called by itself and from one place in main(). */
static void qs(int LO, int HI) throws IOException {
int lo = LO; // set moving lo to LO end of partition
int hi = HI; // set moving hi to HI end of partition
if(HI > LO) { // if the partition contains anything
String mid = T[(LO + HI) >> 1]; // get the content of mid its element
while(lo <= hi) { // loop through the array until the indices cross
/* While current lowest keyword < midway keyword, push lower sort
boundary up by one element. While current highest keyword > midway
keyword pull upper sort boundary down by one element. */
while(lo < HI && T[lo].compareTo(mid) < 0) lo++;
while(hi > LO && T[hi].compareTo(mid) > 0) hi--;
if(lo <= hi) { // IF LOW INDEX <= HIGH INDEX SWAP THEIR 'CONTENTS'
/* Sort by HTML file and shift along with it the hyper-
links, the HTML file descriptions, the "noindex" flags and the
date the file was last modified. For each, get index (offset-
extent-filenum) of lo element, put index of hi element in lo
element and put index of lo element in hi element. */
String
x = T[lo]; T[lo] = T[hi]; T[hi] = x;
x = A[lo]; A[lo] = A[hi]; A[hi] = x;
x = Q[lo]; Q[lo] = Q[hi]; Q[hi] = x;
boolean
b = B[lo]; B[lo] = B[hi]; B[hi] = b;
b = P[lo]; P[lo] = P[hi]; P[hi] = b;
b = R[lo]; R[lo] = R[hi]; R[hi] = b;
long
dm = DM[lo]; DM[lo] = DM[hi]; DM[hi] = dm;
lo++; // push lower sort boundary up by one element
hi--; // pull upper sort boundary down by one element
}
}
if(LO < hi) // if hi not yet reached start of file
qs(LO, hi); // sort lower partition
if(lo < HI) // if lo not yet reached end of file
qs(lo, HI); // sort upper partition
}
}
/* THE FOLLOWING METHOD EMBODIES C A R HOARE'S QUICK SORT ALGORITHM
Called only by itself and from one place in siteMap(). */
static void qs2(int LO, int HI) throws IOException {
int lo = LO; // set moving lo to LO end of partition
int hi = HI; // set moving hi to HI end of partition
/* If the partition contains anything, get content of its mid element
then execute the main "while" loop until the indices meet. */
if(HI > LO) {
String mid = A[(LO + HI) >> 1];
while(lo <= hi) {
/* while current lowest keyword < midway keyword, push lower sort
boundary up by one element and while current highest keyword >
midway keyword pull upper sort boundary down by one element. */
while(lo < HI && A[lo].compareTo(mid) < 0) lo++;
while(hi > LO && A[hi].compareTo(mid) > 0) hi--;
if(lo <= hi) { //IF LOW INDEX <= HIGH INDEX SWAP THEIR 'CONTENTS'
/* The following sorts the hyperlink dragging with it its corres-
ponding HTML File Title, date the file was last modified and its
"noindex" flag. Each of the following 4 lines gets index (offset-
extent-file-num) of lo element, puts index of hi element in lo
element then puts index of lo element in hi element. */
String
x = A[lo]; A[lo] = A[hi]; A[hi] = x;
x = T[lo]; T[lo] = T[hi]; T[hi] = x;
long
dm = DM[lo]; DM[lo] = DM[hi]; DM[hi] = dm;
boolean
b = B[lo]; B[lo] = B[hi]; B[hi] = b;
b = P[lo]; P[lo] = P[hi]; P[hi] = b;
lo++; // push lower sort boundary up by one element
hi--; // pull upper sort boundary down by one element
}
}
if(LO < hi) // If hi not yet reached start of file
qs2(LO, hi); // sort lower partition.
if(lo < HI) // If lo not yet reached end of file
qs2(lo, HI); // sort upper partition.
}
}
/* THE FOLLOWING METHOD EMBODIES C A R HOARE'S QUICK SORT ALGORITHM.
SORT THE FRAME, TITLE & CONTENTS FILESPECS
Called only by iteself and from one place in list_pt(). */
static void qs3(int LO, int HI) throws IOException {
int lo = LO; // set moving lo to LO end of partition
int hi = HI; // set moving hi to HI end of partition
if(HI > LO) { // if the partition contains anything
String mid = PT[(LO + HI) >> 1]; // get content of its mid element
while(lo <= hi) { // loop through the array until the indices cross
/* While current lowest keyword < midway keyword push lower sort
boundary up by one element. While current highest keyword >
midway keyword pull upper sort boundary down by one element. */
while(lo < HI && PT[lo].compareTo(mid) < 0) lo++;
while(hi > LO && PT[hi].compareTo(mid) > 0) hi--;
/* IF LOW INDEX <= HIGH INDEX, SWAP THEIR 'CONTENTS'
then sort by Link. NOTE: index=offset-extent-filenum */
if(lo <= hi) {
String
x = PT[lo]; // get index () of lo element
PT[lo] = PT[hi]; // put index of hi element in lo element
PT[hi] = x; // put index of lo element in hi element
lo++; // push lower sort boundary up by one element
hi--; // pull upper sort boundary down by one element
}
}
if(LO < hi) // If hi not yet reached start of file,
qs3(LO, hi); // sort lower partition.
if(lo < HI) // If lo not yet reached end of file,
qs3(lo, HI); // sort upper partition.
}
}
/* REMOVE "The" OR "A" FROM THE START OF A FILE TITLE FOR INDEXING.
Called from only one place in main(). */
static void killThe(){
for(int i = 0; i < arts; i++) { // for each HTML file in the list
String s = T[i];
if(s.indexOf("The ") == 0)
T[i] = s.substring(4);
else if(s.indexOf("A ") == 0)
T[i] = s.substring(2);
}
}
/* Date Converter Wed Jul 22 14:42:15 BRT 2009 to 2009-07-22
0123456789012345678901234567 0123456789
Called only from 2 places in siteMap(). */
static String ISO8601(long dm){
Date d = new Date(dm); // convert long to Unix log date format
String s = "" + d; // convert to string
String m = s.substring(4,7); // extract abbreviated month name
int i;
for(i = 0; i < 12; i++) // find the month number (range 0 to 11)
if(m.equals(MONTHS[i]))
break;
i++; // add 1 to month number for range 1 to 12
if(i < 10)
m = "0";
else
m = "";
int x = s.length();
return(s.substring(x-4,x) + "-" + m + i + "-" + s.substring(8,10));
}
// GET CURRENT DATE. Called from only 3 places in siteMap.
static String now() {
Calendar cal = Calendar.getInstance();
SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT_NOW);
return sdf.format(cal.getTime());
}
// GENERATE THE 'robots.txt' FILE. Called from only one place in main().
private static void robotsTxt() throws IOException {
robots = new FileWriter(bd + "/robots.txt");
robots.write(
"Sitemap: " + siteURL + "sitemap.xml\n"
+ "User-agent: * #allow all robots\n\n"
);
robots.close();
}
// GENERATE THE 'sitemap.xml' FILE. Called from only one place in main().
private static void siteMap() throws IOException {
qs2(0,arts - 1); // sort sitemap links
sitemap = new FileWriter(bd + "/sitemap.xml"); // open output streams
// Sitemap XML header information
sitemap.write(
"\n"
+ "\n"
);
/* Write the first two entries: for the site url
itself and for the top-level index.htm file. */
String h = "\t\n\t\t" + siteURL;
String ld = now(); // get the date of top level index.htm
/*
String lp = "\n\t\t"
+ "monthly\n"
+ "\t\t0.5\n"
+ "\t\n";
*/
// Write the XML entry for the web site's top level index.htm file.
String
lp = "\n\t\n",
lq = "\n\t\t",
lr = lq + now() + lp;
sitemap.write(
h + lr
+ h + "index.html" + lr
+ h + "articles-index/articles_index.html" + lr
+ h + "articles-index/articles_index_br.html" + lr
+ h + "articles-index/articles_index_ru.html" + lr
);
/* Write the accepted entries to sitemap.xml and
the rejected entries to robots.txt if any exist. */
String s;
for(int i = 0; i < arts; i++) { //for each HTML file in the list
s = A[i]; //its filespec
/* If it does not have a
then write it to the XML sitemap file. */
if(!B[i])
sitemap.write(h + s + lq + ISO8601(DM[i]) + lp);
}
/* Write each PDF file's details to the XML sitemap file...
for(int i = 0; i < pdfs; i++)
sitemap.write(h + "articles-pdf/" + PDF[i]
+ lq + ISO8601(PDM[i]) + lp
); */
sitemap.write("\n"); // final tag in sitemaps.xml
sitemap.close(); // close the sitemap.xml file
}
/* CREATE LIST OF ARTICLES WRITTEN IN PORTUGUESE
Called from only one place in main(). */
private static void list_pt() throws IOException {
Writer // open Portuguese file
F = new FileWriter(bd + "/articles-index/portuguese.txt");
qs3(0,pt - 1); // sort filespecs
F.write("#Files containing articles written in Portuguese.\n#\n");
for(int i = 0; i < pt; i++) // write all the filespec entries
F.write(PT[i] + "\n");
F.close(); // close the portuguese file
}
/* CREATE LIST OF ARTICLES WRITTEN IN RUSSIAN
Called from only one place in main(). */
private static void list_ru() throws IOException {
Writer // open Russian file
F = new FileWriter(bd + "/articles-index/russian.txt");
qs3(0,ru - 1); // sort filespecs
F.write("#Files containing articles written in Russian.\n#\n");
for(int i = 0; i < ru; i++) // write all the filespec entries
F.write(RU[i] + "\n");
F.close(); // close the Russian file
}
/* Generate the articles shortlist in pathname order for constructing the
sitemap indexes of the non-book articles. Called from 1 place in main(). */
private static void shrtlst() throws IOException {
fsshort = new FileWriter( // open for stream output
bd + "/articles-index/articles_shortlist.htm"
);
for(int i = 0; i < arts; i++) { // for each HTML file in the list
String L = A[i]; // next filespec in the list
/* If it does not contain a "noindex" tag and
it's not part of the book and it is not part of
the poems then write hyper-linked file title. */
if(!B[i] && (L.indexOf("book/") == -1)
&& (L.indexOf("poems/") == -1))
fsshort.write(mhl(L, T[i]) + " \n");
}
fsshort.close(); // close the shortlist file
}
/* GENERATE THE ENGLISH, PORTUGUESE & RUSSIAN ARTICLES INDEX FILES PLUS AN
INDEX OF ARTICLES THAT ARE NOT TO BE INCLUDED IN EITHER OF THE OTHER INDEXES.
Called from only one place in main(). */
private static void artIdx() throws IOException {
// Open output streams for the articles-index files.
String s = bd + "/articles-index/articles_";
fslist_en = new FileWriter(s + "index.html");
fslist_pt = new FileWriter(s + "index_br.html");
fslist_ru = new FileWriter(s + "index_ru.html");
fsreject = new FileWriter(s + "noindex.html");
// Generate the HTML file header code for each index file.
EnglishHeader();
PortugueseHeader();
RussianHeader();
Reject();
for(int i = 0; i < arts; i++) { // for each HTML file in the list
s = "../" + A[i]; // [next] article's filespec;
String L = mhl(s,T[i]); // create hyperlinked title
/* If it is a file that contains a "noindex" tag, write
the file's title, link and description to the rejects
file; otherwise, write them to the text file. */
if(B[i])
fsreject.write(mhld(L,Q[i],++rejects) + " \n");
else {
if(P[i])
fslist_pt.write(mhld(L,Q[i],++accepts_pt) + " \n");
else if(R[i])
fslist_ru.write(mhld(L,Q[i],++accepts_ru) + " \n");
else
fslist_en.write(mhld(L,Q[i],++accepts_en) + " \n");
}
/* CREATE THE SEARCH ENGINE'S SUMMARY FILE FOR EACH HTML FILE
h = new FileWriter(bd + s + "index.summ");
h.write(T[i]);
h.write(D[i]);
h.close(); */
}
/* Write the final
and end tags for
the 3 files and then close the files. */
fslist_en.write("
\n
\n\n\n");
fslist_pt.write("
\n
\n\n\n");
fslist_ru.write("
\n
\n\n\n");
fsreject.write("\n\n\n");
fslist_en.close();
fslist_pt.close();
fslist_ru.close();
fsreject.close();
}
public static void main(String args[]) throws IOException {
bd = "../.."; // to get up to the website root level
String sd = ""; // path within website
if(args.length > 0) // if a command line argument has been entered
bd = args[0]; // name of base directory from command line
dl = bd.length() + 1; // length of parent directory path name + 1
File pd = new File(bd); // form file object for parent directory
// if command line argument is an existing directory
if(pd.isDirectory()) {
scan(bd); // create the file list arrays
killThe(); // remove "The" or "A" from beginnings of titles
qs(0,arts - 1); // sort everything else by HTML, Russian arts indexes
artIdx();
robotsTxt(); // generate the robots.txt file
siteMap(); // generate the sitemap.xml file
list_pt(); // create list of files written in Portuguese
list_ru(); // create list of files written in Russian
shrtlst(); // generate shortlist of articles
// listPDFs(); // generate list of PDF article files
System.out.println("Longest HTML file: " + FL + " bytes.");
} else System.out.println( bd + " is not a directory.");
}
}