/** * Web Site Indexer. * @author Robert John Morton * @version 29 June 2009, adapted for CSS version of website 11/09/2018 */ /* This program does NOT generate the java search engine index. Look in the folder 'search-engine' for this. This program creates a file articles_index.html containing a list of hyperlinked titles and descriptions for all the chapter and article files in the website. This program excludes files with tag from articles_index.html and instead writes them to the file articles_noindex.html It also produces standard sitemap.xml and robots.txt files for web site. Sample command line entries: to output in file website_index.htm java website_indexer /home/rob/website to include the HTML description under each title output in website_descriptions.htm java website_indexer /home/rob/website descriptions */ import java.io.*; import java.util.*; import java.text.SimpleDateFormat; class articles_indexer { public static final String DATE_FORMAT_NOW = "yyyy-MM-dd"; static int dir_level = 0, //current directory level used in scan() dl, //length of parent directory path name + terminating '/' arts = 0, //chapter+articles files counter pdfs = 0, //index for pdf articles frms = 0, //counter for frame, title and contents files pt = 0, //counter for files written in Portuguese ru = 0, //counter for articles written in Russian accepts_en = 0, //list number of the article entry in articles_index accepts_pt = 0, //list number of the article entry in articles_index_br accepts_ru = 0, //list number of the article entry in articles_index_ru rejects = 0; //respective article counters static long FL = 0, fl = 0; //for capturing the length of the longest file. static Writer fslist_en, //for articles_index.html fslist_pt, //for articles_index_br.html fslist_ru, //for articles_index_ru.html fsreject, //for noindex articles fsshort, //for shortlist files sitemap, //for sitemap.xml robots; //robots.txt static String FOLDERS[] = { "book","chaos","computers","home","internet","landshare", "navigation","poems","radio","science","software" }, MONTHS[] = { "Jan","Feb","Mar","Apr","May","Jun", "Jul","Aug","Sep","Oct","Nov","Dec" }, siteURL = "https://robmorton.website/", SM[] = new String[1000], //string array in which to build site map index A[] = new String[1000], //String array for the articles' filespecs T[] = new String[1000], //String array for the articles' titles Q[] = new String[1000], //String array for the articles' descriptions PT[] = new String[1000], //for files in Portuguese RU[] = new String[1000], //for files in Russian PDF[] = new String[400], //for the filenames of the PDF articles Descr = "", //for HTML file description bd; static long DM[] = new long[1000], //array for files modified dates PDM[] = new long[300]; //dates modified for PDF files static boolean GotDescription = false, GotTheNoIndexTag = false, B[] = new boolean[1000], //array of noindex flags P[] = new boolean[1000], //array of Portuguese flags R[] = new boolean[1000]; //array of Russian flags /* HTML HEADER CODING FOR THE FILE: aricles_index.html WHICH LISTS ALL THE ARTICLES WRITTEN IN ENGLISH. Called from only one place in artIdx(). */ private static void EnglishHeader() throws IOException { fslist_en.write( "\n" + "\n" + "\n" + "\n" + "Index to Book, Articles & Essays by Robert John Morton\n" + "\n" + "\n" + "\n" + "\n\n" + "\n\n" + "\n\n" + "
\n\n" + "
\n" + "

\n" + "\n" + "\n" + "
Robert John Morton

\n\n" + "

Web-Book\n" + "
The Lost Inheritance\n" + "
My Poems\n\n" + "

Projects\n" + "
Landshare Project\n" + "
Sustainable Food\n" + "
Sustainable Energy\n\n" + "

Interests\n" + "
Global Navigation\n" + "
Short-Wave Radio\n" + "
Chaos Theory\n" + "
The Universe\n" + "
The Internet\n" + "
My Software\n" + "
My Ideal PC\n" + "
Autism & Society\n\n" + "

Facilities\n" + "
Home Page\n" + "
Site Index\n" + " | Search\n" + "
About This Site\n" + "
About Me\n" + " | Email\n" + "
Security †\n\n" + "

\n\n" + "

Index to Articles by Robert John Morton

\n\n" + "

\n\n" + "Browse the article titles and summaries or use your browser's FIND facility to search the list for what you want. Alternatively, use this website's dedi­cated meta-tag search engine, which, specifically for this website, is vastly more precise than the mainstream public search engines.\n" + "

\n" + "portugês | \n" + "русский | \n" + "Home Page\n\n" + "

\n\n" + "
\n" ); } /* HTML HEADER CODING FOR THE FILE: aricles_index_br.html WHICH LISTS ALL THE ARTICLES WRITTEN IN PORTUGUESE. Called from 1 place in artIdx(). */ private static void PortugueseHeader() throws IOException { fslist_pt.write( "\n" + "\n" + "\n" + "\n" + "Índice para livro, artigos e ensaios por Robert John Morton\n" + "\n" + "\n" + "\n" + "\n\n" + "\n\n" + "\n\n" + "
\n\n" + "
\n" + "

\n" + "\n" + "\n" + "
Robert John Morton

\n\n" + "

Web-Book\n" + "
The Lost Inheritance\n" + "
My Poems\n\n" + "

Projects\n" + "
Landshare Project\n" + "
Sustainable Food\n" + "
Sustainable Energy\n\n" + "

Interests\n" + "
Global Navigation\n" + "
Short-Wave Radio\n" + "
Chaos Theory\n" + "
The Universe\n" + "
The Internet\n" + "
My Software\n" + "
My Ideal PC\n\n" + "

Facilities\n" + "
Home Page\n" + "
Articles Index\n" + "
Search Site\n" + "
About This Site\n" + "
About Me, \n" + "Email\n" + "
Security †\n\n" + "

\n\n" + "

Índice para Artigos por Robert John Morton

\n\n" + "

\n\n" + "Navegue pelos títulos e resumos dos artigos ou use a facilidade FIND do seu navegador para pesquisar na lista o que deseja. Como alternativa, use a meta tag dedicada do site buscador, que, especificamente para este site, é muito mais preciso do que os principais motores de busca públicos.\n\n" + "

\n" + "English | \n" + "русский | \n" + "página inicial\n\n" + "

\n\n" + "
\n" ); } /* HTML HEADER CODING FOR THE FILE: aricles_index_ru.html WHICH LISTS ALL THE ARTICLES WRITTEN IN RUSSIAN. Called from 1 place in artIdx(). */ private static void RussianHeader() throws IOException { fslist_ru.write( "\n" + "\n" + "\n" + "\n" + "Указатель к книгам и эссе Роберта Джона Мортона\n" + "\n" + "\n" + "\n" + "\n\n" + "\n\n" + "\n\n" + "
\n\n" + "

Указатель к эссе Роберт Джон Мортон

\n\n" + "

\n\n" + "Просмотрите заголовки и резюме эссе или используйте функцию НАЙТИ вашего браузера, чтобы найти в списке то, что вы хотите. В качестве альтернативы используйте специальный метатег этого веб-сайта.поисковый движок, который, особенно для этого веб-сайта, намного точнее, чем основные общедоступные поисковые системы.\n" + "

\n" + "English | " + "Portugês | \n" + "Португальская версия\n\n" + "

\n\n" + "
\n" ); } /* HTML HEADER CODE FOR THE FILE noindex.html WHICH LIST ALL THE NON- INDEXABLE FILES. Called from only one place in artIdx(). */ private static void Reject() throws IOException { fsreject.write( "" + "Articles with noindex Meta Tag" + "
\n" ); } /* COMPILE A LIST OF THE PDF ARTICLE FILE NAMES Called from only one place in main(). */ private static void listPDFs() throws IOException { String d = bd + "/articles-pdf"; File fd = new File(d); // create file object for articles-pdf directory String D[] = fd.list(); // list all items in this directory // for each file in the sub-directory for(int i = 0; i < D.length; i++) { String dd = D[i]; // relative path name of next PDF file String fp = d + "/" + dd; // get full path name of [next] PDF file File fs = new File(fp); // create a file object for it /* If it is an existing PDF or PNG file then put its filename in the PDF files array and date that this file was last modified. */ if(fs.isFile() && (fp.endsWith(".pdf") || fp.endsWith(".png"))) { PDF[pdfs] = dd; PDM[pdfs++] = fs.lastModified(); } } } /* CHECK TO SEE IF THE CAPTURED TAG IS A DESCRIPTION TMETA TAG. FORMAT: Called from only one place in HTMLtitle(). */ static void isDescriptionTag(String Tag) { if(GotDescription) return; /* If the tag's text begins with the word "meta" and it also con- tains the word "description" try to locate the word "content". */ if( (Tag.indexOf("meta") == 0) && (Tag.indexOf("description") != -1) ) { int x = Tag.indexOf("content"); /* If the word "content" is found, find the position of the following opening quote mark and the position of the closing quote mark. Pro- vided some actual text exists between the two quote marks, put it in the "Descr" string and set the "got description" flag. */ if(x != -1) { x = Tag.indexOf('\"', x + 7) + 1; int y = Tag.indexOf('\"', x); if(y > x) { Descr = Tag.substring(x, y); GotDescription = true; } } } } /* CHECK TO SEE IF THE CAPTURED TAG IS A noindex META TAG Called from only one place in HTMLtitle(). */ static void isNoIndexTag(String Tag) { if(GotTheNoIndexTag) return; // already verified that it is. if( (Tag.indexOf("meta") == 0) && // tag begins with the word "meta" (Tag.indexOf("robots") != -1) && // and contains the word "robots" (Tag.indexOf("noindex") != -1)) // and contains the word "noindex" GotTheNoIndexTag = true; // so say it is a "noindex" tag return; } /* EXAMINE THE CONTENTS OF THE HTML FILE Called from only one place in scan(). */ static String HTMLtitle(String fp) { FileReader fr; // file reader for the file to be examined int x; // for java UNICODE char input from file stream char c; // for each char retrieved from file input stream boolean // The following flags are true when getting inTag = false, // chararacters that are part of a tag name inTitle = false; // chararacters that are part of the file's title String Tag = "", // raw tag input string Title = ""; // title content string Descr = ""; // clear the description string GotDescription = false; GotTheNoIndexTag = false; try { fr = new FileReader(fp); // create a file reader for this file /* while there are more characters yet to read in from the file: [loop broken only by End-Of-File error] */ while((x = fr.read()) != -1) { c = (char)x; // get next character from file stream if(c == '<') // if initial tag-delimiter "<" char encountered inTag = true; // we're inside a tag so set InTag flag and exit else if(c == '>') { // if final tag-delimiter ">" char encountered /* If it is an initial title tag , set that we're now receiving title characters; otherwise, if it's an 'end' title tag then set that we are no longer within a title */ if(Tag.equals("title")) inTitle = true; else if(Tag.equals("/title")) inTitle = false; /* Otherwise, if we've reached end of section of the HTML file then if no description text acquired, make the description a red 'No description' message. */ else if(Tag.equals("/head")) { if(Descr.equals("")) Descr = "No Description"; } /* Otherwise, we've not yet hit the end of the section, so check if the captured text is a description or a 'no index' tag. */ else { isDescriptionTag(Tag); isNoIndexTag(Tag); } Tag = ""; // clear for the next tag to be encountered inTag = false; // we are no longer inside a tag } else if(inTag) // if currently inside a tag Tag += c; // add the current character to the tag name else if(inTitle) // if currently inside the title Title += c; // add current character to the text } // end of WHILE loop fr.close(); // close the file reader } catch(Exception e) { // catch the end-of-file exception and Title = ""; // clear Title accumulator ready for the next pass } // Cut out possible leading and trailing '\n' if(!Title.equals("")) { if(Title.indexOf('\n') == 0) Title = Title.substring(1, Title.length()); if(Title.indexOf('\n') == Title.length() - 1) Title = Title.substring(0, Title.length() - 1); } return Title; // return the <title> of the HTML file } /* This method is re-entrant. It calls itself. When invoked, it lists the files and directories contained within the directory 'dir' passed to it as its parameter. It then examines each entry in that directory. If an entry is an HTML file, which is not a _frame or _title file, it writes that file's relative filespec to the A array. The 'relative' filespec is the path+filename from the point of view of the parent directory. If an entry is a directory, it simply calls itself to deal with that (sub) directory as it is doing with the current directory. Thus it can handle any depth of sub-directories from the parent. Called only by itself and from one place on main(). */ private static void scan(String d) throws IOException { File fd = new File(d); // create file object for given directory name String D[] = fd.list(); // list all items in this directory // for each file in the sub-directory for(int i = 0; i < D.length; i++) { String dd = D[i]; //relative path name of next sub-directory or file String fp = d + "/" + dd;//get full path name of [next] sub-directory File fs = new File(fp); //create a file object for it if(fs.isDirectory()) { // if it is a directory /* Prime the flag initially to indicate that this is NOT one of the directories to be indexed. */ boolean flag = false; /* If we are currently in the top-level directory of the website, then... */ if(dir_level == 0) { /* If the ith sub-directory is one of those to be indexed then set the flag to indicate that this sub-directory must be indexed. */ for(int j = 0; j < FOLDERS.length; j++) if(dd.equals(FOLDERS[j])) { flag = true; break; } } /* Else, if we are not in the top-level directory, we must index this subdirectory anyway, so set the flag. */ else flag = true; /* Provided this is one of the directories to be indexed and it is not an images, applets or java_progs directory, then... */ if(flag && (dd.indexOf("images") == -1) && (dd.indexOf("applets") == -1) && (dd.indexOf("C-programs") == -1) && (dd.indexOf("java_progs") == -1)) { /* increment the directory level to the one we are now in, re-enter this method and decrement the directory level back to the one above. */ dir_level++; scan(fp); dir_level--; } } /* On the other hand, if it is not a directory but is an existing HTML file and we are not currently in the top-level directory: */ else if(fs.isFile() && fp.endsWith(".html") && (fp.indexOf("index") == -1) && (fp.indexOf("noindex") == -1) && (fp.indexOf("contents") == -1) && !fp.endsWith("error404.html") && !fp.endsWith("error404_br.html") && (dir_level > 0) ) { // Find the longest HTML file. if((fl = fs.length()) > FL) FL = fl; /* form this HTML file's relative filespec and put its title in the T array. */ String rfs = fp.substring(dl,fp.length()); T[arts] = HTMLtitle(fp); /* If the file contains a noindex meta tag then set not to include this file in the articles and sitemap files, otherwise set to include it. */ if(GotTheNoIndexTag) B[arts] = true; else B[arts] = false; /* Set the date that this file was last modified, the contents of HTML description meta tag and put article's relative filespec in A array. */ DM[arts] = fs.lastModified(); Q[arts] = Descr; A[arts] = rfs; // If filename contains "_br" then list file as written in Portuguese. if(dd.endsWith("_br.html")) { P[arts] = true; PT[pt++] = rfs; // If filename contains "_ru" then list file as written in Russian. } else if(dd.endsWith("_ru.html")) { R[arts] = true; RU[ru++] = rfs; // Otherwise assume it is wriiten in english. } else { P[arts] = false; R[arts] = false; } arts++; } } } // Called from only one place in artIdx(). static String mhld(String L, String D, int x) { String a = ""; if(x < 10) a = "00"; else if(x < 100) a = "0"; /* Hyperlink title between definition title tags file description between definition detail tags. */ return "<dt>" + a + x + " " + L + "</dt>\n" + "<dd><div>" + D + "</div></dd>\n"; } /* Make filespec into a hyperlink: if file has no title, use the link text and return the hyperlink tags with enclosed text. Called from only one place in artIdx(). */ static String mhl(String s, String T) { if(T.equals("")) T = s; return "<a href=\"" + s + "\">" + T + "</a>"; } /* THE FOLLOWING METHOD EMBODIES C A R HOARE'S QUICK SORT ALGORITHM. Note that it is a highly re-entrant method: it calls itself indefinitely. Called by itself and from one place in main(). */ static void qs(int LO, int HI) throws IOException { int lo = LO; // set moving lo to LO end of partition int hi = HI; // set moving hi to HI end of partition if(HI > LO) { // if the partition contains anything String mid = T[(LO + HI) >> 1]; // get the content of mid its element while(lo <= hi) { // loop through the array until the indices cross /* While current lowest keyword < midway keyword, push lower sort boundary up by one element. While current highest keyword > midway keyword pull upper sort boundary down by one element. */ while(lo < HI && T[lo].compareTo(mid) < 0) lo++; while(hi > LO && T[hi].compareTo(mid) > 0) hi--; if(lo <= hi) { // IF LOW INDEX <= HIGH INDEX SWAP THEIR 'CONTENTS' /* Sort by HTML file <title> and shift along with it the hyper- links, the HTML file descriptions, the "noindex" flags and the date the file was last modified. For each, get index (offset- extent-filenum) of lo element, put index of hi element in lo element and put index of lo element in hi element. */ String x = T[lo]; T[lo] = T[hi]; T[hi] = x; x = A[lo]; A[lo] = A[hi]; A[hi] = x; x = Q[lo]; Q[lo] = Q[hi]; Q[hi] = x; boolean b = B[lo]; B[lo] = B[hi]; B[hi] = b; b = P[lo]; P[lo] = P[hi]; P[hi] = b; b = R[lo]; R[lo] = R[hi]; R[hi] = b; long dm = DM[lo]; DM[lo] = DM[hi]; DM[hi] = dm; lo++; // push lower sort boundary up by one element hi--; // pull upper sort boundary down by one element } } if(LO < hi) // if hi not yet reached start of file qs(LO, hi); // sort lower partition if(lo < HI) // if lo not yet reached end of file qs(lo, HI); // sort upper partition } } /* THE FOLLOWING METHOD EMBODIES C A R HOARE'S QUICK SORT ALGORITHM Called only by itself and from one place in siteMap(). */ static void qs2(int LO, int HI) throws IOException { int lo = LO; // set moving lo to LO end of partition int hi = HI; // set moving hi to HI end of partition /* If the partition contains anything, get content of its mid element then execute the main "while" loop until the indices meet. */ if(HI > LO) { String mid = A[(LO + HI) >> 1]; while(lo <= hi) { /* while current lowest keyword < midway keyword, push lower sort boundary up by one element and while current highest keyword > midway keyword pull upper sort boundary down by one element. */ while(lo < HI && A[lo].compareTo(mid) < 0) lo++; while(hi > LO && A[hi].compareTo(mid) > 0) hi--; if(lo <= hi) { //IF LOW INDEX <= HIGH INDEX SWAP THEIR 'CONTENTS' /* The following sorts the hyperlink dragging with it its corres- ponding HTML File Title, date the file was last modified and its "noindex" flag. Each of the following 4 lines gets index (offset- extent-file-num) of lo element, puts index of hi element in lo element then puts index of lo element in hi element. */ String x = A[lo]; A[lo] = A[hi]; A[hi] = x; x = T[lo]; T[lo] = T[hi]; T[hi] = x; long dm = DM[lo]; DM[lo] = DM[hi]; DM[hi] = dm; boolean b = B[lo]; B[lo] = B[hi]; B[hi] = b; b = P[lo]; P[lo] = P[hi]; P[hi] = b; lo++; // push lower sort boundary up by one element hi--; // pull upper sort boundary down by one element } } if(LO < hi) // If hi not yet reached start of file qs2(LO, hi); // sort lower partition. if(lo < HI) // If lo not yet reached end of file qs2(lo, HI); // sort upper partition. } } /* THE FOLLOWING METHOD EMBODIES C A R HOARE'S QUICK SORT ALGORITHM. SORT THE FRAME, TITLE & CONTENTS FILESPECS Called only by iteself and from one place in list_pt(). */ static void qs3(int LO, int HI) throws IOException { int lo = LO; // set moving lo to LO end of partition int hi = HI; // set moving hi to HI end of partition if(HI > LO) { // if the partition contains anything String mid = PT[(LO + HI) >> 1]; // get content of its mid element while(lo <= hi) { // loop through the array until the indices cross /* While current lowest keyword < midway keyword push lower sort boundary up by one element. While current highest keyword > midway keyword pull upper sort boundary down by one element. */ while(lo < HI && PT[lo].compareTo(mid) < 0) lo++; while(hi > LO && PT[hi].compareTo(mid) > 0) hi--; /* IF LOW INDEX <= HIGH INDEX, SWAP THEIR 'CONTENTS' then sort by Link. NOTE: index=offset-extent-filenum */ if(lo <= hi) { String x = PT[lo]; // get index () of lo element PT[lo] = PT[hi]; // put index of hi element in lo element PT[hi] = x; // put index of lo element in hi element lo++; // push lower sort boundary up by one element hi--; // pull upper sort boundary down by one element } } if(LO < hi) // If hi not yet reached start of file, qs3(LO, hi); // sort lower partition. if(lo < HI) // If lo not yet reached end of file, qs3(lo, HI); // sort upper partition. } } /* REMOVE "The" OR "A" FROM THE START OF A FILE TITLE FOR INDEXING. Called from only one place in main(). */ static void killThe(){ for(int i = 0; i < arts; i++) { // for each HTML file in the list String s = T[i]; if(s.indexOf("The ") == 0) T[i] = s.substring(4); else if(s.indexOf("A ") == 0) T[i] = s.substring(2); } } /* Date Converter Wed Jul 22 14:42:15 BRT 2009 to 2009-07-22 0123456789012345678901234567 0123456789 Called only from 2 places in siteMap(). */ static String ISO8601(long dm){ Date d = new Date(dm); // convert long to Unix log date format String s = "" + d; // convert to string String m = s.substring(4,7); // extract abbreviated month name int i; for(i = 0; i < 12; i++) // find the month number (range 0 to 11) if(m.equals(MONTHS[i])) break; i++; // add 1 to month number for range 1 to 12 if(i < 10) m = "0"; else m = ""; int x = s.length(); return(s.substring(x-4,x) + "-" + m + i + "-" + s.substring(8,10)); } // GET CURRENT DATE. Called from only 3 places in siteMap. static String now() { Calendar cal = Calendar.getInstance(); SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT_NOW); return sdf.format(cal.getTime()); } // GENERATE THE 'robots.txt' FILE. Called from only one place in main(). private static void robotsTxt() throws IOException { robots = new FileWriter(bd + "/robots.txt"); robots.write( "Sitemap: " + siteURL + "sitemap.xml\n" + "User-agent: * #allow all robots\n\n" ); robots.close(); } // GENERATE THE 'sitemap.xml' FILE. Called from only one place in main(). private static void siteMap() throws IOException { qs2(0,arts - 1); // sort sitemap links sitemap = new FileWriter(bd + "/sitemap.xml"); // open output streams // Sitemap XML header information sitemap.write( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n" ); /* Write the first two entries: for the site url itself and for the top-level index.htm file. */ String h = "\t<url>\n\t\t<loc>" + siteURL; String ld = now(); // get the date of top level index.htm /* String lp = "</lastmod>\n\t\t" + "<changefreq>monthly</changefreq>\n" + "\t\t<priority>0.5</priority>\n" + "\t</url>\n"; */ // Write the XML entry for the web site's top level index.htm file. String lp = "</lastmod>\n\t</url>\n", lq = "</loc>\n\t\t<lastmod>", lr = lq + now() + lp; sitemap.write( h + lr + h + "index.html" + lr + h + "articles-index/articles_index.html" + lr + h + "articles-index/articles_index_br.html" + lr + h + "articles-index/articles_index_ru.html" + lr ); /* Write the accepted entries to sitemap.xml and the rejected entries to robots.txt if any exist. */ String s; for(int i = 0; i < arts; i++) { //for each HTML file in the list s = A[i]; //its filespec /* If it does not have a <meta name="robots" content="noindex"> then write it to the XML sitemap file. */ if(!B[i]) sitemap.write(h + s + lq + ISO8601(DM[i]) + lp); } /* Write each PDF file's details to the XML sitemap file... for(int i = 0; i < pdfs; i++) sitemap.write(h + "articles-pdf/" + PDF[i] + lq + ISO8601(PDM[i]) + lp ); */ sitemap.write("</urlset>\n"); // final tag in sitemaps.xml sitemap.close(); // close the sitemap.xml file } /* CREATE LIST OF ARTICLES WRITTEN IN PORTUGUESE Called from only one place in main(). */ private static void list_pt() throws IOException { Writer // open Portuguese file F = new FileWriter(bd + "/articles-index/portuguese.txt"); qs3(0,pt - 1); // sort filespecs F.write("#Files containing articles written in Portuguese.\n#\n"); for(int i = 0; i < pt; i++) // write all the filespec entries F.write(PT[i] + "\n"); F.close(); // close the portuguese file } /* CREATE LIST OF ARTICLES WRITTEN IN RUSSIAN Called from only one place in main(). */ private static void list_ru() throws IOException { Writer // open Russian file F = new FileWriter(bd + "/articles-index/russian.txt"); qs3(0,ru - 1); // sort filespecs F.write("#Files containing articles written in Russian.\n#\n"); for(int i = 0; i < ru; i++) // write all the filespec entries F.write(RU[i] + "\n"); F.close(); // close the Russian file } /* Generate the articles shortlist in pathname order for constructing the sitemap indexes of the non-book articles. Called from 1 place in main(). */ private static void shrtlst() throws IOException { fsshort = new FileWriter( // open for stream output bd + "/articles-index/articles_shortlist.htm" ); for(int i = 0; i < arts; i++) { // for each HTML file in the list String L = A[i]; // next filespec in the list /* If it does not contain a "noindex" tag and it's not part of the book and it is not part of the poems then write hyper-linked file title. */ if(!B[i] && (L.indexOf("book/") == -1) && (L.indexOf("poems/") == -1)) fsshort.write(mhl(L, T[i]) + "<br>\n"); } fsshort.close(); // close the shortlist file } /* GENERATE THE ENGLISH, PORTUGUESE & RUSSIAN ARTICLES INDEX FILES PLUS AN INDEX OF ARTICLES THAT ARE NOT TO BE INCLUDED IN EITHER OF THE OTHER INDEXES. Called from only one place in main(). */ private static void artIdx() throws IOException { // Open output streams for the articles-index files. String s = bd + "/articles-index/articles_"; fslist_en = new FileWriter(s + "index.html"); fslist_pt = new FileWriter(s + "index_br.html"); fslist_ru = new FileWriter(s + "index_ru.html"); fsreject = new FileWriter(s + "noindex.html"); // Generate the HTML file header code for each index file. EnglishHeader(); PortugueseHeader(); RussianHeader(); Reject(); for(int i = 0; i < arts; i++) { // for each HTML file in the list s = "../" + A[i]; // [next] article's filespec; String L = mhl(s,T[i]); // create hyperlinked title /* If it is a file that contains a "noindex" tag, write the file's title, link and description to the rejects file; otherwise, write them to the text file. */ if(B[i]) fsreject.write(mhld(L,Q[i],++rejects) + "<br>\n"); else { if(P[i]) fslist_pt.write(mhld(L,Q[i],++accepts_pt) + "<br>\n"); else if(R[i]) fslist_ru.write(mhld(L,Q[i],++accepts_ru) + "<br>\n"); else fslist_en.write(mhld(L,Q[i],++accepts_en) + "<br>\n"); } /* CREATE THE SEARCH ENGINE'S SUMMARY FILE FOR EACH HTML FILE h = new FileWriter(bd + s + "index.summ"); h.write(T[i]); h.write(D[i]); h.close(); */ } /* Write the final <dl> and end tags for the 3 files and then close the files. */ fslist_en.write("</dl>\n</div>\n</body>\n</html>\n"); fslist_pt.write("</dl>\n</div>\n</body>\n</html>\n"); fslist_ru.write("</dl>\n</div>\n</body>\n</html>\n"); fsreject.write("</dl>\n</body>\n</html>\n"); fslist_en.close(); fslist_pt.close(); fslist_ru.close(); fsreject.close(); } public static void main(String args[]) throws IOException { bd = "../.."; // to get up to the website root level String sd = ""; // path within website if(args.length > 0) // if a command line argument has been entered bd = args[0]; // name of base directory from command line dl = bd.length() + 1; // length of parent directory path name + 1 File pd = new File(bd); // form file object for parent directory // if command line argument is an existing directory if(pd.isDirectory()) { scan(bd); // create the file list arrays killThe(); // remove "The" or "A" from beginnings of titles qs(0,arts - 1); // sort everything else by HTML, Russian arts indexes artIdx(); robotsTxt(); // generate the robots.txt file siteMap(); // generate the sitemap.xml file list_pt(); // create list of files written in Portuguese list_ru(); // create list of files written in Russian shrtlst(); // generate shortlist of articles // listPDFs(); // generate list of PDF article files System.out.println("Longest HTML file: " + FL + " bytes."); } else System.out.println( bd + " is not a directory."); } }