/** * Search Engine Applet 1.0.1 * @author Robert John Morton * @version 22 October 1999 modified 10 July 2009, 10 April 2012 * @copyright Robert John Morton (all rights reserved) */ /* This applet requires a specialised index contained in a data file called index.dat located in the /website directory of the web site on the server. It requires no server-side executables or scripts. It can search on the up to 16 entered keywords. */ import java.applet.*; // all the gubbins to make the applet work import javax.swing.*; // Swing GUI import java.awt.*; // for graphics operations (GUI) import java.awt.event.*; // for the new-fangled 1.1 event handling import java.net.*; // for downloading data from the remote server import java.io.*; // for stream handling for the above public class search extends JApplet implements Runnable { /* Declare two fonts for the lettering used on this panel. The first is for the KEYWORDS: label and the second is for the text entry field. */ private static final Font pf = new Font("Sans",Font.BOLD,14), bf = new Font("Sans",Font.BOLD,16); private static final Color bg = new Color(244,244,244), // panels background colour BG = new Color(238,238,238); // applet background colour static Image IMG0, // main background image IMG1, // background for the title + summary panel IMG2, // background for the message/url panel IMG3; // background for the summary number panel private imgldr il; // instance reference to the image loader private worldmap wm; // panel for background image private boolean // THESE FLAGS BECOME 'TRUE' WHEN THE Init2started = false, //construction of the GUI has been initiated Init2finished = false; //GUI has now been built boolean // true means get the: JarredImages = true, // image files from the jar file JarredSearch = true; // search data files from the jar file private int N, // total number of keywords in index a, // byte number at which keyword text stream starts b, // byte number at which filespec reference pointers start c, // byte number at which filespec reference numbers start d, // byte number at which filespec text pointers start e, // byte number at which filespec text stream starts keywords = 0, // current number of keywords typed into Text field RANK[], // holds order of rank for web pages in shortlist SIGF[], // significance rating of each HTML file in shortlist fsp = 0, // points to ref num of first filespec for primary keyword fse = 0, // number of filespecs pertaining to primary keyword fsi = 0, // filespec index for given keyword set lp = 0, // indicates current download phase len = 0, // length of the remote item being loaded suc = 0, // number of bytes of the above successfully downloaded fh, space, // font height and width of a space Ty = 0; // vertical position of current line of text private byte INDEX[]; // large byte array to hold the downloaded index data private String res, // name of resource being downloaded E = "", // Exception during downloading + method where it occurred Title = "", // for HTML Title of current document Descr = "", // for HTML Description of current document cb, // code base URL - where this applet's class file came from KeyWs = ""; // entered keywords (global so can be displayed in message) private Container cp; // instance reference of the JApplet's pane private AppletContext ac; // context of HTML page this applet is runs in private URL url; // url of current HTML file private volatile Thread TH; // reference for Internet Data Transfer thread private JLabel KL; // label for keyword entry field private JTextField KF; // keyword entry field on the applet panel private img1panel img1; // instance references for titl & summary panel private img2panel img2; // instance references for message panel private img3panel img3; // instance references for summary number panel // Input stream for downloading the index file or the current HTML file private InputStream I; // References for the Search, Clear, Next, Prev, View buttons private JButton CB, SB, NB, PB, VB; public void init() { // INITIALISE THE APPLET /* Get the 'context' of HTML document in which this applet is running, and the URL path to the server directory that contains the applet's class files. */ ac = getAppletContext(); cb = getCodeBase().toString(); /* The following is a work-around regarding the Sun Hotjava browser for Mircosoft intranet machine names where the getCodeBase() method wrongly seems to return the document base (cira Nov 1999). */ if(!cb.endsWith("/")) { int x = cb.lastIndexOf('/'); if(x != -1) cb = cb.substring(0,x + 1); } /* Invoke Phase 1 of the GUI construction process on the Event Despatching Thread. */ try { javax.swing.SwingUtilities.invokeAndWait( new Runnable() { public void run() { init1(); } } ); } catch(Exception e) { showStatus("couldn't create Swing GUI"); } il = new imgldr(this,img1,img2,img3); // Create & start image loader TH = new Thread(this); // Create a program thread for } // downloading data from the server. /* INITIALISATION: Since this involves the creation and set-up of Swing widgets, it must be done on the Event Dispatching thread. It is invoked from the applet's init() method above. */ private void init1() { cp = getContentPane(); // get the content pane of the JApplet cp.setLayout(null); // allow control panels to be laid out manually setFont(pf); // set font for Swing widgets // Create the "KEYWORDS" LABELS and add it to the applet panel. KL = new JLabel("KEYWORDS:"); KL.setFont(bf); cp.add(KL); KL.setBounds(12,12,117,30); // Create the keywords entry text field and add it to the applet panel. KF = new JTextField(); KF.setFont(pf); cp.add(KF); KF.setBounds(129,12,240,30); /* Create the 'Clear', 'Search', 'NEXT', 'PREV' and "View Docu- ment" buttons then add and position each on the applet panel. */ CB = new JButton(); CB.setText("Clear"); cp.add(CB); CB.setBounds(463,12,70,30); CB.setMargin(new Insets(3,3,3,3)); SB = new JButton(); SB.setText("Search"); cp.add(SB); SB.setBounds(381,12,70,0); SB.setMargin(new Insets(3,3,3,3)); NB = new JButton(); NB.setText("Next"); cp.add(NB); NB.setBounds(264,253,60,30); NB.setMargin(new Insets(3,3,3,3)); PB = new JButton(); PB.setText("Prev"); cp.add(PB); PB.setBounds(336,253,60,30); PB.setMargin(new Insets(3,3,3,3)); VB = new JButton(); VB.setText("View Document"); cp.add(VB); VB.setBounds(408,253,125,30); VB.setMargin(new Insets(3,3,3,3)); /* Set up event listeners to capture the occurrence of the follow- ing events: a carrage-return RTRN from the text field, the Clear, Search, NEXT, PREV and "View Document" buttons being pressed. */ KF.addActionListener(new normlbut(normlbut.TEXT,this)); // RTRN CB.addActionListener(new normlbut(normlbut.WIPE,this)); // Clear SB.addActionListener(new normlbut(normlbut.SRCH,this)); // Search NB.addActionListener(new normlbut(normlbut.NEXT,this)); // NEXT PB.addActionListener(new normlbut(normlbut.PREV,this)); // PREV VB.addActionListener(new normlbut(normlbut.VIEW,this)); // View Doc int // create the message display panel W = 521, H = 30; img2 = new img2panel(W,H,bg,this); cp.add(img2); img2.setBounds(12,213,W,H); } /* SECOND PHASE OF INITIALISATION: This can only be done after the background image for the applet has finished downloading from the server. Since it involves the creation of a Swing JPanel, it must be executed by the Event Dispatching thread and not the local run() thread from where the process must be invoked. */ private void scheduleInit2() { Init2started = true; // Indicate that phase 2 of the initialisation // process has been scheduled. try { // Try to create the search display panel javax.swing.SwingUtilities.invokeAndWait( new Runnable() { public void run() {init2();} } ); } catch(Exception e) { showStatus("createGUI phase 2 didn't successfully complete"); } } private void init2() { /* Create the main title and summary display panel, add it to the applet content pane and set its dimensions */ int W = 521, H = 151; img1 = new img1panel(W,H,bg,this); cp.add(img1); img1.setBounds(12,52,W,H); /* Create the summary number display panel, add it to the applet content pane and set its dimensions. */ W = 100; H = 30; img3 = new img3panel(W,H,bg,this); cp.add(img3); img3.setBounds(12,253,W,H); /* Create panel for the applet' background image, add it to the applet's content pane and set its size to the full size of the applet window. */ wm = new worldmap(this); cp.add(wm); wm.setBounds(0,0,545,295); /* 'repaint()' is invoked here to cause everything else in the GUI to be displayed on top of the back- ground in the right layer sequence. */ repaint(); lp = 1; // initiates the index loading process Init2finished = true; // phase 2 of the initialization has finished } /* THE FOLLOWING 4 METHODS HANDLE THE CONTROL OF THE DOWNLOAD- ING PROCESSES FOR THE INDEX FILE AND THE HTML FILE SUMMARIES */ // Called immediately init() has finished when the applet is first loaded. public void start() { TH.start(); } /* MANAGE INTERNET DATA TRANSFERS ON A SEPARATE THREAD: This method is the one called by the Internet data transfer thread. It handles the downloading of data from the remote server in two stages. Firstly it establishes a con- nection. Then it downloads the data in chunks. The size of a chunk is the amount of data it can read from the connection until read() becomes blocked (ie it returns -1). Upon experiencing blocking, the download process sleeps and then comes back to try again. */ public void run() { if(il.awaitingImages()) return; while(TH.isAlive()) { runLoop(); try { Thread.currentThread().sleep(50); } catch (InterruptedException e) {} } } private void runLoop() { if(!il.imagesLoaded()) // exit if media tracker not yet terminated return; if(!Init2finished) { // If 2nd phase of initialization not finished if(!Init2started) // if 2nd phase of initialisation not yet started scheduleInit2(); // request for it to be done on event dispatching return; // thread and exit here until Phase 2 initializa- } // tion has completed. switch(lp) { // THE 4 DOWNLOADING PHASES case 1: indexConnect(); break; // connect to index file on server case 2: indexLoad(); break; // download its content case 4: fileConnect(); break; // connect to HTML file on server case 5: fileLoad(); break; // download its summary } } public void stop() { TH = null; } // terminates execution of thread TH // Called after stop() before applet is removed from memory. public void destroy() { if(TH != null) // If Internet data transfer thread still exists TH = null; // jettison it into limbo to be collected as garbage. } /* The following two methods handle the initial connection to, and down loading of, the keyword index from the server. */ // 1) CONNECT TO THE INDEX RESOURCE ON THE SERVER private void indexConnect() { img2.showMsg("Opening connection to index...",false); try { /* If the parameter [or command line argument] flag JarredSearch is TRUE, load the index file from the applet's JAR file, otherwise load it from the remote server. */ if(JarredSearch) { /* Create stream to load file from the applet's jar file. Use an estimated maximum content length because it cannot readily access its current file length. */ I = getClass().getResourceAsStream("index.dat"); len = 131072; } /* Else, create a stream to load file from the remote server: Form the url of the remote index file, open a connection to it, get its length and create an input stream object to access the HTML file. */ else { url = new URL(cb + "index.dat"); URLConnection u = url.openConnection(); len = u.getContentLength(); I = u.getInputStream(); } INDEX = new byte[len]; // Create gigantic buffer to receive index, suc = 0; // initialise number of bytes so far received to zero, lp = 2; // advance to second phase of index loading process. } catch(Exception e){ // Catch any exception that may occur img2.showMsg( // during this phase. "indexConnect() " // note where the exception occurred + + e, true // the type of exception that occurred ); lp = 0; // set the process status to 'unrecoverable error' } } /* NOTE: in the following method, for testing the index loading pro- cess, in place of the message: "Index loaded." the following was sub- stituted: "Ready: len=" + len + " suc=" + suc + " Keywords: " + N */ // 2) DOWNLOAD THE INDEX private void indexLoad() { img2.showMsg("Loading index from server...",false); int k; // current byte being read() try { // if read() hasn't hit the current end of input stream /* If loading from the JAR file then read in as much of the file as possible (i.e. until the end of the currently available input has been reached) into the INDEX byte array. Else, we are loading from the remote server, so while the number of bytes received so far is less than the total content length, read in as much of the file as possible (i.e. until the end of the currently available input has been reached) into the INDEX byte array. */ if(JarredSearch) while((k = I.read()) != -1) INDEX[suc++] = (byte)k; else while(suc < len && (k = I.read()) != -1) INDEX[suc++] = (byte)k; // if the whole of the index has now been successfully downloaded if(JarredSearch || suc >= len) { a = getInt( 0); // get start byte of the keyword text stream N = (a >> 2) - 1; // and the total number of keywords in the index /* Skip the first 20 bytes holding a, b, c, d, e so that 'a' points to the start byte of the keyword text stream. Then note the start bytes of the: */ a += 20; b = getInt( 4) + 20; // filespec reference pointers, c = getInt( 8) + 20; // reference numbers, d = getInt(12) + 20; // text pointers, e = getInt(16) + 20; // filespec text stream. I.close(); // close the URL Connection's input stream lp = 3; // indicate thatindex loading completed OK img2.showMsg("Index loaded.",false); KF.requestFocus(); // set the cursor to the keyword text field } } /* If an error is caught during the above phase, note the type of exception and where it occurred. */ catch(Exception e) { img2.showMsg("indexLoad() " + e + " " + len + " " + suc, true); lp = 0; // set error condition } } /* The following method handles the connection to the currently selected HTML file. */ private void fileConnect() { img2.showMsg("Opening connection to HTML file...",false); try { /* Form the url of the HTML file to be summarised and then open a connection to it and get length of file. */ url = new URL(cb + getFS(fsi)); URLConnection u = url.openConnection(); len = u.getContentLength(); I = u.getInputStream(); // create an input stream from the HTML file suc = 0; // number of last byte successfully downloaded lp = 5; // set to 'index remote connection open' phase /* If an error is caught during the above phase, then note the type of exception and where it occurred. */ } catch(Exception e){ img2.showMsg("fileConnect() " + e,true);} lp = 0; // set error condition } /* The following two methods help parse out the content of the and <meta name=description content=""> tags */ // 1) Cut out possible leading and trailing '\n' in the HTML title. private void checkTitle() { if(!Title.equals("")) { if(Title.indexOf('\n') == 0) Title = Title.substring(1,Title.length()); if(Title.indexOf('\n') == Title.length() - 1) Title = Title.substring(0,Title.length() - 1); } } // 2) Find the <meta name=description content="blah blah blah"> tag. private boolean isDescriptionTag(String Tag) { // Convert the tag to lower-case-only for finding tags. String s = Tag.toLowerCase(); int p, q; // index pointers /* If it is not a META tag or it contains no name attribute or '=' sign or it is not a 'description' tag or it has no 'content' attribute or it has no content '=' sign or it has no opening quote mark or it has no closing quote mark, then return false. */ if(((p = s.indexOf("meta")) == -1) || ((p = s.indexOf("name",p + 4)) == -1) || ((p = s.indexOf('=',p + 4)) == -1) || ((p = s.indexOf("description", p + 1)) == -1) || ((p = s.indexOf("content", p + 8)) == -1) || ((p = s.indexOf('=',p + 7)) == -1) || ((p = s.indexOf('\"',p + 1)) == -1) || ((q = s.indexOf('\"',p + 1)) == -1)) return false; // Extract the 'description' text from the tag content and exit true. Descr = Tag.substring(p + 1, q); return true; } /* Data arrives across the Internet more slowly than the read() method can read bytes from the Input Stream. Whenever it starts reading, it therefore rapidly catches up with the end of the data materialising at the end of the input stream. It therefore hits what it thinks is the end of the input stream prematurely and gives up before all the data has come into the input stream from the server. When read() returns a -1, we must not regarded it as necessarily the end of the data, but merely an indication that it has read in all the data that has so far arrived from the server. At this point, fileLoad() therefore checks to see whether the number of bytes required from the HTML file has yet arrived. If not it returns to run() for a short sleep(). Otherwise it goes ahead on the basis that the completed file summary has arrived. DOWNLOAD THE HTML FILE'S <TITLE> AND <DESCRIPTION> */ private void fileLoad() { img2.showMsg("Loading HTML file summary...", false); boolean // THE FOLLOWING FLAGS ARE TRUE WHEN: inTag = false, // getting characters that are part of a tag name inTitle = false, // getting characters that are part of the title finished = false; // both title and description have been acquired String Tag = ""; // create and clear the tag string Title = ""; // clear title accumulator string Descr = ""; // clear description accumulator char z; // to hold current byte BEING read from input stream. try { // Enable the local capture of I/O exceptions /* While the whole HTML file is not yet downloaded and not yet hit end of what is currently available within the input stream. */ while(suc < len && (z = (char)I.read()) != -1) { suc++; // increment the count of successfully-received bytes if(z == '<') // If current character an initial tag-delimiter '<' inTag = true; // we are from now on currently inside a tag. /* Else, if the current character is the end of tag-delimiter '>' then convert the tag text to lower case for comparison. */ else if(z == '>') { String tag = Tag.toLowerCase(); /* If it is an initial title tag <title> indicate that we are now receiving title characters. Else, if it is a terminating title tag indicate that we are no longer reading in the title. Otherwise, if whole of the description meta tag has been received, indicate that downloading has finished and break out of while-loop. */ if(tag.equals("title")) inTitle = true; else if(tag.equals("/title")) inTitle = false; else if(isDescriptionTag(Tag)) { finished = true; break; } /* Else, if we've hit the tag without encountering a description meta tag, we won't find it by looking in the of the HTML file, so exit the while loop. */ else if(tag.equals("/head")) { Descr = "[No Description]"; finished = true; break; } Tag = ""; // Clear and reset for the next tag to be encountered. inTag = false; } else if(inTag) // Else, if inside a tag, Tag += z; // add current character to the tag name, else if(inTitle) // or if inside title, Title += z; // add current character to title text. } if(finished // if HTML summary has now been || suc >= len) { // completely downloaded, I.close(); // close the URL Connection's input stream checkTitle(); // remove any spurious new-line chars from title text /* Display the HTML file's TITLE and DESCRIPTION and URL on the applet panel and also the sequence number of the HTML document within those listed. */ img1.showSummary(Title,Descr); img2.showURL(url); img3.showNumber("" + (fsi + 1) + " of " + fse); lp = 6; // set phase number of this process to 6 = 'completed' } /* Otherwise, clear the applet's display areas and show the error message that downloading failed. */ else { img1.showSummary("", ""); img2.showMsg("This HTML file failed to load properly.", true); img3.showNumber(""); lp = 0; // set phase number of this process to 0 = 'failed' } } /* If an error is caught during the above phase, set error con- dition then note the type of exception and where it occurred. */ catch(Exception e) { lp = 0; img2.showMsg("fileLoad() " + e + len + " " + suc, true); } } /* The following method returns the index number of the given keyword or -1 if the keyword cannot be found in the index. */ private int find(String s) { // accepts the given keyword int x; // 3-state string comparison variable int n = N >> 1; // start with the keyword half way up the index int j = n; // jump size /* The following WHILE loop does the binary slice search. It starts with the middle keyword. If this is > the one sought, then the one being sought must be lower down the index. It therefore looks at the keyword half way between the middle one and the bottom of the index. If this is greater than the one sought, it looks at the one half way between this one and the bottom of the index. And so on. It halves how far it jumps each time until the size of the jump reaches zero. Note that each time the jump is halved, the result of the division is rounded down. This ensures that it will never look outside the limits of the index. However, it does mean that the jump can reach zero at the element next to the keyword we are looking for. So, though present, the keyword may not be found... */ while((j >>= 1) > 0) { // While jump size, having been halved, be > zero, // if s > retrieved keyword, we are too low down the index, if((x = s.compareTo(getKeyword(n))) > 0) n += j; // so split the partition // else if s < retrieved keyword, we must be too far up the index else if(x < 0) n -= j; // so split the partition else // otherwise return n; // return if s = the retrieved keyword } /* ...That is the reason for this following WHILE loop. This one examines the keywords jumping only one index element at a time. It compares the current keyword with the one sought and, if it does not match, moves up or down the index one keyword accordingly. It repeats this until it finds the keyword being sought, or until the sense of the comparison reverses. This marks absolutely the end of the search. */ boolean u = false; // true indicates going up! boolean d = false; // true indicates going down! while(!(u && d)) { // while not yet reversed direction along index // if s > retrieved keyword, we are too low down the index if((x = s.compareTo(getKeyword(n))) > 0) { if(++n >= N) // if moving up overshoots end of index, return -1; // return error code u = true; // indicate that we have moved up the index } else if(x < 0) { // if s < retrieved keyword, we're too far up index if(--n < 0) // if moving down overshoots start of index, return -1; // return error code d = true; // indicate that we have moved down the index } else return n; // Else, submitted keyword word matched retrieved key- } // word so return index number of retrieved keyword. return -1; // keyword cannot be found so return error code number } private int getInt(int i) { // convert a 4-byte train to an interger /* For each of the 4 bytes: 1) Promote each byte to an int, 2) Chop off propagated 1s in case high bit set, 3) Shift it into its correct position in the int. */ int x0 = INDEX[i ]; x0 &= 0xff; x0 <<= 24; int x1 = INDEX[i + 1]; x1 &= 0xff; x1 <<= 16; int x2 = INDEX[i + 2]; x2 &= 0xff; x2 <<= 8; int x3 = INDEX[i + 3]; x3 &= 0xff; return x0 | x1 | x2 | x3; // 'or' them all together to } // return the complete integer // GET THE nth KEYWORD IN THE KEYWORD INDEX private String getKeyword(int n) { int p = getInt(20 + (n << 2)); // start pointer of the keyword's text int q = getInt(20 + (++n << 2)); // start pointer of next keyword's text String s = ""; // Add the characters, one at a time, to while(p < q) // string 's' from the (p)th character s += (char)INDEX[a + p++]; // upto and including the (q-1)th. return s; // The keyword is then in string 's'. } //RETRIEVE THE HTML FILE'S FILESPEC FROM THE INDEX private String getFS(int fsi) { /* Get the reference of the fsi(th) filespec in DESCENDING order of rank and form the byte reference of the required filespec. */ int o = RANK[fse - fsi - 1], p = fsp + (o << 1); int x2 = INDEX[p++]; // Make the two adjacent bytes x2 &= 0xff; // form a 16-bit short integer (short)rn x2 <<= 8; int x3 = INDEX[p]; x3 &= 0xff; int rn = x2 | x3; int // Form the start and end pointers of the filespec text. P = e + getInt(d + (rn << 2)), Q = e + getInt(d + (++rn << 2)); String s = ""; // Add the characters from the (P)th while(P < Q) // to the (Q-1)th to the string 's' s += (char)INDEX[P++]; return s; } /* The following 4 methods are the event handlers for the buttons and the text field. They are called by the public actionPerformed() method in normlbut.class which appears at the end of this listing. */ void wipe() { KF.setText(""); // wipe the text field KF.requestFocus(); // Request that the cursor be set } // to the keyword text field. void seek() { // SEARCHES ON UP TO 16 ENTERED KEYWORDS // Provided at least one of the entered keywords is found in the index, if((keywords = getKeyWords()) > 0) { fsi = 0; // start with the first HTML document in the shortlist lp = 4; // and initiate loading of this first HTML file } else { // otherwise (if keyword not found) fse = 0; // set the number of the filespecs to zero lp = 6; // set to post retrieval state img2.showMsg("No references found for: " + KeyWs,false); } } void next() { // THE 'NEXT' BUTTON WAS PRESSED if(fse > 0) { // provided there're some filespecs in shortlist if(fsi < (fse - 1)) // if not already at last filespec in shortlist fsi++; // increment to the next filespec else // else fsi = 0; // loop back to the beginning of the shortlist lp = 4; // initiate the loading of the next HTML file } } void prev() { // THE 'PREV' BUTTON WAS PRESSED if(fse > 0) { // provided there are some filespecs in the shortlist if(fsi > 0) // if not already at first filespec in shortlist fsi--; // decrement to the previous filespec else // else fsi = fse - 1; // loop back to the end of the shortlist lp = 4; // initiate the loading of the next HTML file } } /* For this to work, it is necessary that the browser allow pop-ups and the execution of JavaScript. Currently (April 2012), Firefox has a bug that causes it to take upto 5 minutes to execute the showDocument() instruction. */ void view() { // try to link to currently summarised document img2.showMsg("Document will appear in a new browser tab.",false); /* Check that the rigorous security functionality of modern browsers doesn't unconditionally block the "_blank" target option. */ ac.showDocument(url,"_blank"); } /* This method takes the text typed into the search field of the applet and splits it up into individual words. It looks up each word, as it goes, in the main keywords index to get its main index reference number. It then uses this to look up the 'start position' and 'extent' of the references to HTML files that contain this keyword and puts them in the FSP[] and FSE[] arrays respectively. There's no need to save the keyword itself or its index refer- ence. This method allows for the entered keywords to be separated by single or multiple or combinations of: space-chars, colons, semi-colons, commas, full-stops. */ private int getKeyWords() { /* Get typed-in keywords from Text entry field, trim off any leading or trailing 'spaces' and convert all letters to lower case. The 'space' added at the end is to make 'k' return correctly in all circumstances. */ KeyWs = removeAccents(KF.getText().trim().toLowerCase()) + ' '; int L = KeyWs.length(); // length of the typed-in keywords string if(L < 1) return 0; // return "no keywords entered" int // FOR EACH ENTERED KEYWORD: FSP[] = new int[L], // pointer to start of its index entries FSE[] = new int[L], // extent of index its entries k = 0, // number of keywords received from Text field x = 0; // index number within main index of current keyword char z; // current character taken from the typed-in keywords string String w = ""; // string for accumulating the current keyword boolean // THE FOLLOWING FLAGS ARE 'TRUE' WHEN: sp = false, // previous character was a space-character vc = false; // valid characters have already been encountered // For each character in the entered string of keywords for(int i = 0; i < L; i++) { z = KeyWs.charAt(i); // get next character of keywords string if((((z) == ' ') // if it is a space || (z == ':') // or a colon || (z == ',') // or a comma || (z == ';') // or a semi-colon || (z == '.')) // or a full stop && vc // AND valid chars have already been received && !sp) { // AND the previous character was not a space if((x = find(w)) != -1) { // then if the keyword 'w' appears // in the main keyword index, then /* Calculate position 'y', within the filespecs index, of the first filespec pertaining to the keyword 'w'. NOTE: b and c are class-wide variables: not locals. */ int y = c + getInt(b + (x << 2)); // Compute the number of filespecs pertaining to keyword 'w'. FSE[k] = (c + getInt(b + (++x << 2)) - y) >> 1; /* Store the position 'y', within the filespecs index, of the first filespec pertaining to the keyword 'w' in the array of pointers for the entered keywords. */ FSP[k++] = y; /* break out of the FOR-loop if we have reached the limit of 16 entered keywords. */ if(k > 15) break; } /* Clear the keyword string ready to accumulate next keyword and note that this character was a 'space'. */ w = ""; sp = true; /* Else if the character is between 0 and 9 inclusive or between A and Z inclusive or between a and z inclusive . */ } else if(((z >= '0') && (z <= '9')) || ((z >= 'A') && (z <='Z')) || ((z >= 'a') && (z <= 'z'))) { w += z; // add new character to the current keyword string vc = true; // say that a valid character has been encountered sp = false; // clear 'space' flag for when next keyword extracted } } /* This method continues by sorting the references of the HTML docu- ments containing the first keyword into an order of ranking according to how many of the other entered keywords each document contains. */ fsp = FSP[0]; /* Set the HTML document reference pointer to the start of the HTML document references pertaining to the first keyword. */ fse = FSE[0]; // number of HTML documents containing the first keyword RANK = new int[fse]; /* Create a ranking array to contain the OFFSET for each HTML document in order of rank. */ /* First make the order of rank of the HTML documents the same as their order of occurrence within the main index. */ for(int i = 0; i < fse; i++) RANK[i] = i; if(k < 2) // return if only one keyword was entered return k; SIGF = new int[fse]; /* Create an array to contain the significance ranking of each HTML document containing the current keyword. */ for(int i = 1; i < k; i++) { // for the 2nd keyword onwards... int J = FSE[i]; // number of index entries for the keyword /* for each of the HTML documents containing the first keyword, get its filespec reference number. */ for(int h = 0; h < fse; h++) { int r = getFSref(fsp,h); /* then, for each HTML document containing the i(th) keyword, if it is the same HTML document as contains the first key- word, increment the significance for this HTML document. */ for(int j = 0; j < J; j++) if(r == getFSref(FSP[i],j)) ++SIGF[h]; } } qs(0, fse - 1); // Sort the RANK[] index into order of significance of return k; // HTML documents and return number of keywords entered } /* UTF-8 uses 2 bytes for accented characters so the length of a string within Java can be different from the number of bytes put out in UTF-8. I have decreed that all accents shall be removed for the purpose of keyword searching. This ensures that all characters occupy only one byte in UTF-8 and I can therefore make the index about half the size it would have to be in Unicode. NOTE: simply zeroing the upper byte of the Java Unicode char does not work. It changes the actual letter. */ private static String removeAccents(String s){ s = s.replaceAll("[èéêë]","e"); s = s.replaceAll("[ûùúü]","u"); s = s.replaceAll("[ïîíì]","i"); s = s.replaceAll("[àáâã]","a"); s = s.replaceAll("[ôóòõ]","o"); s = s.replaceAll("[ç]","c"); return s; } /* NOTE THAT A FILESPEC'S POINTER VALUE IS UNIQUE TO IT ALONE. RETRIEVE THE HTML FILE'S FILESPEC REFERENCE FROM THE INDEX */ private int getFSref(int ptr, int ext) { // form the byte reference of the required filespec int p = ptr + (ext << 1); // make the two adjacent bytes form a 16-bit integer (short)rn int x2 = INDEX[p++]; x2 &= 0xff; x2 <<= 8; int x3 = INDEX[p]; x3 &= 0xff; int rn = x2 | x3; // return the start pointer of the filespec text return e + getInt(d + (rn << 2)); } /* The following method embodies C A R Hoare's Quick Sort algorithm. Note that it is a highly re-entrant method: it calls itself indefinitely. */ private void qs(int LO, int HI) { int lo = LO; // set moving lo to LO end of partition int hi = HI; // set moving hi to HI end of partition if (HI > LO) { // if the partition contains anything // get the content of the mid element of the partition int mid = SIGF[(LO + HI) >> 1]; while(lo <= hi) { // loop through array until indices cross /* While current lowest significance < midway significance, push lower sort boundary up by one element and while current highest keyword > midway keyword, pull upper sort boundary down by one element */ while(lo < HI && SIGF[lo] < mid) lo++; while(hi > LO && SIGF[hi] > mid) hi--; if(lo <= hi) { // IF LOW INDEX <= HIGH INDEX SWAP THEIR 'CONTENTS' /* Sort by significance: get the index (offset-extent-filenum) of the lo element, then put the index of the hi element i the lo ele- ment and the index of the lo element in the hi element. */ int x = SIGF[lo]; SIGF[lo] = SIGF[hi]; SIGF[hi] = x; /* Shift the rank order along with it: get the index (offset-extent- filenum) of the lo element, put the index of the hi element in the lo element and put the index of the lo ele ment in the hi element.*/ x = RANK[lo]; RANK[lo] = RANK[hi]; RANK[hi] = x; /* Push lower sort boundary up by one element and pull upper sort boundary down by one element. */ lo++; hi--; } } if(LO < hi) // If hi not yet reached start of file qs(LO, hi); // sort lower partition, and if(lo < HI) // if lo not yet reached end of file qs(lo, HI); // sort upper partition. } } } // end of search.class /* The following class creates instances of ActionListener. It is used by this applet to create and service an action event for each of the con- trol buttons plus one for the text field. The underlying system calls the actionPerformed() method below every time a button is pushed or carriage return is hit from the keyword entry text field. The method is invoked upon the instance of the following class corresponding to that particular button. The value of the variable 'id' at the time is there- fore that which corresponds to the button concerned. This causes the appropriate case statement to be executed, thus passing control to the appropriate handling method above. */ class normlbut implements ActionListener { static final int TEXT = 0; // 'carriage-return hit in text field' event static final int SRCH = 1; // 'search button pressed' event static final int WIPE = 2; // 'clear button pressed' event static final int NEXT = 3; // 'next button pressed' event static final int PREV = 4; // 'prev button pressed' event static final int VIEW = 5; // 'view button pressed' event int id; // one of the above search ap; // the application that called: always the above applet! public normlbut(int id, search ap) { // constructor for a new command this.id = id; this.ap = ap; } // CALLED BY THE JAVA SYSTEM WHEN ONE OF THE BUTTONS HAS BEEN CLICKED public void actionPerformed(ActionEvent e) { switch(id) { // id of this instance of ActionEvent case TEXT: ap.seek(); break; // carriage-return from the text field case SRCH: ap.seek(); break; // the 'search' button case WIPE: ap.wipe(); break; // the 'clear' button case NEXT: ap.next(); break; // the 'next' button case PREV: ap.prev(); break; // the 'prev' button case VIEW: ap.view(); break; // the 'view document' button } } }