/** * Search Engine Applet 1.0.1 * @author Robert John Morton YE572246C * @version 22 October 1999 modified 10 July 2009, 10 April 2012 Modified to JFrame operation Sat 04 Mar 2017 10:47:52 BRT * @copyright Robert John Morton (all rights reserved) */ /* This applet requires a specialised index contained in a data file called index.dat located in the /website directory of the web site on the server. It requires no server-side executables or scripts. It can search on the up to 16 entered keywords. */ import javax.swing.*; // Swing GUI import java.awt.*; // for graphics operations (GUI) import java.awt.event.*; // for the new-fangled 1.1 event handling import java.awt.image.BufferedImage; import java.net.*; // for downloading data from the remote server import java.io.*; // for stream handling for the above public class search extends JPanel implements Runnable { private static final Font // for KEYWORDS: label and text entry field pf = new Font("Sans",Font.BOLD,14), bf = new Font("Sans",Font.BOLD,16); private static final Color bg = new Color(244,244,244), // panels background colour BG = new Color(238,238,238); // applet background colour static BufferedImage IMG0, // main background image IMG1, // background for the title + summary panel IMG2, // background for the message/url panel IMG3; // background for the summary number panel private imgldr il; // instance reference to the image loader private worldmap wm; // panel for background image private boolean Init2started = false, Init2finished = false; // true when the GUI has been built private int XE = 549, // Horizontal extent of window and JFrame. 545 YE = 320, // Vertical extent of window and JFrame. 295 ls = 0, // load switch N, // total number of keywords in index a, // byte number at which keyword text stream starts b, // byte number at which filespec reference pointers start c, // byte number at which filespec reference numbers start d, // byte number at which filespec text pointers start e, // byte number at which filespec text stream starts keywords = 0, // current number of keywords typed into Text field RANK[], // holds order of rank for web pages in shortlist SIGF[], // significance rating of each HTML file in shortlist fsp = 0, // points to ref num of first filespec for primary keyword fse = 0, // number of filespecs pertaining to primary keyword fsi = 0, // filespec index for given keyword set lp = 0, // indicates current download phase len = 0, // length of the remote item being loaded suc = 0, // number of bytes of the above successfully downloaded fh, space, // font height and width of a space Ty = 0; // vertical position of current line of text private byte INDEX[]; // gigantic byte array to hold the downloaded index data private String res, // name of resource being downloaded E = "", // Exception during downloading + method where it occurred Title = "", // HTML Title of current document Descr = "", // HTML Description of current document cb = "", // code base URL - where this applet's class file came from db = "", // document base URL for retrieved HTML files wp = "", // full web page address including protocol KeyWs = ""; /* entered keywords (global so that it can be displayed in message) */ private URL url; // url of current HTML file private volatile Thread TH; // ref for a separate run() thread private InputStream I; // input stream for downloading private JLabel KL; // label for keyword entry field private JTextField KF; // keyword entry field on app panel private JButton CB, SB, NB, PB, VB; // for Search, Clear, Next, Prev, View buttons private img1panel img1; // instance references for titl & summary panel private img2panel img2; // instance references for message panel private img3panel img3; // instance references for summary number panel search(int XE, int YE, int ls, String cb, String db) { // CONSTRUCTOR this.XE = XE; this.YE = YE; this.ls = ls; this.cb = cb; this.db = db; setLayout(null); // to allow the control panels to be laid out manually setFont(pf); // set font for Swing widgets // CREATE THE REQUIRED SWING WIDGETS KL = new JLabel("KEYWORDS:"); KL.setFont(bf); KF = new JTextField(); KF.setFont(pf); CB = new JButton(); CB.setText("Clear"); SB = new JButton(); SB.setText("Search"); NB = new JButton(); NB.setText("Next"); PB = new JButton(); PB.setText("Prev"); VB = new JButton(); VB.setText( "View Document"); // ADD THEM TO THIS JPANEL AND SET THEIR DIMENSIONS add(KL); KL.setBounds(12,12,117,30); add(KF); KF.setBounds(129,12,240,30); add(CB); CB.setBounds(463,12,70,30); add(SB); SB.setBounds(381,12,70,30); add(NB); NB.setBounds(264,253,60,30); add(PB); PB.setBounds(336,253,60,30); add(VB); VB.setBounds(408,253,125,30); // SET THEIR MARGINS WHERE APPROPRIATE CB.setMargin(new Insets(3,3,3,3)); SB.setMargin(new Insets(3,3,3,3)); NB.setMargin(new Insets(3,3,3,3)); PB.setMargin(new Insets(3,3,3,3)); VB.setMargin(new Insets(3,3,3,3)); // CREATE ACTION LISTENERS FOR THEM KF.addActionListener(new normlbut(normlbut.TEXT,this)); //c/r text field CB.addActionListener(new normlbut(normlbut.WIPE,this)); //'Clear' button SB.addActionListener(new normlbut(normlbut.SRCH,this)); //'Search' button NB.addActionListener(new normlbut(normlbut.NEXT,this)); //'Next' button PB.addActionListener(new normlbut(normlbut.PREV,this)); //'Prev' button VB.addActionListener(new normlbut(normlbut.VIEW,this)); //'View Document' int // create the message display panel W = 521, H = 30; img2 = new img2panel(W,H,bg,this); add(img2); img2.setBounds(12,213,W,H); il = new imgldr(this,img2,ls,cb); // create and start image loader TH = new Thread(this); // thread for downloading data from server TH.start(); } /* SECOND PHASE OF INITIALISATION: This can only be done after the background image for the applet has finished downloading from the server. Since it involves the creation of a Swing JPanel, it must be executed by the Event Dispatching thread and not the local run() thread from where the process must be invoked. */ private void scheduleInit2() { Init2started = true; // phase 2 initialisation has been scheduled try { javax.swing.SwingUtilities.invokeAndWait( new Runnable() { public void run(){init2();} } ); } catch(Exception e) { System.out.println("createGUI phase 2 didn't successfully complete"); } } private void init2() { // create the map and nav information panels int // create the main title and summary display panel W = 521, H = 151; img1 = new img1panel(W,H,bg,this); add(img1); img1.setBounds(12,52,W,H); // create the summary number display panel W = 100; H = 30; img3 = new img3panel(W,H,bg,this); add(img3); img3.setBounds(12,253,W,H); // create panel for the applet' background image wm = new worldmap(this); add(wm); // add it to this JPanel wm.setBounds(0,0,545,295); // set its size to full size of JPanel System.out.println("Images installed."); repaint(); // to display everything on top of background image lp = 1; // start the index loading process Init2finished = true; // phase 2 of the initialization has finished } /* THE FOLLOWING 4 METHODS HANDLE THE CONTROL OF THE DOWNLOADING PROCESSES FOR THE INDEX FILE AND THE HTML FILE SUMMARIES */ /* The following method is the one called by the Internet data transfer thread [the main run() thread]. It handles the downloading of data from the remote server in two stages. Firstly it establishes a connection. Then it downloads the data in chunks. The size of a chunk is the amount of data it can read from the connection until read() becomes blocked (ie it returns -1). Upon experiencing blocking, the download process sleeps and then comes back to try again. */ public void run() { // MANAGE INTERNET DATA TRANSFERS while(TH != null) { // while this thread exists runloop(); try{Thread.sleep(50);} // sleep before trying again catch(InterruptedException e){} // catch any external interrupt } } private void runloop() { if(il.awaitingImages()) // if images are still loading return; // exit if(!il.imagesLoaded()) // if media tracker not yet terminated return; // exit if(!Init2finished) { // if phase 2 init not yet finished if(!Init2started) // If phase 2 init not yet started scheduleInit2(); // do on event-dispatching thread return; // exit here until Phase 2 init done } switch(lp) { // THE 4 DOWNLOADING PHASES case 1: indexConnect(); break; // connect to index file on server case 2: indexLoad(); break; // download its content case 4: fileConnect(); break; // connect to HTML file on server case 5: fileLoad(); break; // download its summary } } /* The following two methods handle the initial connection to, and downloading of, the keyword index from the server. */ private void indexConnect() { // CONNECT TO INDEX RESOURCE ON THE SERVER img2.showMsg("Opening connection to index...", false); String fn = "index.dat"; try { switch(ls) { /* Load from index.dat file located in local the local dev- elopment directory. The index.dat file is thus in the same directory as the app was started from; so a further path extension within the web site is not necessary. */ case 0: len = (int)(new File(fn)).length(); I = new FileInputStream(fn); break; /* Create an input stream to load file from jar. Note that wherever the jar file may be [at the web site's top level or somewhere in the software sub directory] it is local to the class files; so a further path extension within the web site is not necessary. */ case 1: len = 131072; // generous maximum length for index.dat I = getClass().getResourceAsStream(fn); break; /* Create an input stream to load file from server. Note that the index file is located at the top level of the web site; so the file's name only need be prefixed with the URL of the web site. No further path extension within the web site is necessary. */ case 2: URLConnection u = new URL(cb + fn).openConnection(); I = u.getInputStream(); len = u.getContentLength(); } INDEX = new byte[len]; // create the gigantic buffer for the index suc = 0; // number of bytes so far downloaded lp = 2; // advance to the index loading phase } catch(Exception e) { lp = 0; // set unrecoverable error status img2.showMsg( "indexConnect() " // note where the exception occurred + e,true // and the kind of exception it was ); } } /* NOTE: in the following method, for testing the index loading process, in place of the message: "Index loaded." the following was substituted: "Ready: len=" + len + " suc=" + suc + " Keywords: " + N */ private void indexLoad() { // DOWNLOAD THE INDEX img2.showMsg("Loading index from server...", false); int k; // current byte being read() try { /* while read() hasn't yet hit the current end of input stream and the entire index has not yet been downloaded, add its new byte into the big byte array. */ if(ls == 1) { // len is known for server connections while((k = I.read()) != -1) INDEX[suc++] = (byte)k; len = suc; } else while(suc < len && (k = I.read()) != -1) INDEX[suc++] = (byte)k; // if the whole of the index has now been downloaded if(ls < 2 || suc >= len) { a = getInt( 0); // start byte of the keyword text stream N = (a >> 2) - 1; // total number of keywords in the index a += 20; // bias over first 20 bytes holding a,b,c,d,e b = getInt( 4) + 20; // start byte of filespec reference pointers c = getInt( 8) + 20; // start byte of filespec reference numbers d = getInt(12) + 20; // start byte of filespec text pointers e = getInt(16) + 20; // start byte of filespec text stream I.close(); // close the URL Connection's input stream lp = 3; // = index loading completed successfully img2.showMsg("Index loaded.",false); KF.requestFocus(); // request cursor be set to keyword text field } } catch(Exception e) { img2.showMsg( "indexLoad() " // note where the exception occurred + e + " " // and what kind of exception it was + len // total length of the data to be downloaded + " " + suc, // and how much of it was actually downloaded true ); lp = 0; // set the unrecoverable error condition } } /* The following method handles the connection to the currently selected HTML file. */ private void fileConnect() { // CONNECT TO REQUIRED html FILE ON SERVER img2.showMsg("Opening connection to HTML file...", false); /* Form the url of the file to be summarised, open a con- nection to the remote HTML file, create an input stream object to access it and get its content length. */ try { wp = db + getFS(fsi); // full URL of web page System.out.println(wp); URLConnection u = (new URL(wp)).openConnection(); I = u.getInputStream(); len = u.getContentLength(); suc = 0; // number of the latest byte downloaded lp = 5; // set 'index remote connection open' phase } catch(Exception e) { img2.showMsg( "fileConnect() " // note where the exception occurred + e, true // and what type of exception it was ); lp = 0; // set the loader's error condition } } /* The following two methods help parse out the content of the and <meta name=description content=""> tags. The first method cuts out a possible leading and trailing '\n' in HTML title. The second validates the content of the required HTML tags. */ private void checkTitle() { if(!Title.equals("")) { if(Title.indexOf('\n') == 0) Title = Title.substring(1,Title.length()); if(Title.indexOf('\n') == Title.length() - 1) Title = Title.substring(0,Title.length() - 1); } } private boolean isDescriptionTag(String Tag) { //receives <meta name=description content="blah blah blah"> String s = Tag.toLowerCase(); // Convert tag text to lower-case only int p, q; /* If it isn't a meta tag or a name tag or has no name '=' sign, description parameter, content parameter, content '=' sign or opening or closing quote marks, then it is invalid. */ if(((p = s.indexOf("meta")) == -1) || ((p = s.indexOf("name",p+4)) == -1) || ((p = s.indexOf('=',p+4)) == -1) || ((p = s.indexOf("description",p+1)) == -1) || ((p = s.indexOf("content",p+8)) == -1) || ((p = s.indexOf('=',p+7)) == -1) || ((p = s.indexOf('\"',p+1)) == -1) || ((q = s.indexOf('\"',p+1)) == -1)) return false; Descr = Tag.substring(p+1,q); // extract the tag content return true; // exit true } /* Data arrives across the Internet more slowly than the read() method can read bytes from the Input Stream. Whenever it starts reading, it therefore rapidly catches up with the end of the data materialising at the end of the input stream. It therefore hits what it thinks is the end of the input stream prematurely and gives up before all the data has come into the input stream from the server. When read() returns a -1, we must not regarded it as necessarily the end of the data, but merely an indication that it has read in all the data that has so far arrived from the server. At this point, fileLoad() therefore checks to see whether the number of bytes re- quired from the HTML file has yet arrived. If not it returns to run() for a short sleep(). Otherwise it goes ahead on the basis that the completed file summary has arrived. */ private void fileLoad() { // DOWNLOAD HTML FILE'S <TITLE> & <DESCRIPTION> img2.showMsg("Loading HTML file summary...", false); boolean inTag = false, // true = getting chars that are part of a tag name inTitle = false, // true = getting chars that are part of file title finished = false; // true = title + description have been acquired String Tag = ""; // clear the tag string Title = ""; // clear the title string Descr = ""; // clear the description string char z; // holds current byte read from input stream try { /* While the whole HTML file not yet downloaded and not yet hit current end of input stream... */ while(suc < len && (z = (char)I.read()) != -1) { suc++; // increment the count of successfully-received bytes if(z == '<') // if initial tag-delimiter encountered, inTag = true; // we are inside a tag else if(z == '>') { // if terminating tag-delimiter encountered String tag = Tag.toLowerCase(); // rationalise tag text to lower // case ready for comparison if(tag.equals("title")) // if it's an initial title tag <title>, inTitle = true; // set flag to show that we are now // receiving title characters. /* otherwise if it is a terminating title tag , set flag to show that we are no linger reading in the title. */ else if(tag.equals("/title")) inTitle = false; /* else if the whole meta tag containg the description has been received, set flag to indicate that download- ing has finished and terminate file input while(loop). */ else if(isDescriptionTag(Tag)) { finished = true; break; } /* We've hit the tag without encountering a description meta tag but we won't find it by looking in the of the HTML file, so set "finished" flag and exit the while-loop. */ else if(tag.equals("/head")) { Descr = "[No Description]"; finished = true; break; } Tag = ""; // clear the tag and reset the inTag flag inTag = false; // ready for the next tag to be encountered } else if(inTag) // Otherwise, if we are still inside a tag, Tag += z; // add the current character to tag's name. */ else if(inTitle) // if inside the title itself, Title += z; // add current character to title text } // end of the while() loop if(finished // If HTML summary has now been || suc >= len) { // completely downloaded, I.close(); // close the URL Connection's input stream checkTitle(); // remove any spurious new-line characters // from the title text /* Display the HTML page's title and description in the app's text window, its URL in the URL display field and that this HTML page is page number x of X in the set of pages the search engine has found as pertaining to the entered search expression. */ img1.showSummary(Title, Descr); img2.showMsg(wp, false); img3.showNumber("" + (fsi + 1) + " of " + fse); lp = 6; // indicates that the HTML file summary } // has been successfully loaded. /* Else, blank the app's text window, display a message in the message field to the effect that the search engine was unable to download any pages pertaining to the entered search expression and clear the page x of X field. */ else { img1.showSummary("", ""); img2.showMsg( "This HTML file summary failed to load completely.", true ); img3.showNumber(""); lp = 0; // Set download indicator back to the inactive state. } } catch(Exception e) { // if an exception occurred within foregoing code img2.showMsg( "fileLoad() " // say where the exception occurred + e // what kind of exception it was + len // the total length of the content + " " + suc, // and how much of it was actually downloaded true // before the exception ccurred ); lp = 0; // set download indicator back to the inactive state } } /* The following method returns the index number of the given keyword or -1 if the keyword cannot be found in the index. */ private int find(String s) { // accepts the given keyword 's' int x; // 3-state string comparison variable int n = N >> 1; // start with the keyword half way up the index int j = n; // jump size /* The following WHILE loop does the binary slice search. It starts with the middle keyword. If this is > the one sought, then the one being sought must be lower down the index. It therefore looks at the keyword half way between the middle one and the bottom of the index. If this is greater than the one sought, it looks at the one half way between this one and the bottom of the index. And so on. It halves how far it jumps each time until the size of the jump reaches 0. Note that each time the jump is halved, the result of the division is rounded down. This ensures that it will never look outside the limits of the index. However, it does mean that the jump can reach 0 at the element next not be found. */ // While the jump size, having been halved, be greater than zero... while((j >>= 1) > 0) { /* if 's' is greater than the retrieved keyword then we are too low down the index, so split the partition. */ if((x = s.compareTo(getKeyword(n))) > 0) n += j; /* otherwise, 's' is less than retrieved keyword so we are too far up the index, so split the partition. */ else if(x < 0) n -= j; else return n; // else return if 's' is the retrieved keyword } /* ...That is the reason for this following WHILE loop. This one examines the keywords jumping only one index element at a time. It compares the current keyword with the one sought and, if it does not match, moves up or down the index one keyword accordingly. It repeats this until it finds the keyword being sought, or until the sense of the comparison reverses. This marks absolutely the end of the search. */ boolean u = false; // true indicates going up! boolean d = false; // true indicates going down! while(!(u && d)) { // while not yet reversed direction along index /* if 's' be greater than the retrieved keyword, we are too low down the index, so we must move up. */ if((x = s.compareTo(getKeyword(n))) > 0) { if(++n >= N) // However, if moving up overshoots end return -1; // of index, return the error code. u = true; // But if it doesn't, set the 'up' flag to } // show that we have moved up the index. else if(x < 0) { // Else, if 's' be less than retrieved keyword, we're // too far up the index, so we must move down. if(--n < 0) // However, if moving down overshoots the return -1; // end of the index, return the error code. d = true; // But if it doesn't, set the 'down' flag to } // show that we have moved down the index. else // Otherwise, the submitted keyword must have matched return n; // the retrieved keyword, so return its index number. } return -1; // keyword could not be found so return error code } /* CONVERT A 4-BYTE TRAIN TO AN INTEGER: 1. Promote each byte to an int 2. Chop off propagated 1s in case high bit set 3. Shift it into its correct position in the int 4. 'or' them all together to form the complete integer */ private int getInt(int i) { int x0 = INDEX[i ]; x0 &= 0xff; x0 <<= 24; int x1 = INDEX[i + 1]; x1 &= 0xff; x1 <<= 16; int x2 = INDEX[i + 2]; x2 &= 0xff; x2 <<= 8; int x3 = INDEX[i + 3]; x3 &= 0xff; return x0 | x1 | x2 | x3; } /* GET THE Nth KEYWORD IN THE KEYWORD INDEX: Get the start pointer of the keyword's text, get the start pointer of the next keyword's text, return the keyword as a string. */ private String getKeyword(int n) { int p = getInt(20 + ( n << 2)); int q = getInt(20 + (++n << 2)); String s = ""; while(p < q) s += (char)INDEX[a + p++]; return s; } // RETRIEVE THE HTML FILE'S FILESPEC FROM THE INDEX private String getFS(int fsi) { // get the reference of fsi(th) filespec in DESCENDING order of rank int o = RANK[fse - fsi - 1]; // form the byte reference of the required filespec int p = fsp + (o << 1); // make the two adjacent bytes form a 16-bit integer (short)rn int x2 = INDEX[p++]; x2 &= 0xff; x2 <<= 8; int x3 = INDEX[p]; x3 &= 0xff; int rn = x2 | x3; // form the start pointer of the filespec text int P = e + getInt(d + ( rn << 2)); // form the 'end' pointer of the filespec text int Q = e + getInt(d + (++rn << 2)); String s = ""; while(P < Q) s += (char)INDEX[P++]; return s; } /* The following 4 methods are the event handlers for the buttons and the text field. They are called by the public actionPerformed() method in normlbut.class which appears at the end of this listing. */ void wipe() { KF.setText(""); // wipe the text field KF.requestFocus(); // request that the cursor be set } // to the keyword text field void seek() { // SEARCHES ON UP TO 16 ENTERED KEYWORDS // provided at least one of the entered keywords is found in the index if((keywords = getKeyWords()) > 0) { fsi = 0; // start with the first HTML document in the shortlist lp = 4; // initiate loading of the first HTML file } else { fse = 0; // set number of filespecs zero if keyword not found lp = 6; // set to post retrieval state img2.showMsg("No references found for: " + KeyWs,false); } } void next() { // THE 'NEXT' BUTTON WAS PRESSED if(fse > 0) { // provided filespecs exist if(fsi < (fse - 1)) // then if we are not already at the last one fsi++; // increment to the next filespec else fsi = 0; // else loop back to start of list lp = 4; // initiate loading of the next HTML file } } void prev() { // THE 'PREV' BUTTON WAS PRESSED if(fse > 0) { // provided filespecs exist if(fsi > 0 ) // then if we are not already at the first one fsi--; // decrement to the previous filespec else fsi = fse - 1; // else loop back to the end of the list lp = 4; // initiate loading of the previous HTML file } } /* For this to work, it is necessary that the browser allow pop-ups and the execution of JavaScript. Currently (April 2012), Firefox has a bug that causes it to take upto 5 minutes to execute the showDocument() instruction. */ void view() { // try to link to currently summarised document img2.showMsg("The page should appear in a new browser tab.", false); /* Applet Version: check that the rigorous security functionality of modern browsers doesn't unconditionally block the "_blank" target. ac.showDocument(url, "_blank"); JFrame application version: I am assured that this works in MS Windows. It certainly works in Linux with XFCE or Gnome desktops. */ try { if(Desktop.isDesktopSupported()) { Desktop.getDesktop().browse(new URI(wp)); } } catch(Exception e) { System.out.println("" + e); // display error message in terminal img2.showMsg("" + e,false); // display it also in message field. } } /* This method takes the text typed into the search field of the applet and splits it up into individual words. It looks up each word, as it goes, in the main keywords index to get its main index reference number. It then uses this to look up the 'start position' and 'extent' of the references to HTML files that contain this keyword and puts them in the FSP[] and FSE[] arrays respectively. There's no need to save the keyword itself or its index reference. This method allows for the entered keywords to be separated by single or multiple or combinations of: space-characters, colons, semi-colons, commas, full-stops. */ private int getKeyWords() { /* Get the typed-in keywords from the Text entry field. The 'space' is to make k return correctly in all circumstances. */ KeyWs = removeAccents(KF.getText().trim().toLowerCase()) + ' '; int L = KeyWs.length(); // length of the typed-in keywords string if(L < 1) return 0; // return "no keywords entered" int FSP[] = new int[L], // pointer to start of index entries // for each entered keyword FSE[] = new int[L], // extent of index entries // for each entered keyword k = 0, // number of keywords received from Text field x = 0; // index number within main index of current keyword char z; // current character taken from the typed-in keywords string String w = ""; // string for accumulating the current keyword boolean sp = false, // true if previous character was a space-character vc = false; // true if valid characters have already been encountered // for each character in the entered keyword string for(int i = 0; i < L; i++) { /* If the current character 'z' is one of the permitted keyword delimiter characters AND valid keyword characters have already been encountered AND the previous character was not a 'space': */ z = KeyWs.charAt(i); if((((z == ' ') || (z == ':') || (z == ',') || (z == ';') || (z == '.')) && vc && !sp)) { // NOTE: b and c are class-wide variables: not locals /* if the keyword w appears in the main keywords index, construct the pointer to reference number of the first filespec for this keyword and the number of filespec entries in the index that pertain to this keyword. Bail out if or when we reach the limit of 16 entered keywords. */ if((x = find(w)) != -1) { int y = c + getInt(b + (x << 2)); FSE[k] = (c + getInt(b + (++x << 2)) - y) >> 1; FSP[k++] = y; if(k > 15) break; } w = ""; // clear keyword string ready to accumulate next keyword sp = true; // and note that this character was a space } else if(((z >= '0') && (z <= '9')) || // is between 0 and 9 inclusive ((z >= 'A') && (z <='Z')) || // or between A and Z inclusive ((z >= 'a') && (z <= 'z')) // or between a and z inclusive ) { w += z; // add new character to the current keyword string vc = true; // indicate that a valid character has been encountered sp = false; // ready for when the next keyword will be extracted } } /* This method continues by sorting the references of the HTML documents containing the first keyword into an order of ranking according to how many of the other entered keywords each document contains. */ fsp = FSP[0]; // pointer to start of HTML document refs for first keyword fse = FSE[0]; // number of HTML documents containing the first keyword RANK = new int[fse]; // offset for each HTML document in order of rank for(int i = 0; i < fse; i++) // First, make the order of rank = the RANK[i] = i; // order of occurrence in the main index if(k < 2) return k; // return if only one keyword entered SIGF = new int[fse]; // for significance ranking for each HTML document for(int i = 1; i < k; i++) { // for the 2nd keyword onwards int J = FSE[i]; // number of index entries for the keyword // for each of the HTML documents containing the first keyword for(int h = 0; h < fse; h++) { int r = getFSref(fsp,h); // get its filespec reference number /* for each HTML document containing the i(th) keyword, if it is the same HTML document as contains the first keyword, increment the significance for this HTML document. */ for(int j = 0; j < J; j++) if(r == getFSref(FSP[i], j)) ++SIGF[h]; } } qs(0,fse - 1); // sort RANK[] index into the order of // significance of the HTML documents return k; // return the number of keywords entered } /* REMOVE ACCENTS FROM CHARACTERS WITHIN ENTERED KEYWORDS: UTF-8 uses 2 bytes for accented characters so the length of a string within Java can be different from the number of bytes put out in UTF-8. I have decreed that all accents shall be removed for the purpose of keyword searching. This ensures that all characters occupy only one byte in UTF-8 and I can therefore make the index about half the size it would have to be in Unicode. NOTE: simply zeroing the upper byte of the Java Unicode char does not work. It changes the actual letter. */ private static String removeAccents(String s){ s = s.replaceAll("[èéêë]","e"); s = s.replaceAll("[ûùúü]","u"); s = s.replaceAll("[ïîíì]","i"); s = s.replaceAll("[àáâã]","a"); s = s.replaceAll("[ôóòõ]","o"); s = s.replaceAll("[ç]","c"); return s; } /* NOTE THAT A FILESPEC'S POINTER VALUE IS UNIQUE TO IT ALONE RETRIEVE THE HTML FILE'S FILESPEC REFERENCE FROM INDEX */ private int getFSref(int ptr, int ext) { int p, x2, x3, rn; p = ptr + (ext << 1); // form the byte reference x2 = INDEX[p++]; // then make the two adjacent bytes x2 &= 0xff; // form a 16-bit integer (short)rn x2 <<= 8; x3 = INDEX[p]; x3 &= 0xff; rn = x2 | x3; // return the start pointer of the filespec text return e + getInt(d + (rn << 2)); } /* The following method embodies C A R Hoare's Quick Sort algorithm. Note that it is a highly re-entrant method: it calls itself indefinitely. */ private void qs(int LO, int HI) { int lo = LO, // set moving lo to LO end of partition hi = HI; // set moving hi to HI end of partition if(HI > LO) { // if the partition contains anything int mid = SIGF[(LO + HI) >> 1]; // get the content of the mid // element of the partition while(lo <= hi) { // loop through the array // until the indices cross /* While current lowest significance < midway significance push lower sort boundary up by one element and while current highest keyword > midway keyword pull upper sort boundary down by one element. */ while(lo < HI && SIGF[lo] < mid) lo++; while(hi > LO && SIGF[hi] > mid) hi--; if(lo <= hi) { // IF LOW INDEX <= HIGH INDEX SWAP THEIR 'CONTENTS' /* Sort by significance: get index (offset-extent-filenum) of lo element, put index of hi element in lo element put index of lo element in hi element */ int x = SIGF[lo]; SIGF[lo] = SIGF[hi]; SIGF[hi] = x; /* Shift along with it the rank order: get index (offset- extent-filenum) of lo element,put index of hi element in lo element, put index of lo element in hi element */ x = RANK[lo]; RANK[lo] = RANK[hi]; RANK[hi] = x; lo++; // push lower sort boundary up by one element hi--; // pull upper sort boundary down by one element } } /* if hi not yet reached start of file, sort lower partition and if lo not yet reached end of file, sort upper partition. */ if(LO < hi) qs(LO,hi); if(lo < HI) qs(lo,HI); } } } // end of search.class /* The following class creates instances of ActionListener. It is used by this applet to create and service an action event for each of the control buttons plus one for the text field. The underlying system calls the actionPerformed() method below every time a button is pushed or carriage return is hit from the keyword entry text field. The method is invoked upon the instance of the following class corresponding to that particular button. The value of the variable id at the time is therefore that which corresponds to the button concerned. This causes the appropriate case statement to be executed, thus passing control to the appropriate handling method above. */ class normlbut implements ActionListener { static final int TEXT = 0, // 'carriage-return hit in text field' event SRCH = 1, // 'search button pressed' event WIPE = 2, // 'clear button pressed' event NEXT = 3, // 'next button pressed' event PREV = 4, // 'prev button pressed' event VIEW = 5; // 'view button pressed' event int id; // one of the above search ap; // the app that called: always the above app! public normlbut(int id, search ap) { // constructor for a new command this.id = id; this.ap = ap; } // one of the buttons has been clicked public void actionPerformed(ActionEvent e) { switch(id) { // id of this instance of ActionEvent case TEXT: ap.seek(); break; // carriage-return from text field case SRCH: ap.seek(); break; // the 'search' button case WIPE: ap.wipe(); break; // the 'clear' button case NEXT: ap.next(); break; // the 'next' button case PREV: ap.prev(); break; // the 'prev' button case VIEW: ap.view(); // the 'view' button } } }