/**
* Search Engine Applet 1.0.1
* @author Robert John Morton YE572246C
* @version 22 October 1999 modified 10 July 2009, 10 April 2012
Modified to JFrame operation Sat 04 Mar 2017 10:47:52 BRT
* @copyright Robert John Morton (all rights reserved) */
/* This applet requires a specialised index contained in a data file called
index.dat located in the /website directory of the web site on the server.
It requires no server-side executables or scripts. It can search on the
up to 16 entered keywords. */
import javax.swing.*; // Swing GUI
import java.awt.*; // for graphics operations (GUI)
import java.awt.event.*; // for the new-fangled 1.1 event handling
import java.awt.image.BufferedImage;
import java.net.*; // for downloading data from the remote server
import java.io.*; // for stream handling for the above
public class search extends JPanel implements Runnable {
private static final Font // for KEYWORDS: label and text entry field
pf = new Font("Sans",Font.BOLD,14),
bf = new Font("Sans",Font.BOLD,16);
private static final Color
bg = new Color(244,244,244), // panels background colour
BG = new Color(238,238,238); // applet background colour
static BufferedImage
IMG0, // main background image
IMG1, // background for the title + summary panel
IMG2, // background for the message/url panel
IMG3; // background for the summary number panel
private imgldr il; // instance reference to the image loader
private worldmap wm; // panel for background image
private boolean
Init2started = false,
Init2finished = false; // true when the GUI has been built
private int
XE = 549, // Horizontal extent of window and JFrame. 545
YE = 320, // Vertical extent of window and JFrame. 295
ls = 0, // load switch
N, // total number of keywords in index
a, // byte number at which keyword text stream starts
b, // byte number at which filespec reference pointers start
c, // byte number at which filespec reference numbers start
d, // byte number at which filespec text pointers start
e, // byte number at which filespec text stream starts
keywords = 0, // current number of keywords typed into Text field
RANK[], // holds order of rank for web pages in shortlist
SIGF[], // significance rating of each HTML file in shortlist
fsp = 0, // points to ref num of first filespec for primary keyword
fse = 0, // number of filespecs pertaining to primary keyword
fsi = 0, // filespec index for given keyword set
lp = 0, // indicates current download phase
len = 0, // length of the remote item being loaded
suc = 0, // number of bytes of the above successfully downloaded
fh, space, // font height and width of a space
Ty = 0; // vertical position of current line of text
private byte
INDEX[]; // gigantic byte array to hold the downloaded index data
private String
res, // name of resource being downloaded
E = "", // Exception during downloading + method where it occurred
Title = "", // HTML Title of current document
Descr = "", // HTML Description of current document
cb = "", // code base URL - where this applet's class file came from
db = "", // document base URL for retrieved HTML files
wp = "", // full web page address including protocol
KeyWs = ""; /* entered keywords (global so that it can be
displayed in message) */
private URL url; // url of current HTML file
private volatile Thread TH; // ref for a separate run() thread
private InputStream I; // input stream for downloading
private JLabel KL; // label for keyword entry field
private JTextField KF; // keyword entry field on app panel
private JButton
CB, SB, NB, PB, VB; // for Search, Clear, Next, Prev, View buttons
private img1panel img1; // instance references for titl & summary panel
private img2panel img2; // instance references for message panel
private img3panel img3; // instance references for summary number panel
search(int XE, int YE, int ls, String cb, String db) { // CONSTRUCTOR
this.XE = XE;
this.YE = YE;
this.ls = ls;
this.cb = cb;
this.db = db;
setLayout(null); // to allow the control panels to be laid out manually
setFont(pf); // set font for Swing widgets
// CREATE THE REQUIRED SWING WIDGETS
KL = new JLabel("KEYWORDS:"); KL.setFont(bf);
KF = new JTextField(); KF.setFont(pf);
CB = new JButton(); CB.setText("Clear");
SB = new JButton(); SB.setText("Search");
NB = new JButton(); NB.setText("Next");
PB = new JButton(); PB.setText("Prev");
VB = new JButton(); VB.setText( "View Document");
// ADD THEM TO THIS JPANEL AND SET THEIR DIMENSIONS
add(KL); KL.setBounds(12,12,117,30);
add(KF); KF.setBounds(129,12,240,30);
add(CB); CB.setBounds(463,12,70,30);
add(SB); SB.setBounds(381,12,70,30);
add(NB); NB.setBounds(264,253,60,30);
add(PB); PB.setBounds(336,253,60,30);
add(VB); VB.setBounds(408,253,125,30);
// SET THEIR MARGINS WHERE APPROPRIATE
CB.setMargin(new Insets(3,3,3,3));
SB.setMargin(new Insets(3,3,3,3));
NB.setMargin(new Insets(3,3,3,3));
PB.setMargin(new Insets(3,3,3,3));
VB.setMargin(new Insets(3,3,3,3));
// CREATE ACTION LISTENERS FOR THEM
KF.addActionListener(new normlbut(normlbut.TEXT,this)); //c/r text field
CB.addActionListener(new normlbut(normlbut.WIPE,this)); //'Clear' button
SB.addActionListener(new normlbut(normlbut.SRCH,this)); //'Search' button
NB.addActionListener(new normlbut(normlbut.NEXT,this)); //'Next' button
PB.addActionListener(new normlbut(normlbut.PREV,this)); //'Prev' button
VB.addActionListener(new normlbut(normlbut.VIEW,this)); //'View Document'
int // create the message display panel
W = 521,
H = 30;
img2 = new img2panel(W,H,bg,this);
add(img2);
img2.setBounds(12,213,W,H);
il = new imgldr(this,img2,ls,cb); // create and start image loader
TH = new Thread(this); // thread for downloading data from server
TH.start();
}
/* SECOND PHASE OF INITIALISATION: This can only be done after the
background image for the applet has finished downloading from the
server. Since it involves the creation of a Swing JPanel, it must
be executed by the Event Dispatching thread and not the local run()
thread from where the process must be invoked. */
private void scheduleInit2() {
Init2started = true; // phase 2 initialisation has been scheduled
try {
javax.swing.SwingUtilities.invokeAndWait(
new Runnable() {
public void run(){init2();}
}
);
} catch(Exception e) {
System.out.println("createGUI phase 2 didn't successfully complete");
}
}
private void init2() { // create the map and nav information panels
int // create the main title and summary display panel
W = 521,
H = 151;
img1 = new img1panel(W,H,bg,this);
add(img1);
img1.setBounds(12,52,W,H);
// create the summary number display panel
W = 100; H = 30;
img3 = new img3panel(W,H,bg,this);
add(img3);
img3.setBounds(12,253,W,H);
// create panel for the applet' background image
wm = new worldmap(this);
add(wm); // add it to this JPanel
wm.setBounds(0,0,545,295); // set its size to full size of JPanel
System.out.println("Images installed.");
repaint(); // to display everything on top of background image
lp = 1; // start the index loading process
Init2finished = true; // phase 2 of the initialization has finished
}
/* THE FOLLOWING 4 METHODS HANDLE THE CONTROL OF THE DOWNLOADING
PROCESSES FOR THE INDEX FILE AND THE HTML FILE SUMMARIES */
/* The following method is the one called by the Internet data transfer
thread [the main run() thread]. It handles the downloading of data from
the remote server in two stages. Firstly it establishes a connection.
Then it downloads the data in chunks. The size of a chunk is the amount
of data it can read from the connection until read() becomes blocked (ie
it returns -1). Upon experiencing blocking, the download process sleeps
and then comes back to try again. */
public void run() { // MANAGE INTERNET DATA TRANSFERS
while(TH != null) { // while this thread exists
runloop();
try{Thread.sleep(50);} // sleep before trying again
catch(InterruptedException e){} // catch any external interrupt
}
}
private void runloop() {
if(il.awaitingImages()) // if images are still loading
return; // exit
if(!il.imagesLoaded()) // if media tracker not yet terminated
return; // exit
if(!Init2finished) { // if phase 2 init not yet finished
if(!Init2started) // If phase 2 init not yet started
scheduleInit2(); // do on event-dispatching thread
return; // exit here until Phase 2 init done
}
switch(lp) { // THE 4 DOWNLOADING PHASES
case 1: indexConnect(); break; // connect to index file on server
case 2: indexLoad(); break; // download its content
case 4: fileConnect(); break; // connect to HTML file on server
case 5: fileLoad(); break; // download its summary
}
}
/* The following two methods handle the initial connection
to, and downloading of, the keyword index from the server. */
private void indexConnect() { // CONNECT TO INDEX RESOURCE ON THE SERVER
img2.showMsg("Opening connection to index...", false);
String fn = "index.dat";
try {
switch(ls) {
/* Load from index.dat file located in local the local dev-
elopment directory. The index.dat file is thus in the same
directory as the app was started from; so a further path
extension within the web site is not necessary. */
case 0:
len = (int)(new File(fn)).length();
I = new FileInputStream(fn);
break;
/* Create an input stream to load file from jar. Note that wherever
the jar file may be [at the web site's top level or somewhere in
the software sub directory] it is local to the class files; so a
further path extension within the web site is not necessary. */
case 1:
len = 131072; // generous maximum length for index.dat
I = getClass().getResourceAsStream(fn);
break;
/* Create an input stream to load file from server. Note that the
index file is located at the top level of the web site; so the
file's name only need be prefixed with the URL of the web site.
No further path extension within the web site is necessary. */
case 2:
URLConnection u = new URL(cb + fn).openConnection();
I = u.getInputStream();
len = u.getContentLength();
}
INDEX = new byte[len]; // create the gigantic buffer for the index
suc = 0; // number of bytes so far downloaded
lp = 2; // advance to the index loading phase
} catch(Exception e) {
lp = 0; // set unrecoverable error status
img2.showMsg(
"indexConnect() " // note where the exception occurred
+ e,true // and the kind of exception it was
);
}
}
/* NOTE: in the following method, for testing the index loading process,
in place of the message: "Index loaded." the following was substituted:
"Ready: len=" + len + " suc=" + suc + " Keywords: " + N */
private void indexLoad() { // DOWNLOAD THE INDEX
img2.showMsg("Loading index from server...", false);
int k; // current byte being read()
try {
/* while read() hasn't yet hit the current end of input
stream and the entire index has not yet been downloaded,
add its new byte into the big byte array. */
if(ls == 1) { // len is known for server connections
while((k = I.read()) != -1)
INDEX[suc++] = (byte)k;
len = suc;
} else
while(suc < len && (k = I.read()) != -1)
INDEX[suc++] = (byte)k;
// if the whole of the index has now been downloaded
if(ls < 2 || suc >= len) {
a = getInt( 0); // start byte of the keyword text stream
N = (a >> 2) - 1; // total number of keywords in the index
a += 20; // bias over first 20 bytes holding a,b,c,d,e
b = getInt( 4) + 20; // start byte of filespec reference pointers
c = getInt( 8) + 20; // start byte of filespec reference numbers
d = getInt(12) + 20; // start byte of filespec text pointers
e = getInt(16) + 20; // start byte of filespec text stream
I.close(); // close the URL Connection's input stream
lp = 3; // = index loading completed successfully
img2.showMsg("Index loaded.",false);
KF.requestFocus(); // request cursor be set to keyword text field
}
}
catch(Exception e) {
img2.showMsg(
"indexLoad() " // note where the exception occurred
+ e + " " // and what kind of exception it was
+ len // total length of the data to be downloaded
+ " " + suc, // and how much of it was actually downloaded
true
);
lp = 0; // set the unrecoverable error condition
}
}
/* The following method handles the connection
to the currently selected HTML file. */
private void fileConnect() { // CONNECT TO REQUIRED html FILE ON SERVER
img2.showMsg("Opening connection to HTML file...", false);
/* Form the url of the file to be summarised, open a con-
nection to the remote HTML file, create an input stream
object to access it and get its content length. */
try {
wp = db + getFS(fsi); // full URL of web page
System.out.println(wp);
URLConnection u = (new URL(wp)).openConnection();
I = u.getInputStream();
len = u.getContentLength();
suc = 0; // number of the latest byte downloaded
lp = 5; // set 'index remote connection open' phase
}
catch(Exception e) {
img2.showMsg(
"fileConnect() " // note where the exception occurred
+ e, true // and what type of exception it was
);
lp = 0; // set the loader's error condition
}
}
/* The following two methods help parse out the content of
the
and tags.
The first method cuts out a possible leading and trailing '\n' in HTML
title. The second validates the content of the required HTML tags. */
private void checkTitle() {
if(!Title.equals("")) {
if(Title.indexOf('\n') == 0)
Title = Title.substring(1,Title.length());
if(Title.indexOf('\n') == Title.length() - 1)
Title = Title.substring(0,Title.length() - 1);
}
}
private boolean isDescriptionTag(String Tag) {
//receives
String s = Tag.toLowerCase(); // Convert tag text to lower-case only
int p, q;
/* If it isn't a meta tag or a name tag or has no name '=' sign,
description parameter, content parameter, content '=' sign or
opening or closing quote marks, then it is invalid. */
if(((p = s.indexOf("meta")) == -1)
|| ((p = s.indexOf("name",p+4)) == -1)
|| ((p = s.indexOf('=',p+4)) == -1)
|| ((p = s.indexOf("description",p+1)) == -1)
|| ((p = s.indexOf("content",p+8)) == -1)
|| ((p = s.indexOf('=',p+7)) == -1)
|| ((p = s.indexOf('\"',p+1)) == -1)
|| ((q = s.indexOf('\"',p+1)) == -1))
return false;
Descr = Tag.substring(p+1,q); // extract the tag content
return true; // exit true
}
/* Data arrives across the Internet more slowly than the read() method can
read bytes from the Input Stream. Whenever it starts reading, it therefore
rapidly catches up with the end of the data materialising at the end of the
input stream. It therefore hits what it thinks is the end of the input
stream prematurely and gives up before all the data has come into the input
stream from the server. When read() returns a -1, we must not regarded it
as necessarily the end of the data, but merely an indication that it has
read in all the data that has so far arrived from the server. At this
point, fileLoad() therefore checks to see whether the number of bytes re-
quired from the HTML file has yet arrived. If not it returns to run() for a
short sleep(). Otherwise it goes ahead on the basis that the completed file
summary has arrived. */
private void fileLoad() { // DOWNLOAD HTML FILE'S &
img2.showMsg("Loading HTML file summary...", false);
boolean
inTag = false, // true = getting chars that are part of a tag name
inTitle = false, // true = getting chars that are part of file title
finished = false; // true = title + description have been acquired
String
Tag = ""; // clear the tag string
Title = ""; // clear the title string
Descr = ""; // clear the description string
char z; // holds current byte read from input stream
try {
/* While the whole HTML file not yet downloaded
and not yet hit current end of input stream... */
while(suc < len && (z = (char)I.read()) != -1) {
suc++; // increment the count of successfully-received bytes
if(z == '<') // if initial tag-delimiter encountered,
inTag = true; // we are inside a tag
else if(z == '>') { // if terminating tag-delimiter encountered
String tag = Tag.toLowerCase(); // rationalise tag text to lower
// case ready for comparison
if(tag.equals("title")) // if it's an initial title tag ,
inTitle = true; // set flag to show that we are now
// receiving title characters.
/* otherwise if it is a terminating title tag , set
flag to show that we are no linger reading in the title. */
else if(tag.equals("/title"))
inTitle = false;
/* else if the whole meta tag containg the description
has been received, set flag to indicate that download-
ing has finished and terminate file input while(loop). */
else if(isDescriptionTag(Tag)) {
finished = true;
break;
}
/* We've hit the tag without encountering a description
meta tag but we won't find it by looking in the of the
HTML file, so set "finished" flag and exit the while-loop. */
else if(tag.equals("/head")) {
Descr = "[No Description]";
finished = true;
break;
}
Tag = ""; // clear the tag and reset the inTag flag
inTag = false; // ready for the next tag to be encountered
}
else if(inTag) // Otherwise, if we are still inside a tag,
Tag += z; // add the current character to tag's name. */
else if(inTitle) // if inside the title itself,
Title += z; // add current character to title text
} // end of the while() loop
if(finished // If HTML summary has now been
|| suc >= len) { // completely downloaded,
I.close(); // close the URL Connection's input stream
checkTitle(); // remove any spurious new-line characters
// from the title text
/* Display the HTML page's title and description in the app's
text window, its URL in the URL display field and that this HTML
page is page number x of X in the set of pages the search engine
has found as pertaining to the entered search expression. */
img1.showSummary(Title, Descr);
img2.showMsg(wp, false);
img3.showNumber("" + (fsi + 1) + " of " + fse);
lp = 6; // indicates that the HTML file summary
} // has been successfully loaded.
/* Else, blank the app's text window, display a message in
the message field to the effect that the search engine was
unable to download any pages pertaining to the entered
search expression and clear the page x of X field. */
else {
img1.showSummary("", "");
img2.showMsg(
"This HTML file summary failed to load completely.", true
);
img3.showNumber("");
lp = 0; // Set download indicator back to the inactive state.
}
}
catch(Exception e) { // if an exception occurred within foregoing code
img2.showMsg(
"fileLoad() " // say where the exception occurred
+ e // what kind of exception it was
+ len // the total length of the content
+ " " + suc, // and how much of it was actually downloaded
true // before the exception ccurred
);
lp = 0; // set download indicator back to the inactive state
}
}
/* The following method returns the index number of the given keyword
or -1 if the keyword cannot be found in the index. */
private int find(String s) { // accepts the given keyword 's'
int x; // 3-state string comparison variable
int n = N >> 1; // start with the keyword half way up the index
int j = n; // jump size
/* The following WHILE loop does the binary slice search. It starts with
the middle keyword. If this is > the one sought, then the one being
sought must be lower down the index. It therefore looks at the keyword
half way between the middle one and the bottom of the index. If this is
greater than the one sought, it looks at the one half way between this
one and the bottom of the index. And so on. It halves how far it jumps
each time until the size of the jump reaches 0. Note that each time the
jump is halved, the result of the division is rounded down. This ensures
that it will never look outside the limits of the index. However, it does
mean that the jump can reach 0 at the element next not be found. */
// While the jump size, having been halved, be greater than zero...
while((j >>= 1) > 0) {
/* if 's' is greater than the retrieved keyword then we
are too low down the index, so split the partition. */
if((x = s.compareTo(getKeyword(n))) > 0)
n += j;
/* otherwise, 's' is less than retrieved keyword so we
are too far up the index, so split the partition. */
else if(x < 0)
n -= j;
else return n; // else return if 's' is the retrieved keyword
}
/* ...That is the reason for this following WHILE loop. This one examines
the keywords jumping only one index element at a time. It compares the
current keyword with the one sought and, if it does not match, moves up
or down the index one keyword accordingly. It repeats this until it
finds the keyword being sought, or until the sense of the comparison
reverses. This marks absolutely the end of the search. */
boolean u = false; // true indicates going up!
boolean d = false; // true indicates going down!
while(!(u && d)) { // while not yet reversed direction along index
/* if 's' be greater than the retrieved keyword, we
are too low down the index, so we must move up. */
if((x = s.compareTo(getKeyword(n))) > 0) {
if(++n >= N) // However, if moving up overshoots end
return -1; // of index, return the error code.
u = true; // But if it doesn't, set the 'up' flag to
} // show that we have moved up the index.
else if(x < 0) { // Else, if 's' be less than retrieved keyword, we're
// too far up the index, so we must move down.
if(--n < 0) // However, if moving down overshoots the
return -1; // end of the index, return the error code.
d = true; // But if it doesn't, set the 'down' flag to
} // show that we have moved down the index.
else // Otherwise, the submitted keyword must have matched
return n; // the retrieved keyword, so return its index number.
}
return -1; // keyword could not be found so return error code
}
/* CONVERT A 4-BYTE TRAIN TO AN INTEGER:
1. Promote each byte to an int
2. Chop off propagated 1s in case high bit set
3. Shift it into its correct position in the int
4. 'or' them all together to form the complete integer */
private int getInt(int i) {
int x0 = INDEX[i ]; x0 &= 0xff; x0 <<= 24;
int x1 = INDEX[i + 1]; x1 &= 0xff; x1 <<= 16;
int x2 = INDEX[i + 2]; x2 &= 0xff; x2 <<= 8;
int x3 = INDEX[i + 3]; x3 &= 0xff;
return x0 | x1 | x2 | x3;
}
/* GET THE Nth KEYWORD IN THE KEYWORD INDEX: Get the start pointer of
the keyword's text, get the start pointer of the next keyword's text,
return the keyword as a string. */
private String getKeyword(int n) {
int p = getInt(20 + ( n << 2));
int q = getInt(20 + (++n << 2));
String s = "";
while(p < q) s += (char)INDEX[a + p++];
return s;
}
// RETRIEVE THE HTML FILE'S FILESPEC FROM THE INDEX
private String getFS(int fsi) {
// get the reference of fsi(th) filespec in DESCENDING order of rank
int o = RANK[fse - fsi - 1];
// form the byte reference of the required filespec
int p = fsp + (o << 1);
// make the two adjacent bytes form a 16-bit integer (short)rn
int x2 = INDEX[p++]; x2 &= 0xff; x2 <<= 8;
int x3 = INDEX[p]; x3 &= 0xff;
int rn = x2 | x3;
// form the start pointer of the filespec text
int P = e + getInt(d + ( rn << 2));
// form the 'end' pointer of the filespec text
int Q = e + getInt(d + (++rn << 2));
String s = "";
while(P < Q) s += (char)INDEX[P++];
return s;
}
/* The following 4 methods are the event handlers for the buttons and the
text field. They are called by the public actionPerformed() method in
normlbut.class which appears at the end of this listing. */
void wipe() {
KF.setText(""); // wipe the text field
KF.requestFocus(); // request that the cursor be set
} // to the keyword text field
void seek() { // SEARCHES ON UP TO 16 ENTERED KEYWORDS
// provided at least one of the entered keywords is found in the index
if((keywords = getKeyWords()) > 0) {
fsi = 0; // start with the first HTML document in the shortlist
lp = 4; // initiate loading of the first HTML file
} else {
fse = 0; // set number of filespecs zero if keyword not found
lp = 6; // set to post retrieval state
img2.showMsg("No references found for: " + KeyWs,false);
}
}
void next() { // THE 'NEXT' BUTTON WAS PRESSED
if(fse > 0) { // provided filespecs exist
if(fsi < (fse - 1)) // then if we are not already at the last one
fsi++; // increment to the next filespec
else
fsi = 0; // else loop back to start of list
lp = 4; // initiate loading of the next HTML file
}
}
void prev() { // THE 'PREV' BUTTON WAS PRESSED
if(fse > 0) { // provided filespecs exist
if(fsi > 0 ) // then if we are not already at the first one
fsi--; // decrement to the previous filespec
else
fsi = fse - 1; // else loop back to the end of the list
lp = 4; // initiate loading of the previous HTML file
}
}
/* For this to work, it is necessary that the browser allow pop-ups and
the execution of JavaScript. Currently (April 2012), Firefox has a bug
that causes it to take upto 5 minutes to execute the showDocument()
instruction. */
void view() { // try to link to currently summarised document
img2.showMsg("The page should appear in a new browser tab.", false);
/* Applet Version: check that the rigorous security functionality of
modern browsers doesn't unconditionally block the "_blank" target.
ac.showDocument(url, "_blank");
JFrame application version: I am assured that this works in MS Windows.
It certainly works in Linux with XFCE or Gnome desktops. */
try {
if(Desktop.isDesktopSupported()) {
Desktop.getDesktop().browse(new URI(wp));
}
} catch(Exception e) {
System.out.println("" + e); // display error message in terminal
img2.showMsg("" + e,false); // display it also in message field.
}
}
/* This method takes the text typed into the search field of the applet and
splits it up into individual words. It looks up each word, as it goes, in
the main keywords index to get its main index reference number. It then
uses this to look up the 'start position' and 'extent' of the references
to HTML files that contain this keyword and puts them in the FSP[] and
FSE[] arrays respectively. There's no need to save the keyword itself or
its index reference. This method allows for the entered keywords to be
separated by single or multiple or combinations of: space-characters,
colons, semi-colons, commas, full-stops. */
private int getKeyWords() {
/* Get the typed-in keywords from the Text entry field. The
'space' is to make k return correctly in all circumstances. */
KeyWs = removeAccents(KF.getText().trim().toLowerCase()) + ' ';
int L = KeyWs.length(); // length of the typed-in keywords string
if(L < 1) return 0; // return "no keywords entered"
int
FSP[] = new int[L], // pointer to start of index entries
// for each entered keyword
FSE[] = new int[L], // extent of index entries
// for each entered keyword
k = 0, // number of keywords received from Text field
x = 0; // index number within main index of current keyword
char z; // current character taken from the typed-in keywords string
String w = ""; // string for accumulating the current keyword
boolean
sp = false, // true if previous character was a space-character
vc = false; // true if valid characters have already been encountered
// for each character in the entered keyword string
for(int i = 0; i < L; i++) {
/* If the current character 'z' is one of the permitted keyword
delimiter characters AND valid keyword characters have already
been encountered AND the previous character was not a 'space': */
z = KeyWs.charAt(i);
if((((z == ' ') || (z == ':') || (z == ',') || (z == ';') || (z == '.'))
&& vc
&& !sp)) {
// NOTE: b and c are class-wide variables: not locals
/* if the keyword w appears in the main keywords index, construct
the pointer to reference number of the first filespec for this
keyword and the number of filespec entries in the index that
pertain to this keyword. Bail out if or when we reach the limit
of 16 entered keywords. */
if((x = find(w)) != -1) {
int y = c + getInt(b + (x << 2));
FSE[k] = (c + getInt(b + (++x << 2)) - y) >> 1;
FSP[k++] = y;
if(k > 15) break;
}
w = ""; // clear keyword string ready to accumulate next keyword
sp = true; // and note that this character was a space
}
else
if(((z >= '0') && (z <= '9')) || // is between 0 and 9 inclusive
((z >= 'A') && (z <='Z')) || // or between A and Z inclusive
((z >= 'a') && (z <= 'z')) // or between a and z inclusive
) {
w += z; // add new character to the current keyword string
vc = true; // indicate that a valid character has been encountered
sp = false; // ready for when the next keyword will be extracted
}
}
/* This method continues by sorting the references of the HTML documents
containing the first keyword into an order of ranking according to how
many of the other entered keywords each document contains. */
fsp = FSP[0]; // pointer to start of HTML document refs for first keyword
fse = FSE[0]; // number of HTML documents containing the first keyword
RANK = new int[fse]; // offset for each HTML document in order of rank
for(int i = 0; i < fse; i++) // First, make the order of rank = the
RANK[i] = i; // order of occurrence in the main index
if(k < 2) return k; // return if only one keyword entered
SIGF = new int[fse]; // for significance ranking for each HTML document
for(int i = 1; i < k; i++) { // for the 2nd keyword onwards
int J = FSE[i]; // number of index entries for the keyword
// for each of the HTML documents containing the first keyword
for(int h = 0; h < fse; h++) {
int r = getFSref(fsp,h); // get its filespec reference number
/* for each HTML document containing the i(th) keyword, if it is the
same HTML document as contains the first keyword, increment the
significance for this HTML document. */
for(int j = 0; j < J; j++)
if(r == getFSref(FSP[i], j))
++SIGF[h];
}
}
qs(0,fse - 1); // sort RANK[] index into the order of
// significance of the HTML documents
return k; // return the number of keywords entered
}
/* REMOVE ACCENTS FROM CHARACTERS WITHIN ENTERED KEYWORDS: UTF-8 uses 2
bytes for accented characters so the length of a string within Java can
be different from the number of bytes put out in UTF-8. I have decreed
that all accents shall be removed for the purpose of keyword searching.
This ensures that all characters occupy only one byte in UTF-8 and I
can therefore make the index about half the size it would have to be in
Unicode. NOTE: simply zeroing the upper byte of the Java Unicode char
does not work. It changes the actual letter. */
private static String removeAccents(String s){
s = s.replaceAll("[èéêë]","e");
s = s.replaceAll("[ûùúü]","u");
s = s.replaceAll("[ïîíì]","i");
s = s.replaceAll("[àáâã]","a");
s = s.replaceAll("[ôóòõ]","o");
s = s.replaceAll("[ç]","c");
return s;
}
/* NOTE THAT A FILESPEC'S POINTER VALUE IS UNIQUE TO IT ALONE
RETRIEVE THE HTML FILE'S FILESPEC REFERENCE FROM INDEX */
private int getFSref(int ptr, int ext) {
int p, x2, x3, rn;
p = ptr + (ext << 1); // form the byte reference
x2 = INDEX[p++]; // then make the two adjacent bytes
x2 &= 0xff; // form a 16-bit integer (short)rn
x2 <<= 8;
x3 = INDEX[p];
x3 &= 0xff;
rn = x2 | x3;
// return the start pointer of the filespec text
return e + getInt(d + (rn << 2));
}
/* The following method embodies C A R Hoare's Quick Sort algorithm.
Note that it is a highly re-entrant method: it calls itself indefinitely. */
private void qs(int LO, int HI) {
int
lo = LO, // set moving lo to LO end of partition
hi = HI; // set moving hi to HI end of partition
if(HI > LO) { // if the partition contains anything
int mid = SIGF[(LO + HI) >> 1]; // get the content of the mid
// element of the partition
while(lo <= hi) { // loop through the array
// until the indices cross
/* While current lowest significance < midway significance
push lower sort boundary up by one element and while current
highest keyword > midway keyword pull upper sort boundary
down by one element. */
while(lo < HI && SIGF[lo] < mid)
lo++;
while(hi > LO && SIGF[hi] > mid)
hi--;
if(lo <= hi) { // IF LOW INDEX <= HIGH INDEX SWAP THEIR 'CONTENTS'
/* Sort by significance: get index (offset-extent-filenum)
of lo element, put index of hi element in lo element
put index of lo element in hi element */
int x = SIGF[lo];
SIGF[lo] = SIGF[hi];
SIGF[hi] = x;
/* Shift along with it the rank order: get index (offset-
extent-filenum) of lo element,put index of hi element
in lo element, put index of lo element in hi element */
x = RANK[lo];
RANK[lo] = RANK[hi];
RANK[hi] = x;
lo++; // push lower sort boundary up by one element
hi--; // pull upper sort boundary down by one element
}
}
/* if hi not yet reached start of file, sort lower partition
and if lo not yet reached end of file, sort upper partition. */
if(LO < hi) qs(LO,hi);
if(lo < HI) qs(lo,HI);
}
}
} // end of search.class
/* The following class creates instances of ActionListener. It is used by
this applet to create and service an action event for each of the control
buttons plus one for the text field. The underlying system calls the
actionPerformed() method below every time a button is pushed or carriage
return is hit from the keyword entry text field. The method is invoked
upon the instance of the following class corresponding to that particular
button. The value of the variable id at the time is therefore that which
corresponds to the button concerned. This causes the appropriate case
statement to be executed, thus passing control to the appropriate
handling method above. */
class normlbut implements ActionListener {
static final int
TEXT = 0, // 'carriage-return hit in text field' event
SRCH = 1, // 'search button pressed' event
WIPE = 2, // 'clear button pressed' event
NEXT = 3, // 'next button pressed' event
PREV = 4, // 'prev button pressed' event
VIEW = 5; // 'view button pressed' event
int id; // one of the above
search ap; // the app that called: always the above app!
public normlbut(int id, search ap) { // constructor for a new command
this.id = id;
this.ap = ap;
}
// one of the buttons has been clicked
public void actionPerformed(ActionEvent e) {
switch(id) { // id of this instance of ActionEvent
case TEXT: ap.seek(); break; // carriage-return from text field
case SRCH: ap.seek(); break; // the 'search' button
case WIPE: ap.wipe(); break; // the 'clear' button
case NEXT: ap.next(); break; // the 'next' button
case PREV: ap.prev(); break; // the 'prev' button
case VIEW: ap.view(); // the 'view' button
}
}
}