aboutsummaryrefslogtreecommitdiff
path: root/src/com/benlinskey
diff options
context:
space:
mode:
Diffstat (limited to 'src/com/benlinskey')
-rw-r--r--src/com/benlinskey/grdbc/GreekTextParser.java181
-rw-r--r--src/com/benlinskey/grdbc/LexiconCreator.java356
-rw-r--r--src/com/benlinskey/grdbc/LexiconParser.java193
-rw-r--r--src/com/benlinskey/grdbc/SyntaxCreator.java316
-rw-r--r--src/com/benlinskey/grdbc/SyntaxParser.java52
5 files changed, 562 insertions, 536 deletions
diff --git a/src/com/benlinskey/grdbc/GreekTextParser.java b/src/com/benlinskey/grdbc/GreekTextParser.java
index 81cc4cb..e78e1cd 100644
--- a/src/com/benlinskey/grdbc/GreekTextParser.java
+++ b/src/com/benlinskey/grdbc/GreekTextParser.java
@@ -39,93 +39,102 @@ import org.xml.sax.SAXException;
import edu.unc.epidoc.transcoder.TransCoder;
/**
+ * An abstract class for parsing Greek text encoded in an XML document.
+ *
* @author Ben Linskey
- *
+ *
*/
public abstract class GreekTextParser {
- protected Document doc;
- protected TransCoder transcoder;
-
- /**
- * Class constructor.
- * @param xml the XML to parse
- * @throws ParserConfigurationException
- * @throws SAXException
- * @throws IOException
- */
- protected GreekTextParser(String xml)
- throws ParserConfigurationException, SAXException, IOException {
- // Parse the XML and create a Document.
- DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
- DocumentBuilder db = dbf.newDocumentBuilder();
- InputSource is = new InputSource(new StringReader(xml));
- doc = db.parse(is);
-
- // Create a TransCoder for converting Beta Code to Greek characters.
- try {
- transcoder = new TransCoder("BetaCode", "UnicodeC");
- } catch (Exception e) {
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Transcodes beta code to Greek in elements with the given name.
- * @param element the name of the element to search for
- */
- protected void transcodeInElements(String element) {
- NodeList nodeList = doc.getElementsByTagName(element);
- for (int i = 0; i < nodeList.getLength(); i++) {
- Node elementNode = nodeList.item(i);
- Node langAttr = elementNode.getAttributes().getNamedItem("lang");
- if (langAttr != null) {
- String lang = langAttr.getTextContent();
- if (lang.equals("greek")) {
- String greek = betaToGreek(elementNode.getTextContent());
- elementNode.setTextContent(greek);
- }
- }
- }
- }
-
- /**
- * Returns a string containing an XML representation of the document in its
- * current state.
- * @return a string containing an XML representation of the document in its
- * current state
- */
- protected String getUpdatedXML() {
- StringWriter writer = new StringWriter();
- try {
- TransformerFactory tf = TransformerFactory.newInstance();
- Transformer transformer = tf.newTransformer();
- DOMSource source = new DOMSource(doc);
- StreamResult result = new StreamResult(writer);
- transformer.transform(source, result);
- } catch (TransformerConfigurationException e) {
- e.printStackTrace();
- System.exit(1);
- } catch (TransformerException e) {
- e.printStackTrace();
- System.exit(1);
- }
- return writer.toString();
- }
-
- /**
- * Converts Beta Code to Greek characters.
- * @param beta the Beta Code to transcode
- * @return the Greek equivalent of the specified Beta Code
- */
- protected String betaToGreek(String beta) {
- String greek = null;
- try {
- greek = transcoder.getString(beta);
- } catch (UnsupportedEncodingException e) {
- e.printStackTrace();
- System.exit(1);
- }
- return greek;
- }
+ protected Document doc;
+ protected TransCoder transcoder;
+
+ /**
+ * Class constructor.
+ *
+ * @param xml
+ * the XML to parse
+ * @throws ParserConfigurationException
+ * @throws SAXException
+ * @throws IOException
+ */
+ protected GreekTextParser(String xml) throws ParserConfigurationException,
+ SAXException, IOException {
+ // Parse the XML and create a Document.
+ DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+ DocumentBuilder db = dbf.newDocumentBuilder();
+ InputSource is = new InputSource(new StringReader(xml));
+ doc = db.parse(is);
+
+ // Create a TransCoder for converting Beta Code to Greek characters.
+ try {
+ transcoder = new TransCoder("BetaCode", "UnicodeC");
+ } catch (Exception e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ /**
+ * Transcodes beta code to Greek in elements with the given name.
+ *
+ * @param element
+ * the name of the element to search for
+ */
+ protected void transcodeInElements(String element) {
+ NodeList nodeList = doc.getElementsByTagName(element);
+ for (int i = 0; i < nodeList.getLength(); i++) {
+ Node elementNode = nodeList.item(i);
+ Node langAttr = elementNode.getAttributes().getNamedItem("lang");
+ if (langAttr != null) {
+ String lang = langAttr.getTextContent();
+ if (lang.equals("greek")) {
+ String greek = betaToGreek(elementNode.getTextContent());
+ elementNode.setTextContent(greek);
+ }
+ }
+ }
+ }
+
+ /**
+ * Returns a string containing an XML representation of the document in its
+ * current state.
+ *
+ * @return a string containing an XML representation of the document in its
+ * current state
+ */
+ protected String getUpdatedXML() {
+ StringWriter writer = new StringWriter();
+ try {
+ TransformerFactory tf = TransformerFactory.newInstance();
+ Transformer transformer = tf.newTransformer();
+ DOMSource source = new DOMSource(doc);
+ StreamResult result = new StreamResult(writer);
+ transformer.transform(source, result);
+ } catch (TransformerConfigurationException e) {
+ e.printStackTrace();
+ System.exit(1);
+ } catch (TransformerException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ return writer.toString();
+ }
+
+ /**
+ * Converts Beta Code to Greek characters.
+ *
+ * @param beta
+ * the Beta Code to transcode
+ * @return the Greek equivalent of the specified Beta Code
+ */
+ protected String betaToGreek(String beta) {
+ String greek = null;
+ try {
+ greek = transcoder.getString(beta);
+ } catch (UnsupportedEncodingException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ return greek;
+ }
}
diff --git a/src/com/benlinskey/grdbc/LexiconCreator.java b/src/com/benlinskey/grdbc/LexiconCreator.java
index 6dca163..02d5865 100644
--- a/src/com/benlinskey/grdbc/LexiconCreator.java
+++ b/src/com/benlinskey/grdbc/LexiconCreator.java
@@ -30,185 +30,187 @@ import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.SAXException;
/**
- * Reads in an XML file containing a Greek lexicon and stores entries in an
+ * Reads in an XML file containing a Greek lexicon and stores entries in an
* SQLite database.
+ *
* @author Ben Linskey
*/
public class LexiconCreator {
- private final static String FILE = "../xml/Perseus_text_1999.04.0058.xml";
- private final static String DB = "lexicon.db";
- private final static String TABLE_NAME = "lexicon";
- private Connection connection;
- private PreparedStatement insertStatement;
-
- /**
- * Class constructor.
- */
- public LexiconCreator() {
- // Load driver.
- try {
- Class.forName("org.sqlite.JDBC");
- } catch (ClassNotFoundException e) {
- e.printStackTrace();
- System.exit(1);
- }
-
- // Connect to database.
- try {
- connection = DriverManager.getConnection("jdbc:sqlite:" + DB);
- } catch (SQLException e) {
- e.printStackTrace();
- System.exit(1);
- }
-
- // Use batch inserts for speed.
- try {
- connection.setAutoCommit(false);
- } catch (SQLException e) {
- e.printStackTrace();
- System.exit(1);
- }
-
- createDatabase();
-
- // Create a prepared statement to use when inserting entries.
- try {
- insertStatement = connection.prepareStatement("INSERT INTO "
- + TABLE_NAME + " VALUES (NULL, ?, ?, ?, ?, ?, ?)");
- } catch (SQLException e) {
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Creates the lexicon database.
- */
- public void run() {
- addEntries();
- createIndex();
- try {
- insertStatement.close();
- connection.close();
- } catch (SQLException e) {
- e.printStackTrace();
- System.exit(1);
- }
- System.out.println("Done.");
- }
-
- /**
- * Resets the database if it already exists and creates a new, empty
- * database.
- */
- private void createDatabase() {
- System.out.println("Creating lexicon database...");
- try {
- String dropTable = "DROP TABLE IF EXISTS " + TABLE_NAME;
- String createTable = "CREATE TABLE " + TABLE_NAME + " (" +
- "_ID INTEGER PRIMARY KEY, " +
- "betaNoSymbols VARCHAR(100), " +
- "betaSymbols VARCHAR(100), " +
- "greekFullWord VARCHAR(100), " +
- "greekNoSymbols VARCHAR(100), " +
- "greekLowercase VARCHAR(100), " +
- "entry TEXT)";
- Statement statement = connection.createStatement();
- statement.executeUpdate(dropTable);
- statement.executeUpdate(createTable);
- connection.commit();
- statement.close();
- } catch (SQLException e) {
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Parses the XML file, modifies the lexicon entries, and inserts the
- * modified entries into the database.
- */
- private void addEntries() {
- System.out.println("Inserting entries...");
-
- try {
- BufferedReader in = new BufferedReader(new FileReader(FILE));
- StringBuilder xml = new StringBuilder();
-
- // Extract the XML for each lexicon entry, then process it.
- while (in.ready()) {
- String line = in.readLine();
- if (line.startsWith("<entry ")) {
- xml.delete(0, xml.length()); // Reset XML.
- xml.append(line); // Add this line to new chunk of XML.
- } else if (line.startsWith("</entry>")) {
- xml.append(line);
- processEntry(xml.toString());
- } else {
- xml.append(line);
- }
- }
- in.close();
-
- insertStatement.executeBatch();
- connection.commit();
- } catch (FileNotFoundException e) {
- System.err.println("Error: Lexicon file not found.");
- System.exit(1);
- } catch (IOException e) {
- e.printStackTrace();
- System.exit(1);
- } catch (SQLException e) {
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Modifies the specified entry and inserts it into the database.
- * @param xml the XML containing the entry to process
- */
- private void processEntry(String xml) {
- try {
- LexiconParser parser = new LexiconParser(xml);
- insertStatement.setString(1, parser.getBetaNoSymbols());
- insertStatement.setString(2, parser.getBetaSymbols());
- insertStatement.setString(3, parser.getGreekFullWord());
- insertStatement.setString(4, parser.getGreekNoSymbols());
- insertStatement.setString(5, parser.getGreekLowercase());
- insertStatement.setString(6, parser.getEntry());
- insertStatement.addBatch();
- } catch (ParserConfigurationException e) {
- e.printStackTrace();
- System.exit(1);
- } catch (SAXException e) {
- e.printStackTrace();
- System.exit(1);
- } catch (IOException e) {
- e.printStackTrace();
- System.exit(1);
- } catch (SQLException e) {
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Creates an index on the database to speed up searches.
- */
- private void createIndex() {
- System.out.println("Creating index...");
-
- // Create an index on the three columns matched against search queries.
- String sql = "CREATE INDEX searchIndex ON " + TABLE_NAME +
- " (betaNoSymbols, betaSymbols, greekNoSymbols)";
- try {
- Statement statement = connection.createStatement();
- statement.executeUpdate(sql);
- statement.close();
- connection.commit();
- } catch (SQLException e) {
- e.printStackTrace();
- }
- }
+ private final static String FILE = "../xml/Perseus_text_1999.04.0058.xml";
+ private final static String DB = "lexicon.db";
+ private final static String TABLE_NAME = "lexicon";
+ private Connection connection;
+ private PreparedStatement insertStatement;
+
+ /**
+ * Class constructor.
+ */
+ public LexiconCreator() {
+ // Load driver.
+ try {
+ Class.forName("org.sqlite.JDBC");
+ } catch (ClassNotFoundException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+
+ // Connect to database.
+ try {
+ connection = DriverManager.getConnection("jdbc:sqlite:" + DB);
+ } catch (SQLException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+
+ // Use batch inserts for speed.
+ try {
+ connection.setAutoCommit(false);
+ } catch (SQLException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+
+ createDatabase();
+
+ // Create a prepared statement to use when inserting entries.
+ try {
+ insertStatement = connection.prepareStatement("INSERT INTO "
+ + TABLE_NAME + " VALUES (NULL, ?, ?, ?, ?, ?, ?)");
+ } catch (SQLException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ /**
+ * Creates the lexicon database.
+ */
+ public void run() {
+ addEntries();
+ createIndex();
+ try {
+ insertStatement.close();
+ connection.close();
+ } catch (SQLException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ System.out.println("Done.");
+ }
+
+ /**
+ * Resets the database if it already exists and creates a new, empty
+ * database.
+ */
+ private void createDatabase() {
+ System.out.println("Creating lexicon database...");
+ try {
+ String dropTable = "DROP TABLE IF EXISTS " + TABLE_NAME;
+ String createTable = "CREATE TABLE " + TABLE_NAME + " ("
+ + "_ID INTEGER PRIMARY KEY, "
+ + "betaNoSymbols VARCHAR(100), "
+ + "betaSymbols VARCHAR(100), "
+ + "greekFullWord VARCHAR(100), "
+ + "greekNoSymbols VARCHAR(100), "
+ + "greekLowercase VARCHAR(100), " + "entry TEXT)";
+ Statement statement = connection.createStatement();
+ statement.executeUpdate(dropTable);
+ statement.executeUpdate(createTable);
+ connection.commit();
+ statement.close();
+ } catch (SQLException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ /**
+ * Parses the XML file, modifies the lexicon entries, and inserts the
+ * modified entries into the database.
+ */
+ private void addEntries() {
+ System.out.println("Inserting entries...");
+
+ try {
+ BufferedReader in = new BufferedReader(new FileReader(FILE));
+ StringBuilder xml = new StringBuilder();
+
+ // Extract the XML for each lexicon entry, then process it.
+ while (in.ready()) {
+ String line = in.readLine();
+ if (line.startsWith("<entry ")) {
+ xml.delete(0, xml.length()); // Reset XML.
+ xml.append(line); // Add this line to new chunk of XML.
+ } else if (line.startsWith("</entry>")) {
+ xml.append(line);
+ processEntry(xml.toString());
+ } else {
+ xml.append(line);
+ }
+ }
+ in.close();
+
+ insertStatement.executeBatch();
+ connection.commit();
+ } catch (FileNotFoundException e) {
+ System.err.println("Error: Lexicon file not found.");
+ System.exit(1);
+ } catch (IOException e) {
+ e.printStackTrace();
+ System.exit(1);
+ } catch (SQLException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ /**
+ * Modifies the specified entry and inserts it into the database.
+ *
+ * @param xml
+ * the XML containing the entry to process
+ */
+ private void processEntry(String xml) {
+ try {
+ LexiconParser parser = new LexiconParser(xml);
+ insertStatement.setString(1, parser.getBetaNoSymbols());
+ insertStatement.setString(2, parser.getBetaSymbols());
+ insertStatement.setString(3, parser.getGreekFullWord());
+ insertStatement.setString(4, parser.getGreekNoSymbols());
+ insertStatement.setString(5, parser.getGreekLowercase());
+ insertStatement.setString(6, parser.getEntry());
+ insertStatement.addBatch();
+ } catch (ParserConfigurationException e) {
+ e.printStackTrace();
+ System.exit(1);
+ } catch (SAXException e) {
+ e.printStackTrace();
+ System.exit(1);
+ } catch (IOException e) {
+ e.printStackTrace();
+ System.exit(1);
+ } catch (SQLException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ /**
+ * Creates an index on the database to speed up searches.
+ */
+ private void createIndex() {
+ System.out.println("Creating index...");
+
+ // Create an index on the three columns matched against search queries.
+ String sql = "CREATE INDEX searchIndex ON " + TABLE_NAME
+ + " (betaNoSymbols, betaSymbols, greekNoSymbols)";
+ try {
+ Statement statement = connection.createStatement();
+ statement.executeUpdate(sql);
+ statement.close();
+ connection.commit();
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ }
}
diff --git a/src/com/benlinskey/grdbc/LexiconParser.java b/src/com/benlinskey/grdbc/LexiconParser.java
index 8e9e46b..9f3e3b1 100644
--- a/src/com/benlinskey/grdbc/LexiconParser.java
+++ b/src/com/benlinskey/grdbc/LexiconParser.java
@@ -24,99 +24,108 @@ import org.xml.sax.SAXException;
/**
* This class provides methods to parse a chunk of XML containing a lexicon
- * entry, modify the data contained therein, and return data to be inserted
- * into the database.
+ * entry, modify the data contained therein, and return data to be inserted into
+ * the database.
+ *
* @author Ben Linskey
*/
public class LexiconParser extends GreekTextParser {
- /**
- * Class constructor.
- * @param xml the XML to parse
- * @throws ParserConfigurationException
- * @throws SAXException
- * @throws IOException
- */
- public LexiconParser(String xml)
- throws ParserConfigurationException, SAXException, IOException {
- super(xml);
- }
-
- /**
- * Returns a Beta Code representation of this entry's word, stripped of
- * all diacritics.
- * @return this entry's word in Beta Code without diacritics
- */
- public String getBetaNoSymbols() {
- // Get the word and replace all symbols with an empty string.
- return getBetaSymbols().replaceAll("[^a-zA-Z]", "");
- }
-
- /**
- * Returns a Beta Code representation of this entry's word.
- * @return this entry's word in Beta Code
- */
- public String getBetaSymbols() {
- // We just need the "key" attribute from the "entry" element.
- Node entry = doc.getElementsByTagName("entry").item(0);
- return entry.getAttributes().getNamedItem("key").getTextContent();
- }
-
- /**
- * Returns this entry's word in Greek characters.
- * @return this entry's word in Greek characters
- */
- public String getGreekFullWord() {
- // Use the transcoder to convert the beta code to Greek.
- return betaToGreek(getBetaSymbols());
- }
-
- /**
- * Returns this entry's word in Greek characters, stripped of all
- * diacritics.
- * @return this entry's word in Greek characters without diacritics
- */
- public String getGreekNoSymbols() {
- // Get beta code with no symbols other than the capital letter marker.
- String beta = getBetaSymbols().replaceAll("[^a-zA-Z\\*]", "");
-
- // Use the transcoder to convert the beta code to Greek.
- return betaToGreek(beta);
- }
-
- /**
- * Returns this entry's word in all lowercase Greek characters, stripped
- * of all diacritics.
- * @return this entry's word in lowercase Greek characters without
- * diacritics
- */
- public String getGreekLowercase() {
- return getGreekNoSymbols().toLowerCase();
- }
-
- /**
- * Returns the XML for this entry, with all Beta Code converted to Greek
- * characters.
- * @return the XML for this entry with all Beta Code converted to Greek
- * characters
- */
- public String getEntry() {
- transcodeEntryKey();
- transcodeInElements("orth");
- transcodeInElements("ref");
- transcodeInElements("foreign");
- transcodeInElements("note");
- return getUpdatedXML();
- }
-
- /**
- * Converts the value of the entry element's "key" attribute from Beta
- * Code to Greek.
- */
- private void transcodeEntryKey() {
- Node entryNode = doc.getElementsByTagName("entry").item(0);
- Node keyAttr = entryNode.getAttributes().getNamedItem("key");
- String beta = keyAttr.getTextContent();
- String greek = betaToGreek(beta);
- keyAttr.setTextContent(greek);
- }
+ /**
+ * Class constructor.
+ *
+ * @param xml
+ * the XML to parse
+ * @throws ParserConfigurationException
+ * @throws SAXException
+ * @throws IOException
+ */
+ public LexiconParser(String xml) throws ParserConfigurationException,
+ SAXException, IOException {
+ super(xml);
+ }
+
+ /**
+ * Returns a Beta Code representation of this entry's word, stripped of all
+ * diacritics.
+ *
+ * @return this entry's word in Beta Code without diacritics
+ */
+ public String getBetaNoSymbols() {
+ // Get the word and replace all symbols with an empty string.
+ return getBetaSymbols().replaceAll("[^a-zA-Z]", "");
+ }
+
+ /**
+ * Returns a Beta Code representation of this entry's word.
+ *
+ * @return this entry's word in Beta Code
+ */
+ public String getBetaSymbols() {
+ // We just need the "key" attribute from the "entry" element.
+ Node entry = doc.getElementsByTagName("entry").item(0);
+ return entry.getAttributes().getNamedItem("key").getTextContent();
+ }
+
+ /**
+ * Returns this entry's word in Greek characters.
+ *
+ * @return this entry's word in Greek characters
+ */
+ public String getGreekFullWord() {
+ // Use the transcoder to convert the beta code to Greek.
+ return betaToGreek(getBetaSymbols());
+ }
+
+ /**
+ * Returns this entry's word in Greek characters, stripped of all
+ * diacritics.
+ *
+ * @return this entry's word in Greek characters without diacritics
+ */
+ public String getGreekNoSymbols() {
+ // Get beta code with no symbols other than the capital letter marker.
+ String beta = getBetaSymbols().replaceAll("[^a-zA-Z\\*]", "");
+
+ // Use the transcoder to convert the beta code to Greek.
+ return betaToGreek(beta);
+ }
+
+ /**
+ * Returns this entry's word in all lowercase Greek characters, stripped of
+ * all diacritics.
+ *
+ * @return this entry's word in lowercase Greek characters without
+ * diacritics
+ */
+ public String getGreekLowercase() {
+ return getGreekNoSymbols().toLowerCase();
+ }
+
+ /**
+ * Returns the XML for this entry, with all Beta Code converted to Greek
+ * characters.
+ *
+ * @return the XML for this entry with all Beta Code converted to Greek
+ * characters
+ */
+ public String getEntry() {
+ transcodeEntryKey();
+ transcodeInElements("orth");
+ transcodeInElements("ref");
+ transcodeInElements("foreign");
+ transcodeInElements("note");
+ return getUpdatedXML();
+ }
+
+ /**
+ * Converts the value of the entry element's "key" attribute from Beta Code
+ * to Greek.
+ */
+ private void transcodeEntryKey() {
+ Node entryNode = doc.getElementsByTagName("entry").item(0);
+ Node keyAttr = entryNode.getAttributes().getNamedItem("key");
+ String beta = keyAttr.getTextContent();
+ String greek = betaToGreek(beta);
+ keyAttr.setTextContent(greek);
+ }
}
diff --git a/src/com/benlinskey/grdbc/SyntaxCreator.java b/src/com/benlinskey/grdbc/SyntaxCreator.java
index 172b53e..e5ccb17 100644
--- a/src/com/benlinskey/grdbc/SyntaxCreator.java
+++ b/src/com/benlinskey/grdbc/SyntaxCreator.java
@@ -31,169 +31,171 @@ import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.SAXException;
/**
- * Reads in an XML file containing the Overview of Greek Syntax text and stores
+ * Reads in an XML file containing the Overview of Greek Syntax text and stores
* sections of the text in an SQLite database.
* <p>
* Note that the Sources Cited section is omitted, as it is on Perseus.
+ *
* @author Ben Linskey
- *
+ *
*/
public class SyntaxCreator {
- private final static String FILE = "../xml/Perseus_text_1999.04.0052.xml";
- private final static String DB = "syntax.db";
- private final static String TABLE_NAME = "syntax";
- private Connection connection;
- private PreparedStatement insertStatement;
-
- /**
- * Class constructor.
- */
- public SyntaxCreator() {
- // Load driver.
- try {
- Class.forName("org.sqlite.JDBC");
- } catch (ClassNotFoundException e) {
- e.printStackTrace();
- System.exit(1);
- }
-
- // Connect to database.
- try {
- connection = DriverManager.getConnection("jdbc:sqlite:" + DB);
- } catch (SQLException e) {
- e.printStackTrace();
- System.exit(1);
- }
-
- // Use batch inserts for speed.
- try {
- connection.setAutoCommit(false);
- } catch (SQLException e) {
- e.printStackTrace();
- System.exit(1);
- }
-
- createDatabase();
-
- // Create a prepared statement to use when inserting entries.
- try {
- insertStatement = connection.prepareStatement("INSERT INTO "
- + TABLE_NAME + " VALUES (NULL, ?, ?, ?)");
- } catch (SQLException e) {
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Creates the Overview of Greek Syntax database.
- */
- public void run() {
- addSections();
- try {
- insertStatement.close();
- connection.close();
- } catch (SQLException e) {
- e.printStackTrace();
- System.exit(1);
- }
- System.out.println("Done.");
- }
-
- /**
- * Resets the database if it already exists and creates a new, empty
- * database.
- */
- private void createDatabase() {
- System.out.println("Creating lexicon database...");
- try {
- String dropTable = "DROP TABLE IF EXISTS " + TABLE_NAME;
- String createTable = "CREATE TABLE " + TABLE_NAME + " (" +
- "_ID INTEGER PRIMARY KEY, " +
- "chapter VARCHAR(100), " +
- "section VARCHAR(100), " +
- "xml TEXT)";
- Statement statement = connection.createStatement();
- statement.executeUpdate(dropTable);
- statement.executeUpdate(createTable);
- connection.commit();
- statement.close();
- } catch (SQLException e) {
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Parses the XML file, modifies the sections, and inserts the modified
- * data into the database.
- */
- private void addSections() {
- System.out.println("Inserting data...");
-
- String chapter = null;
+ private final static String FILE = "../xml/Perseus_text_1999.04.0052.xml";
+ private final static String DB = "syntax.db";
+ private final static String TABLE_NAME = "syntax";
+ private Connection connection;
+ private PreparedStatement insertStatement;
+
+ /**
+ * Class constructor.
+ */
+ public SyntaxCreator() {
+ // Load driver.
+ try {
+ Class.forName("org.sqlite.JDBC");
+ } catch (ClassNotFoundException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+
+ // Connect to database.
+ try {
+ connection = DriverManager.getConnection("jdbc:sqlite:" + DB);
+ } catch (SQLException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+
+ // Use batch inserts for speed.
+ try {
+ connection.setAutoCommit(false);
+ } catch (SQLException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+
+ createDatabase();
+
+ // Create a prepared statement to use when inserting entries.
+ try {
+ insertStatement = connection.prepareStatement("INSERT INTO "
+ + TABLE_NAME + " VALUES (NULL, ?, ?, ?)");
+ } catch (SQLException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ /**
+ * Creates the Overview of Greek Syntax database.
+ */
+ public void run() {
+ addSections();
+ try {
+ insertStatement.close();
+ connection.close();
+ } catch (SQLException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ System.out.println("Done.");
+ }
+
+ /**
+ * Resets the database if it already exists and creates a new, empty
+ * database.
+ */
+ private void createDatabase() {
+ System.out.println("Creating lexicon database...");
+ try {
+ String dropTable = "DROP TABLE IF EXISTS " + TABLE_NAME;
+ String createTable = "CREATE TABLE " + TABLE_NAME + " ("
+ + "_ID INTEGER PRIMARY KEY, "
+ + "chapter VARCHAR(100), " + "section VARCHAR(100), "
+ + "xml TEXT)";
+ Statement statement = connection.createStatement();
+ statement.executeUpdate(dropTable);
+ statement.executeUpdate(createTable);
+ connection.commit();
+ statement.close();
+ } catch (SQLException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
+
+ /**
+ * Parses the XML file, modifies the sections, and inserts the modified data
+ * into the database.
+ */
+ private void addSections() {
+ System.out.println("Inserting data...");
+
+ String chapter = null;
String section = null;
StringBuilder xml = new StringBuilder();
- Pattern pattern = Pattern.compile("<head>(.*?)</head>");
-
- try {
- BufferedReader in = new BufferedReader(new FileReader(FILE));
- while (in.ready()) {
- String line = in.readLine();
- if (line.startsWith("<div1")) {
- // Get chapter title.
- line = in.readLine(); // Next line is "head" element with title.
- Matcher matcher = pattern.matcher(line);
- matcher.find();
- chapter = matcher.group(1);
- } else if (line.startsWith("<div2")) {
- // Get section title.
- line = in.readLine(); // Next line is "head" element with title.
- Matcher matcher = pattern.matcher(line);
- matcher.find();
- section = matcher.group(1);
-
- // Reset XML and add "head" element.
- xml.delete(0, xml.length());
- xml.append("<section>");
- xml.append(line);
- } else if (line.contains("</div2>")) {
- // Get any XML before the "</div2>" tag.
- String[] split = line.split("</div2>");
- xml.append(split[0]);
-
- // Add closing root tag.
- xml.append("</section>");
-
- SyntaxParser parser = new SyntaxParser(xml.toString());
- String transcodedXml = parser.transcode();
-
- // Add data to database.
- insertStatement.setString(1, chapter);
- insertStatement.setString(2, section);
- insertStatement.setString(3, transcodedXml);
- insertStatement.addBatch();
- } else {
- // Get next line of XML.
- xml.append(line);
- }
- }
- in.close();
-
- insertStatement.executeBatch();
- connection.commit();
- } catch (SQLException e) {
- e.printStackTrace();
- System.exit(1);
- } catch (IOException e) {
- e.printStackTrace();
- System.exit(1);
- } catch (ParserConfigurationException e) {
- e.printStackTrace();
- System.exit(1);
- } catch (SAXException e) {
- e.printStackTrace();
- System.exit(1);
- }
- }
+ Pattern pattern = Pattern.compile("<head>(.*?)</head>");
+
+ try {
+ BufferedReader in = new BufferedReader(new FileReader(FILE));
+ while (in.ready()) {
+ String line = in.readLine();
+ if (line.startsWith("<div1")) {
+ // Get chapter title.
+ line = in.readLine(); // Next line is "head" element with
+ // title.
+ Matcher matcher = pattern.matcher(line);
+ matcher.find();
+ chapter = matcher.group(1);
+ } else if (line.startsWith("<div2")) {
+ // Get section title.
+ line = in.readLine(); // Next line is "head" element with
+ // title.
+ Matcher matcher = pattern.matcher(line);
+ matcher.find();
+ section = matcher.group(1);
+
+ // Reset XML and add "head" element.
+ xml.delete(0, xml.length());
+ xml.append("<section>");
+ xml.append(line);
+ } else if (line.contains("</div2>")) {
+ // Get any XML before the "</div2>" tag.
+ String[] split = line.split("</div2>");
+ xml.append(split[0]);
+
+ // Add closing root tag.
+ xml.append("</section>");
+
+ SyntaxParser parser = new SyntaxParser(xml.toString());
+ String transcodedXml = parser.transcode();
+
+ // Add data to database.
+ insertStatement.setString(1, chapter);
+ insertStatement.setString(2, section);
+ insertStatement.setString(3, transcodedXml);
+ insertStatement.addBatch();
+ } else {
+ // Get next line of XML.
+ xml.append(line);
+ }
+ }
+ in.close();
+
+ insertStatement.executeBatch();
+ connection.commit();
+ } catch (SQLException e) {
+ e.printStackTrace();
+ System.exit(1);
+ } catch (IOException e) {
+ e.printStackTrace();
+ System.exit(1);
+ } catch (ParserConfigurationException e) {
+ e.printStackTrace();
+ System.exit(1);
+ } catch (SAXException e) {
+ e.printStackTrace();
+ System.exit(1);
+ }
+ }
}
diff --git a/src/com/benlinskey/grdbc/SyntaxParser.java b/src/com/benlinskey/grdbc/SyntaxParser.java
index 811ca2f..9745af3 100644
--- a/src/com/benlinskey/grdbc/SyntaxParser.java
+++ b/src/com/benlinskey/grdbc/SyntaxParser.java
@@ -24,31 +24,35 @@ import org.xml.sax.SAXException;
/**
* Parses XML from the Overview of Greek Syntax text and converts Beta Code to
* Greek characters.
+ *
* @author Ben Linskey
- *
+ *
*/
public class SyntaxParser extends GreekTextParser {
- /**
- * Class constructor.
- * @param xml the XML to parse
- * @throws ParserConfigurationException
- * @throws SAXException
- * @throws IOException
- */
- public SyntaxParser(String xml)
- throws ParserConfigurationException, SAXException, IOException {
- super(xml);
- }
-
- /**
- * Returns the XML for this section, with all Beta Code converted to Greek
- * characters.
- * @return the XML for this section with all Beta Code converted to Greek
- * characters
- */
- public String transcode() {
- transcodeInElements("quote");
- transcodeInElements("foreign");
- return getUpdatedXML();
- }
+ /**
+ * Class constructor.
+ *
+ * @param xml
+ * the XML to parse
+ * @throws ParserConfigurationException
+ * @throws SAXException
+ * @throws IOException
+ */
+ public SyntaxParser(String xml) throws ParserConfigurationException,
+ SAXException, IOException {
+ super(xml);
+ }
+
+ /**
+ * Returns the XML for this section, with all Beta Code converted to Greek
+ * characters.
+ *
+ * @return the XML for this section with all Beta Code converted to Greek
+ * characters
+ */
+ public String transcode() {
+ transcodeInElements("quote");
+ transcodeInElements("foreign");
+ return getUpdatedXML();
+ }
}