diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/com/benlinskey/grdbc/GreekTextParser.java | 181 | ||||
| -rw-r--r-- | src/com/benlinskey/grdbc/LexiconCreator.java | 356 | ||||
| -rw-r--r-- | src/com/benlinskey/grdbc/LexiconParser.java | 193 | ||||
| -rw-r--r-- | src/com/benlinskey/grdbc/SyntaxCreator.java | 171 | ||||
| -rw-r--r-- | src/com/benlinskey/grdbc/SyntaxParser.java | 52 |
5 files changed, 572 insertions, 381 deletions
diff --git a/src/com/benlinskey/grdbc/GreekTextParser.java b/src/com/benlinskey/grdbc/GreekTextParser.java index 81cc4cb..e78e1cd 100644 --- a/src/com/benlinskey/grdbc/GreekTextParser.java +++ b/src/com/benlinskey/grdbc/GreekTextParser.java @@ -39,93 +39,102 @@ import org.xml.sax.SAXException; import edu.unc.epidoc.transcoder.TransCoder; /** + * An abstract class for parsing Greek text encoded in an XML document. + * * @author Ben Linskey - * + * */ public abstract class GreekTextParser { - protected Document doc; - protected TransCoder transcoder; - - /** - * Class constructor. - * @param xml the XML to parse - * @throws ParserConfigurationException - * @throws SAXException - * @throws IOException - */ - protected GreekTextParser(String xml) - throws ParserConfigurationException, SAXException, IOException { - // Parse the XML and create a Document. - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - DocumentBuilder db = dbf.newDocumentBuilder(); - InputSource is = new InputSource(new StringReader(xml)); - doc = db.parse(is); - - // Create a TransCoder for converting Beta Code to Greek characters. - try { - transcoder = new TransCoder("BetaCode", "UnicodeC"); - } catch (Exception e) { - e.printStackTrace(); - System.exit(1); - } - } - - /** - * Transcodes beta code to Greek in elements with the given name. - * @param element the name of the element to search for - */ - protected void transcodeInElements(String element) { - NodeList nodeList = doc.getElementsByTagName(element); - for (int i = 0; i < nodeList.getLength(); i++) { - Node elementNode = nodeList.item(i); - Node langAttr = elementNode.getAttributes().getNamedItem("lang"); - if (langAttr != null) { - String lang = langAttr.getTextContent(); - if (lang.equals("greek")) { - String greek = betaToGreek(elementNode.getTextContent()); - elementNode.setTextContent(greek); - } - } - } - } - - /** - * Returns a string containing an XML representation of the document in its - * current state. - * @return a string containing an XML representation of the document in its - * current state - */ - protected String getUpdatedXML() { - StringWriter writer = new StringWriter(); - try { - TransformerFactory tf = TransformerFactory.newInstance(); - Transformer transformer = tf.newTransformer(); - DOMSource source = new DOMSource(doc); - StreamResult result = new StreamResult(writer); - transformer.transform(source, result); - } catch (TransformerConfigurationException e) { - e.printStackTrace(); - System.exit(1); - } catch (TransformerException e) { - e.printStackTrace(); - System.exit(1); - } - return writer.toString(); - } - - /** - * Converts Beta Code to Greek characters. - * @param beta the Beta Code to transcode - * @return the Greek equivalent of the specified Beta Code - */ - protected String betaToGreek(String beta) { - String greek = null; - try { - greek = transcoder.getString(beta); - } catch (UnsupportedEncodingException e) { - e.printStackTrace(); - System.exit(1); - } - return greek; - } + protected Document doc; + protected TransCoder transcoder; + + /** + * Class constructor. + * + * @param xml + * the XML to parse + * @throws ParserConfigurationException + * @throws SAXException + * @throws IOException + */ + protected GreekTextParser(String xml) throws ParserConfigurationException, + SAXException, IOException { + // Parse the XML and create a Document. + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilder db = dbf.newDocumentBuilder(); + InputSource is = new InputSource(new StringReader(xml)); + doc = db.parse(is); + + // Create a TransCoder for converting Beta Code to Greek characters. + try { + transcoder = new TransCoder("BetaCode", "UnicodeC"); + } catch (Exception e) { + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Transcodes beta code to Greek in elements with the given name. + * + * @param element + * the name of the element to search for + */ + protected void transcodeInElements(String element) { + NodeList nodeList = doc.getElementsByTagName(element); + for (int i = 0; i < nodeList.getLength(); i++) { + Node elementNode = nodeList.item(i); + Node langAttr = elementNode.getAttributes().getNamedItem("lang"); + if (langAttr != null) { + String lang = langAttr.getTextContent(); + if (lang.equals("greek")) { + String greek = betaToGreek(elementNode.getTextContent()); + elementNode.setTextContent(greek); + } + } + } + } + + /** + * Returns a string containing an XML representation of the document in its + * current state. + * + * @return a string containing an XML representation of the document in its + * current state + */ + protected String getUpdatedXML() { + StringWriter writer = new StringWriter(); + try { + TransformerFactory tf = TransformerFactory.newInstance(); + Transformer transformer = tf.newTransformer(); + DOMSource source = new DOMSource(doc); + StreamResult result = new StreamResult(writer); + transformer.transform(source, result); + } catch (TransformerConfigurationException e) { + e.printStackTrace(); + System.exit(1); + } catch (TransformerException e) { + e.printStackTrace(); + System.exit(1); + } + return writer.toString(); + } + + /** + * Converts Beta Code to Greek characters. + * + * @param beta + * the Beta Code to transcode + * @return the Greek equivalent of the specified Beta Code + */ + protected String betaToGreek(String beta) { + String greek = null; + try { + greek = transcoder.getString(beta); + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + System.exit(1); + } + return greek; + } } diff --git a/src/com/benlinskey/grdbc/LexiconCreator.java b/src/com/benlinskey/grdbc/LexiconCreator.java index ec3a99a..3a242ff 100644 --- a/src/com/benlinskey/grdbc/LexiconCreator.java +++ b/src/com/benlinskey/grdbc/LexiconCreator.java @@ -30,185 +30,187 @@ import javax.xml.parsers.ParserConfigurationException; import org.xml.sax.SAXException; /** - * Reads in an XML file containing a Greek lexicon and stores entries in an + * Reads in an XML file containing a Greek lexicon and stores entries in an * SQLite database. + * * @author Ben Linskey */ public class LexiconCreator { - private final static String FILE = "../xml/Perseus_text_1999.04.0058.xml"; - private final static String DB = "lexicon.db"; - private final static String TABLE_NAME = "lexicon"; - private Connection connection; - private PreparedStatement insertStatement; - - /** - * Class constructor. - */ - public LexiconCreator() { - // Load driver. - try { - Class.forName("org.sqlite.JDBC"); - } catch (ClassNotFoundException e) { - e.printStackTrace(); - System.exit(1); - } - - // Connect to database. - try { - connection = DriverManager.getConnection("jdbc:sqlite:" + DB); - } catch (SQLException e) { - e.printStackTrace(); - System.exit(1); - } - - // Use batch inserts for speed. - try { - connection.setAutoCommit(false); - } catch (SQLException e) { - e.printStackTrace(); - System.exit(1); - } - - createDatabase(); - - // Create a prepared statement to use when inserting entries. - try { - insertStatement = connection.prepareStatement("INSERT INTO " - + TABLE_NAME + " VALUES (NULL, ?, ?, ?, ?, ?, ?)"); - } catch (SQLException e) { - e.printStackTrace(); - System.exit(1); - } - } - - /** - * Creates the lexicon database. - */ - public void run() { - addEntries(); - createIndex(); - try { - insertStatement.close(); - connection.close(); - } catch (SQLException e) { - e.printStackTrace(); - System.exit(1); - } - System.out.println("Done."); - } - - /** - * Resets the database if it already exists and creates a new, empty - * database. - */ - private void createDatabase() { - System.out.println("Creating lexicon database..."); - try { - String dropTable = "DROP TABLE IF EXISTS " + TABLE_NAME; - String createTable = "CREATE TABLE " + TABLE_NAME + " (" + - "_id INTEGER PRIMARY KEY, " + - "betaNoSymbols VARCHAR(100), " + - "betaSymbols VARCHAR(100), " + - "greekFullWord VARCHAR(100), " + - "greekNoSymbols VARCHAR(100), " + - "greekLowercase VARCHAR(100), " + - "entry TEXT)"; - Statement statement = connection.createStatement(); - statement.executeUpdate(dropTable); - statement.executeUpdate(createTable); - connection.commit(); - statement.close(); - } catch (SQLException e) { - e.printStackTrace(); - System.exit(1); - } - } - - /** - * Parses the XML file, modifies the lexicon entries, and inserts the - * modified entries into the database. - */ - private void addEntries() { - System.out.println("Inserting entries..."); - - try { - BufferedReader in = new BufferedReader(new FileReader(FILE)); - StringBuilder xml = new StringBuilder(); - - // Extract the XML for each lexicon entry, then process it. - while (in.ready()) { - String line = in.readLine(); - if (line.startsWith("<entry ")) { - xml.delete(0, xml.length()); // Reset XML. - xml.append(line); // Add this line to new chunk of XML. - } else if (line.startsWith("</entry>")) { - xml.append(line); - processEntry(xml.toString()); - } else { - xml.append(line); - } - } - in.close(); - - insertStatement.executeBatch(); - connection.commit(); - } catch (FileNotFoundException e) { - System.err.println("Error: Lexicon file not found."); - System.exit(1); - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } catch (SQLException e) { - e.printStackTrace(); - System.exit(1); - } - } - - /** - * Modifies the specified entry and inserts it into the database. - * @param xml the XML containing the entry to process - */ - private void processEntry(String xml) { - try { - LexiconParser parser = new LexiconParser(xml); - insertStatement.setString(1, parser.getBetaNoSymbols()); - insertStatement.setString(2, parser.getBetaSymbols()); - insertStatement.setString(3, parser.getGreekFullWord()); - insertStatement.setString(4, parser.getGreekNoSymbols()); - insertStatement.setString(5, parser.getGreekLowercase()); - insertStatement.setString(6, parser.getEntry()); - insertStatement.addBatch(); - } catch (ParserConfigurationException e) { - e.printStackTrace(); - System.exit(1); - } catch (SAXException e) { - e.printStackTrace(); - System.exit(1); - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } catch (SQLException e) { - e.printStackTrace(); - System.exit(1); - } - } - - /** - * Creates an index on the database to speed up searches. - */ - private void createIndex() { - System.out.println("Creating index..."); - - // Create an index on the three columns matched against search queries. - String sql = "CREATE INDEX searchIndex ON " + TABLE_NAME + - " (betaNoSymbols, betaSymbols, greekNoSymbols)"; - try { - Statement statement = connection.createStatement(); - statement.executeUpdate(sql); - statement.close(); - connection.commit(); - } catch (SQLException e) { - e.printStackTrace(); - } - } + private final static String FILE = "../xml/Perseus_text_1999.04.0058.xml"; + private final static String DB = "lexicon.db"; + private final static String TABLE_NAME = "lexicon"; + private Connection connection; + private PreparedStatement insertStatement; + + /** + * Class constructor. + */ + public LexiconCreator() { + // Load driver. + try { + Class.forName("org.sqlite.JDBC"); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + System.exit(1); + } + + // Connect to database. + try { + connection = DriverManager.getConnection("jdbc:sqlite:" + DB); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + + // Use batch inserts for speed. + try { + connection.setAutoCommit(false); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + + createDatabase(); + + // Create a prepared statement to use when inserting entries. + try { + insertStatement = connection.prepareStatement("INSERT INTO " + + TABLE_NAME + " VALUES (NULL, ?, ?, ?, ?, ?, ?)"); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Creates the lexicon database. + */ + public void run() { + addEntries(); + createIndex(); + try { + insertStatement.close(); + connection.close(); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + System.out.println("Done."); + } + + /** + * Resets the database if it already exists and creates a new, empty + * database. + */ + private void createDatabase() { + System.out.println("Creating lexicon database..."); + try { + String dropTable = "DROP TABLE IF EXISTS " + TABLE_NAME; + String createTable = "CREATE TABLE " + TABLE_NAME + " (" + + "_id INTEGER PRIMARY KEY, " + + "betaNoSymbols VARCHAR(100), " + + "betaSymbols VARCHAR(100), " + + "greekFullWord VARCHAR(100), " + + "greekNoSymbols VARCHAR(100), " + + "greekLowercase VARCHAR(100), " + "entry TEXT)"; + Statement statement = connection.createStatement(); + statement.executeUpdate(dropTable); + statement.executeUpdate(createTable); + connection.commit(); + statement.close(); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Parses the XML file, modifies the lexicon entries, and inserts the + * modified entries into the database. + */ + private void addEntries() { + System.out.println("Inserting entries..."); + + try { + BufferedReader in = new BufferedReader(new FileReader(FILE)); + StringBuilder xml = new StringBuilder(); + + // Extract the XML for each lexicon entry, then process it. + while (in.ready()) { + String line = in.readLine(); + if (line.startsWith("<entry ")) { + xml.delete(0, xml.length()); // Reset XML. + xml.append(line); // Add this line to new chunk of XML. + } else if (line.startsWith("</entry>")) { + xml.append(line); + processEntry(xml.toString()); + } else { + xml.append(line); + } + } + in.close(); + + insertStatement.executeBatch(); + connection.commit(); + } catch (FileNotFoundException e) { + System.err.println("Error: Lexicon file not found."); + System.exit(1); + } catch (IOException e) { + e.printStackTrace(); + System.exit(1); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Modifies the specified entry and inserts it into the database. + * + * @param xml + * the XML containing the entry to process + */ + private void processEntry(String xml) { + try { + LexiconParser parser = new LexiconParser(xml); + insertStatement.setString(1, parser.getBetaNoSymbols()); + insertStatement.setString(2, parser.getBetaSymbols()); + insertStatement.setString(3, parser.getGreekFullWord()); + insertStatement.setString(4, parser.getGreekNoSymbols()); + insertStatement.setString(5, parser.getGreekLowercase()); + insertStatement.setString(6, parser.getEntry()); + insertStatement.addBatch(); + } catch (ParserConfigurationException e) { + e.printStackTrace(); + System.exit(1); + } catch (SAXException e) { + e.printStackTrace(); + System.exit(1); + } catch (IOException e) { + e.printStackTrace(); + System.exit(1); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Creates an index on the database to speed up searches. + */ + private void createIndex() { + System.out.println("Creating index..."); + + // Create an index on the three columns matched against search queries. + String sql = "CREATE INDEX searchIndex ON " + TABLE_NAME + + " (betaNoSymbols, betaSymbols, greekNoSymbols)"; + try { + Statement statement = connection.createStatement(); + statement.executeUpdate(sql); + statement.close(); + connection.commit(); + } catch (SQLException e) { + e.printStackTrace(); + } + } } diff --git a/src/com/benlinskey/grdbc/LexiconParser.java b/src/com/benlinskey/grdbc/LexiconParser.java index 8e9e46b..9f3e3b1 100644 --- a/src/com/benlinskey/grdbc/LexiconParser.java +++ b/src/com/benlinskey/grdbc/LexiconParser.java @@ -24,99 +24,108 @@ import org.xml.sax.SAXException; /** * This class provides methods to parse a chunk of XML containing a lexicon - * entry, modify the data contained therein, and return data to be inserted - * into the database. + * entry, modify the data contained therein, and return data to be inserted into + * the database. + * * @author Ben Linskey */ public class LexiconParser extends GreekTextParser { - /** - * Class constructor. - * @param xml the XML to parse - * @throws ParserConfigurationException - * @throws SAXException - * @throws IOException - */ - public LexiconParser(String xml) - throws ParserConfigurationException, SAXException, IOException { - super(xml); - } - - /** - * Returns a Beta Code representation of this entry's word, stripped of - * all diacritics. - * @return this entry's word in Beta Code without diacritics - */ - public String getBetaNoSymbols() { - // Get the word and replace all symbols with an empty string. - return getBetaSymbols().replaceAll("[^a-zA-Z]", ""); - } - - /** - * Returns a Beta Code representation of this entry's word. - * @return this entry's word in Beta Code - */ - public String getBetaSymbols() { - // We just need the "key" attribute from the "entry" element. - Node entry = doc.getElementsByTagName("entry").item(0); - return entry.getAttributes().getNamedItem("key").getTextContent(); - } - - /** - * Returns this entry's word in Greek characters. - * @return this entry's word in Greek characters - */ - public String getGreekFullWord() { - // Use the transcoder to convert the beta code to Greek. - return betaToGreek(getBetaSymbols()); - } - - /** - * Returns this entry's word in Greek characters, stripped of all - * diacritics. - * @return this entry's word in Greek characters without diacritics - */ - public String getGreekNoSymbols() { - // Get beta code with no symbols other than the capital letter marker. - String beta = getBetaSymbols().replaceAll("[^a-zA-Z\\*]", ""); - - // Use the transcoder to convert the beta code to Greek. - return betaToGreek(beta); - } - - /** - * Returns this entry's word in all lowercase Greek characters, stripped - * of all diacritics. - * @return this entry's word in lowercase Greek characters without - * diacritics - */ - public String getGreekLowercase() { - return getGreekNoSymbols().toLowerCase(); - } - - /** - * Returns the XML for this entry, with all Beta Code converted to Greek - * characters. - * @return the XML for this entry with all Beta Code converted to Greek - * characters - */ - public String getEntry() { - transcodeEntryKey(); - transcodeInElements("orth"); - transcodeInElements("ref"); - transcodeInElements("foreign"); - transcodeInElements("note"); - return getUpdatedXML(); - } - - /** - * Converts the value of the entry element's "key" attribute from Beta - * Code to Greek. - */ - private void transcodeEntryKey() { - Node entryNode = doc.getElementsByTagName("entry").item(0); - Node keyAttr = entryNode.getAttributes().getNamedItem("key"); - String beta = keyAttr.getTextContent(); - String greek = betaToGreek(beta); - keyAttr.setTextContent(greek); - } + /** + * Class constructor. + * + * @param xml + * the XML to parse + * @throws ParserConfigurationException + * @throws SAXException + * @throws IOException + */ + public LexiconParser(String xml) throws ParserConfigurationException, + SAXException, IOException { + super(xml); + } + + /** + * Returns a Beta Code representation of this entry's word, stripped of all + * diacritics. + * + * @return this entry's word in Beta Code without diacritics + */ + public String getBetaNoSymbols() { + // Get the word and replace all symbols with an empty string. + return getBetaSymbols().replaceAll("[^a-zA-Z]", ""); + } + + /** + * Returns a Beta Code representation of this entry's word. + * + * @return this entry's word in Beta Code + */ + public String getBetaSymbols() { + // We just need the "key" attribute from the "entry" element. + Node entry = doc.getElementsByTagName("entry").item(0); + return entry.getAttributes().getNamedItem("key").getTextContent(); + } + + /** + * Returns this entry's word in Greek characters. + * + * @return this entry's word in Greek characters + */ + public String getGreekFullWord() { + // Use the transcoder to convert the beta code to Greek. + return betaToGreek(getBetaSymbols()); + } + + /** + * Returns this entry's word in Greek characters, stripped of all + * diacritics. + * + * @return this entry's word in Greek characters without diacritics + */ + public String getGreekNoSymbols() { + // Get beta code with no symbols other than the capital letter marker. + String beta = getBetaSymbols().replaceAll("[^a-zA-Z\\*]", ""); + + // Use the transcoder to convert the beta code to Greek. + return betaToGreek(beta); + } + + /** + * Returns this entry's word in all lowercase Greek characters, stripped of + * all diacritics. + * + * @return this entry's word in lowercase Greek characters without + * diacritics + */ + public String getGreekLowercase() { + return getGreekNoSymbols().toLowerCase(); + } + + /** + * Returns the XML for this entry, with all Beta Code converted to Greek + * characters. + * + * @return the XML for this entry with all Beta Code converted to Greek + * characters + */ + public String getEntry() { + transcodeEntryKey(); + transcodeInElements("orth"); + transcodeInElements("ref"); + transcodeInElements("foreign"); + transcodeInElements("note"); + return getUpdatedXML(); + } + + /** + * Converts the value of the entry element's "key" attribute from Beta Code + * to Greek. + */ + private void transcodeEntryKey() { + Node entryNode = doc.getElementsByTagName("entry").item(0); + Node keyAttr = entryNode.getAttributes().getNamedItem("key"); + String beta = keyAttr.getTextContent(); + String greek = betaToGreek(beta); + keyAttr.setTextContent(greek); + } } diff --git a/src/com/benlinskey/grdbc/SyntaxCreator.java b/src/com/benlinskey/grdbc/SyntaxCreator.java index 10637aa..014b7ca 100644 --- a/src/com/benlinskey/grdbc/SyntaxCreator.java +++ b/src/com/benlinskey/grdbc/SyntaxCreator.java @@ -31,12 +31,178 @@ import javax.xml.parsers.ParserConfigurationException; import org.xml.sax.SAXException; /** - * Reads in an XML file containing the Overview of Greek Syntax text and stores + * Reads in an XML file containing the Overview of Greek Syntax text and stores * sections of the text in an SQLite database. +<<<<<<< HEAD + * <p> + * Note that the Sources Cited section is omitted, as it is on Perseus. + * +======= +>>>>>>> master * @author Ben Linskey - * + * */ public class SyntaxCreator { +<<<<<<< HEAD + private final static String FILE = "../xml/Perseus_text_1999.04.0052.xml"; + private final static String DB = "syntax.db"; + private final static String TABLE_NAME = "syntax"; + private Connection connection; + private PreparedStatement insertStatement; + + /** + * Class constructor. + */ + public SyntaxCreator() { + // Load driver. + try { + Class.forName("org.sqlite.JDBC"); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + System.exit(1); + } + + // Connect to database. + try { + connection = DriverManager.getConnection("jdbc:sqlite:" + DB); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + + // Use batch inserts for speed. + try { + connection.setAutoCommit(false); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + + createDatabase(); + + // Create a prepared statement to use when inserting entries. + try { + insertStatement = connection.prepareStatement("INSERT INTO " + + TABLE_NAME + " VALUES (NULL, ?, ?, ?)"); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Creates the Overview of Greek Syntax database. + */ + public void run() { + addSections(); + try { + insertStatement.close(); + connection.close(); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + System.out.println("Done."); + } + + /** + * Resets the database if it already exists and creates a new, empty + * database. + */ + private void createDatabase() { + System.out.println("Creating lexicon database..."); + try { + String dropTable = "DROP TABLE IF EXISTS " + TABLE_NAME; + String createTable = "CREATE TABLE " + TABLE_NAME + " (" + + "_ID INTEGER PRIMARY KEY, " + + "chapter VARCHAR(100), " + "section VARCHAR(100), " + + "xml TEXT)"; + Statement statement = connection.createStatement(); + statement.executeUpdate(dropTable); + statement.executeUpdate(createTable); + connection.commit(); + statement.close(); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Parses the XML file, modifies the sections, and inserts the modified data + * into the database. + */ + private void addSections() { + System.out.println("Inserting data..."); + + String chapter = null; + String section = null; + StringBuilder xml = new StringBuilder(); + Pattern pattern = Pattern.compile("<head>(.*?)</head>"); + + try { + BufferedReader in = new BufferedReader(new FileReader(FILE)); + while (in.ready()) { + String line = in.readLine(); + if (line.startsWith("<div1")) { + // Get chapter title. + line = in.readLine(); // Next line is "head" element with + // title. + Matcher matcher = pattern.matcher(line); + matcher.find(); + chapter = matcher.group(1); + } else if (line.startsWith("<div2")) { + // Get section title. + line = in.readLine(); // Next line is "head" element with + // title. + Matcher matcher = pattern.matcher(line); + matcher.find(); + section = matcher.group(1); + + // Reset XML and add "head" element. + xml.delete(0, xml.length()); + xml.append("<section>"); + xml.append(line); + } else if (line.contains("</div2>")) { + // Get any XML before the "</div2>" tag. + String[] split = line.split("</div2>"); + xml.append(split[0]); + + // Add closing root tag. + xml.append("</section>"); + + SyntaxParser parser = new SyntaxParser(xml.toString()); + String transcodedXml = parser.transcode(); + + // Add data to database. + insertStatement.setString(1, chapter); + insertStatement.setString(2, section); + insertStatement.setString(3, transcodedXml); + insertStatement.addBatch(); + } else { + // Get next line of XML. + xml.append(line); + } + } + in.close(); + + insertStatement.executeBatch(); + connection.commit(); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } catch (IOException e) { + e.printStackTrace(); + System.exit(1); + } catch (ParserConfigurationException e) { + e.printStackTrace(); + System.exit(1); + } catch (SAXException e) { + e.printStackTrace(); + System.exit(1); + } + } +======= private final static String FILE = "../xml/Perseus_text_1999.04.0052.xml"; private final static String DB = "syntax.db"; private final static String TABLE_NAME = "syntax"; @@ -214,4 +380,5 @@ public class SyntaxCreator { System.exit(1); } } +>>>>>>> master } diff --git a/src/com/benlinskey/grdbc/SyntaxParser.java b/src/com/benlinskey/grdbc/SyntaxParser.java index 811ca2f..9745af3 100644 --- a/src/com/benlinskey/grdbc/SyntaxParser.java +++ b/src/com/benlinskey/grdbc/SyntaxParser.java @@ -24,31 +24,35 @@ import org.xml.sax.SAXException; /** * Parses XML from the Overview of Greek Syntax text and converts Beta Code to * Greek characters. + * * @author Ben Linskey - * + * */ public class SyntaxParser extends GreekTextParser { - /** - * Class constructor. - * @param xml the XML to parse - * @throws ParserConfigurationException - * @throws SAXException - * @throws IOException - */ - public SyntaxParser(String xml) - throws ParserConfigurationException, SAXException, IOException { - super(xml); - } - - /** - * Returns the XML for this section, with all Beta Code converted to Greek - * characters. - * @return the XML for this section with all Beta Code converted to Greek - * characters - */ - public String transcode() { - transcodeInElements("quote"); - transcodeInElements("foreign"); - return getUpdatedXML(); - } + /** + * Class constructor. + * + * @param xml + * the XML to parse + * @throws ParserConfigurationException + * @throws SAXException + * @throws IOException + */ + public SyntaxParser(String xml) throws ParserConfigurationException, + SAXException, IOException { + super(xml); + } + + /** + * Returns the XML for this section, with all Beta Code converted to Greek + * characters. + * + * @return the XML for this section with all Beta Code converted to Greek + * characters + */ + public String transcode() { + transcodeInElements("quote"); + transcodeInElements("foreign"); + return getUpdatedXML(); + } } |