diff options
| author | Ben Linskey | 2014-02-02 01:31:48 -0500 |
|---|---|---|
| committer | Ben Linskey | 2014-02-02 01:31:48 -0500 |
| commit | 0c56b5162aa0ae4d153b733ee473432d547e027d (patch) | |
| tree | 4e57fa8eef97d8510298d6f5a6186dff468003c4 /src/com/benlinskey/grdbc | |
| parent | fa7224732c4a10e104d1dd58715482125f7ca9ad (diff) | |
| download | greek-reference-database-creator-0c56b5162aa0ae4d153b733ee473432d547e027d.tar.gz | |
Finish merge.
Diffstat (limited to 'src/com/benlinskey/grdbc')
| -rw-r--r-- | src/com/benlinskey/grdbc/SyntaxCreator.java | 208 |
1 files changed, 22 insertions, 186 deletions
diff --git a/src/com/benlinskey/grdbc/SyntaxCreator.java b/src/com/benlinskey/grdbc/SyntaxCreator.java index 014b7ca..9c8ad36 100644 --- a/src/com/benlinskey/grdbc/SyntaxCreator.java +++ b/src/com/benlinskey/grdbc/SyntaxCreator.java @@ -33,17 +33,11 @@ import org.xml.sax.SAXException; /** * Reads in an XML file containing the Overview of Greek Syntax text and stores * sections of the text in an SQLite database. -<<<<<<< HEAD - * <p> - * Note that the Sources Cited section is omitted, as it is on Perseus. * -======= ->>>>>>> master * @author Ben Linskey * */ public class SyntaxCreator { -<<<<<<< HEAD private final static String FILE = "../xml/Perseus_text_1999.04.0052.xml"; private final static String DB = "syntax.db"; private final static String TABLE_NAME = "syntax"; @@ -114,7 +108,7 @@ public class SyntaxCreator { try { String dropTable = "DROP TABLE IF EXISTS " + TABLE_NAME; String createTable = "CREATE TABLE " + TABLE_NAME + " (" - + "_ID INTEGER PRIMARY KEY, " + + "_id INTEGER PRIMARY KEY, " + "chapter VARCHAR(100), " + "section VARCHAR(100), " + "xml TEXT)"; Statement statement = connection.createStatement(); @@ -151,6 +145,13 @@ public class SyntaxCreator { Matcher matcher = pattern.matcher(line); matcher.find(); chapter = matcher.group(1); + + if (chapter.equals("Sources Cited")) { + section = chapter; + xml.delete(0, xml.length()); + xml.append("<section>"); + xml.append("<head>Sources Cited</head>"); + } } else if (line.startsWith("<div2")) { // Get section title. line = in.readLine(); // Next line is "head" element with @@ -179,6 +180,20 @@ public class SyntaxCreator { insertStatement.setString(2, section); insertStatement.setString(3, transcodedXml); insertStatement.addBatch(); + } else if (line.contains("</div1>") + && chapter.equals("Sources Cited")) { + // Get any XML before the "</div2>" tag. + String[] split = line.split("</div1>"); + xml.append(split[0]); + + // Add closing root tag. + xml.append("</section>"); + + // Add data to database. + insertStatement.setString(1, chapter); + insertStatement.setString(2, section); + insertStatement.setString(3, xml.toString()); + insertStatement.addBatch(); } else { // Get next line of XML. xml.append(line); @@ -202,183 +217,4 @@ public class SyntaxCreator { System.exit(1); } } -======= - private final static String FILE = "../xml/Perseus_text_1999.04.0052.xml"; - private final static String DB = "syntax.db"; - private final static String TABLE_NAME = "syntax"; - private Connection connection; - private PreparedStatement insertStatement; - - /** - * Class constructor. - */ - public SyntaxCreator() { - // Load driver. - try { - Class.forName("org.sqlite.JDBC"); - } catch (ClassNotFoundException e) { - e.printStackTrace(); - System.exit(1); - } - - // Connect to database. - try { - connection = DriverManager.getConnection("jdbc:sqlite:" + DB); - } catch (SQLException e) { - e.printStackTrace(); - System.exit(1); - } - - // Use batch inserts for speed. - try { - connection.setAutoCommit(false); - } catch (SQLException e) { - e.printStackTrace(); - System.exit(1); - } - - createDatabase(); - - // Create a prepared statement to use when inserting entries. - try { - insertStatement = connection.prepareStatement("INSERT INTO " - + TABLE_NAME + " VALUES (NULL, ?, ?, ?)"); - } catch (SQLException e) { - e.printStackTrace(); - System.exit(1); - } - } - - /** - * Creates the Overview of Greek Syntax database. - */ - public void run() { - addSections(); - try { - insertStatement.close(); - connection.close(); - } catch (SQLException e) { - e.printStackTrace(); - System.exit(1); - } - System.out.println("Done."); - } - - /** - * Resets the database if it already exists and creates a new, empty - * database. - */ - private void createDatabase() { - System.out.println("Creating lexicon database..."); - try { - String dropTable = "DROP TABLE IF EXISTS " + TABLE_NAME; - String createTable = "CREATE TABLE " + TABLE_NAME + " (" + - "_id INTEGER PRIMARY KEY, " + - "chapter VARCHAR(100), " + - "section VARCHAR(100), " + - "xml TEXT)"; - Statement statement = connection.createStatement(); - statement.executeUpdate(dropTable); - statement.executeUpdate(createTable); - connection.commit(); - statement.close(); - } catch (SQLException e) { - e.printStackTrace(); - System.exit(1); - } - } - - /** - * Parses the XML file, modifies the sections, and inserts the modified - * data into the database. - */ - private void addSections() { - System.out.println("Inserting data..."); - - String chapter = null; - String section = null; - StringBuilder xml = new StringBuilder(); - Pattern pattern = Pattern.compile("<head>(.*?)</head>"); - - try { - BufferedReader in = new BufferedReader(new FileReader(FILE)); - while (in.ready()) { - String line = in.readLine(); - if (line.startsWith("<div1")) { - // Get chapter title. - line = in.readLine(); // Next line is "head" element with title. - Matcher matcher = pattern.matcher(line); - matcher.find(); - chapter = matcher.group(1); - - if (chapter.equals("Sources Cited")) { - section = chapter; - xml.delete(0, xml.length()); - xml.append("<section>"); - xml.append("<head>Sources Cited</head>"); - } - } else if (line.startsWith("<div2")) { - // Get section title. - line = in.readLine(); // Next line is "head" element with title. - Matcher matcher = pattern.matcher(line); - matcher.find(); - section = matcher.group(1); - - // Reset XML and add "head" element. - xml.delete(0, xml.length()); - xml.append("<section>"); - xml.append(line); - } else if (line.contains("</div2>")) { - // Get any XML before the "</div2>" tag. - String[] split = line.split("</div2>"); - xml.append(split[0]); - - // Add closing root tag. - xml.append("</section>"); - - SyntaxParser parser = new SyntaxParser(xml.toString()); - String transcodedXml = parser.transcode(); - - // Add data to database. - insertStatement.setString(1, chapter); - insertStatement.setString(2, section); - insertStatement.setString(3, transcodedXml); - insertStatement.addBatch(); - } else if (line.contains("</div1>") && chapter.equals("Sources Cited")) { - // Get any XML before the "</div2>" tag. - String[] split = line.split("</div1>"); - xml.append(split[0]); - - // Add closing root tag. - xml.append("</section>"); - - // Add data to database. - insertStatement.setString(1, chapter); - insertStatement.setString(2, section); - insertStatement.setString(3, xml.toString()); - insertStatement.addBatch(); - } else { - // Get next line of XML. - xml.append(line); - } - } - in.close(); - - insertStatement.executeBatch(); - connection.commit(); - } catch (SQLException e) { - e.printStackTrace(); - System.exit(1); - } catch (IOException e) { - e.printStackTrace(); - System.exit(1); - } catch (ParserConfigurationException e) { - e.printStackTrace(); - System.exit(1); - } catch (SAXException e) { - e.printStackTrace(); - System.exit(1); - } - } ->>>>>>> master } |