From 8d3f5bd0cfb9124ded8008af47fc5ea531d11b0e Mon Sep 17 00:00:00 2001 From: Ben Linskey Date: Mon, 16 Dec 2013 20:00:18 -0500 Subject: Add grammar database creation code. --- src/com/benlinskey/grdbc/GRDBC.java | 5 +- src/com/benlinskey/grdbc/LexiconParser.java | 3 +- src/com/benlinskey/grdbc/SyntaxCreator.java | 199 ++++++++++++++++++++++++++++ src/com/benlinskey/grdbc/SyntaxParser.java | 138 +++++++++++++++++++ 4 files changed, 342 insertions(+), 3 deletions(-) create mode 100644 src/com/benlinskey/grdbc/SyntaxCreator.java create mode 100644 src/com/benlinskey/grdbc/SyntaxParser.java (limited to 'src/com/benlinskey') diff --git a/src/com/benlinskey/grdbc/GRDBC.java b/src/com/benlinskey/grdbc/GRDBC.java index 9afcfe8..d8cd6ae 100644 --- a/src/com/benlinskey/grdbc/GRDBC.java +++ b/src/com/benlinskey/grdbc/GRDBC.java @@ -28,11 +28,12 @@ public class GRDBC { String opt = args[0]; if (opt.equals("-a")) { - // TODO + (new LexiconCreator()).run(); + (new SyntaxCreator()).run(); } else if (opt.equals("-l")) { (new LexiconCreator()).run(); } else if (opt.equals("-g")) { - // TODO + (new SyntaxCreator()).run(); } else { displayUsage(); } diff --git a/src/com/benlinskey/grdbc/LexiconParser.java b/src/com/benlinskey/grdbc/LexiconParser.java index 46dcad5..69d474b 100644 --- a/src/com/benlinskey/grdbc/LexiconParser.java +++ b/src/com/benlinskey/grdbc/LexiconParser.java @@ -55,7 +55,8 @@ public class LexiconParser { * @throws IOException * @throws SAXException */ - public LexiconParser(String xml) throws ParserConfigurationException, SAXException, IOException { + public LexiconParser(String xml) + throws ParserConfigurationException, SAXException, IOException { // Parse the XML and create a Document. DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); diff --git a/src/com/benlinskey/grdbc/SyntaxCreator.java b/src/com/benlinskey/grdbc/SyntaxCreator.java new file mode 100644 index 0000000..cce594d --- /dev/null +++ b/src/com/benlinskey/grdbc/SyntaxCreator.java @@ -0,0 +1,199 @@ +/* Copyright 2013 Benjamin Linskey + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.benlinskey.grdbc; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.ParserConfigurationException; + +import org.xml.sax.SAXException; + +/** + * Reads in an XML file containing the Overview of Greek Syntax text and stores + * sections of the text in an SQLite database. + *
+ * Note that the Sources Cited section is omitted, as it is on Perseus. + * @author Ben Linskey + * + */ +public class SyntaxCreator { + private final static String FILE = "../xml/Perseus_text_1999.04.0052.xml"; + private final static String DB = "syntax.db"; + private final static String TABLE_NAME = "syntax"; + private Connection connection; + private PreparedStatement insertStatement; + + /** + * Class constructor. + */ + public SyntaxCreator() { + // Load driver. + try { + Class.forName("org.sqlite.JDBC"); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + System.exit(1); + } + + // Connect to database. + try { + connection = DriverManager.getConnection("jdbc:sqlite:" + DB); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + + // Use batch inserts for speed. + try { + connection.setAutoCommit(false); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + + createDatabase(); + + // Create a prepared statement to use when inserting entries. + try { + insertStatement = connection.prepareStatement("INSERT INTO " + + TABLE_NAME + " VALUES (NULL, ?, ?, ?)"); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Creates the Overview of Greek Syntax database. + */ + public void run() { + addSections(); + try { + insertStatement.close(); + connection.close(); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + System.out.println("Done."); + } + + /** + * Resets the database if it already exists and creates a new, empty + * database. + */ + private void createDatabase() { + System.out.println("Creating lexicon database..."); + try { + String dropTable = "DROP TABLE IF EXISTS " + TABLE_NAME; + String createTable = "CREATE TABLE " + TABLE_NAME + " (" + + "_ID INT PRIMARY KEY, " + + "chapter VARCHAR(100), " + + "section VARCHAR(100), " + + "xml TEXT)"; + Statement statement = connection.createStatement(); + statement.executeUpdate(dropTable); + statement.executeUpdate(createTable); + connection.commit(); + statement.close(); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + } + + /** + * Parses the XML file, modifies the sections, and inserts the modified + * data into the database. + */ + private void addSections() { + System.out.println("Inserting data..."); + + String chapter = null; + String section = null; + StringBuilder xml = new StringBuilder(); + Pattern pattern = Pattern.compile("
(.*?)"); + + try { + BufferedReader in = new BufferedReader(new FileReader(FILE)); + while (in.ready()) { + String line = in.readLine(); + if (line.startsWith("