diff options
| author | Ben Linskey | 2014-02-03 22:36:09 -0500 |
|---|---|---|
| committer | Ben Linskey | 2014-02-03 22:36:09 -0500 |
| commit | f7489fcb0d365d0263b1fc86c93529e94d9eba0c (patch) | |
| tree | b0b8ad08c37522e85a9ddd5bb233e14e9d4984e7 | |
| parent | 2dea8082e3777c040ec5e391b259743355f472ef (diff) | |
| parent | 2e5458bcc606ee558bb98a47b064efdb219e93ca (diff) | |
| download | greek-reference-database-creator-021922cb44605f1582db6dbc4091181491097c42.tar.gz | |
Merge branch 'dev'v1.2.0
| -rw-r--r-- | src/com/benlinskey/grdbc/LexiconCreator.java | 118 |
1 files changed, 99 insertions, 19 deletions
diff --git a/src/com/benlinskey/grdbc/LexiconCreator.java b/src/com/benlinskey/grdbc/LexiconCreator.java index f33007b..5da0e6c 100644 --- a/src/com/benlinskey/grdbc/LexiconCreator.java +++ b/src/com/benlinskey/grdbc/LexiconCreator.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; +import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; @@ -32,15 +33,21 @@ import org.xml.sax.SAXException; /** * Reads in an XML file containing a Greek lexicon and stores entries in an * SQLite database. + * <p> + * The entries in the XML file are not properly alphabetized, so we first read + * them into a temporary table and then alphabetize them and copy them into the + * final table in alphabetical order. * * @author Ben Linskey */ public class LexiconCreator { private final static String FILE = "../xml/Perseus_text_1999.04.0058.xml"; private final static String DB = "lexicon.db"; - private final static String TABLE_NAME = "lexicon"; + private final static String TEMP_TABLE_NAME = "temp"; + private final static String FINAL_TABLE_NAME = "lexicon"; private Connection connection; - private PreparedStatement insertStatement; + private PreparedStatement tempInsertStatement; + private PreparedStatement finalInsertStatement; /** * Class constructor. @@ -74,8 +81,10 @@ public class LexiconCreator { // Create a prepared statement to use when inserting entries. try { - insertStatement = connection.prepareStatement("INSERT INTO " - + TABLE_NAME + " VALUES (NULL, ?, ?, ?, ?, ?, ?)"); + tempInsertStatement = connection.prepareStatement("INSERT INTO " + + TEMP_TABLE_NAME + " VALUES (?, ?, ?, ?, ?, ?)"); + finalInsertStatement = connection.prepareStatement("INSERT INTO " + + FINAL_TABLE_NAME + " VALUES (NULL, ?, ?, ?, ?, ?, ?)"); } catch (SQLException e) { e.printStackTrace(); System.exit(1); @@ -87,9 +96,11 @@ public class LexiconCreator { */ public void run() { addEntries(); + alphabetize(); + dropTempTable(); createIndex(); + vacuum(); try { - insertStatement.close(); connection.close(); } catch (SQLException e) { e.printStackTrace(); @@ -105,17 +116,26 @@ public class LexiconCreator { private void createDatabase() { System.out.println("Creating lexicon database..."); try { - String dropTable = "DROP TABLE IF EXISTS " + TABLE_NAME; - String createTable = "CREATE TABLE " + TABLE_NAME + " (" - + "_id INTEGER PRIMARY KEY, " + String dropTempTable = "DROP TABLE IF EXISTS " + TEMP_TABLE_NAME; + String createTempTable = "CREATE TABLE " + TEMP_TABLE_NAME + " (" + "betaNoSymbols VARCHAR(100), " + "betaSymbols VARCHAR(100), " + "greekFullWord VARCHAR(100), " + "greekNoSymbols VARCHAR(100), " + "greekLowercase VARCHAR(100), " + "entry TEXT)"; + String dropFinalTable = "DROP TABLE IF EXISTS " + FINAL_TABLE_NAME; + String createFinalTable = "CREATE TABLE " + FINAL_TABLE_NAME + " (" + + "_id INTEGER PRIMARY KEY, " + + "betaNoSymbols VARCHAR(100), " + + "betaSymbols VARCHAR(100), " + + "greekFullWord VARCHAR(100), " + + "greekNoSymbols VARCHAR(100), " + + "greekLowercase VARCHAR(100), " + "entry TEXT)"; Statement statement = connection.createStatement(); - statement.executeUpdate(dropTable); - statement.executeUpdate(createTable); + statement.executeUpdate(dropTempTable); + statement.executeUpdate(createTempTable); + statement.executeUpdate(dropFinalTable); + statement.executeUpdate(createFinalTable); connection.commit(); statement.close(); } catch (SQLException e) { @@ -150,8 +170,10 @@ public class LexiconCreator { } in.close(); - insertStatement.executeBatch(); + tempInsertStatement.executeBatch(); connection.commit(); + + tempInsertStatement.close(); } catch (FileNotFoundException e) { System.err.println("Error: Lexicon file not found."); System.exit(1); @@ -173,13 +195,13 @@ public class LexiconCreator { private void processEntry(String xml) { try { LexiconParser parser = new LexiconParser(xml); - insertStatement.setString(1, parser.getBetaNoSymbols()); - insertStatement.setString(2, parser.getBetaSymbols()); - insertStatement.setString(3, parser.getGreekFullWord()); - insertStatement.setString(4, parser.getGreekNoSymbols()); - insertStatement.setString(5, parser.getGreekLowercase()); - insertStatement.setString(6, parser.getEntry()); - insertStatement.addBatch(); + tempInsertStatement.setString(1, parser.getBetaNoSymbols()); + tempInsertStatement.setString(2, parser.getBetaSymbols()); + tempInsertStatement.setString(3, parser.getGreekFullWord()); + tempInsertStatement.setString(4, parser.getGreekNoSymbols()); + tempInsertStatement.setString(5, parser.getGreekLowercase()); + tempInsertStatement.setString(6, parser.getEntry()); + tempInsertStatement.addBatch(); } catch (ParserConfigurationException e) { e.printStackTrace(); System.exit(1); @@ -196,13 +218,56 @@ public class LexiconCreator { } /** + * Copies the entries from the temporary table into the final table in + * alphabetical order. + */ + private void alphabetize() { + try { + String query = "SELECT * FROM " + TEMP_TABLE_NAME + + " ORDER BY greekLowercase ASC"; + Statement statement = connection.createStatement(); + ResultSet rs = statement.executeQuery(query); + while (rs.next()) { + finalInsertStatement.setString(1, rs.getString(1)); + finalInsertStatement.setString(2, rs.getString(2)); + finalInsertStatement.setString(3, rs.getString(3)); + finalInsertStatement.setString(4, rs.getString(4)); + finalInsertStatement.setString(5, rs.getString(5)); + finalInsertStatement.setString(6, rs.getString(6)); + finalInsertStatement.addBatch(); + } + + finalInsertStatement.executeBatch(); + connection.commit(); + + statement.close(); + finalInsertStatement.close(); + } catch (SQLException e) { + e.printStackTrace(); + } + } + + /** + * Drops the temporary table. + */ + private void dropTempTable() { + String sql = "DROP TABLE " + TEMP_TABLE_NAME; + try { + Statement statement = connection.createStatement(); + statement.executeUpdate(sql); + } catch (SQLException e) { + e.printStackTrace(); + } + } + + /** * Creates an index on the database to speed up searches. */ private void createIndex() { System.out.println("Creating index..."); // Create an index on the three columns matched against search queries. - String sql = "CREATE INDEX searchIndex ON " + TABLE_NAME + String sql = "CREATE INDEX searchIndex ON " + FINAL_TABLE_NAME + " (betaNoSymbols, betaSymbols, greekNoSymbols, " + "greekLowercase)"; try { @@ -214,4 +279,19 @@ public class LexiconCreator { e.printStackTrace(); } } + + /** + * Rebuilds the database in order to reduce its size. + */ + private void vacuum() { + System.out.println("Rebuilding database..."); + try { + connection.setAutoCommit(true); + Statement statement = connection.createStatement(); + statement.executeUpdate("VACUUM"); + statement.close(); + } catch (SQLException e) { + e.printStackTrace(); + } + } } |