diff options
Diffstat (limited to 'src/com/benlinskey/grdbc/LexiconParser.java')
| -rw-r--r-- | src/com/benlinskey/grdbc/LexiconParser.java | 193 |
1 files changed, 101 insertions, 92 deletions
diff --git a/src/com/benlinskey/grdbc/LexiconParser.java b/src/com/benlinskey/grdbc/LexiconParser.java index 8e9e46b..9f3e3b1 100644 --- a/src/com/benlinskey/grdbc/LexiconParser.java +++ b/src/com/benlinskey/grdbc/LexiconParser.java @@ -24,99 +24,108 @@ import org.xml.sax.SAXException; /** * This class provides methods to parse a chunk of XML containing a lexicon - * entry, modify the data contained therein, and return data to be inserted - * into the database. + * entry, modify the data contained therein, and return data to be inserted into + * the database. + * * @author Ben Linskey */ public class LexiconParser extends GreekTextParser { - /** - * Class constructor. - * @param xml the XML to parse - * @throws ParserConfigurationException - * @throws SAXException - * @throws IOException - */ - public LexiconParser(String xml) - throws ParserConfigurationException, SAXException, IOException { - super(xml); - } - - /** - * Returns a Beta Code representation of this entry's word, stripped of - * all diacritics. - * @return this entry's word in Beta Code without diacritics - */ - public String getBetaNoSymbols() { - // Get the word and replace all symbols with an empty string. - return getBetaSymbols().replaceAll("[^a-zA-Z]", ""); - } - - /** - * Returns a Beta Code representation of this entry's word. - * @return this entry's word in Beta Code - */ - public String getBetaSymbols() { - // We just need the "key" attribute from the "entry" element. - Node entry = doc.getElementsByTagName("entry").item(0); - return entry.getAttributes().getNamedItem("key").getTextContent(); - } - - /** - * Returns this entry's word in Greek characters. - * @return this entry's word in Greek characters - */ - public String getGreekFullWord() { - // Use the transcoder to convert the beta code to Greek. - return betaToGreek(getBetaSymbols()); - } - - /** - * Returns this entry's word in Greek characters, stripped of all - * diacritics. - * @return this entry's word in Greek characters without diacritics - */ - public String getGreekNoSymbols() { - // Get beta code with no symbols other than the capital letter marker. - String beta = getBetaSymbols().replaceAll("[^a-zA-Z\\*]", ""); - - // Use the transcoder to convert the beta code to Greek. - return betaToGreek(beta); - } - - /** - * Returns this entry's word in all lowercase Greek characters, stripped - * of all diacritics. - * @return this entry's word in lowercase Greek characters without - * diacritics - */ - public String getGreekLowercase() { - return getGreekNoSymbols().toLowerCase(); - } - - /** - * Returns the XML for this entry, with all Beta Code converted to Greek - * characters. - * @return the XML for this entry with all Beta Code converted to Greek - * characters - */ - public String getEntry() { - transcodeEntryKey(); - transcodeInElements("orth"); - transcodeInElements("ref"); - transcodeInElements("foreign"); - transcodeInElements("note"); - return getUpdatedXML(); - } - - /** - * Converts the value of the entry element's "key" attribute from Beta - * Code to Greek. - */ - private void transcodeEntryKey() { - Node entryNode = doc.getElementsByTagName("entry").item(0); - Node keyAttr = entryNode.getAttributes().getNamedItem("key"); - String beta = keyAttr.getTextContent(); - String greek = betaToGreek(beta); - keyAttr.setTextContent(greek); - } + /** + * Class constructor. + * + * @param xml + * the XML to parse + * @throws ParserConfigurationException + * @throws SAXException + * @throws IOException + */ + public LexiconParser(String xml) throws ParserConfigurationException, + SAXException, IOException { + super(xml); + } + + /** + * Returns a Beta Code representation of this entry's word, stripped of all + * diacritics. + * + * @return this entry's word in Beta Code without diacritics + */ + public String getBetaNoSymbols() { + // Get the word and replace all symbols with an empty string. + return getBetaSymbols().replaceAll("[^a-zA-Z]", ""); + } + + /** + * Returns a Beta Code representation of this entry's word. + * + * @return this entry's word in Beta Code + */ + public String getBetaSymbols() { + // We just need the "key" attribute from the "entry" element. + Node entry = doc.getElementsByTagName("entry").item(0); + return entry.getAttributes().getNamedItem("key").getTextContent(); + } + + /** + * Returns this entry's word in Greek characters. + * + * @return this entry's word in Greek characters + */ + public String getGreekFullWord() { + // Use the transcoder to convert the beta code to Greek. + return betaToGreek(getBetaSymbols()); + } + + /** + * Returns this entry's word in Greek characters, stripped of all + * diacritics. + * + * @return this entry's word in Greek characters without diacritics + */ + public String getGreekNoSymbols() { + // Get beta code with no symbols other than the capital letter marker. + String beta = getBetaSymbols().replaceAll("[^a-zA-Z\\*]", ""); + + // Use the transcoder to convert the beta code to Greek. + return betaToGreek(beta); + } + + /** + * Returns this entry's word in all lowercase Greek characters, stripped of + * all diacritics. + * + * @return this entry's word in lowercase Greek characters without + * diacritics + */ + public String getGreekLowercase() { + return getGreekNoSymbols().toLowerCase(); + } + + /** + * Returns the XML for this entry, with all Beta Code converted to Greek + * characters. + * + * @return the XML for this entry with all Beta Code converted to Greek + * characters + */ + public String getEntry() { + transcodeEntryKey(); + transcodeInElements("orth"); + transcodeInElements("ref"); + transcodeInElements("foreign"); + transcodeInElements("note"); + return getUpdatedXML(); + } + + /** + * Converts the value of the entry element's "key" attribute from Beta Code + * to Greek. + */ + private void transcodeEntryKey() { + Node entryNode = doc.getElementsByTagName("entry").item(0); + Node keyAttr = entryNode.getAttributes().getNamedItem("key"); + String beta = keyAttr.getTextContent(); + String greek = betaToGreek(beta); + keyAttr.setTextContent(greek); + } } |