From bcf134e1a71252e6d86c8e9587029f919396663a Mon Sep 17 00:00:00 2001 From: Ben Linskey Date: Mon, 16 Dec 2013 20:08:36 -0500 Subject: Adds abstract class GreekTextParser. This change eliminates the duplication of code in the LexiconParser and SyntaxParser classes. --- src/com/benlinskey/grdbc/LexiconParser.java | 100 ++-------------------------- 1 file changed, 5 insertions(+), 95 deletions(-) (limited to 'src/com/benlinskey/grdbc/LexiconParser.java') diff --git a/src/com/benlinskey/grdbc/LexiconParser.java b/src/com/benlinskey/grdbc/LexiconParser.java index 69d474b..af16c51 100644 --- a/src/com/benlinskey/grdbc/LexiconParser.java +++ b/src/com/benlinskey/grdbc/LexiconParser.java @@ -16,60 +16,29 @@ package com.benlinskey.grdbc; import java.io.IOException; -import java.io.StringReader; -import java.io.StringWriter; -import java.io.UnsupportedEncodingException; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerConfigurationException; -import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; -import org.w3c.dom.Document; import org.w3c.dom.Node; -import org.w3c.dom.NodeList; -import org.xml.sax.InputSource; import org.xml.sax.SAXException; -import edu.unc.epidoc.transcoder.TransCoder; - /** * This class provides methods to parse a chunk of XML containing a lexicon * entry, modify the data contained therein, and return data to be inserted * into the database. * @author Ben Linskey */ -public class LexiconParser { - private Document doc; - private TransCoder transcoder; - +public class LexiconParser extends GreekTextParser { /** * Class constructor. * @param xml the XML to parse - * @throws ParserConfigurationException - * @throws IOException - * @throws SAXException + * @throws ParserConfigurationException + * @throws SAXException + * @throws IOException */ public LexiconParser(String xml) throws ParserConfigurationException, SAXException, IOException { - // Parse the XML and create a Document. - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - DocumentBuilder db = dbf.newDocumentBuilder(); - InputSource is = new InputSource(new StringReader(xml)); - doc = db.parse(is); - - // Create a TransCoder for converting Beta Code to Greek characters. - try { - transcoder = new TransCoder("BetaCode", "UnicodeC"); - } catch (Exception e) { - e.printStackTrace(); - System.exit(1); - } + super(xml); } /** @@ -136,63 +105,4 @@ public class LexiconParser { transcodeInElements("foreign"); return getUpdatedXML(); } - - /** - * Transcodes beta code to Greek in elements with the given name. - * @param element the name of the element to search for - */ - private void transcodeInElements(String element) { - NodeList nodeList = doc.getElementsByTagName(element); - for (int i = 0; i < nodeList.getLength(); i++) { - Node elementNode = nodeList.item(i); - Node langAttr = elementNode.getAttributes().getNamedItem("lang"); - if (langAttr != null) { - String lang = langAttr.getTextContent(); - if (lang.equals("greek")) { - String greek = betaToGreek(elementNode.getTextContent()); - langAttr.setTextContent(greek); - } - } - } - } - - /** - * Returns a string containing an XML representation of the document in its - * current state. - * @return a string containing an XML representation of the document in its - * current state - */ - private String getUpdatedXML() { - StringWriter writer = new StringWriter(); - try { - TransformerFactory tf = TransformerFactory.newInstance(); - Transformer transformer = tf.newTransformer(); - DOMSource source = new DOMSource(doc); - StreamResult result = new StreamResult(writer); - transformer.transform(source, result); - } catch (TransformerConfigurationException e) { - e.printStackTrace(); - System.exit(1); - } catch (TransformerException e) { - e.printStackTrace(); - System.exit(1); - } - return writer.toString(); - } - - /** - * Converts Beta Code to Greek characters. - * @param beta the Beta Code to transcode - * @return the Greek equivalent of the specified Beta Code - */ - private String betaToGreek(String beta) { - String greek = null; - try { - greek = transcoder.getString(beta); - } catch (UnsupportedEncodingException e) { - e.printStackTrace(); - System.exit(1); - } - return greek; - } } -- cgit v1.2.3