1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
/* Copyright 2013 Benjamin Linskey
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.benlinskey.grdbc;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
/**
* This class provides methods to parse a chunk of XML containing a lexicon
* entry, modify the data contained therein, and return data to be inserted into
* the database.
*
* @author Ben Linskey
*/
public class LexiconParser extends GreekTextParser {
/**
* Class constructor.
*
* @param xml
* the XML to parse
* @throws ParserConfigurationException
* @throws SAXException
* @throws IOException
*/
public LexiconParser(String xml) throws ParserConfigurationException,
SAXException, IOException {
super(xml);
}
/**
* Returns a Beta Code representation of this entry's word, stripped of all
* diacritics.
*
* @return this entry's word in Beta Code without diacritics
*/
public String getBetaNoSymbols() {
// Get the word and replace all symbols with an empty string.
return getBetaSymbols().replaceAll("[^a-zA-Z]", "");
}
/**
* Returns a Beta Code representation of this entry's word.
*
* @return this entry's word in Beta Code
*/
public String getBetaSymbols() {
// We just need the "key" attribute from the "entry" element.
Node entry = doc.getElementsByTagName("entry").item(0);
return entry.getAttributes().getNamedItem("key").getTextContent();
}
/**
* Returns this entry's word in Greek characters.
*
* @return this entry's word in Greek characters
*/
public String getGreekFullWord() {
// Use the transcoder to convert the beta code to Greek.
return betaToGreek(getBetaSymbols());
}
/**
* Returns this entry's word in Greek characters, stripped of all
* diacritics.
*
* @return this entry's word in Greek characters without diacritics
*/
public String getGreekNoSymbols() {
// Get beta code with no symbols other than the capital letter marker.
String beta = getBetaSymbols().replaceAll("[^a-zA-Z\\*]", "");
// Use the transcoder to convert the beta code to Greek.
return betaToGreek(beta);
}
/**
* Returns this entry's word in all lowercase Greek characters, stripped of
* all diacritics.
*
* @return this entry's word in lowercase Greek characters without
* diacritics
*/
public String getGreekLowercase() {
return getGreekNoSymbols().toLowerCase();
}
/**
* Returns the XML for this entry, with all Beta Code converted to Greek
* characters.
*
* @return the XML for this entry with all Beta Code converted to Greek
* characters
*/
public String getEntry() {
transcodeEntryKey();
transcodeInElements("orth");
transcodeInElements("ref");
transcodeInElements("foreign");
transcodeInElements("note");
return getUpdatedXML();
}
/**
* Converts the value of the entry element's "key" attribute from Beta Code
* to Greek.
*/
private void transcodeEntryKey() {
Node entryNode = doc.getElementsByTagName("entry").item(0);
Node keyAttr = entryNode.getAttributes().getNamedItem("key");
String beta = keyAttr.getTextContent();
String greek = betaToGreek(beta);
keyAttr.setTextContent(greek);
}
}
|