/ libxml2 / result / intsubset2.xml.sax2
intsubset2.xml.sax2
  1  SAX.setDocumentLocator()
  2  SAX.startDocument()
  3  SAX.internalSubset(kanjidic2, , )
  4  SAX.comment( Version 1.3
  5  	This is the DTD of the XML-format kanji file combining information from
  6  	the KANJIDIC and KANJD212 files. It is intended to be largely self-
  7  	documenting, with each field being accompanied by an explanatory
  8  	comment.
  9  
 10  	The file covers the following kanji:
 11  	(a) the 6,355 kanji from JIS X 0208;
 12  	(b) the 5,801 kanji from JIS X 0212;
 13  	(c) the 3,625 kanji from JIS X 0213 as follows:
 14  		(i) the 2,741 kanji which are also in JIS X 0212 have
 15  		JIS X 0213 code-points (kuten) added to the existing entry;
 16  		(ii) the 884 "new" kanji have new entries.
 17  
 18  	At the end of the explanation for a number of fields there is a tag
 19  	with the format [N]. This indicates the leading letter(s) of the
 20  	equivalent field in the KANJIDIC and KANJD212 files.
 21  
 22  	The KANJIDIC documentation should also be read for additional 
 23  	information about the information in the file.
 24  	)
 25  SAX.elementDecl(kanjidic2, 4, ...)
 26  SAX.elementDecl(header, 4, ...)
 27  SAX.comment(
 28  	The single header element will contain identification information
 29  	about the version of the file 
 30  	)
 31  SAX.elementDecl(file_version, 3, ...)
 32  SAX.comment(
 33  	This field denotes the version of kanjidic2 structure, as more
 34  	than one version may exist.
 35  	)
 36  SAX.elementDecl(database_version, 3, ...)
 37  SAX.comment(
 38  	The version of the file, in the format YYYY-NN, where NN will be
 39  	a number starting with 01 for the first version released in a
 40  	calendar year, then increasing for each version in that year.
 41  	)
 42  SAX.elementDecl(date_of_creation, 3, ...)
 43  SAX.comment(
 44  	The date the file was created in international format (YYYY-MM-DD).
 45  	)
 46  SAX.elementDecl(character, 4, ...)
 47  SAX.elementDecl(literal, 3, ...)
 48  SAX.comment(
 49  	The character itself in UTF8 coding.
 50  	)
 51  SAX.elementDecl(codepoint, 4, ...)
 52  SAX.comment( 
 53  	The codepoint element states the code of the character in the various
 54  	character set standards.
 55  	)
 56  SAX.elementDecl(cp_value, 3, ...)
 57  SAX.comment( 
 58  	The cp_value contains the codepoint of the character in a particular
 59  	standard. The standard will be identified in the cp_type attribute.
 60  	)
 61  SAX.attributeDecl(cp_value, cp_type, 1, 2, NULL, ...)
 62  SAX.comment( 
 63  	The cp_type attribute states the coding standard applying to the
 64  	element. The values assigned so far are:
 65  		jis208 - JIS X 0208-1997 - kuten coding (nn-nn)
 66  		jis212 - JIS X 0212-1990 - kuten coding (nn-nn)
 67  		jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn)
 68  		ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits)
 69  	)
 70  SAX.elementDecl(radical, 4, ...)
 71  SAX.elementDecl(rad_value, 3, ...)
 72  SAX.comment( 
 73  	The radical number, in the range 1 to 214. The particular
 74  	classification type is stated in the rad_type attribute.
 75  	)
 76  SAX.attributeDecl(rad_value, rad_type, 1, 2, NULL, ...)
 77  SAX.comment( 
 78  	The rad_type attribute states the type of radical classification.
 79  		classical - as recorded in the KangXi Zidian.
 80  		nelson - as used in the Nelson "Modern Japanese-English 
 81  		Character Dictionary" (i.e. the Classic, not the New Nelson).
 82  		This will only be used where Nelson reclassified the kanji.
 83  	)
 84  SAX.elementDecl(misc, 4, ...)
 85  SAX.elementDecl(grade, 3, ...)
 86  SAX.comment( 
 87  	The Jouyou Kanji grade level. 1 through 6 indicate the grade in which
 88  	the kanji is taught in Japanese schools. 8 indicates it is one of the
 89  	remaining Jouyou Kanji to be learned in junior high school, and 9 
 90  	indicates it is a Jinmeiyou (for use in names) kanji. [G]
 91  	)
 92  SAX.elementDecl(stroke_count, 3, ...)
 93  SAX.comment( 
 94  	The stroke count of the kanji, including the radical. If more than 
 95  	one, the first is considered the accepted count, while subsequent ones 
 96  	are common miscounts. (See Appendix E. of the KANJIDIC documentation
 97  	for some of the rules applied when counting strokes in some of the 
 98  	radicals.) [S]
 99  	)
100  SAX.elementDecl(variant, 3, ...)
101  SAX.comment( 
102  	A cross-reference code to another kanji, usually regarded as a variant.
103  	The type of cross-reference is given in the var_type attribute.
104  	)
105  SAX.attributeDecl(variant, var_type, 1, 2, NULL, ...)
106  SAX.comment( 
107  	The var_type attribute indicates the type of variant code. The current
108  	values are: 
109  		jis208 - in JIS X 0208 - kuten coding
110  		jis212 - in JIS X 0212 - kuten coding
111  		jis213 - in JIS X 0213 - kuten coding
112  		deroo - De Roo number - numeric
113  		njecd - Halpern NJECD index number - numeric
114  		s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor
115  		nelson - "Classic" Nelson - numeric
116  		oneill - Japanese Names (O'Neill) - numeric
117  	)
118  SAX.elementDecl(freq, 3, ...)
119  SAX.comment( 
120  	A frequency-of-use ranking. The 2,500 most-used characters have a 
121  	ranking; those characters that lack this field are not ranked. The 
122  	frequency is a number from 1 to 2,500 that expresses the relative 
123  	frequency of occurrence of a character in modern Japanese. This is
124  	based on a survey in newspapers, so it is biassed towards kanji
125  	used in newspaper articles. The discrimination between the less
126  	frequently used kanji is not strong.
127  	)
128  SAX.elementDecl(rad_name, 3, ...)
129  SAX.comment( 
130  	When the kanji is itself a radical and has a name, this element
131  	contains the name (in hiragana.) [T2]
132  	)
133  SAX.elementDecl(dic_number, 4, ...)
134  SAX.comment( 
135  	This element contains the index numbers and similar unstructured
136  	information such as page numbers in a number of published dictionaries,
137  	and instructional books on kanji.
138  	)
139  SAX.elementDecl(dic_ref, 3, ...)
140  SAX.comment( 
141  	Each dic_ref contains an index number. The particular dictionary,
142  	etc. is defined by the dr_type attribute.
143  	)
144  SAX.attributeDecl(dic_ref, dr_type, 1, 2, NULL, ...)
145  SAX.comment( 
146  	The dr_type defines the dictionary or reference book, etc. to which
147  	dic_ref element applies. The initial allocation is:
148  	  nelson_c - "Modern Reader's Japanese-English Character Dictionary",  
149  	  	edited by Andrew Nelson (now published as the "Classic" 
150  	  	Nelson).
151  	  nelson_n - "The New Nelson Japanese-English Character Dictionary", 
152  	  	edited by John Haig.
153  	  halpern_njecd - "New Japanese-English Character Dictionary", 
154  	  	edited by Jack Halpern.
155  	  halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by 
156  	  	Jack Halpern.
157  	  heisig - "Remembering The  Kanji"  by  James Heisig.
158  	  gakken - "A  New Dictionary of Kanji Usage" (Gakken)
159  	  oneill_names - "Japanese Names", by P.G. O'Neill. 
160  	  oneill_kk - "Essential Kanji" by P.G. O'Neill.
161  	  moro - "Daikanwajiten" compiled by Morohashi. For some kanji two
162  	  	additional attributes are used: m_vol:  the volume of the
163  	  	dictionary in which the kanji is found, and m_page: the page
164  	  	number in the volume.
165  	  henshall - "A Guide To Remembering Japanese Characters" by
166  	  	Kenneth G.  Henshall.
167  	  sh_kk - "Kanji and Kana" by Spahn and Hadamitzky.
168  	  sakade - "A Guide To Reading and Writing Japanese" edited by
169  	  	Florence Sakade.
170  	  henshall3 - "A Guide To Reading and Writing Japanese" 3rd
171  		edition, edited by Henshall, Seeley and De Groot.
172  	  tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask.
173  	  crowley - "The Kanji Way to Japanese Language Power" by
174  	  	Dale Crowley.
175  	  kanji_in_context - "Kanji in Context" by Nishiguchi and Kono.
176  	  busy_people - "Japanese For Busy People" vols I-III, published
177  		by the AJLT. The codes are the volume.chapter.
178  	  kodansha_compact - the "Kodansha Compact Kanji Guide".
179  	)
180  SAX.attributeDecl(dic_ref, m_vol, 1, 3, NULL, ...)
181  SAX.comment( 
182  	See above under "moro".
183  	)
184  SAX.attributeDecl(dic_ref, m_page, 1, 3, NULL, ...)
185  SAX.comment( 
186  	See above under "moro".
187  	)
188  SAX.elementDecl(query_code, 4, ...)
189  SAX.comment( 
190  	These codes contain information relating to the glyph, and can be used
191  	for finding a required kanji. The type of code is defined by the
192  	qc_type attribute.
193  	)
194  SAX.elementDecl(q_code, 3, ...)
195  SAX.comment(
196  	The q_code contains the actual query-code value, according to the
197  	qc_type attribute.
198  	)
199  SAX.attributeDecl(q_code, qc_type, 1, 2, NULL, ...)
200  SAX.comment( 
201  	The q_code attribute defines the type of query code. The current values
202  	are:
203  	  skip -  Halpern's SKIP (System  of  Kanji  Indexing  by  Patterns) 
204  	  	code. The  format is n-nn-nn.  See the KANJIDIC  documentation 
205  	  	for  a description of the code and restrictions on  the 
206  	  	commercial  use  of this data. [P]
207  
208  	  sh_desc - the descriptor codes for The Kanji Dictionary (Tuttle 
209  	  	1996) by Spahn and Hadamitzky. They are in the form nxnn.n,  
210  	  	e.g.  3k11.2, where the  kanji has 3 strokes in the 
211  	  	identifying radical, it is radical "k" in the SH 
212  	  	classification system, there are 11 other strokes, and it is 
213  	  	the 2nd kanji in the 3k11 sequence. (I am very grateful to 
214  	  	Mark Spahn for providing the list of these descriptor codes 
215  	  	for the kanji in this file.) [I]
216  	  four_corner - the "Four Corner" code for the kanji. This is a code 
217  	  	invented by Wang Chen in 1928. See the KANJIDIC documentation 
218  	  	for  an overview of  the Four Corner System. [Q]
219  
220  	  deroo - the codes developed by the late Father Joseph De Roo, and 
221  	  	published in  his book "2001 Kanji" (Bojinsha). Fr De Roo 
222  	  	gave his permission for these codes to be included. [DR]
223  	  misclass - a possible misclassification of the kanji according
224  		to one of the code types. (See the "Z" codes in the KANJIDIC
225  		documentation for more details.)
226  	  
227  	)
228  SAX.elementDecl(reading_meaning, 4, ...)
229  SAX.comment( 
230  	The readings for the kanji in several languages, and the meanings, also
231  	in several languages. The readings and meanings are grouped to enable
232  	the handling of the situation where the meaning is differentiated by 
233  	reading. [T1]
234  	)
235  SAX.elementDecl(nanori, 3, ...)
236  SAX.comment( 
237  	Japanese readings that are now only associated with names.
238  	)
239  SAX.elementDecl(rmgroup, 4, ...)
240  SAX.elementDecl(reading, 3, ...)
241  SAX.comment( 
242  	The reading element contains the reading or pronunciation
243  	of the kanji.
244  	)
245  SAX.attributeDecl(reading, r_type, 1, 2, NULL, ...)
246  SAX.comment( 
247  	The r_type attribute defines the type of reading in the reading
248  	element. The current values are:
249  	  pinyin - the modern PinYin romanization of the Chinese reading 
250  	  	of the kanji. The tones are represented by a concluding 
251  	  	digit. [Y]
252  	  korean_r - the romanized form of the Korean reading(s) of the 
253  	  	kanji.  The readings are in the (Republic of Korea) Ministry 
254  	  	of Education style of romanization. [W]
255  	  korean_h - the Korean reading(s) of the kanji in hangul.
256  	  ja_on - the "on" Japanese reading of the kanji, in katakana. A
257  	  	second attribute r_status, if present, will indicate with
258  	  	a value of "jy" whether the reading is approved for a
259  	  	"Jouyou kanji".
260  	  ja_kun - the "kun" Japanese reading of the kanji, in hiragana. 
261  	  	Where relevant the okurigana is also included separated by a 
262  	  	".". Readings associated with prefixes and suffixes are 
263  	  	marked with a "-". A second attribute r_status, if present, 
264  	  	will indicate with a value of "jy" whether the reading is 
265  	  	approved for a "Jouyou kanji".
266  	)
267  SAX.attributeDecl(reading, r_status, 1, 3, NULL, ...)
268  SAX.comment( 
269  	See under ja_on and ja_kun above.
270  	)
271  SAX.elementDecl(meaning, 3, ...)
272  SAX.comment( 
273  	The meaning associated with the kanji.
274  	)
275  SAX.attributeDecl(meaning, m_lang, 1, 3, NULL, ...)
276  SAX.comment( 
277  	The m_lang attribute defines the target language of the meaning. It 
278  	will be coded using the two-letter language code from the ISO 639 
279  	standard. When absent, the value "en" (i.e. English) is implied. [{}]
280  	)
281  SAX.externalSubset(kanjidic2, , )
282  SAX.startElementNs(kanjidic2, NULL, NULL, 0, 0, 0)
283  SAX.characters(
284  , 1)
285  SAX.endElementNs(kanjidic2, NULL, NULL)
286  SAX.endDocument()