摘要:本文介绍了如何使用SAX解析XML文档。
环境
Windows 10 企业版 LTSC 21H2
Java 1.8
1 定义
SAX(Simple API for XML)是一种基于事件驱动的XML解析方式。SAX不需要将整个XML文档加载到内存中,而是通过顺序读取文档并触发相应的事件来解析内容。
2 基本原理
SAX解析器的核心工作流程:
- 顺序读取:逐行读取XML文档内容。
- 事件触发:在遇到特定XML结构时触发相应的事件。
- 回调处理:通过实现特定接口的方法来处理这些事件。
- 流式处理:边读边解析,不保存整个文档结构。
关键特性:
- 事件驱动:通过回调方法处理XML内容。
- 顺序访问:只能从前到后顺序解析,不能随机访问。
- 内存高效:不需要将整个文档加载到内存。
- 只读解析:通常用于读取,修改操作较复杂。
3 文档示例
以一个简单的XML文档为例:
school.xml1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
| <?xml version="1.0" encoding="UTF-8"?> <school address="北京市海淀区"> <name>阳光小学</name> <teachers> <teacher id="teacher_1"> <name>李明</name> <subject>语文</subject> </teacher> <teacher id="teacher_2"> <name>赵强</name> <subject>数学</subject> </teacher> </teachers> <students> <student id="student_1"> <name>张婷</name> <gender>女</gender> <age>13</age> <hobbies> <hobby>画画</hobby> <hobby>弹琴</hobby> </hobbies> </student> <student id="student_2"> <name>王浩</name> <gender>男</gender> <age>14</age> <hobbies> <hobby>跑步</hobby> <hobby>游泳</hobby> </hobbies> </student> </students> </school>
|
4 核心组件
4.1 工厂类
4.1.1 SAXParserFactory
常用方法:
java1 2 3 4 5 6 7 8
| public static SAXParserFactory newInstance();
public SAXParser newSAXParser();
public void setNamespaceAware(boolean awareness);
public void setValidating(boolean validating);
|
4.1.2 SAXParser
常用方法:
java1 2 3
| public void parse(File f, DefaultHandler dh); public void parse(InputStream is, DefaultHandler dh); public void parse(InputSource is, DefaultHandler dh);
|
4.2 处理器类
4.2.1 DefaultHandler
常用方法:
java1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
| public void startDocument();
public void endDocument();
public void startElement(String uri, String localName, String qName, Attributes attributes);
public void endElement(String uri, String localName, String qName);
public void characters(char[] ch, int start, int length);
public void processingInstruction(String target, String data);
public void ignorableWhitespace(char[] ch, int start, int length);
public void warning(SAXParseException e);
public void error(SAXParseException e);
public void fatalError(SAXParseException e);
|
4.2.2 Attributes
常用方法:
java1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
| public abstract int getLength();
public abstract String getURI (int index);
public abstract String getQName(int index);
public abstract String getLocalName(int index);
public abstract String getValue(int index);
public abstract String getValue(String qName);
public abstract String getValue (String uri, String localName);
public abstract String getType(int index);
public abstract String getType(String qName);
public abstract String getType (String uri, String localName);
|
5 实际应用
5.1 基础使用
使用SAX解析XML文档需要先创建自定义处理器,通过处理器的回调方法解析需要的数据。
创建自定义处理器:
java1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
| class SchoolHandler extends DefaultHandler { private StringBuilder currentValue = new StringBuilder(); private Student currentStudent; private List<Student> students = new ArrayList<>();
@Override public void startDocument() { System.out.println("解析文档开始"); students.clear(); }
@Override public void endDocument() { System.out.println("解析文档结束"); System.out.println("共找到 " + students.size() + " 个学生"); }
@Override public void startElement(String uri, String localName, String qName, Attributes attributes) { currentValue.setLength(0); if ("student".equals(qName)) { currentStudent = new Student(); currentStudent.setId(attributes.getValue("id")); } }
@Override public void endElement(String uri, String localName, String qName) { if ("student".equals(qName)) { students.add(currentStudent); currentStudent = null; } else if (currentStudent != null) { switch (qName) { case "name": currentStudent.setName(currentValue.toString()); break; case "gender": currentStudent.setGender(currentValue.toString()); break; case "age": currentStudent.setAge(Integer.parseInt(currentValue.toString())); break; case "hobby": currentStudent.addHobby(currentValue.toString()); break; } } }
@Override public void characters(char[] ch, int start, int length) { currentValue.append(ch, start, length); }
public List<Student> getStudents() { return students; } }
|
创建学生类:
java1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
| class Student { private String id; private String name; private String gender; private int age; private List<String> hobbies = new ArrayList<>();
public Student() {}
public String getId() { return id; } public void setId(String id) { this.id = id; }
public String getName() { return name; } public void setName(String name) { this.name = name; }
public String getGender() { return gender; } public void setGender(String gender) { this.gender = gender; }
public int getAge() { return age; } public void setAge(int age) { this.age = age; }
public List<String> getHobbies() { return hobbies; } public void addHobby(String hobby) { this.hobbies.add(hobby); }
@Override public String toString() { return String.format("Student{id='%s', name='%s', gender='%s', age=%d, hobbies=%s}", id, name, gender, age, hobbies); } }
|
主方法:
java1 2 3 4 5 6
| public static void main(String[] args) { SAX sax = new SAX(); List<Student> students = sax.parse("src/main/resources/school.xml"); students.forEach(System.out::println); }
|
解析文档:
java1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| public List<Student> parse(String filePath) { try { SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser parser = factory.newSAXParser(); SchoolHandler handler = new SchoolHandler(); parser.parse(new File(filePath), handler); return handler.getStudents(); } catch (Exception e) { e.printStackTrace(); return Collections.emptyList(); } }
|
5.2 高级特性
5.2.1 统计处理器
统计文档中的各种信息:
java1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
| public class StatisticsHandler extends DefaultHandler { private int elementCount = 0; private int attributeCount = 0; private int textCount = 0; private Map<String, Integer> elementStats = new HashMap<>(); private StringBuilder currentText = new StringBuilder();
@Override public void startElement(String uri, String localName, String qName, Attributes attributes) { elementCount++; elementStats.put(qName, elementStats.getOrDefault(qName, 0) + 1); attributeCount += attributes.getLength(); }
@Override public void characters(char[] ch, int start, int length) { String text = new String(ch, start, length).trim(); if (!text.isEmpty()) { textCount++; currentText.append(text); } }
@Override public void endDocument() { System.out.println("文档统计结果:"); System.out.println("元素总数: " + elementCount); System.out.println("属性总数: " + attributeCount); System.out.println("文本节点数: " + textCount); System.out.println("各元素出现次数:"); elementStats.forEach((name, count) -> System.out.println(" " + name + ": " + count)); } }
|
5.2.2 验证处理
启用验证:
java1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| public class ValidatingHandler extends DefaultHandler { @Override public void warning(SAXParseException e) { System.out.println("警告: " + e.getMessage()); }
@Override public void error(SAXParseException e) { System.out.println("错误: " + e.getMessage()); }
@Override public void fatalError(SAXParseException e) { System.out.println("致命错误: " + e.getMessage()); } }
public void parseWithValidation(String filePath) { try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setValidating(true); factory.setNamespaceAware(true); SAXParser parser = factory.newSAXParser(); parser.setProperty("http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema"); ValidatingHandler handler = new ValidatingHandler(); parser.parse(new File(filePath), handler); } catch (Exception e) { e.printStackTrace(); } }
|
条