DOM解析

说明:这里主要分析hadoop的DOM解析

DOM 的工作方式是:

  • 首先一次性将XML文档加入内存
  • 然后在内存创建一个“树形结构”,也就是对象模型
  • 然后使用对象提供的接口访问文档,进而操作文档

    处理步骤

    1. 获得用于创建DOM解析器的工厂对象
      1
      DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
    2. 可以设置一下参数[可选]
      1
      2
      3
      4
      5
      6
      7
      8
      docBuilderFactory.setIgnoringComments(true);
      docBuilderFactory.setNamespaceAware(true);
      boolean useXInclude = !wrapper.isParserRestricted();
      try {
      docBuilderFactory.setXIncludeAware(useXInclude);
      } catch (UnsupportedOperationException var28) {
      LOG.error("Failed to set setXIncludeAware(" + useXInclude + ") for parser " + docBuilderFactory, var28);
      }
    3. 获得解析XML的DocumentBuilder对象
      1
      DocumentBuilder builder = docBuilderFactory.newDocumentBuilder();
    4. 获取根节点下的所有节点
      1
      NodeList props = root.getChildNodes();
    5. 遍历节点
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      for(int i = 0; i < props.getLength(); ++i) {
      //获取节点
      Node propNode = props.item(i);
      if (propNode instanceof Element) {
      Element prop = (Element)propNode;
      //prop.getTagName()获取节点内的值
      if ("configuration".equals(prop.getTagName())) {
      this.loadResource(toAddTo, new Configuration.Resource(prop, name, wrapper.isParserRestricted()), quiet);
      } else {
      if (!"property".equals(prop.getTagName())) {
      ...
      }

hadoop配置文件解析的特别说明

对DocumentBuilderFactory做的处理

  • 忽略XML文档中的注释
    1
    docBuilderFactory.setIgnoringComments(true);
  • 支持XML命名空间
    1
    docBuilderFactory.setNamespaceAware(true);
  • 支持XML包含机制
    1
    2
    3
    4
    5
    try {
    docBuilderFactory.setXIncludeAware(useXInclude);
    } catch (UnsupportedOperationException var28) {
    LOG.error("Failed to set setXIncludeAware(" + useXInclude + ")for parser " + docBuilderFactory, var28);
    }

    XInclude机制允许将XML文档分解为多个可管理的块,然后将-一个或多个较小的文档组装成一个大型文档。

hadoop 配置文件解析完整代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157

private Configuration.Resource loadResource(Properties properties, Configuration.Resource wrapper, boolean quiet) {
String name = "Unknown";

try {
Object resource = wrapper.getResource();
name = wrapper.getName();
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
docBuilderFactory.setIgnoringComments(true);
docBuilderFactory.setNamespaceAware(true);
boolean useXInclude = !wrapper.isParserRestricted();

try {
docBuilderFactory.setXIncludeAware(useXInclude);
} catch (UnsupportedOperationException var28) {
LOG.error("Failed to set setXIncludeAware(" + useXInclude + ") for parser " + docBuilderFactory, var28);
}

if (wrapper.isParserRestricted()) {
docBuilderFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
}

DocumentBuilder builder = docBuilderFactory.newDocumentBuilder();
Document doc = null;
Element root = null;
boolean returnCachedProperties = false;
if (resource instanceof URL) {
doc = this.parse(builder, (URL)resource);
} else if (resource instanceof String) {
URL url = this.getResource((String)resource);
doc = this.parse(builder, url);
} else if (resource instanceof Path) {
File file = (new File(((Path)resource).toUri().getPath())).getAbsoluteFile();
if (file.exists()) {
if (!quiet) {
LOG.debug("parsing File " + file);
}

doc = this.parse(builder, new BufferedInputStream(new FileInputStream(file)), ((Path)resource).toString());
}
} else if (resource instanceof InputStream) {
doc = this.parse(builder, (InputStream)resource, (String)null);
returnCachedProperties = true;
} else if (resource instanceof Properties) {
this.overlay(properties, (Properties)resource);
} else if (resource instanceof Element) {
root = (Element)resource;
}

if (root == null) {
if (doc == null) {
if (quiet) {
return null;
}

throw new RuntimeException(resource + " not found");
}

root = doc.getDocumentElement();
}

Properties toAddTo = properties;
if (returnCachedProperties) {
toAddTo = new Properties();
}

if (!"configuration".equals(root.getTagName())) {
LOG.fatal("bad conf file: top-level element not <configuration>");
}

NodeList props = root.getChildNodes();
Configuration.DeprecationContext deprecations = (Configuration.DeprecationContext)deprecationContext.get();

for(int i = 0; i < props.getLength(); ++i) {
Node propNode = props.item(i);
if (propNode instanceof Element) {
Element prop = (Element)propNode;
if ("configuration".equals(prop.getTagName())) {
this.loadResource(toAddTo, new Configuration.Resource(prop, name, wrapper.isParserRestricted()), quiet);
} else {
if (!"property".equals(prop.getTagName())) {
if (wrapper.isParserRestricted() && "http://www.w3.org/2001/XInclude".equals(prop.getNamespaceURI())) {
throw new RuntimeException("Error parsing resource " + wrapper + ": XInclude is not supported for restricted resources");
}

LOG.warn("Unexpected tag in conf file " + wrapper + ": expected <property> but found <" + prop.getTagName() + ">");
}

NodeList fields = prop.getChildNodes();
String attr = null;
String value = null;
boolean finalParameter = false;
LinkedList<String> source = new LinkedList();

//遍历所有节点,并根据情况设置对象的成员变量properties和finalParameters
for(int j = 0; j < fields.getLength(); ++j) {
Node fieldNode = fields.item(j);
if (fieldNode instanceof Element) {
Element field = (Element)fieldNode;
if ("name".equals(field.getTagName()) && field.hasChildNodes()) {
attr = StringInterner.weakIntern(((Text)field.getFirstChild()).getData().trim());
}

if ("value".equals(field.getTagName()) && field.hasChildNodes()) {
value = StringInterner.weakIntern(((Text)field.getFirstChild()).getData());
}

if ("final".equals(field.getTagName()) && field.hasChildNodes()) {
finalParameter = "true".equals(((Text)field.getFirstChild()).getData());
}

if ("source".equals(field.getTagName()) && field.hasChildNodes()) {
source.add(StringInterner.weakIntern(((Text)field.getFirstChild()).getData()));
}
}
}

source.add(name);
if (attr != null) {
if (deprecations.getDeprecatedKeyMap().containsKey(attr)) {
Configuration.DeprecatedKeyInfo keyInfo = (Configuration.DeprecatedKeyInfo)deprecations.getDeprecatedKeyMap().get(attr);
keyInfo.clearAccessed();
String[] arr$ = keyInfo.newKeys;
int len$ = arr$.length;

for(int i$ = 0; i$ < len$; ++i$) {
String key = arr$[i$];
this.loadProperty(toAddTo, name, key, value, finalParameter, (String[])source.toArray(new String[source.size()]));
}
} else {
this.loadProperty(toAddTo, name, attr, value, finalParameter, (String[])source.toArray(new String[source.size()]));
}
}
}
}
}

if (returnCachedProperties) {
this.overlay(properties, toAddTo);
return new Configuration.Resource(toAddTo, name, wrapper.isParserRestricted());
} else {
return null;
}
} catch (IOException var29) {
LOG.fatal("error parsing conf " + name, var29);
throw new RuntimeException(var29);
} catch (DOMException var30) {
LOG.fatal("error parsing conf " + name, var30);
throw new RuntimeException(var30);
} catch (SAXException var31) {
LOG.fatal("error parsing conf " + name, var31);
throw new RuntimeException(var31);
} catch (ParserConfigurationException var32) {
LOG.fatal("error parsing conf " + name, var32);
throw new RuntimeException(var32);
}
}

Comments

Your browser is out-of-date!

Update your browser to view this website correctly. Update my browser now

×