nekohtml怎么解析字符串
发布网友
发布时间:2022-05-11 22:52
我来回答
共1个回答
热心网友
时间:2023-07-30 03:53
一、dom解析html
DOMParser parser = new DOMParser();
XMLInputSource source = new XMLInputSource(null, "", null,
new StringReader("<html><head></head><body>"
+ sugg.getSuggContent() + "</body></html>"), "utf-8");
try {
parser.parse(source);
} catch (XNIException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
Document document = parser.getDocument();
String ss = document.getDocumentElement().getTextContent();
二、去除文本中多余的换行
InputStream is = null;
InputStreamReader isr = null;
try {
is = new ByteArrayInputStream(ss.getBytes("utf-8"));//这是关键,不然会出现乱码
isr = new InputStreamReader(is, "utf-8");//编码
} catch (UnsupportedEncodingException e2) {
e2.printStackTrace();
}
BufferedReader br = new BufferedReader(isr);
StringBuffer sb = new StringBuffer();
try {
String line = br.readLine();
while (null != line) {
if (!"".equals(line.trim())) {
sb.append(line + "\r");
}
line = br.readLine();
}
} catch (IOException e1) {
e1.printStackTrace();
}
String fstr = sb.toString();