java如何去掉html标签 - 行业资讯 - 肥雀云_南京肥雀信息技术有限公司

　　介绍

这篇文章给大家分享的是有关java如何去掉html标签的内容。小编觉得挺实用的,因此分享给大家做个参考,一起跟随小编过来看看吧。

java去掉html标签的方法:1,通过纯正则方法去掉html标签;2、使用“javax.swing.text.html.HTMLEditorKit”去掉html标签;3,通过使用Jsoup框架去掉html标签等等。

一、背景

业务开发中可能需要将html的标签全部去掉,本文将多种方法综合在这里,供大家参考。

二、方法

2.1纯正则方法
import java.util.regex.Matcher;, 　　import java.util.regex.Pattern, 　　　　public class HTMLSpirit {, 　　,,,public static String delHTMLTag (String htmlStr) {, 　　,,,,,,,String regEx_script=? lt;脚本(^祝辞]* ?祝辞(\ \ \ \年代)* ? & lt; \ \/script>,,,//定义脚本的正则表达式, 　　,,,,,,,String regEx_style=? lt;风格[^祝辞]* ?祝辞(\ \ \ \年代)* ? & lt; \ \/style>,,,//定义风格的正则表达式, 　　,,,,,,,String regEx_html=? lt;[^祝辞]+祝辞,,,//定义HTML标签的正则表达式, 　　,,,,,,,, 　　,,,,,,,Pattern p_script=Pattern.compile (regEx_script Pattern.CASE_INSENSITIVE),, 　　,,,,,,,Matcher m_script=p_script.matcher (htmlStr);, 　　,,,,,,,htmlStr=m_script.replaceAll(“;”);,//过滤脚本标签, 　　,,,,,,,, 　　,,,,,,,Pattern p_style=Pattern.compile (regEx_style Pattern.CASE_INSENSITIVE),, 　　,,,,,,,Matcher m_style=p_style.matcher (htmlStr);, 　　,,,,,,,htmlStr=m_style.replaceAll(“;”);,//过滤风格标签, 　　,,,,,,,, 　　,,,,,,,Pattern p_html=Pattern.compile (regEx_html Pattern.CASE_INSENSITIVE),, 　　,,,,,,,Matcher m_html=p_html.matcher (htmlStr);, 　　,,,,,,,htmlStr=m_html.replaceAll(“;”);,//过滤html标签, 　　　　,,,,,,,return htmlStr.trim();,//返回文本字符串, 　　,,,}, 　　}
2.2使用,javax.swing.text.html。HTMLEditorKit

import java.io.IOException; 　　import java.io.FileReader; 　　import java.io.Reader; 　　import 并不知道; 　　import java.util.ArrayList; 　　　　import javax.swing.text.html.parser.ParserDelegator; 　　import javax.swing.text.html.HTMLEditorKit.ParserCallback; 　　import javax.swing.text.html.HTML.Tag; 　　import javax.swing.text.MutableAttributeSet; 　　　　public class HTMLUtils { 　　private 才能;HTMLUtils (), {} 　　　　public 才能static List< String>, extractText (Reader 读者),throws IOException { 　　,,,final ArrayList, list =, new ArrayList (); 　　　　,,,ParserDelegator ParserDelegator =, new ParserDelegator (); 　　,,,ParserCallback ParserCallback =, new ParserCallback (), { 　　,,,,,public void handleText (final char[],数据,final int pos), { 　　,,,,,,,list.add (new 字符串(数据)); 　　,,,,,} 　　,,,,,public void handleStartTag (Tag 标签,MutableAttributeSet 属性,,int pos), {,} 　　,,,,,public void handleEndTag (Tag t, final int pos), {,,} 　　,,,,,public void handleSimpleTag (Tag t, MutableAttributeSet ,, final int pos), {,} 　　,,,,,public void handleComment (final char[],数据,final int pos), {,} 　　,,,,,public void handleError (final java.lang.String errMsg,, final int pos), {,} 　　,,,}; 　　,,,parserDelegator.parse(读者,parserCallback,,真的); 　　,,,return 列表; 　　,,} 　　　　public 才能final static void main (String [], args), throws 异常{ 　　,,,FileReader reader =, new FileReader (“java-new.html"); 　　,,,List, lines =, HTMLUtils.extractText(读者); 　　,,,for (String line :行),{ 　　,,,,,System.out.println(线); 　　,,,} 　　,,} 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null
java如何去掉html标签