java > htmlparser
sample code
import static java.util.Collections.emptyList; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.SocketTimeoutException; import java.net.URL; import java.net.URLEncoder; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.htmlparser.Node; import org.htmlparser.NodeFilter; import org.htmlparser.Parser; import org.htmlparser.filters.TagNameFilter; import org.htmlparser.filters.HasAttributeFilter; import org.htmlparser.filters.AndFilter; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; public class htmlparser { public static void main(String[] args){ NodeFilter FILTER = new TagNameFilter ("span"); HasAttributeFilter GOODFILTER = new HasAttributeFilter("class","goodtag1"); HasAttributeFilter BADFILTER = new HasAttributeFilter("class","badtag1"); NodeList spanList = null; NodeList goodList = null; NodeList badList = null; AndFilter andgoodfilter = new AndFilter(FILTER,GOODFILTER); AndFilter andbadfilter = new AndFilter(FILTER,BADFILTER); Parser parser = new Parser (); String testhtml = "<span class=\"goodtag1\">いい</span><span class=\"badtag1\">欲しい</span><span>欲しい</span>"; try{ parser.setResource(testhtml); //spanList = parser.parse(FILTER); goodList = parser.parse(andgoodfilter); badList = parser.parse(andbadfilter); } catch (ParserException e){ e.printStackTrace(); } Node[] nodes = goodList.toNodeArray(); List<String> outList= new ArrayList<String>(); for(int i=0;i< nodes.length; i++){ outList.add(nodes[i].getFirstChild().getText()); } System.out.println(outList); } }