Scan the website link given format Jsoup Java -
i try attempt scan links in web page according selector level.i've given
here's statement, have read selectors way fixed, want read more in loop, recursive or me become more flexible command level selector future may greater 2
public static void main(string[] args) { string website = website("http://www.java2s.com/"); system.out.println(website); } private static string website(string url) { string lstlink = ""; try { string level[] = {"div.col-md-9 li a", "div#sidebar ul li a"}; //level 1 document connect = jsoup.connect(url).get(); elements selectlevel1 = connect.select(level[0]); (element level1 : selectlevel1) { lstlink += level1.attr("href") + "\n"; //level2 document connect2 = jsoup.connect(level1.attr("href")).get(); elements selectlevel2 = connect2.select(level[1]); (element level2 : selectlevel2) { lstlink += level2.attr("href") + "\n"; } } } catch (ioexception ex) { logger.getlogger(awebsite.class.getname()).log(level.severe, null, ex); } return lstlink; }
please check it.
static string levels[] = {"div.col-md-9 li a", "div#sidebar ul li a"}; private static string getrecursive(string href, int level) { string links = ""; if (level > levels.length-1) { return ""; } document doc; try { doc = jsoup.connect(href).get(); elements elements = doc.select(levels[level]); level++; (element element : elements) { if(!element.attr("href").isempty()) { links += element.attr("abs:href") + "\n"; links += getrecursive(element.attr("abs:href"), level); } } } catch (ioexception e1) { e1.printstacktrace(); } return links; } public static void main(string[] args) { string website = getrecursive("http://www.java2s.com/", 0); system.out.println(website); }
Comments
Post a Comment