java - Google Search with JSoup -
i trie search in google jsoup. problem have is, variable query shows not url want when start searching. also, how jsoup search ? looking title or url or ?
public class start {
public static void main(string[] args) { try { new google().searching("möbel beck gmbh & co.kg"); } catch (exception e) { system.out.println(e.getmessage()); } } } public class google implements serializable { private static final long serialversionuid = 1l; private static pattern patterndomainname; private matcher matcher; private static final string domain_name_pattern = "([a-za-z0-9]([a-za-z0-9\\-]{0,61}[a-za-z0-9])?\\.)+[a-za-z]{2,6}"; static { patterndomainname = pattern.compile(domain_name_pattern); } public void searching(string searchstring) throws ioexception { google obj = new google(); set<string> result = obj.getdatafromgoogle(searchstring); (string temp : result) { if (temp.contains(searchstring)) { system.out.println(temp + " ----> contains"); } else { system.out.println(temp); } } system.out.println(result.size()); } public string getdomainname(string url) { string domainname = ""; matcher = patterndomainname.matcher(url); if (matcher.find()) { domainname = matcher.group(0).tolowercase().trim(); } return domainname; } private set<string> getdatafromgoogle(string query) { set<string> result = new hashset<string>(); string request = "https://www.google.com/search?q=" + query; system.out.println("sending request..." + request); try { // need http protocol, set google bot agent :) document doc = jsoup.connect(request) .useragent("mozilla/5.0 (compatible; googlebot/2.1; +http://www.google.com/bot.html)").timeout(6000) .get(); // links elements links = doc.select("a[href]"); (element link : links) { string temp = link.attr("href"); if (temp.startswith("/url?q=")) { // use regex domain name result.add(getdomainname(temp)); } } } catch (ioexception e) { e.printstacktrace(); } return result; }
}
parsing google sites directly not idea. can try google api https://developers.google.com/web-search/docs/#java-access
Comments
Post a Comment