java - Google Search with JSoup -


i trie search in google jsoup. problem have is, variable query shows not url want when start searching. also, how jsoup search ? looking title or url or ?

public class start {

public static void main(string[] args) {     try {         new google().searching("möbel beck gmbh & co.kg");     } catch (exception e) {         system.out.println(e.getmessage());     } }  }  public class google implements serializable {  private static final long serialversionuid = 1l;  private static pattern patterndomainname; private matcher matcher; private static final string domain_name_pattern = "([a-za-z0-9]([a-za-z0-9\\-]{0,61}[a-za-z0-9])?\\.)+[a-za-z]{2,6}"; static {     patterndomainname = pattern.compile(domain_name_pattern); }  public void searching(string searchstring) throws ioexception {      google obj = new google();     set<string> result = obj.getdatafromgoogle(searchstring);     (string temp : result) {          if (temp.contains(searchstring)) {             system.out.println(temp + " ----> contains");         } else {             system.out.println(temp);         }     }     system.out.println(result.size());  }  public string getdomainname(string url) {      string domainname = "";     matcher = patterndomainname.matcher(url);     if (matcher.find()) {         domainname = matcher.group(0).tolowercase().trim();     }     return domainname;  }  private set<string> getdatafromgoogle(string query) {      set<string> result = new hashset<string>();     string request = "https://www.google.com/search?q=" + query;     system.out.println("sending request..." + request);      try {          // need http protocol, set google bot agent :)         document doc = jsoup.connect(request)                 .useragent("mozilla/5.0 (compatible; googlebot/2.1; +http://www.google.com/bot.html)").timeout(6000)                 .get();          // links         elements links = doc.select("a[href]");         (element link : links) {              string temp = link.attr("href");             if (temp.startswith("/url?q=")) {                 // use regex domain name                 result.add(getdomainname(temp));             }          }      } catch (ioexception e) {         e.printstacktrace();     }      return result; } 

}

parsing google sites directly not idea. can try google api https://developers.google.com/web-search/docs/#java-access


Comments

Popular posts from this blog

Command prompt result in label. Python 2.7 -

javascript - How do I use URL parameters to change link href on page? -

amazon web services - AWS Route53 Trying To Get Site To Resolve To www -