python - Using beautifulSoup to get listings -
i trying parse url using python/beautifulsoup. below code. wondering if there's better sorted dictionary without using 2 lists? or there efficient way?
import requests bs4 import beautifulsoup import operator req = requests.get('https://www.firstchampionship.org/sponsorvideos') soup = beautifulsoup(req.text, "lxml") companies = [] votes = [] all_in_one = {} company in soup.find_all("div", {"class": "views-field views-field-title"}): # print(company.text.encode('ascii','ignore')) companies.append(company.text.encode('ascii','ignore')) vote in soup.find_all("div", {"class": "rate-info"}): vote_x = vote.text.split(" ") votes.append(int(vote_x[0])) i, x in enumerate(companies): all_in_one.update({x:votes[i]}) key, value in all_in_one.iteritems(): print(key+"->"+str(value)) sorted_x = sorted(all_in_one.items(), key=operator.itemgetter(1), reverse=true) print(sorted_x)
my final below. company name-> num of votes
[(' analog devices ', 7227), (' bechtel ', 6797), (' nvidia ', 436), (' qualcomm incorporated ', 349), (' viasat ', 292), (' bosch ', 201), (' nokia bell labs ', 124), (' walt disney imagineering ', 119), (' google, inc. ', 113), (' ni ', 109), (' fedex ', 100), (' nasa ', 97), (' boeing company ', 86), (' u.s. air force ', 83), (' first ', 74), (' 3m company ', 73), (' twitch ', 73), (' baxter ', 70), (' rockwell automation ', 68), (' booz allen hamilton ', 68), (' nrg ', 66), (' mouser electronics ', 63), (' ibm corporation ', 63), (' john deere ', 63), (' motorola solutions ', 62), (' delphi ', 62), (' boston scientific ', 60), (' texas instruments ', 59), (' dow chemical co. ', 59), (' ptc ', 59), (' xerox ', 58), (' southwest airlines ', 57), (' gm ', 55), (' vulcan spring ', 53), (' rockwell collins ', 52), (' festo ', 52), (' monsanto ', 50), (' lego education ', 39)]
you company title, parent node, find next element match votes node, append list dict , order company name.
import requests bs4 import beautifulsoup operator import itemgetter req = requests.get('https://www.firstchampionship.org/sponsorvideos') soup = beautifulsoup(req.text, "lxml") companies = [] company in soup.find_all("div", {"class": "views-field views-field-title"}): companies.append({"company": company.get_text(strip=true).encode('ascii','ignore'), "votes": company.parent.find_next("div", class_="rate-info").get_text(strip=true).split(' ')[0]}) print (sorted(companies, key=itemgetter('company')))
output:
[{'company': '3m company', 'votes': u'76'}, {'company': 'analog devices', 'votes': u'7282'}...
Comments
Post a Comment