Facebook
From Muskaan, 7 Months ago, written in Plain Text.
Embed
Download Paste or View Raw
Hits: 459
  1. from bs4 import BeautifulSoup
  2. import json
  3. from time import sleep
  4. import time
  5. import requests
  6. from random import randint
  7. from html.parser import HTMLParser
  8.  
  9. USER_AGENT = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
  10.  
  11. class SearchEngine:
  12.     @staticmethod
  13.     def search(query, sleep=True):
  14.         if sleep:
  15.             time.sleep(randint(10, 100))
  16.         temp_url = '+'.join(query.split())
  17.         url = 'http://www.ask.com/web?q=' + temp_url
  18.         soup = BeautifulSoup(requests.get(url, headers=USER_AGENT).text, "html.parser")
  19.         new_results = SearchEngine.scrape_search_result(soup)
  20.         return new_results
  21.    
  22.     @staticmethod
  23.     def scrape_search_result(soup):
  24.         raw_results = soup.find_all("div",attrs = {"class" : "PartialSearchResults-item-title"})
  25.         results = set()
  26.         for result in raw_results:
  27.             for r in result.find_all("a"):
  28.                 if(len(results) < 10):
  29.                     results.add(r.attrs["href"])  
  30.         final_results = list(results)      
  31.         return final_results
  32.  
  33.  
  34. if __name__ == '__main__':
  35.     f = open("100QueriesSet3.txt", "r")
  36.     file = f.readlines()
  37.  
  38.     dictionary = {}
  39.  
  40.     for i in file:
  41.         key = i
  42.         query = SearchEngine.search(key)
  43.         dictionary[key] = query
  44.  
  45.     with open("hw1.json", "w") as outfile:
  46.         json.dump(dictionary, outfile, indent=4)