Untitled

From Muskaan, 7 Months ago, written in Plain Text.

Embed

Download Paste or View Raw
Hits: 459

from bs4 import BeautifulSoup

import json

from time import sleep

import time

import requests

from random import randint

from html.parser import HTMLParser

USER_AGENT = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}

class SearchEngine:

@staticmethod

def search(query, sleep=True):

if sleep:

time.sleep(randint(10, 100))

temp_url = '+'.join(query.split())

url = 'http://www.ask.com/web?q=' + temp_url

soup = BeautifulSoup(requests.get(url, headers=USER_AGENT).text, "html.parser")

new_results = SearchEngine.scrape_search_result(soup)

return new_results

@staticmethod

def scrape_search_result(soup):

raw_results = soup.find_all("div",attrs = {"class" : "PartialSearchResults-item-title"})

results = set()

for result in raw_results:

for r in result.find_all("a"):

if(len(results) < 10):

results.add(r.attrs["href"])

final_results = list(results)

return final_results

if __name__ == '__main__':

f = open("100QueriesSet3.txt", "r")

file = f.readlines()

dictionary = {}

for i in file:

key = i

query = SearchEngine.search(key)

dictionary[key] = query

with open("hw1.json", "w") as outfile:

json.dump(dictionary, outfile, indent=4)

Author

Title

Language

Your paste - Paste your paste here

from bs4 import BeautifulSoup
import json
from time import sleep
import time
import requests
from random import randint
from html.parser import HTMLParser

USER_AGENT = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}

class SearchEngine:
    @staticmethod
    def search(query, sleep=True):
        if sleep:
            time.sleep(randint(10, 100))
        temp_url = '+'.join(query.split())
        url = 'http://www.ask.com/web?q=' + temp_url
        soup = BeautifulSoup(requests.get(url, headers=USER_AGENT).text, &quot;html.parser&quot;)
        new_results = SearchEngine.scrape_search_result(soup)
        return new_results
    
    @staticmethod
    def scrape_search_result(soup):
        raw_results = soup.find_all(&quot;div&quot;,attrs = {&quot;class&quot; : &quot;PartialSearchResults-item-title&quot;})
        results = set()
        for result in raw_results:
            for r in result.find_all(&quot;a&quot;):
                if(len(results) &amp;lt; 10):
                    results.add(r.attrs[&quot;href&quot;])   
        final_results = list(results)      
        return final_results

if __name__ == '__main__':
    f = open(&quot;100QueriesSet3.txt&quot;, &quot;r&quot;)
    file = f.readlines()

dictionary = {}

for i in file:
        key = i
        query = SearchEngine.search(key)
        dictionary[key] = query

with open(&quot;hw1.json&quot;, &quot;w&quot;) as outfile:
        json.dump(dictionary, outfile, indent=4)

Private - Private paste aren't shown in recent listings.

Delete After - When should we delete your paste?

Spam protection -

{"html5":"htmlmixed","css":"css","javascript":"javascript","php":"php","python":"python","ruby":"ruby","lua":"text\/x-lua","bash":"text\/x-sh","go":"go","c":"text\/x-csrc","cpp":"text\/x-c++src","diff":"diff","latex":"stex","sql":"sql","xml":"xml","apl":"apl","asterisk":"asterisk","c_loadrunner":"text\/x-csrc","c_mac":"text\/x-csrc","coffeescript":"text\/x-coffeescript","csharp":"text\/x-csharp","d":"d","ecmascript":"javascript","erlang":"erlang","groovy":"text\/x-groovy","haskell":"text\/x-haskell","haxe":"text\/x-haxe","html4strict":"htmlmixed","java":"text\/x-java","java5":"text\/x-java","jquery":"javascript","mirc":"mirc","mysql":"sql","ocaml":"text\/x-ocaml","pascal":"text\/x-pascal","perl":"perl","perl6":"perl","plsql":"sql","properties":"text\/x-properties","q":"text\/x-q","scala":"scala","scheme":"text\/x-scheme","tcl":"text\/x-tcl","vb":"text\/x-vb","verilog":"text\/x-verilog","yaml":"text\/x-yaml","z80":"text\/x-z80"}

Reply to "Untitled"