Facebook
From Dominika, 8 Months ago, written in Python.
This paste is a reply to Lsn from Dominika - view diff
Embed
Download Paste or View Raw
Hits: 83
  1. import argparse
  2. from bs4 import BeautifulSoup
  3. import requests
  4. import re
  5.  
  6. global history, counter, linksTableRoot
  7. history = []
  8. counter = 0
  9. linksTableRoot = []
  10.  
  11. def checkWiki(argument, query):
  12.         if "wikipedia" in str(argument):
  13.             history.append(argument)
  14.             return getLinksTableRoot(argument, query)
  15.         else:
  16.             print("This is not a Wikipedias website!")
  17.  
  18.  
  19. def getLinksTableRoot(argument, query):
  20.     source = requests.get(argument).text
  21.     soup = BeautifulSoup(source, 'lxml')
  22.     links = soup.find("div", class_='mw-parser-output').find_all("a", href=re.compile("(/wiki/)+([A-Za-z0-9_:()])+"))
  23.     for link in links:
  24.  
  25.         if "jpg" in str(link) or "JPG" in str(link) or "commons" in str(link):
  26.             pass
  27.         elif  "https://" in str(link):
  28.             linksTableRoot.append(link)
  29.         else:
  30.             link = link['href']
  31.             string = "https://pl.wikipedia.org" + str(link)
  32.             linksTableRoot.append(string)
  33.     return getSubwebsite(argument, query, counter)
  34.  
  35.  
  36. def getSubwebsite(argument, query, counter):
  37.     source = requests.get(argument).text
  38.     soup = BeautifulSoup(source, 'lxml')
  39.  
  40.     links = soup.find("div", class_='mw-parser-output').find_all("a", href=re.compile("(/wiki/)+([A-Za-z0-9_:()])+"))
  41.  
  42.     linksTable = []
  43.     for link in links:
  44.         if "jpg" in str(link) or "JPG" in str(link) or "commons" in str(link):
  45.             pass
  46.         elif "https://" in str(link):
  47.             linksTable.append(link)
  48.         else:
  49.             link = link['href']
  50.             string = "https://pl.wikipedia.org" + str(link)
  51.             linksTable.append(string)
  52.  
  53.     for link in linksTable:
  54.         if query in link:
  55.             print("Link with 'query': ", link)
  56.             history.append(argument)
  57.             history.append(link)
  58.             print("History of search: ", history)
  59.             exit()
  60.         else:
  61.             pass
  62.  
  63.     if counter == 0:
  64.         print("Do not find links with 'query' in deph: 0 ")
  65.     else:
  66.         pass
  67.  
  68.     print("Do not find 'query' in sublinks of ", argument)
  69.     counter = counter + 1
  70.  
  71.     return getSubwebsite(linksTableRoot[counter - 1], query, counter)
  72.  
  73.  
  74. def Main():
  75.     parser = argparse.ArgumentParser()
  76.     parser.add_argument("--url", help="Page of wikipedia (url)")
  77.     parser.add_argument("--query")
  78.  
  79.     args = parser.parse_args()
  80.     result = checkWiki(args.url, args.query)
  81.  
  82.  
  83. if __name__ == "__main__" :
  84.     Main()