import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
# Define the starting URL
start_url = 'https://tr.wikipedia.org/wiki/Fred_la_marmotte' # Replace with the URL you want to start from
# Initialize a list to store (URL, title) pairs
page_info = []
# Function to fetch and parse a web page
def fetch_and_parse(url):
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
title = soup.title.string if soup.title else 'No Title'
return title
except requests.exceptions.RequestException as e:
print(f"Error: {e}")
return None
# Function to visit links and fetch titles
def visit_links_and_fetch_titles(url):
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
links = soup.find_all('a', href=True)
for link in links:
link_url = link['href']
if link_url.startswith('http') and 'web.archive.org' in link_url:
title = fetch_and_parse(link_url)
if title:
page_info.append((link_url, title))
except requests.exceptions.RequestException as e:
print(f"Error: {e}")
# Start the process
visit_links_and_fetch_titles(start_url)
# Print the (URL, title) pairs
print("URL and Title:")
for url, title in page_info:
print(f"URL: {url}")
print(f"Title: {title}\n")
{"html5":"htmlmixed","css":"css","javascript":"javascript","php":"php","python":"python","ruby":"ruby","lua":"text\/x-lua","bash":"text\/x-sh","go":"go","c":"text\/x-csrc","cpp":"text\/x-c++src","diff":"diff","latex":"stex","sql":"sql","xml":"xml","apl":"apl","asterisk":"asterisk","c_loadrunner":"text\/x-csrc","c_mac":"text\/x-csrc","coffeescript":"text\/x-coffeescript","csharp":"text\/x-csharp","d":"d","ecmascript":"javascript","erlang":"erlang","groovy":"text\/x-groovy","haskell":"text\/x-haskell","haxe":"text\/x-haxe","html4strict":"htmlmixed","java":"text\/x-java","java5":"text\/x-java","jquery":"javascript","mirc":"mirc","mysql":"sql","ocaml":"text\/x-ocaml","pascal":"text\/x-pascal","perl":"perl","perl6":"perl","plsql":"sql","properties":"text\/x-properties","q":"text\/x-q","scala":"scala","scheme":"text\/x-scheme","tcl":"text\/x-tcl","vb":"text\/x-vb","verilog":"text\/x-verilog","yaml":"text\/x-yaml","z80":"text\/x-z80"}