### Browse through all the 10 pages on the web-site ## https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=1 ## collect/print all the hyperlinks that lead to each individual profile in terminal/CSV ## Concatenate the collected urls and go through each profile (you can create another program, by just looping through the list) ## Collect the below information (if existent on each profile) and print it in CSV file ## name, DOB, nationality, address,start and end date, case referrence ## Desired output: ## profileUrl + ‘!’ + name + ‘!’ + dob + ‘!’ + nationality + ‘!’ + address + ‘!’ + startDate + ‘!’ + endDate + ‘!’ + caseReference + ‘n’ from selenium import webdriver from selenium.webdriver.common.by import By import time import csv from selenium.common.exceptions import NoSuchElementException driver = webdriver.Chrome() # Creating/Opening a file to write te data in. Please change the pasth to the correct csv file on your computer with open(r'C:UsersKarolina.GugudisDesktopFind and update company info.csv', 'w', encoding="utf-8") as f: # w is for writing a new file f.write('profilesurls n') driver.get('https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A') driver.maximize_window() driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(5) urls = ['https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=1', 'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=2', 'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=3', 'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=4', 'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=5', 'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=6', 'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=7', 'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=8', 'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=9', 'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=10' ] for link in urls: driver.get(link) time.sleep(3) for i in range(2, 52): # the hyperlink of the names to each profile x = '//*[@id="search-container"]/div[1]/table/tbody/tr[' x = x + str(i) + ']/td[1]/a' # //*[@id="search-container"]/div[1]/table/tbody/tr[2]/td[1]/a # //*[@id="search-container"]/div[1]/table/tbody/tr[3]/td[1]/a # //*[@id="search-container"]/div[1]/table/tbody/tr[4]/td[1]/a # //*[@id="search-container"]/div[1]/table/tbody/tr[5]/td[1]/a # //*[@id="search-container"]/div[1]/table/tbody/tr[51]/td[1]/a # //*[@id="search-container"]/div[1]/table/tbody/tr[25]/td[1]/a - last page # 10 try: profileurl = driver.find_element(By.XPATH,x).get_attribute('href') except NoSuchElementException: profileurl = 'no name' print(profileurl) with open(r'C:UsersKarolina.GugudisDesktopFind and update company info.csv', 'a', encoding="utf-8") as f: # a is for appending information to the file f.write(profileurl + 'n') print('the job is done')