### Browse through all the 10 pages on the web-site 
## https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=1
## collect/print all the hyperlinks that lead to each individual profile in terminal/CSV
## Concatenate the collected urls and go through each profile &#40;you can create another program, by just looping through the list&#41; 
## Collect the below information (if existent on each profile) and print it in CSV file
## name, DOB, nationality, address,start and end date, case referrence

## Desired output:
## profileUrl + ‘!’ + name + ‘!’ + dob + ‘!’ + nationality + ‘!’ + address + ‘!’  + startDate + ‘!’ + endDate  + ‘!’ + caseReference + ‘n’

from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import csv
from selenium.common.exceptions import NoSuchElementException
 
driver = webdriver.Chrome()

# Creating/Opening a file to write te data in. Please change the pasth to the correct csv file on your computer
with open(r'C:UsersKarolina.GugudisDesktopFind and update company info.csv', 'w', encoding="utf-8") as f:  # w is for writing a new file
    f.write('profilesurls n')


driver.get('https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A')
driver.maximize_window()
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(5)

urls = ['https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=1',
        'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=2',
        'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=3',
        'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=4',
        'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=5',
        'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=6',
        'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=7',
        'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=8',
        'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=9',
        'https://find-and-update.company-information.service.gov.uk/register-of-disqualifications/A?page=10'
        ]

for link in urls:
    driver.get(link)
    time.sleep(3)

    for i in range(2, 52): # the hyperlink of the names to each profile
        x = '//*[@id="search-container"]/div[1]/table/tbody/tr['
        x = x + str(i) + ']/td[1]/a'
        # //*[@id="search-container"]/div[1]/table/tbody/tr[2]/td[1]/a
        # //*[@id="search-container"]/div[1]/table/tbody/tr[3]/td[1]/a
        # //*[@id="search-container"]/div[1]/table/tbody/tr[4]/td[1]/a
        # //*[@id="search-container"]/div[1]/table/tbody/tr[5]/td[1]/a
        # //*[@id="search-container"]/div[1]/table/tbody/tr[51]/td[1]/a
        # //*[@id="search-container"]/div[1]/table/tbody/tr[25]/td[1]/a - last page # 10

        try:
            profileurl = driver.find_element(By.XPATH,x).get_attribute('href')  
        except NoSuchElementException:
            profileurl = 'no name'
        print(profileurl)

        with open(r'C:UsersKarolina.GugudisDesktopFind and update company info.csv', 'a', encoding="utf-8") as f: # a is for appending information to the file
            f.write(profileurl + 'n')
    
print('the job is done')