Facebook
From ID:pE5umMzO0, 2 Years ago, written in Python.
This paste is a reply to scraping 5ch/VIP from ID:pE5umMzO0 - go back
Embed
Viewing differences between scraping 5ch/VIP and Re: scraping 5ch/VIP
import selenium
from typing import Optional
Optional

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By


By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

from latest_user_agents import get_latest_user_agents


def write_thread(
        driver, server: str, board: str, thread: str,
        body: str, name: str = None, mail: str = None
) -> (bool, Optional[str]):
    url = f"https://{server}.5ch.net/test/read.cgi/{board}/{thread}/l50"

    # TODO: エラー処理
    driver.get(url)

    # 要素に値を詰める
    elem = driver.find_element(By.XPATH, '//textarea[@name="MESSAGE"]')
    elem.send_keys(body)
    if name is not None:
        elem = driver.find_element(By.XPATH, '//input[@name="FROM"]')
        elem.send_keys(name)
    if mail is not None:
        elem = driver.find_element(By.XPATH, '//input[@name="mail"]')
        elem.send_keys(mail)

    # 書きこみボタンを押す
    elem = WebDriverWait(driver, 10).until(
        expected_conditions.element_to_be_clickable((
            By.XPATH, '//input[@name="submit"]'
        ))
    )
    # ref: https://office54.net/python/scraping/selenium-click-exception
    
driver.find_element(By.XPATH, '//input[@name="submit"]')
    elem.click()

execute_script("arguments[0].click();", elem)

    # Cookieをセットしてない場合は承諾ボタンを押す
    if "書きこみ&クッキー確認" in driver.page_source:
        elem = driver.find_element(By.WebDriverWait(driver, 10).until(
            expected_conditions.element_to_be_clickable((
                By.
XPATH, '//input[@name="submit"]')
'//input[@name="submit"]'
            ))
        )
        elem.click()

    if driver.title == "ERROR!":
        elem = driver.find_element(By.XPATH, '/html/body/font[1]/b')
        return False, elem.text
    else:
        return True, None


def setup_driver():
    ua_list = get_latest_user_agents()
    
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36 Edg/99.0.1150.36'

""
    for ua_ in ua_list:
        if "Windows" in ua_ and "Chrome" in ua_:
            ua = ua_
            break

    options = webdriver.ChromeOptions()
    options.add_argument('--user-agent=' + ua)

ua)
    # ref: https://stackoverflow.com/questions/73930313/python-selenium-button-click-causes-browser-console-error
    options.add_argument('--disable-blink-features=AutomationControlled')

    # ref: https://stackoverflow.com/questions/46322165/dont-wait-for-a-page-to-load-using-selenium-in-python/46339092#46339092
    caps = DesiredCapabilities().CHROME
    caps['pageLoadStrategy'] = 'eager'

    return webdriver.Chrome(chrome_options=options)


Chrome(options=options, desired_capabilities=caps)


if __name__ == '__main__':
    import time

    driver = setup_driver()
    print(write_thread(driver, 'mi', 'news4vip', '1671786885', '書きこみてすつ', 'Cookieのテスト1', '>>1', 'mail address'))
    time.sleep(60)
    print(write_thread(driver, 'mi', 'news4vip', '1671786885', 'Cookieのテスト2', 
'>>1', 'mail address'))

Replies to Re: scraping 5ch/VIP rss

Title Name Language When
Re: Re: scraping 5ch/VIP ID:mj/PQAwN0XMAS python 2 Years ago.