Facebook
From asdf アスペルガー, 1 Month ago, written in Python.
This paste is a reply to Re: Re: scraping 5ch/VIP from ID:mj/PQAwN0XMAS - view diff
Embed
Download Paste or View Raw
Hits: 11
  1. from typing import Optional
  2.  
  3. from selenium import webdriver
  4. from selenium.webdriver.common.by import By
  5. from selenium.webdriver.support.ui import WebDriverWait
  6. from selenium.webdriver.support import expected_conditions
  7. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  8.  
  9. from latest_user_agents import get_latest_user_agents
  10.  
  11.  
  12. def write_thread(
  13.         driver, server: str, board: str, thread: str,
  14.         body: str, name: str = None, mail: str = None
  15. ) -> (bool, Optional[str]):
  16.     url = f"https://{server}.5ch.net/test/read.cgi/{board}/{thread}/l50"
  17.  
  18.     # TODO: エラー処理
  19.     driver.get(url)
  20.  
  21.     # 要素に値を詰める
  22.     どうなん
  23.     elem = driver.find_element(By.XPATH, '//textarea[@name="MESSAGE"]')
  24.     elem.send_keys(body)
  25.     if name is not None:
  26.         elem = driver.find_element(By.XPATH, '//input[@name="FROM"]')
  27.         elem.send_keys(name)
  28.     if mail is not None:
  29.         elem = driver.find_element(By.XPATH, '//input[@name="mail"]')
  30.         elem.send_keys(mail)
  31.  
  32.     # 書きこみボタンを押す
  33.     elem = WebDriverWait(driver, 10).until(
  34.         expected_conditions.element_to_be_clickable((
  35.             By.XPATH, '//input[@name="submit"]'
  36.         ))
  37.     )
  38.     # ref: https://office54.net/python/scraping/selenium-click-exception
  39.     driver.execute_script("arguments[0].click();", elem)
  40.  
  41.     # Cookieをセットしてない場合は承諾ボタンを押す
  42.     if "書きこみ&クッキー確認" in driver.page_source:
  43.         elem = WebDriverWait(driver, 10).until(
  44.             expected_conditions.element_to_be_clickable((
  45.                 By.XPATH, '//input[@name="submit"]'
  46.             ))
  47.         )
  48.         elem.click()
  49.  
  50.     if driver.title == "ERROR!":
  51.         elem = driver.find_element(By.XPATH, '/html/body/font[1]/b')
  52.         return False, elem.text
  53.     else:
  54.         driver.get("https://www.google.com")
  55.         return True, None
  56.  
  57.  
  58. def setup_driver():
  59.     ua_list = get_latest_user_agents()
  60.     ua = ""
  61.     for ua_ in ua_list:
  62.         if "Windows" in ua_ and "Chrome" in ua_:
  63.             ua = ua_
  64.             break
  65.  
  66.     options = webdriver.ChromeOptions()
  67.     options.add_argument('--user-agent=' + ua)
  68.     # ref: https://stackoverflow.com/questions/73930313/python-selenium-button-click-causes-browser-console-error
  69.     options.add_argument('--disable-blink-features=AutomationControlled')
  70.     # ref: https://sushiringblog.com/chromedriver-error
  71.     options.add_argument('--headless')
  72.     options.add_argument('--no-sandbox')
  73.     options.add_argument("--disable-setuid-sandbox")
  74.     # ref: https://stackoverflow.com/questions/53902507/unknown-error-session-deleted-because-of-page-crash-from-unknown-error-cannot
  75.     options.add_argument('--disable-dev-shm-usage')
  76.  
  77.     # ref: https://stackoverflow.com/questions/46322165/dont-wait-for-a-page-to-load-using-selenium-in-python/46339092#46339092
  78.     caps = DesiredCapabilities().CHROME
  79.     caps['pageLoadStrategy'] = 'eager'
  80.  
  81.     return webdriver.Chrome(options=options, desired_capabilities=caps)
  82.  
  83.  
  84. if __name__ == '__main__':
  85.     import time
  86.  
  87.     driver = setup_driver()
  88.     print(write_thread(driver, 'mi', 'news4vip', '1671786885', 'Cookieのテスト1', '>>1', 'mail address'))
  89.     time.sleep(60)
  90.     print(write_thread(driver, 'mi', 'news4vip', '1671786885', 'Cookieのテスト2', '>>1', 'mail address'))
  91.  
captcha