Facebook
From ID:pE5umMzO0, 1 Year ago, written in Python.
This paste is a reply to scraping 5ch/VIP from ID:pE5umMzO0 - view diff
Embed
Download Paste or View Raw
Hits: 324
  1. import selenium
  2. from typing import Optional
  3.  
  4. from selenium import webdriver
  5. from selenium.webdriver.common.keys import Keys
  6. from selenium.webdriver.common.by import By
  7. from selenium.webdriver.support.ui import WebDriverWait
  8. from selenium.webdriver.support import expected_conditions
  9. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  10.  
  11. from latest_user_agents import get_latest_user_agents
  12.  
  13.  
  14. def write_thread(
  15.         driver, server: str, board: str, thread: str,
  16.         body: str, name: str = None, mail: str = None
  17. ) -> (bool, Optional[str]):
  18.     url = f"https://{server}.5ch.net/test/read.cgi/{board}/{thread}/l50"
  19.  
  20.     # TODO: エラー処理
  21.     driver.get(url)
  22.  
  23.     # 要素に値を詰める
  24.     elem = driver.find_element(By.XPATH, '//textarea[@name="MESSAGE"]')
  25.     elem.send_keys(body)
  26.     if name is not None:
  27.         elem = driver.find_element(By.XPATH, '//input[@name="FROM"]')
  28.         elem.send_keys(name)
  29.     if mail is not None:
  30.         elem = driver.find_element(By.XPATH, '//input[@name="mail"]')
  31.         elem.send_keys(mail)
  32.  
  33.     # 書きこみボタンを押す
  34.     elem = WebDriverWait(driver, 10).until(
  35.         expected_conditions.element_to_be_clickable((
  36.             By.XPATH, '//input[@name="submit"]'
  37.         ))
  38.     )
  39.     # ref: https://office54.net/python/scraping/selenium-click-exception
  40.     driver.execute_script("arguments[0].click();", elem)
  41.  
  42.     # Cookieをセットしてない場合は承諾ボタンを押す
  43.     if "書きこみ&クッキー確認" in driver.page_source:
  44.         elem = WebDriverWait(driver, 10).until(
  45.             expected_conditions.element_to_be_clickable((
  46.                 By.XPATH, '//input[@name="submit"]'
  47.             ))
  48.         )
  49.         elem.click()
  50.  
  51.     if driver.title == "ERROR!":
  52.         elem = driver.find_element(By.XPATH, '/html/body/font[1]/b')
  53.         return False, elem.text
  54.     else:
  55.         return True, None
  56.  
  57.  
  58. def setup_driver():
  59.     ua_list = get_latest_user_agents()
  60.     ua = ""
  61.     for ua_ in ua_list:
  62.         if "Windows" in ua_ and "Chrome" in ua_:
  63.             ua = ua_
  64.             break
  65.  
  66.     options = webdriver.ChromeOptions()
  67.     options.add_argument('--user-agent=' + ua)
  68.     # ref: https://stackoverflow.com/questions/73930313/python-selenium-button-click-causes-browser-console-error
  69.     options.add_argument('--disable-blink-features=AutomationControlled')
  70.  
  71.     # ref: https://stackoverflow.com/questions/46322165/dont-wait-for-a-page-to-load-using-selenium-in-python/46339092#46339092
  72.     caps = DesiredCapabilities().CHROME
  73.     caps['pageLoadStrategy'] = 'eager'
  74.  
  75.     return webdriver.Chrome(options=options, desired_capabilities=caps)
  76.  
  77.  
  78. if __name__ == '__main__':
  79.     import time
  80.  
  81.     driver = setup_driver()
  82.     print(write_thread(driver, 'mi', 'news4vip', '1671786885', 'Cookieのテスト1', '>>1', 'mail address'))
  83.     time.sleep(60)
  84.     print(write_thread(driver, 'mi', 'news4vip', '1671786885', 'Cookieのテスト2', '>>1', 'mail address'))
  85.  

Replies to Re: scraping 5ch/VIP rss

Title Name Language When
Re: Re: scraping 5ch/VIP ID:mj/PQAwN0XMAS python 1 Year ago.