Facebook
From ID:mj/PQAwN0XMAS, 1 Year ago, written in Python.
This paste is a reply to Re: scraping 5ch/VIP from ID:pE5umMzO0 - view diff
Embed
Download Paste or View Raw
Hits: 524
  1. from typing import Optional
  2.  
  3. from selenium import webdriver
  4. from selenium.webdriver.common.by import By
  5. from selenium.webdriver.support.ui import WebDriverWait
  6. from selenium.webdriver.support import expected_conditions
  7. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  8.  
  9. from latest_user_agents import get_latest_user_agents
  10.  
  11.  
  12. def write_thread(
  13.         driver, server: str, board: str, thread: str,
  14.         body: str, name: str = None, mail: str = None
  15. ) -> (bool, Optional[str]):
  16.     url = f"https://{server}.5ch.net/test/read.cgi/{board}/{thread}/l50"
  17.  
  18.     # TODO: エラー処理
  19.     driver.get(url)
  20.  
  21.     # 要素に値を詰める
  22.     elem = driver.find_element(By.XPATH, '//textarea[@name="MESSAGE"]')
  23.     elem.send_keys(body)
  24.     if name is not None:
  25.         elem = driver.find_element(By.XPATH, '//input[@name="FROM"]')
  26.         elem.send_keys(name)
  27.     if mail is not None:
  28.         elem = driver.find_element(By.XPATH, '//input[@name="mail"]')
  29.         elem.send_keys(mail)
  30.  
  31.     # 書きこみボタンを押す
  32.     elem = WebDriverWait(driver, 10).until(
  33.         expected_conditions.element_to_be_clickable((
  34.             By.XPATH, '//input[@name="submit"]'
  35.         ))
  36.     )
  37.     # ref: https://office54.net/python/scraping/selenium-click-exception
  38.     driver.execute_script("arguments[0].click();", elem)
  39.  
  40.     # Cookieをセットしてない場合は承諾ボタンを押す
  41.     if "書きこみ&クッキー確認" in driver.page_source:
  42.         elem = WebDriverWait(driver, 10).until(
  43.             expected_conditions.element_to_be_clickable((
  44.                 By.XPATH, '//input[@name="submit"]'
  45.             ))
  46.         )
  47.         elem.click()
  48.  
  49.     if driver.title == "ERROR!":
  50.         elem = driver.find_element(By.XPATH, '/html/body/font[1]/b')
  51.         return False, elem.text
  52.     else:
  53.         driver.get("https://www.google.com")
  54.         return True, None
  55.  
  56.  
  57. def setup_driver():
  58.     ua_list = get_latest_user_agents()
  59.     ua = ""
  60.     for ua_ in ua_list:
  61.         if "Windows" in ua_ and "Chrome" in ua_:
  62.             ua = ua_
  63.             break
  64.  
  65.     options = webdriver.ChromeOptions()
  66.     options.add_argument('--user-agent=' + ua)
  67.     # ref: https://stackoverflow.com/questions/73930313/python-selenium-button-click-causes-browser-console-error
  68.     options.add_argument('--disable-blink-features=AutomationControlled')
  69.     # ref: https://sushiringblog.com/chromedriver-error
  70.     options.add_argument('--headless')
  71.     options.add_argument('--no-sandbox')
  72.     options.add_argument("--disable-setuid-sandbox")
  73.     # ref: https://stackoverflow.com/questions/53902507/unknown-error-session-deleted-because-of-page-crash-from-unknown-error-cannot
  74.     options.add_argument('--disable-dev-shm-usage')
  75.  
  76.     # ref: https://stackoverflow.com/questions/46322165/dont-wait-for-a-page-to-load-using-selenium-in-python/46339092#46339092
  77.     caps = DesiredCapabilities().CHROME
  78.     caps['pageLoadStrategy'] = 'eager'
  79.  
  80.     return webdriver.Chrome(options=options, desired_capabilities=caps)
  81.  
  82.  
  83. if __name__ == '__main__':
  84.     import time
  85.  
  86.     driver = setup_driver()
  87.     print(write_thread(driver, 'mi', 'news4vip', '1671786885', 'Cookieのテスト1', '>>1', 'mail address'))
  88.     time.sleep(60)
  89.     print(write_thread(driver, 'mi', 'news4vip', '1671786885', 'Cookieのテスト2', '>>1', 'mail address'))
  90.  

Replies to Re: Re: scraping 5ch/VIP rss

Title Name Language When
Re: Re: Re: scraping 5ch/VIP asdf アスペルガー python 1 Month ago.
Re: Re: Re: scraping 5ch/VIP Tinct Pintail python 1 Year ago.
Re: Re: Re: scraping 5ch/VIP Walloping Moth python 1 Year ago.