Facebook
From Sloppy Hummingbird, 4 Years ago, written in LaTeX.
This paste is a reply to Re: amazon from YO - view diff
Embed
Download Paste or View Raw
Hits: 335
  1. {
  2.   "start_urls":
  3.   [
  4.     "https://www.amazon.es/s?k=irrigador+dental&__mk_es_ES=%C3%85M%C3%85%C5%BD%C3%95%C3%91&qid=1575978395&ref=sr_pg_1"
  5.   ],
  6.   "fields": {
  7.    "name": "//h1[@id='title']/span[@id='productTitle']/text()",
  8.    "price": "//span[@id='priceblock_ourprice']/text()|//span[@id='priceblock_saleprice']/text()",
  9.    "price_old": "//span[@class='priceBlockStrikePriceString a-text-strike']/text()",
  10.    "reference": "",
  11.    "image": "//div[@id='imgTagWrapperId']/img/@src",
  12.    "description": "//div[@id='productDescription']//text()",
  13.    "brand": "",
  14.    "brand_text": "//*[@id='bylineInfo_feature_div']/div/a/text()",
  15.    "categories": "//div[@id='wayfinding-breadcrumbs_container']//ul/li/span[@class='a-list-item']/a/text()",
  16.    "availability": "//div[@class='a-section']/div[@id='availability']/span/text()",
  17.    "rating_average": "//span[@class='a-declarative']//span[@data-hook='rating-out-of-text']/text()",
  18.    "rating_count": "//*[@id='reviewsMedley']/div/div[1]/div[1]/div[2]/span/text()",
  19.    "barcode": "",
  20.    "stock": "",
  21.    "seller": "//div[@id='merchant-info']/a[@id='sellerProfileTriggerId']/text()"
  22.  },
  23.  "links": {
  24.    "detail": "//h2/a[contains(@class,'a-text-normal') and not(contains(@href,'pantry'))]|//div[@class='s-item-container' and not(self::*//img[contains(@src,'pantry')])]//h2/parent::a[contains(@class,'a-text-normal')]",
  25.    "next_page": "//a[(contains(.,'Siguiente') or contains(.,'siguiente')) and not(contains(@class,'carousel-goto-nextpage'))]"
  26.  },
  27.  "regex": {
  28.    "rating_average": ["(.*?)de", 1],
  29.    "rating_count": ["(.*?)valoraci", 1]
  30.  }
  31. }