Facebook
From Matheuss, 3 Years ago, written in Plain Text.
This paste is a reply to Re: Re: amazon from Social Lemur - view diff
Embed
Download Paste or View Raw
Hits: 231
  1. {
  2.   "start_urls":
  3.   [
  4.     "https://www.amazon.es/s?k=irrigador+dental&__mk_es_ES=%C3%85M%C3%85%C5%BD%C3%95%C3%91&qid=1575978395&ref=sr_pg_1"
  5.   ],
  6.   "fields": {{
  7.   "start_urls":
  8.   [
  9.     "https://www.amazon.es/s?k=irrigador+dental&__mk_es_ES=%C3%85M%C3%85%C5%BD%C3%95%C3%91&qid=1575978395&ref=sr_pg_1"
  10.   ],
  11.   "Campos": {
  12.     "name": "// h1 [@ id = 'title'] / span [@ id = 'productTitle'] / text ()",
  13.     "price": "// span [@ id = 'priceblock_ourprice'] / text () | // span [@ id = 'priceblock_saleprice'] / text ()",
  14.     "price_old": "// span [@ class = 'priceBlockStrikePriceString a-text-strike'] / text ()",
  15.     "referência": "",
  16.     "imagem": "// div [@ id = 'imgTagWrapperId'] / img / @ src",
  17.     "descrição": "// div [@ id = 'productDescription'] // text ()",
  18.     "marca": "",
  19.     "brand_text": "// * [@ id = 'bylineInfo_feature_div'] / div / a / text ()",
  20.     "categorias": "// div [@ id = 'wayfinding-breadcrumbs_container'] // ul / li / span [@ class = 'a-list-item'] / a / text ()",
  21.     "disponibilidade": "// div [@ class = 'a-section'] / div [@ id = 'disponibilidade'] / span / text ()",
  22.     "rating_average": "// span [@ class = 'a-declarative'] // span [@ data-hook = 'rating-out-of-text'] / text ()",
  23.     "rating_count": "// * [@ id = 'reviewsMedley'] / div / div [1] / div [1] / div [2] / span / text ()",
  24.     "código de barras": "",
  25.     "estoque": "",
  26.     "seller": "// div [@ id = 'merchant-info'] / a [@ id = 'sellerProfileTriggerId'] / text ()"
  27.   },
  28.   "links": {
  29.     "detalhe": "// h2 / a [contém (@ class, 'a-text-normal') e não (contém (@ href, 'pantry'))] | // div [@ class = 's-item -container 'e não (self :: * // img [contains (@ src,' pantry ')])] // h2 / parent :: a [contains (@ class,' a-text-normal ')] " ,
  30.     "next_page": "//a[(contains(.,'Siguiente ') ou contém (.,' siguiente ')) e não (contém (@ class,' carrossel-goto-nextpage '))]"
  31.   },
  32.   "regex": {
  33.     "rating_average": ["(. *?) de", 1],
  34.     "rating_count": ["(. *?) valoraci", 1]
  35.   }
  36. }
  37.     "name": "//h1[@id='title']/span[@id='productTitle']/text()",
  38.     "price": "//span[@id='priceblock_ourprice']/text()|//span[@id='priceblock_saleprice']/text()",
  39.     "price_old": "//span[@class='priceBlockStrikePriceString a-text-strike']/text()",
  40.     "reference": "",
  41.     "image": "//div[@id='imgTagWrapperId']/img/@src",
  42.     "description": "//div[@id='productDescription']//text()",
  43.     "brand": "",
  44.     "brand_text": "//*[@id='bylineInfo_feature_div']/div/a/text()",
  45.     "categories": "//div[@id='wayfinding-breadcrumbs_container']//ul/li/span[@class='a-list-item']/a/text()",
  46.     "availability": "//div[@class='a-section']/div[@id='availability']/span/text()",
  47.     "rating_average": "//span[@class='a-declarative']//span[@data-hook='rating-out-of-text']/text()",
  48.     "rating_count": "//*[@id='reviewsMedley']/div/div[1]/div[1]/div[2]/span/text()",
  49.     "barcode": "",
  50.     "stock": "",
  51.     "seller": "//div[@id='merchant-info']/a[@id='sellerProfileTriggerId']/text()"
  52.   },
  53.   "links": {
  54.     "detail": "//h2/a[contains(@class,'a-text-normal') and not(contains(@href,'pantry'))]|//div[@class='s-item-container' and not(self::*//img[contains(@src,'pantry')])]//h2/parent::a[contains(@class,'a-text-normal')]",
  55.     "next_page": "//a[(contains(.,'Siguiente') or contains(.,'siguiente')) and not(contains(@class,'carousel-goto-nextpage'))]"
  56.   },
  57.   "regex": {
  58.     "rating_average": ["(.*?)de", 1],
  59.     "rating_count": ["(.*?)valoraci", 1]
  60.   }
  61. }