from http.cookies import SimpleCookie from urllib.parse import urlparse, parse_qs, urlencode import json URL = "https://guardian.com.my/graphql?query=query+GetCategories%28%24id%3AInt%21%24pageSize%3AInt%21%24currentPage%3AInt%21%24filters%3AProductAttributeFilterInput%21%24sort%3AProductAttributeSortInput%29%7Bcategory%28id%3A%24id%29%7Bid+description+name+product_count+meta_title+meta_keywords+meta_description+__typename%7Dproducts%28pageSize%3A%24pageSize+currentPage%3A%24currentPage+filter%3A%24filters+sort%3A%24sort%29%7Bitems%7Bid+name+sku+price%7BregularPrice%7Bamount%7Bcurrency+value+__typename%7D__typename%7D__typename%7Dprice_range%7Bminimum_price%7Bfinal_price%7Bcurrency+value+__typename%7Ddiscount%7Bamount_off+percent_off+__typename%7D__typename%7Dmaximum_price%7Bfinal_price%7Bcurrency+value+__typename%7Ddiscount%7Bamount_off+percent_off+__typename%7D__typename%7D__typename%7Dpromotion_label+promotion_label_name+sales_icon+small_image%7Burl+__typename%7Dstock_status+url_key+url_suffix+__typename%7Dpage_info%7Btotal_pages+__typename%7Dtotal_count+__typename%7D%7D&operationName=GetCategories&variables=%7B%22currentPage%22%3A1%2C%22id%22%3A3047%2C%22filters%22%3A%7B%22category_id%22%3A%7B%22eq%22%3A%223047%22%7D%7D%2C%22pageSize%22%3A60%2C%22sort%22%3A%7B%22position%22%3A%22ASC%22%7D%7D" def cookie_parser(): cookie_string = "private_content_version=75d921dc5d1fc85c97d8d9876d6e58b2; _fbp=fb.2.1626162049790.1893904607; _ga=GA1.3.518387377.1626162051; _gid=GA1.3.151467354.1626162051; _gcl_au=1.1.203553443.1626162051; x_axis_main=v_id:017a9ecfb7ba000a4be21b24a20803079001c0710093c$_sn:1$_ss:1$_pn:1%3Bexp-session$_st:1626163851002$ses_id:1626162051002%3Bexp-session" cookie = SimpleCookie() cookie.load(cookie_string) cookies = {} for key, morsel in cookie.items(): cookies[key] = morsel.value return cookies def parse_new_url(URL, page_number): url_parsed = urlparse(URL) query_string = parse_qs(url_parsed.query) query_state = json.loads(query_string.get("variables")[0]) query_state['currentPage'] = page_number encoded_qs = urlencode(query_string, doseq=1) new_url = f"https://guardian.com.my/graphql?{encoded_qs}" print(new_url) parse_new_url(URL, 3)