Issue
I try to use python to get the product information like Name and Price. But this time doesn't work, even I check the html code via web-browser programmer mode to get the class name and try to use this name to get anything what i want.
But I got the result like that, I cannot find any the items of "class_="col-xs-2-4 shopee-search-item-result__item"
, should I add more header information?
the print result
- print(r.status_code)-->200
- print(r.history)-->[]
- print(r.url)-->https://shopee.tw/shop/1819984/search?shopCollection=9271157
- print(len(items))-->0
import requests
import re
import pandas as pd
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import json
url = 'https://shopee.tw/shop/1819984/search?shopCollection=9271157'
headers = {
'Host': 'shopee.tw',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0',
'Cookie':'SPC_IA=-1; SPC_EC=-; SPC_F=L07IMDECRHjifEKyg7XuNCJ00GNdJGTA; REC_T_ID=246cfcdc-18fa-11ea-b254-f8f21e2be0b8; SPC_T_ID="Fyr1skVDq7FDiJOuTYHBmMfMr2Cw1eZyPbYJhBYoRmf/gvfvkOf5zgjIVXLrYYlg32aSx1PfmhWq7QsQzwM86mdeXG8VU7ERK4N+gfPFd14="; SPC_U=-; SPC_T_IV="/oJN8EB7iQwg7+n5mXd6cw=="; _gcl_au=1.1.788704691.1575727322; _fbp=fb.1.1575727322914.443117835; _ga=GA1.2.1422761069.1575727324; __BWfp=c1575727332595xf5a099d8b; cto_lwid=7ea874b3-f31f-47d7-aef9-60eed0156d33; cto_bundle=0tgQ7V9rU3JlRTU4aWlTc09JNXRaN014Y3ZXa1BtVVcwT2RhOU1UZ0tweUFvWUo2WHRPQjd0JTJCM1duaG5iWXFFRWxpbHZkTFluWUZLSEFudTFreGJueFoxU0EyanhnMWN6ZEVIUVV6cFlhd050emhFMWQ4bmhVelZwVSUyRmwwQUp5c29lOEhPT2ZobE10S1dvT09HYWNhVXV1YWx5R3dSOGw0MHcwZWpiZ2pXU2VHSzdrJTNE; _med=refer; G_ENABLED_IDPS=google; fbm_382498665271383=base_domain=.shopee.tw; SPC_SI=jq6hwq6ju6hig9hfulumcagdqaiopatc; _gid=GA1.2.143857303.1577796150; csrftoken=3Pya3o5WYEvhLOj9FqCqbV3angfwBlko; AMP_TOKEN=%24NOT_FOUND; _dc_gtm_UA-61915057-6=1'
}
r = requests.get(url,headers=headers,allow_redirects=True)
print(r.status_code)
print(r.history)
print(r.url)
soup = BeautifulSoup(r.text, 'html.parser')
items = soup.find_all("div", class_="col-xs-2-4 shopee-search-item-result__item")
print(len(items))
```**strong text**
Solution
This page uses JavaScript
to display items but BeautifulSoup
/requests
can't run JavaScipt
.
Using DevTools
in Firefox
/Chrome
(tab "Network"
) I found url used by JavaScript
to get data from server as JSON so it doesn't even need BeautifulSoup
.
To work correctly it needs all theses headers.
Without User-Agent
and X-Requested-With
it sends empty data.
Without Referer
it doesn't send prices.
import requests
url = 'https://shopee.tw/api/v2/search_items/?by=pop&limit=30&match_id=1819984&newest=0&order=desc&page_type=shop&shop_categoryids=9271157&version=2'
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0',
'X-Requested-With': 'XMLHttpRequest',
'Referer': 'https://shopee.tw/shop/1819984/search?shopCollection=9271157',
}
r = requests.get(url, headers=headers)
data = r.json()
#print(data['items'][0].keys())
for item in data['items']:
print('name:', item['name'])
print('prince:', item['price'])
print('sold:', item['historical_sold'])
print('---')
#print(data['items'][0]) # for test only
Result:
name: 『現貨+預購』 Balea 精華膠囊 7 入
prince: 4900000
sold: 5104
---
name: 💯現貨供應 💯德國 Invisibobble 神奇魔髮圈流線魔髮圈
prince: 7500000
sold: 26
---
BTW: for test to see all values you can use json
to format it with indentations
import json
print(json.dumps(data['items'][0], indent=4))
Result:
{
"itemid": 1212735748,
"welcome_package_info": null,
"liked": false,
"recommendation_info": null,
"bundle_deal_info": null,
"price_max_before_discount": -1,
"image": "338673ff6f2b23d63514e5af85269d46",
"is_cc_installment_payment_eligible": false,
"shopid": 1819984,
"can_use_wholesale": true,
"group_buy_info": null,
"reference_item_id": "",
"currency": "TWD",
"raw_discount": null,
"show_free_shipping": false,
"video_info_list": [],
"ads_keyword": null,
"collection_id": null,
"images": [
"338673ff6f2b23d63514e5af85269d46"
],
"match_type": null,
"price_before_discount": 0,
"is_category_failed": false,
"show_discount": 0,
"cmt_count": 306,
"view_count": 93,
"display_name": null,
"catid": 67,
"json_data": null,
"upcoming_flash_sale": null,
"is_official_shop": false,
"brand": "Dm Ebelin",
"price_min": 4900000,
"liked_count": 136,
"can_use_bundle_deal": false,
"show_official_shop_label": false,
"coin_earn_label": null,
"price_min_before_discount": -1,
"cb_option": 0,
"sold": 0,
"deduction_info": null,
"stock": 3647,
"status": 1,
"price_max": 4900000,
"add_on_deal_info": null,
"is_group_buy_item": null,
"flash_sale": null,
"price": 4900000,
"shop_location": "\u53f0\u4e2d\u5e02\u6f6d\u5b50\u5340",
"item_rating": {
"rating_star": 4.996732,
"rating_count": [
306,
0,
0,
0,
1,
305
],
"rcount_with_image": 11,
"rcount_with_context": 139
},
"show_official_shop_label_in_title": false,
"tier_variations": [],
"is_adult": null,
"discount": null,
"flag": 65536,
"is_non_cc_installment_payment_eligible": false,
"has_lowest_price_guarantee": false,
"has_group_buy_stock": false,
"preview_info": null,
"welcome_package_type": 0,
"name": "\u300e\u73fe\u8ca8+\u9810\u8cfc\u300f Balea \u7cbe\u83ef\u81a0\u56ca 7 \u5165",
"distance": null,
"adsid": null,
"ctime": 1527866201,
"wholesale_tier_list": [
{
"min_count": 150,
"price": 4700000,
"max_count": 300
},
{
"min_count": 301,
"price": 4600000,
"max_count": 1000
},
{
"min_count": 1001,
"price": 4500000,
"max_count": null
}
],
"show_shopee_verified_label": false,
"campaignid": null,
"show_official_shop_label_in_normal_position": null,
"item_status": "normal",
"shopee_verified": false,
"hidden_price_display": null,
"size_chart": null,
"item_type": 0,
"shipping_icon_type": null,
"campaign_stock": null,
"label_ids": [],
"service_by_shopee_flag": 0,
"badge_icon_type": 0,
"historical_sold": 5104,
"transparent_background_image": ""
}
Answered By - furas
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.