当前位置:网站首页>Crawl the product data of Xiaomi Youpin app

Crawl the product data of Xiaomi Youpin app

2022-04-23 18:00:00 Round programmer

This paper aims to exchange learning , Don't use it for other purposes , Otherwise, we will be responsible for the consequences
Environmental Science linux+pycharm+anaconda

import csv
import requests
from lxml import etree
import re
import random
import json
from usere_agent import UA
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)


url = 'https://youpin.mi.com/app/shopv3/pipe'
headers1 = {
    
    'Accept': '*/*',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Connection': 'keep-alive',
    'Content-Length': '130',
    'Content-Type': 'application/x-www-form-urlencoded',
    'Host': 'youpin.mi.com',
    'Origin': 'https://youpin.mi.com',
    'Referer': 'https://youpin.mi.com/',
    'User-Agent': UA)
}

headers2 = {
    
            'Accept': '*/*',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Connection': 'keep-alive',
            'Content-Length': '145',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Cookie': ( Yours cookie),
            'Origin': 'https://youpin.mi.com',
            'Referer': 'https://youpin.mi.com/',
            'User-Agent': 	UA,
}


headers3 = {
    
            'Accept': '*/*',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Connection': 'keep-alive',
            'Content-Length': '364',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Host': 'youpin.mi.com',
            'Origin': 'https://youpin.mi.com',
            'Referer': 'https://youpin.mi.com/detail?gid={}'.format(str(id)),
            'User-Agent': UA,
        }


data1 = {
    
    'mkbl_data': '{"result": {"model": "Homepage", "action": "GetGroup2ClassInfo", "parameters": {}}}',
}
req = requests.post(url=url, headers=headers1, data=data1, verify=False).json()
groups = req['result']['result']['mkbl_data']['groups']

c_name=[]
c_id=[]
for i in groups:
    for j in i:
        class1_name = j['class']['name']
        ucid1 = j['class']['ucid']
        c_name.append(class1_name)
        c_id.append(ucid1)
        for k in j['sub_class']:
            class2_name = k['name']
            ucid2 = k['ucid']

for i,j in zip(c_name,c_id):
    s = requests.session()
    s.headers.update(headers2)

    data2 = {
    
        'mkbl_data': '{"uClassList": {"model": "Homepage", "action": "BuildHome", "parameters": {"id": "' + str(
            j) + '"}}}'
    }
    respon = s.post(url=url, data=data2, verify=False).json()
    print(respon)
    itemdata = respon['result']['uClassList']['mkbl_data']
    for j in itemdata:
        if 'content' in j:
            content_name = j['content']['name']
            ucid = j['content']['ucid']
            for k in j['mkbl_data']:
                try:

                    gid = k['gid']  ## goods ID
                    name = k['name']  ## Name of commodity 
                    summary = k['summary']  ## Product introduction 
                    pic_url = k['pic_url']  ## Commodity images 
                    price_min = int(k['price_min']) / 100  ## Price 
                    itemurl = k['url']  ## Product links 
                    print(i,name,summary,pic_url,price_min,itemurl)
                    with open('/media/liu/_dde_data/project/spider/ supplier /xmyp/' + i + '.csv', 'a+') as f:
                        f_csv = csv.writer(f)
                        f_csv.writerows([(i,name,summary,pic_url,price_min,itemurl)])
                except:
                    continue



版权声明
本文为[Round programmer]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/04/202204230545316057.html