栏目分类:
子分类:
返回
终身学习网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
终身学习网 > IT > 软件开发 > 后端开发 > Python

Python 爬虫:逆向解析央视频

Python 更新时间:发布时间: 百科书网 趣学号
import random
import time
import asyncio
import aiohttp
import ctypes
import execjs
import binascii

from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from urllib.parse import urlparse, parse_qs, urlencode
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad

javascript_file = execjs.compile('''
function createGUID() {
    var e = (new Date).getTime().toString(36)
      , t = Math.random().toString(36).replace(/^0./, "");
    return "".concat(e, "_").concat(t)
}
''')

KEY = binascii.a2b_hex("4E2918885FD98109869D14E0231A0BF4")
IV = binascii.a2b_hex("16B17E519DDD0CE5B79D7A63A4DD801C")


def aes_encrypt(data_string):
    aes = AES.new(
        key=KEY,
        mode=AES.MODE_CBC,
        iv=IV
    )
    raw = pad(data_string.encode('utf-8'), 16)
    aes_bytes = aes.encrypt(raw)
    return binascii.b2a_hex(aes_bytes).decode()


def create_qa(data_string):
    a = 0
    for i in data_string:
        _char = ord(i)
        a = (a << 5) - a + _char
        a &= a
    return ctypes.c_int(a).value


def create_ckey(vid, _rnd, app_ver, guid, platform):
    # 1、拼接字符串
    Wt = "mg3c3b04ba"
    ending = "https://w.yangshipin.cn/|mozilla/5.0 (macintosh; ||Mozilla|Netscape|MacIntel|"
    data_list = ["", vid, _rnd, Wt, app_ver, guid, platform, ending]
    data_string = "|".join(data_list)

    # 2、根据data_string生成qa
    # qa算法
    qa = create_qa(data_string)
    encrypt_string = "|{}{}".format(qa, data_string)

    # 3、AES加密
    ckey = "--01" + aes_encrypt(encrypt_string).upper()
    return ckey


async def get_vkey(session, guid, flowid, platform, app_ver, _rnd, vid, ckey):
    params = {
        "callback": "txplayerJsonpCallBack_getinfo_389028",
        "charge": "0",
        "defaultfmt": "auto",
        "otype": "json",
        "guid": guid,
        "flowid": flowid,
        "platform": platform,
        "sdtfrom": "v7007",
        "defnpayver": "0",
        "appVer": app_ver,
        "host": "m.yangshipin.cn",
        "ehost": "https://m.yangshipin.cn/video",
        "refer": "m.yangshipin.cn",
        "sphttps": "1",
        "sphls": "1",
        "_rnd": _rnd,
        "spwm": "4",
        "vid": vid,
        "defn": "auto",
        "fhdswitch": "",
        "show1080p": "false",
        "dtype": "3",
        "clip": "",
        "defnsrc": "",
        "fmt": "auto",
        "defsrc": "1",
        "encryptVer": "8.1",
        "cKey": ckey
    }

    headers = {
        "user-agent": "Mozilla/5.0 (Linux; Adroid 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Mobile Safari/537.36",
        "referer": "https://m.yangshipin.cn/"
    }
    playvinfo_url = "https://playvv.yangshipin.cn/playvinfo"
    async with session.get(url=playvinfo_url, params=params, headers=headers) as res:
        text = await res.text()
        return eval(text)


def txplayerJsonpCallBack_getinfo_389028(dict):
    fn = dict.get("vl").get("vi")[0].get("fn")
    vkey = dict.get("vl").get("vi")[0].get("fvkey")
    return fn, vkey


async def play_video(session, guid, vkey, fn, vid, pid, app_ver, platform):
    """
    https://mp4playcloud-cdn.ysp.cctv.cn/n000094fgki.Cvkc10002.mp4?sdtfrom=v7007&guid=ku5mip0w_78uqv4rv62&
    vkey=10FBF62080B04FB00F480593B9747F625D809BD0CCA740D1A415BA0133BCC9C59ECBEE179056F327D01A70B86A6E7F9D94C0B47CFCFA1F58DA09C1F1DC3FE9ED6209E88FDE63D75C08724E04974D0DB883027B65864A4E20CED67E14A429B99B5462C511EF23ECEBC19429B1A6D0E42581E0D78DA3FFB8036D421B9A964AA054&platform=2
    :return:
    """
    download_params = {
        "sdtfrom": "v7007",
        "guid": guid,
        "vkey": vkey,
        "platform": "2"
    }
    video_download_url = "https://mp4playcloud-cdn.ysp.cctv.cn/{}?{}".format(fn, urlencode(download_params))

    params = {
        "BossId": "2865",
        "Pwd": "1698957057",
        "_dc": random.random()
    }

    data = {
        "uin": "",
        "vid": vid,
        "coverid": "",
        "pid": pid,
        "guid": guid,
        "unid": "",
        "vt": "0",
        "type": "3",
        "url": "https://m.yangshipin.cn/video?type=0&vid={}".format(vid),
        "bi": "0",
        "bt": "0",
        "version": app_ver,
        "platform": platform,
        "defn": "0",
        "ctime": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "ptag": "w_yangshipin_cn",
        "isvip": "-1",
        "tpid": "18",
        "pversion": "h5",
        "hc_uin": "",
        "hc_vuserid": "",
        "hc_openid": "",
        "hc_appid": "",
        "hc_pvid": "0",
        "hc_ssid": "",
        "hc_qq": "",
        "hh_ua": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML  like Gecko) Chrome/94.0.4606.61 Mobile Safari/537.36",
        "ua": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML  like Gecko) Chrome/94.0.4606.61 Mobile Safari/537.36",
        "ckey": "",
        "iformat": "0",
        "hh_ref": "https://m.yangshipin.cn/video?type=0&vid={}".format(vid),
        "vuid": "",
        "vsession": "",
        "format_ua": "other",
        "common_rcd_info": "",
        "common_ext_info": "",
        "v_idx": "0",
        "rcd_info": "",
        "extrainfo": "",
        "c_channel": "",
        "vurl": video_download_url,
        "step": "6",
        "val": "640",
        "val1": "1",
        "val2": "1",
        "idx": "0",
        "c_info": "",
        "isfocustab": "0",
        "isvisible": "0",
        "fact1": "",
        "fact2": "",
        "fact3": "",
        "fact4": "",
        "fact5": "",
        "cpay": "0",
        "tpay": "0",
        "dltype": "1",
    }

    async with session.post(url="https://btrace.yangshipin.cn/kvcollect",
                            params=params,
                            data=data,
                            headers={
                                "user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Mobile Safari/537.36",
                                "referer": "https://m.yangshipin.cn/"}) as res:
        text = await res.text()
        print(text)


async def handle(url):
    async with aiohttp.ClientSession() as session:
        vid = parse_qs(urlparse(url).query)['vid'][0]
        app_ver = "1.3.5"
        platform = "4330701"
        guid = javascript_file.call('createGUID')
        pid = javascript_file.call('createGUID')  # 和guid的值不一样
        flowid = pid + "_" + platform
        _rnd = str(int(time.time()))

        # 1、获取ckey
        ckey = create_ckey(vid, _rnd, app_ver, guid, platform)

        # 2、通过ckey向playvinfo发请求获得fn和vkey
        fn, vkey = await get_vkey(session, guid, flowid, platform, app_ver, _rnd, vid, ckey)

        # 3、播放
        await play_video(session, guid, vkey, fn, vid, pid, app_ver, platform)


async def engine(url, per_request_count):
    task_list = [
        asyncio.create_task(handle(url)) for i in range(per_request_count)
    ]
    await asyncio.wait(task_list)


def task(url, per_request_count):
    asyncio.run(engine(url, per_request_count))


def run():
    url = "https://w.yangshipin.cn/video?type=0&vid=n000094fgki"
    # 播放量次数
    total_count = 100

    # 每个协程请求的个数
    per_request_count = 3

    # 线程池
    pool = ThreadPoolExecutor(5)

    loop_count, div = divmod(total_count, per_request_count)

    for i in range(loop_count):
        pool.submit(task, url, per_request_count)

    pool.submit(task, url, div)
    pool.shutdown()


if __name__ == '__main__':
    run()

转载请注明:文章转载自 www.051e.com
本文地址:http://www.051e.com/it/293732.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 ©2023-2025 051e.com

ICP备案号:京ICP备12030808号