分类 python 下的文章

前提是网盘有这个文件,好处在于不用开浏览器...

# -*- coding: utf-8 -*-

import os
import sys
import zlib
import json
import hashlib
import requests
from argparse import ArgumentParser
# 此处的BDUSS是网页源代码中的XDUSS
cookies = {"BDUSS":"pansec_***"}
ua_1 = {
    "X-Requested-With":"XMLHttpRequest",
    "User-Agent":"netdisk;2.0.0.4;PC;PC-Windows;10.0.17763;uploadplugin",
    "Content-Type":"application/x-www-form-urlencoded",
    "Accept":"*/*",
    "Accept-Language":"zh-CN,zh;q=0.8",
    "Accept-Charset":"GBK,utf-8;q=0.7,*;q=0.3",
    }

def crc(path):
    prev = 0
    for line in open(path,"rb"):
        prev = zlib.crc32(line, prev)
    return prev & 0xFFFFFFFF
    
def slice_md5(path):
    with open(path, "rb") as f:
        bin_obj = f.read(256 * 1024)
        part_md5 = hashlib.new('md5', bin_obj).hexdigest()
    return part_md5
    
def info(path):
    crc32 = crc(path)
    part_md5 = slice_md5(path)
    size = fs = os.path.getsize(path)
    f = open(path, "rb")
    hash_obj = hashlib.md5()
    md5_list = []
    while (fs > 0):
        if fs < 4194304:
            bin_obj = f.read(fs)
        else:
            bin_obj = f.read(4194304)
        hash_obj.update(bin_obj)
        md5_list.append(hashlib.new('md5', bin_obj).hexdigest())
        fs -= 4194304
    f.close()
    md5 = hash_obj.hexdigest()
    print("crc32", crc32)
    print("前256K的md5", part_md5)
    print("分片md5", md5_list)
    print("文件md5", md5)
    return crc32, part_md5, md5_list, md5, size
    
def rapidupload(remote_fullpath, md5, part_md5, crc32, size):
    url = "http://pan.baidu.com/api/rapidupload"
    query = {
        "clienttype":"6",
        "version":"2.0.0.4",
        }
    data = {
        "path":remote_fullpath,
        "content-md5":md5,
        "slice-md5":part_md5,#前256K的md5值
        "content-crc32":crc32,#10进制
        "content-length":size,
    }
    # 返回中errno为404说明自己网盘没有这个文件
    rapid_res = requests.post(url, data=data, headers=ua_1, cookies=cookies, params=query)
    rapid_resd = json.loads(rapid_res.content.decode("utf-8"))
    print(rapid_resd)
    if rapid_resd.get("errno") in [-8, 0]:
        if rapid_resd.get("errno") == -8:
            print("已存在同名文件")
        if rapid_resd.get("errno") == 0:
            print(rapid_resd["info"])
        return True
    else:
        print(rapid_resd.get("errno"))
        return False
        
def main():
    # 需要安装argparse
    parser = ArgumentParser(description="尝试秒传文件至百度网盘")
    parser.add_argument("-fp", "--fpath",help="本地文件路径")
    parser.add_argument("-bp", "--bpath",help="网盘路径")
    args = parser.parse_args()
    _, name = os.path.split(args.fpath)
    remote_fullpath = args.bpath + "/" + name
    crc32, part_md5, md5_list, md5, size = info(args.fpath)
    flag = rapidupload(remote_fullpath, md5, part_md5, crc32, size)
    if flag:
        sys.exit("{} 已秒传 位于 {}".format(name, remote_fullpath))
    else:
        sys.exit("{} 秒传失败".format(name))
    
if __name__ == "__main__":
    main()

项目地址https://github.com/ilanschnell/bitarray
其中有一个大小端的转换
简单的讲就是1字节8比特,大小端相互转换时就是将每个字节的8位比特,也就是8位的二进制,“翻转”操作得到转换结果。

from bitarray import bitarray
>>> bit_array = bitarray(endian="big")
>>> bit_array
bitarray()
>>> bit_array.frombytes(b"fdsp")
>>> bit_array
bitarray('01100110011001000111001101110000')
>>> bitarray(bit_array, endian="little")
bitarray('01100110001001101100111000001110')

在md5计算中,在填1补0这一步之后,所有操作都是基于小端的。

爱奇艺的弹幕文件形如:

https://cmts.iqiyi.com/bullet/59/00/1307555900_300_1.z?rn=0.7268306364207229&business=danmu&is_iqiyi=true&is_video_page=true&tvid=1307555900&albumid=214500601&categoryid=2&qypid=01010021010000000000

实际可简化为

https://cmts.iqiyi.com/bullet/59/00/1307555900_300_1.z

链接组成:

https://cmts.iqiyi.com/bullet/tvid倒数4位的前两位/tvid最后两位/tvid_300_x.z
x的计算方式为片子总时长除以300秒向上取整,即按每5分钟一个包。

转换方法:
二进制读取文件,转换为字节数组,用zlib库解包,以utf-8解码即可。
python实现代码:

import zlib
import requests
zread = open('1307555900_300_1.z', 'rb').read()
zarray = bytearray(zread)
xml=zlib.decompress(zarray, 15+32).decode('utf-8')
with open('qiyi.xml','w',encoding='utf-8') as f:
    f.write(xml)

如果不是保存的文件,直接读取弹幕链接的response

import zlib
import requests
zget = requests.get(url).content#url即弹幕链接 省略
zarray = bytearray(zread)
xml=zlib.decompress(zarray, 15+32).decode('utf-8')

结果:
QQ截图20181023012046.png
QQ截图20181023012131.png