分类 python 下的文章

通常会看到表单提交带有文件的时候,一般content-type长这样:
multipart/form-data; boundary=----WebKitFormBoundaryV1xLBsu5DDg4628C
那么boundary对应值的后半部分是怎么来的呢,搜了一下chromium的代码,找到了答案。
下面是用python对应写的,主要是python的requests模块boundary的样子不一样,强迫症~

from random import randint
kAlphaNumericEncodingMap = [0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, 0x42]
random_chars = "".join([chr(kAlphaNumericEncodingMap[randint(0,63)]) for _ in range(16)])
boundary = "----WebKitFormBoundary" + random_chars
print(boundary)

前提是网盘有这个文件,好处在于不用开浏览器...

# -*- coding: utf-8 -*-

import os
import sys
import zlib
import json
import hashlib
import requests
from argparse import ArgumentParser
# 此处的BDUSS是网页源代码中的XDUSS
cookies = {"BDUSS":"pansec_***"}
ua_1 = {
    "X-Requested-With":"XMLHttpRequest",
    "User-Agent":"netdisk;2.0.0.4;PC;PC-Windows;10.0.17763;uploadplugin",
    "Content-Type":"application/x-www-form-urlencoded",
    "Accept":"*/*",
    "Accept-Language":"zh-CN,zh;q=0.8",
    "Accept-Charset":"GBK,utf-8;q=0.7,*;q=0.3",
    }

def crc(path):
    prev = 0
    for line in open(path,"rb"):
        prev = zlib.crc32(line, prev)
    return prev & 0xFFFFFFFF
    
def slice_md5(path):
    with open(path, "rb") as f:
        bin_obj = f.read(256 * 1024)
        part_md5 = hashlib.new('md5', bin_obj).hexdigest()
    return part_md5
    
def info(path):
    crc32 = crc(path)
    part_md5 = slice_md5(path)
    size = fs = os.path.getsize(path)
    f = open(path, "rb")
    hash_obj = hashlib.md5()
    md5_list = []
    while (fs > 0):
        if fs < 4194304:
            bin_obj = f.read(fs)
        else:
            bin_obj = f.read(4194304)
        hash_obj.update(bin_obj)
        md5_list.append(hashlib.new('md5', bin_obj).hexdigest())
        fs -= 4194304
    f.close()
    md5 = hash_obj.hexdigest()
    print("crc32", crc32)
    print("前256K的md5", part_md5)
    print("分片md5", md5_list)
    print("文件md5", md5)
    return crc32, part_md5, md5_list, md5, size
    
def rapidupload(remote_fullpath, md5, part_md5, crc32, size):
    url = "http://pan.baidu.com/api/rapidupload"
    query = {
        "clienttype":"6",
        "version":"2.0.0.4",
        }
    data = {
        "path":remote_fullpath,
        "content-md5":md5,
        "slice-md5":part_md5,#前256K的md5值
        "content-crc32":crc32,#10进制
        "content-length":size,
    }
    # 返回中errno为404说明自己网盘没有这个文件
    rapid_res = requests.post(url, data=data, headers=ua_1, cookies=cookies, params=query)
    rapid_resd = json.loads(rapid_res.content.decode("utf-8"))
    print(rapid_resd)
    if rapid_resd.get("errno") in [-8, 0]:
        if rapid_resd.get("errno") == -8:
            print("已存在同名文件")
        if rapid_resd.get("errno") == 0:
            print(rapid_resd["info"])
        return True
    else:
        print(rapid_resd.get("errno"))
        return False
        
def main():
    # 需要安装argparse
    parser = ArgumentParser(description="尝试秒传文件至百度网盘")
    parser.add_argument("-fp", "--fpath",help="本地文件路径")
    parser.add_argument("-bp", "--bpath",help="网盘路径")
    args = parser.parse_args()
    _, name = os.path.split(args.fpath)
    remote_fullpath = args.bpath + "/" + name
    crc32, part_md5, md5_list, md5, size = info(args.fpath)
    flag = rapidupload(remote_fullpath, md5, part_md5, crc32, size)
    if flag:
        sys.exit("{} 已秒传 位于 {}".format(name, remote_fullpath))
    else:
        sys.exit("{} 秒传失败".format(name))
    
if __name__ == "__main__":
    main()

项目地址https://github.com/ilanschnell/bitarray
其中有一个大小端的转换
简单的讲就是1字节8比特,大小端相互转换时就是将每个字节的8位比特,也就是8位的二进制,“翻转”操作得到转换结果。

from bitarray import bitarray
>>> bit_array = bitarray(endian="big")
>>> bit_array
bitarray()
>>> bit_array.frombytes(b"fdsp")
>>> bit_array
bitarray('01100110011001000111001101110000')
>>> bitarray(bit_array, endian="little")
bitarray('01100110001001101100111000001110')

在md5计算中,在填1补0这一步之后,所有操作都是基于小端的。