# -*- coding: utf-8 -*-
from selenium import webdriver
def download(driver, target_path):
"""Download the currently displayed page to target_path."""
def execute(script, args):
driver.execute('executePhantomScript',
{'script': script, 'args': args})
# hack while the python interface lags
driver.command_executor._commands['executePhantomScript'] = ('POST', '/session/$sessionId/phantom/execute')
# set page format
# inside the execution script, webpage is "this"
page_format = 'this.paperSize = {format: "A3", orientation: "portrait" };'
execute(page_format, [])#使用A4打印的pdf不完整,故用A3
# render current page
render = '''this.render("{}")'''.format(target_path)
execute(render, [])
if __name__ == '__main__':
"""Download a webpage as a PDF."""
driver = webdriver.PhantomJS(executable_path=r'D:\phantomjs\bin\phantomjs.exe')
url = 'http://helloiamkitty.blog.163.com/blog/static/18967710120115306544362/'
driver.get(url)
driver.find_element_by_css_selector(".iknow.ztag").click()#首次访问会强制弹出公告,点击关闭
driver.save_screenshot('test.png')
download(driver, "save_me.pdf")
尝试使用pdfkit时,需要加载js,但是不知道怎么回事加载失败,放弃。
OSError: wkhtmltopdf reported an error:
Loading pages (1/6)
Warning: Failed to load file://www.google-analytics.com/analytics.js (ignore)
Warning: Failed to load file:///newpage/prettycode/prettify.css (ignore)
Warning: Failed to load file:///newpage/prettycode/prettify.js (ignore)
Error: Failed to load http://widget.wumii.com/ext/relatedItemsWidget.htm, with network status code 202 and http status code 403 - Error downloading http://widget.wumii.com/ext/relatedItemsWidget.htm - server replied: Forbidden
Warning: Failed to load file:///common/showhint/hintbg.png (ignore)
libpng warning: iCCP: known incorrect sRGB profile ] 62%
libpng warning: iCCP: known incorrect sRGB profile ] 64%
Warning: Failed to load file://www.google-analytics.com/analytics.js (ignore)
libpng warning: iCCP: known incorrect sRGB profile ] 71%
Error: Failed to load http://rec.g.163.com/kaolaad/api/smartad/rec.s?type=240x275&location=1&site=netease&affiliate=blog&cat=detail, with network status code 301 and http status code 502 - Error downloading http://rec.g.163.com/kaolaad/api/smartad/rec.s?type=240x275&location=1&site=netease&affiliate=blog&cat=detail - server replied: Bad Gateway
Counting pages (2/6)
Resolving links (4/6)
Loading headers and footers (5/6)
Printing pages (6/6)
Done
Exit with code 1 due to network error: ProtocolUnknownError
这样也能另存pdf"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe" --headless --print-to-pdf=test.pdf --disable-gpu url
参考链接:
https://stackoverflow.com/questions/23125557/how-to-run-webpage-code-with-phantomjs-via-ghostdriver-selenium/28269099#28269099
https://segmentfault.com/q/1010000012973252
https://blog.darkthread.net/blog/headless-chrome/