Python实现网页截图(PyQT5)过程解析

yipeiwu_com5年前Python基础

方案说明

功能要求:实现网页加载后将页面截取成长图片

涉及模块:PyQT5 PIL

逻辑说明:

1:完成窗口设置,利用PyQT5 QWebEngineView加载网页地址,待网页加载完成后,调用check_pag;

class MainWindow(QMainWindow):
  def __init__(self, parent=None):
    super(MainWindow, self).__init__(parent)
    self.setWindowTitle('易哈佛')
    self.temp_height = 0
    self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False) # 禁用最大化,最小化
    # self.setWindowFlag(Qt.WindowStaysOnTopHint, True) # 窗口顶置
    self.setWindowFlag(Qt.FramelessWindowHint, True) # 窗口无边框
  def urlScreenShot(self, url):
    self.browser = QWebEngineView()
    self.browser.load(QUrl(url))
    geometry = self.chose_screen()
    self.setGeometry(geometry)
    self.browser.loadFinished.connect(self.check_page)
    self.setCentralWidget(self.browser)
  def get_page_size(self):
    size = self.browser.page().contentsSize()
    self.set_height = size.height()
    self.set_width = size.width()
    return size.width(), size.height()
  def chose_screen(self):
    width, height = 750, 1370
    desktop = QApplication.desktop()
    screen_count = desktop.screenCount()
    for i in range(0, screen_count):
      rect = desktop.availableGeometry(i)
      s_width, s_height = rect.width(), rect.height()
      if s_width > width and s_height > height:
        return QRect(rect.left(), rect.top(), width, height)
    return QRect(0, 0, width, height)
if __name__ == '__main__':
  app = QApplication(sys.argv)
  win = MainWindow()
  win.show()
  app.exit(app.exec_())

2:收集页面高度,并计算分次截屏的次数和余量高度;实例化图片合并工具,设置定时器,超时信号发出后,执行exe_command;

def check_page(self):
    p_width, p_height = self.get_page_size()
    self.page, self.over_flow_size = divmod(p_height, self.height())
    if self.page == 0:
      self.page = 1
    self.ssm = ScreenShotMerge(self.page, self.over_flow_size)
    self.timer = QTimer(self)
    self.timer.timeout.connect(self.exe_command)
    self.timer.setInterval(400)
    self.timer.start()

3:exe_command用来控制截图次数,并在每次截图完成后控制网页向下滑屏幕的高度;所有的页面都已截取时,完成图片合并。

def exe_command(self):
    if self.page > 0:
      self.screen_shot()
      self.run_js()
    elif self.page < 0:
      self.timer.stop()
      self.ssm.image_merge()
      self.close()
    elif self.over_flow_size > 0:
      self.screen_shot()
    self.page -= 1    
  def run_js(self):
    script = """
      var scroll = function (dHeight) {
      var t = document.documentElement.scrollTop
      var h = document.documentElement.scrollHeight
      dHeight = dHeight || 0
      var current = t + dHeight
      if (current > h) {
        window.scrollTo(0, document.documentElement.clientHeight)
       } else {
        window.scrollTo(0, current)
       }
      }
    """
    command = script + '\n scroll({})'.format(self.height())
    self.browser.page().runJavaScript(command)

4:screen_shot在每次截图完成后将图片保存,并将图片对象由图片合并根据保存到列表中。

def screen_shot(self):
    screen = QApplication.primaryScreen()
    winid = self.browser.winId()
    pix = screen.grabWindow(int(winid))
    name = '{}/temp.png'.format(self.ssm.root_path)
    pix.save(name)
    self.ssm.add_im(name)

5:截图合并工具,在每次截图完成后将图片对象保存,完成余量截图的重绘和截图的合并。

class ScreenShotMerge():
  def __init__(self, page, over_flow_size):
    self.im_list = []
    self.page = page
    self.over_flow_size = over_flow_size
    self.get_path()

  def get_path(self):
    self.root_path = Path(__file__).parent.joinpath('temp')
    if not self.root_path.exists():
      self.root_path.mkdir(parents=True)
    self.save_path = self.root_path.joinpath('merge.png')

  def add_im(self, path):
    if len(self.im_list) == self.page:
      im = self.reedit_image(path)
    else:
      im = Image.open(path)
    im.save('{}/{}.png'.format(self.root_path, len(self.im_list) + 1))
    self.im_list.append(im)

  def get_new_size(self):
    max_width = 0
    total_height = 0
    # 计算合成后图片的宽度(以最宽的为准)和高度
    for img in self.im_list:
      width, height = img.size
      if width > max_width:
        max_width = width
      total_height += height
    return max_width, total_height

  def image_merge(self, ):
    if len(self.im_list) > 1:
      max_width, total_height = self.get_new_size()
      # 产生一张空白图
      new_img = Image.new('RGB', (max_width - 15, total_height), 255)
      x = y = 0
      for img in self.im_list:
        width, height = img.size
        new_img.paste(img, (x, y))
        y += height
      new_img.save(self.save_path)
      print('截图成功:', self.save_path)
    else:
      obj = self.im_list[0]
      width, height = obj.size
      left, top, right, bottom = 0, 0, width, height
      box = (left, top, right, bottom)
      region = obj.crop(box)
      new_img = Image.new('RGB', (width, height), 255)
      new_img.paste(region, box)
      new_img.save(self.save_path)
      print('截图成功:', self.save_path)

  def reedit_image(self, path):
    obj = Image.open(path)
    width, height = obj.size
    left, top, right, bottom = 0, height - self.over_flow_size, width, height
    box = (left, top, right, bottom)
    region = obj.crop(box)
    return region

截图功能完整代码

#!/usr/bin/env python
# -*- coding:UTF-8 -*-
# Author:Leslie-x
import sys
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from PyQt5.QtWebEngineWidgets import *
from PIL import Image
from pathlib import Path
class ScreenShotMerge():
  def __init__(self, page, over_flow_size):
    self.im_list = []
    self.page = page
    self.over_flow_size = over_flow_size
    self.get_path()
  def get_path(self):
    self.root_path = Path(__file__).parent.joinpath('temp')
    if not self.root_path.exists():
      self.root_path.mkdir(parents=True)
    self.save_path = self.root_path.joinpath('merge.png')
  def add_im(self, path):
    if len(self.im_list) == self.page:
      im = self.reedit_image(path)
    else:
      im = Image.open(path)
    im.save('{}/{}.png'.format(self.root_path, len(self.im_list) + 1))
    self.im_list.append(im)
  def get_new_size(self):
    max_width = 0
    total_height = 0
    # 计算合成后图片的宽度(以最宽的为准)和高度
    for img in self.im_list:
      width, height = img.size
      if width > max_width:
        max_width = width
      total_height += height
    return max_width, total_height
  def image_merge(self, ):
    if len(self.im_list) > 1:
      max_width, total_height = self.get_new_size()
      # 产生一张空白图
      new_img = Image.new('RGB', (max_width - 15, total_height), 255)
      x = y = 0
      for img in self.im_list:
        width, height = img.size
        new_img.paste(img, (x, y))
        y += height
      new_img.save(self.save_path)
      print('截图成功:', self.save_path)
    else:
      obj = self.im_list[0]
      width, height = obj.size
      left, top, right, bottom = 0, 0, width, height
      box = (left, top, right, bottom)
      region = obj.crop(box)
      new_img = Image.new('RGB', (width, height), 255)
      new_img.paste(region, box)
      new_img.save(self.save_path)
      print('截图成功:', self.save_path)
  def reedit_image(self, path):
    obj = Image.open(path)
    width, height = obj.size
    left, top, right, bottom = 0, height - self.over_flow_size, width, height
    box = (left, top, right, bottom)
    region = obj.crop(box)
    return region
class MainWindow(QMainWindow):
  def __init__(self, parent=None):
    super(MainWindow, self).__init__(parent)
    self.setWindowTitle('易哈佛')
    self.temp_height = 0
    self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False) # 禁用最大化,最小化
    # self.setWindowFlag(Qt.WindowStaysOnTopHint, True) # 窗口顶置
    self.setWindowFlag(Qt.FramelessWindowHint, True) # 窗口无边框
  def urlScreenShot(self, url):
    self.browser = QWebEngineView()
    self.browser.load(QUrl(url))
    geometry = self.chose_screen()
    self.setGeometry(geometry)
    self.browser.loadFinished.connect(self.check_page)
    self.setCentralWidget(self.browser)
  def get_page_size(self):
    size = self.browser.page().contentsSize()
    self.set_height = size.height()
    self.set_width = size.width()
    return size.width(), size.height()
  def chose_screen(self):
    width, height = 750, 1370
    desktop = QApplication.desktop()
    screen_count = desktop.screenCount()
    for i in range(0, screen_count):
      rect = desktop.availableGeometry(i)
      s_width, s_height = rect.width(), rect.height()
      if s_width > width and s_height > height:
        return QRect(rect.left(), rect.top(), width, height)
    return QRect(0, 0, width, height)
  def check_page(self):
    p_width, p_height = self.get_page_size()
    self.page, self.over_flow_size = divmod(p_height, self.height())
    if self.page == 0:
      self.page = 1
    self.ssm = ScreenShotMerge(self.page, self.over_flow_size)
    self.timer = QTimer(self)
    self.timer.timeout.connect(self.exe_command)
    self.timer.setInterval(400)
    self.timer.start()
  def exe_command(self):
    if self.page > 0:
      self.screen_shot()
      self.run_js()

    elif self.page < 0:
      self.timer.stop()
      self.ssm.image_merge()
      self.close()

    elif self.over_flow_size > 0:
      self.screen_shot()
    self.page -= 1

  def run_js(self):
    script = """
      var scroll = function (dHeight) {
      var t = document.documentElement.scrollTop
      var h = document.documentElement.scrollHeight
      dHeight = dHeight || 0
      var current = t + dHeight
      if (current > h) {
        window.scrollTo(0, document.documentElement.clientHeight)
       } else {
        window.scrollTo(0, current)
       }
      }
    """
    command = script + '\n scroll({})'.format(self.height())
    self.browser.page().runJavaScript(command)

  def screen_shot(self):
    screen = QApplication.primaryScreen()
    winid = self.browser.winId()
    pix = screen.grabWindow(int(winid))
    name = '{}/temp.png'.format(self.ssm.root_path)
    pix.save(name)
    self.ssm.add_im(name)

if __name__ == '__main__':
  url = 'http://blog.sina.com.cn/lm/rank/focusbang//'
  app = QApplication(sys.argv)
  win = MainWindow()
  win.urlScreenShot(url)
  win.show()
  app.exit(app.exec_())

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持【听图阁-专注于Python设计】。

相关文章

浅析Git版本控制器使用

浅析Git版本控制器使用

本篇内容通过GitHub仓库创建过程以及创建连接后的上传与下载,详细介绍了Git版本控制器使用情况,来看下。 首先介绍一下什么是Git:git是目前最流行的版本控制系统,属于分布式版本控...

python 遍历目录(包括子目录)下所有文件的实例

如下所示: def list_all_files(rootdir): import os _files = [] list = os.listdir(rootdir) #列出文...

用Python操作字符串之rindex()方法的使用

 rindex()方法返回所在的子str被找到的最后一个索引,可选择限制搜索的字符串string[beg:end] 如果没有这样的索引存在,抛出一个异常。 语法 以下是rind...

解决python 读取 log日志的编码问题

解决python 读取 log日志的编码问题

1.我要读取log日志的”执行成功”的个数,log日志编码格式为GBK 2.显示报错,大致意思是说utf-8的代码不能解析log日志 3.后来想想把log日志用GBK编码读出来,写到...

使用Django连接Mysql数据库步骤

链接mysql步骤 第一步:在终端下载pymysql文件–pip install pymysql 第二步:在gjango项目的__init__文件中添加代码 import pymys...