使用python爬虫实现网络股票信息爬取的demo

yipeiwu_com6年前Python爬虫

实例如下所示:

import requests
from bs4 import BeautifulSoup
import traceback
import re
 
def getHTMLText(url):
 try:
  r = requests.get(url)
  r.raise_for_status()
  r.encoding = r.apparent_encoding
  return r.text
 except:
  return ""
 
def getStockList(lst, stockURL):
 html = getHTMLText(stockURL)
 soup = BeautifulSoup(html, 'html.parser') 
 a = soup.find_all('a')
 for i in a:
  try:
   href = i.attrs['href']
   lst.append(re.findall(r"[s][hz]\d{6}", href)[0])
  except:
   continue
 
def getStockInfo(lst, stockURL, fpath):
 for stock in lst:
  url = stockURL + stock + ".html"
  html = getHTMLText(url)
  try:
   if html=="":
    continue
   infoDict = {}
   soup = BeautifulSoup(html, 'html.parser')
   stockInfo = soup.find('div',attrs={'class':'stock-bets'})
 
   name = stockInfo.find_all(attrs={'class':'bets-name'})[0]
   infoDict.update({'股票名称': name.text.split()[0]})
    
   keyList = stockInfo.find_all('dt')
   valueList = stockInfo.find_all('dd')
   for i in range(len(keyList)):
    key = keyList[i].text
    val = valueList[i].text
    infoDict[key] = val
    
   with open(fpath, 'a', encoding='utf-8') as f:
    f.write( str(infoDict) + '\n' )
  except:
   traceback.print_exc()
   continue
 
def main():
 stock_list_url = 'http://quote.eastmoney.com/stocklist.html'
 stock_info_url = 'https://gupiao.baidu.com/stock/'
 output_file = 'D:/BaiduStockInfo.txt'
 slist=[]
 getStockList(slist, stock_list_url)
 getStockInfo(slist, stock_info_url, output_file)
 
main()

优化并且加入进度条显示

import requests
from bs4 import BeautifulSoup
import traceback
import re
def getHTMLText(url, code="utf-8"):
 try:
  r = requests.get(url)
  r.raise_for_status()
  r.encoding = code
  return r.text
 except:
  return ""
def getStockList(lst, stockURL):
 html = getHTMLText(stockURL, "GB2312")
 soup = BeautifulSoup(html, 'html.parser')
 a = soup.find_all('a')
 for i in a:
  try:
   href = i.attrs['href']
   lst.append(re.findall(r"[s][hz]\d{6}", href)[0])
  except:
   continue
def getStockInfo(lst, stockURL, fpath):
 count = 0
 for stock in lst:
  url = stockURL + stock + ".html"
  html = getHTMLText(url)
  try:
   if html == "":
    continue
   infoDict = {}
   soup = BeautifulSoup(html, 'html.parser')
   stockInfo = soup.find('div', attrs={'class': 'stock-bets'})
   name = stockInfo.find_all(attrs={'class': 'bets-name'})[0]
   infoDict.update({'股票名称': name.text.split()[0]})
   keyList = stockInfo.find_all('dt')
   valueList = stockInfo.find_all('dd')
   for i in range(len(keyList)):
    key = keyList[i].text
    val = valueList[i].text
    infoDict[key] = val
   with open(fpath, 'a', encoding='utf-8') as f:
    f.write(str(infoDict) + '\n')
    count = count + 1
    print("\r当前进度: {:.2f}%".format(count * 100 / len(lst)), end="")
  except:
   count = count + 1
   print("\r当前进度: {:.2f}%".format(count * 100 / len(lst)), end="")
   continue
def main():
 stock_list_url = 'http://quote.eastmoney.com/stocklist.html'
 stock_info_url = 'https://gupiao.baidu.com/stock/'
 output_file = 'BaiduStockInfo.txt'
 slist = []
 getStockList(slist, stock_list_url)
 getStockInfo(slist, stock_info_url, output_file)
main()

以上这篇使用python爬虫实现网络股票信息爬取的demo就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持【听图阁-专注于Python设计】。

相关文章

python访问抓取网页常用命令总结

python访问抓取网页常用命令 简单的抓取网页: import urllib.request url="http://google.cn/" response=urllib....

Python使用Scrapy爬取妹子图

Python Scrapy爬虫,听说妹子图挺火,我整站爬取了,上周一共搞了大概8000多张图片。和大家分享一下。 核心爬虫代码 # -*- coding: utf-8 -*- fro...

详解python selenium 爬取网易云音乐歌单名

详解python selenium 爬取网易云音乐歌单名

目标网站: 首先获取第一页的数据,这里关键要切换到iframe里 打印一下 获取剩下的页数,这里在点击下一页之前需要设置一个延迟,不然会报错。 结果: 一共37页,爬取完...

python爬虫实战之爬取京东商城实例教程

python爬虫实战之爬取京东商城实例教程

前言 本文主要介绍的是利用python爬取京东商城的方法,文中介绍的非常详细,下面话不多说了,来看看详细的介绍吧。 主要工具 scrapy BeautifulSoup r...

Python开发中爬虫使用代理proxy抓取网页的方法示例

本文实例讲述了Python开发中爬虫使用代理proxy抓取网页的方法。分享给大家供大家参考,具体如下: 代理类型(proxy):透明代理 匿名代理 混淆代理和高匿代理. 这里写一些pyt...