使用python爬虫实现网络股票信息爬取的demo

yipeiwu_com6年前Python爬虫

实例如下所示:

import requests
from bs4 import BeautifulSoup
import traceback
import re
 
def getHTMLText(url):
 try:
  r = requests.get(url)
  r.raise_for_status()
  r.encoding = r.apparent_encoding
  return r.text
 except:
  return ""
 
def getStockList(lst, stockURL):
 html = getHTMLText(stockURL)
 soup = BeautifulSoup(html, 'html.parser') 
 a = soup.find_all('a')
 for i in a:
  try:
   href = i.attrs['href']
   lst.append(re.findall(r"[s][hz]\d{6}", href)[0])
  except:
   continue
 
def getStockInfo(lst, stockURL, fpath):
 for stock in lst:
  url = stockURL + stock + ".html"
  html = getHTMLText(url)
  try:
   if html=="":
    continue
   infoDict = {}
   soup = BeautifulSoup(html, 'html.parser')
   stockInfo = soup.find('div',attrs={'class':'stock-bets'})
 
   name = stockInfo.find_all(attrs={'class':'bets-name'})[0]
   infoDict.update({'股票名称': name.text.split()[0]})
    
   keyList = stockInfo.find_all('dt')
   valueList = stockInfo.find_all('dd')
   for i in range(len(keyList)):
    key = keyList[i].text
    val = valueList[i].text
    infoDict[key] = val
    
   with open(fpath, 'a', encoding='utf-8') as f:
    f.write( str(infoDict) + '\n' )
  except:
   traceback.print_exc()
   continue
 
def main():
 stock_list_url = 'http://quote.eastmoney.com/stocklist.html'
 stock_info_url = 'https://gupiao.baidu.com/stock/'
 output_file = 'D:/BaiduStockInfo.txt'
 slist=[]
 getStockList(slist, stock_list_url)
 getStockInfo(slist, stock_info_url, output_file)
 
main()

优化并且加入进度条显示

import requests
from bs4 import BeautifulSoup
import traceback
import re
def getHTMLText(url, code="utf-8"):
 try:
  r = requests.get(url)
  r.raise_for_status()
  r.encoding = code
  return r.text
 except:
  return ""
def getStockList(lst, stockURL):
 html = getHTMLText(stockURL, "GB2312")
 soup = BeautifulSoup(html, 'html.parser')
 a = soup.find_all('a')
 for i in a:
  try:
   href = i.attrs['href']
   lst.append(re.findall(r"[s][hz]\d{6}", href)[0])
  except:
   continue
def getStockInfo(lst, stockURL, fpath):
 count = 0
 for stock in lst:
  url = stockURL + stock + ".html"
  html = getHTMLText(url)
  try:
   if html == "":
    continue
   infoDict = {}
   soup = BeautifulSoup(html, 'html.parser')
   stockInfo = soup.find('div', attrs={'class': 'stock-bets'})
   name = stockInfo.find_all(attrs={'class': 'bets-name'})[0]
   infoDict.update({'股票名称': name.text.split()[0]})
   keyList = stockInfo.find_all('dt')
   valueList = stockInfo.find_all('dd')
   for i in range(len(keyList)):
    key = keyList[i].text
    val = valueList[i].text
    infoDict[key] = val
   with open(fpath, 'a', encoding='utf-8') as f:
    f.write(str(infoDict) + '\n')
    count = count + 1
    print("\r当前进度: {:.2f}%".format(count * 100 / len(lst)), end="")
  except:
   count = count + 1
   print("\r当前进度: {:.2f}%".format(count * 100 / len(lst)), end="")
   continue
def main():
 stock_list_url = 'http://quote.eastmoney.com/stocklist.html'
 stock_info_url = 'https://gupiao.baidu.com/stock/'
 output_file = 'BaiduStockInfo.txt'
 slist = []
 getStockList(slist, stock_list_url)
 getStockInfo(slist, stock_info_url, output_file)
main()

以上这篇使用python爬虫实现网络股票信息爬取的demo就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持【听图阁-专注于Python设计】。

相关文章

python书籍信息爬虫实例

python书籍信息爬虫示例,供大家参考,具体内容如下 背景说明 需要收集一些书籍信息,以豆瓣书籍条目作为源,得到一些有效书籍信息,并保存到本地数据库。 获取书籍分类标签 具体可参考这个...

Python抓取淘宝下拉框关键词的方法

本文实例讲述了Python抓取淘宝下拉框关键词的方法。分享给大家供大家参考。具体如下: import urllib2,re for key in open('key.txt'):...

python实现爬取图书封面

本文实例为大家分享了python实现爬取图书封面的具体代码,供大家参考,具体内容如下 kongfuzi.py 利用更换代理ip,延迟提交数据,设置请求头破解网站的反爬虫机制 impo...

python爬虫系列Selenium定向爬取虎扑篮球图片详解

python爬虫系列Selenium定向爬取虎扑篮球图片详解

前言: 作为一名从小就看篮球的球迷,会经常逛虎扑篮球及湿乎乎等论坛,在论坛里面会存在很多精美图片,包括NBA球队、CBA明星、花边新闻、球鞋美女等等,如果一张张右键另存为的话真是手都点...

Python 爬取携程所有机票的实例代码

Python 爬取携程所有机票的实例代码

打开携程网,查询机票,如广州到成都。 这时网址为:http://flights.ctrip.com/booking/CAN-CTU-day-1.html?DDate1=2018-06-1...