python抓取51job公司名称招聘职位以及网址极速版
先使用命令安装bs4
sudo easy_install pip
sudo pip install bs4
# -*- coding: utf8 -*-
import sys
reload(sys)
sys.setdefaultencoding(’utf-8’)
from bs4 import BeautifulSoup
import re,time,urllib2
html=urllib2.urlopen(”http://www.51job.com/shanghai”,timeout=5).read()
soup=BeautifulSoup(html)
div=soup.find(”div”,id=”dataidea_1″)
for links in div.find_all(”a”,title=True):
print links.get(”title”)
print links.get(”href”)
html1=urllib2.urlopen(links.get(”href”),timeout=5).read()
soup1=BeautifulSoup(html1)
div1=soup1.find(”div”,class_=”redline”)
if div1!=None:
for link1 in div1.find_all(”a”,href=True):
print link1.get_text()
if soup1.find(”p”,”txt_font1″)!=None:
if soup1.find(”p”,”txt_font1″).get_text().find(”tp”)>1:
print soup1.find(”p”,”txt_font1″).get_text()
print “\n”
print “\n”
先使用命令安装bs4
sudo easy_install pip
sudo pip install bs4
# -*- coding: utf8 -*-
import sys
reload(sys)
sys.setdefaultencoding(’utf-8’)
from bs4 import BeautifulSoup
import re,time,urllib2
html=urllib2.urlopen(”http://www.51job.com/shanghai”,timeout=5).read()
soup=BeautifulSoup(html)
div=soup.find(”div”,id=”dataidea_1″)
for links in div.find_all(”a”,title=True):
print links.get(”title”)
print links.get(”href”)
html1=urllib2.urlopen(links.get(”href”),timeout=5).read()
soup1=BeautifulSoup(html1)
div1=soup1.find(”div”,class_=”redline”)
if div1!=None:
for link1 in div1.find_all(”a”,href=True):
print link1.get_text()
if soup1.find(”p”,”txt_font1″)!=None:
if soup1.find(”p”,”txt_font1″).get_text().find(”tp”)>1:
print soup1.find(”p”,”txt_font1″).get_text()
print “\n”
print “\n”