标题:excel 宏系列-百度抓取PR
-------------------------------------------------------------------------------------------------------------------------------
时间:2012/1/15 19:34:01
-------------------------------------------------------------------------------------------------------------------------------
内容:
Option Explicit
Sub PRmonitor()
Dim tmp() As String, i As Integer, g As Integer, j As Integer, arr() As String, arr1() As String, arr2() As String, arr3() As String, xmlhttp As Object, m&, p, o
'arr放url arr1 放title arr2 放meta
'j 有j-1个关键词需要查排名
j = WorksheetFunction.CountA([a:a])
'每个关键词需要查前p名
p = Cells(1, 1)
For g = 2 To j
Set xmlhttp = CreateObject("Microsoft.XMLHTTP")
With xmlhttp
.Open "get", "http://202.108.22.5/s?wd=" & Cells(g, 1) & "&pn=0&rn=" & p, False
.send
'分列特征字符串为cellpadding="0" cellspacing="0" 空格 但是第一个切出来的不是结果要忽略
tmp = Split(.responsetext, "cellpadding=""0"" cellspacing=""0"" ")
End With
'对切出来的结果进行分组得到url
For i = 1 To p
m = m + 1
ReDim Preserve arr(1 To m)
ReDim Preserve arr1(1 To m)
ReDim Preserve arr2(1 To m)
ReDim Preserve arr2(1 To m)
ReDim Preserve arr3(1 To m)
' arr(m) = Split(Split(tmp(i + 1), "href=""http://")(1), "/")(0)
arr(m) = Split(Split(tmp(i + 1), "href=""http://")(1), """")(0)
'对切出来的结果进行分组得到title
'讲切出来的标签闭合 再用removehtml过滤 arr1 放title
arr1(m) = Split(Split(tmp(i + 1), "<a ")(1), "</a>")(0)
o = "<" & arr1(m)
arr1(m) = RemoveHTML(o)
'对切出来的结果进行分组得到meta arr2 放meta
arr2(m) = Split(Split(tmp(i + 1), "</a>")(1), "<br>")(0)
o = arr2(m)
arr2(m) = RemoveHTML(o)
'对切出来的结果进行分组得到meta arr3 放url
arr3(m) = Split(Split(tmp(i + 1), "href=""http://")(1), """")(0)
Next
'arr1放title arr2放描述 arr放网址
'显示结果title
' Cells(2, g).Resize(UBound(arr), 1) = Application.Transpose(arr1)
Cells(g, 2).Resize(1, UBound(arr)) = arr1
'显示结果meta
Cells(g + j + 1, 2).Resize(1, UBound(arr)) = arr2
Cells(g + j + j + 1, 2).Resize(1, UBound(arr)) = arr
'左上角显示关键词完成进度
Cells(1, 1) = (g - 1) & " of " & j
Erase arr
Set xmlhttp = Nothing
m = 0
Next
'整理第一列
Cells(1, 1) = p
For i = 2 To p + 1
Cells(1, i) = "Position # " & i - 1
Next
End Sub
Sub all()
Call PRmonitor
End Sub
Sub clearsreen()
Dim tmp
tmp = Cells(1, 1)
Cells.Select
Selection.ClearContents
Sheets("sheet2").Select
Cells.Select
Selection.ClearContents
Sheets("sheet1").Select
Columns("A:A").Select
Selection.Borders(xlDiagonalDown).LineStyle = xlNone
Selection.Borders(xlDiagonalUp).LineStyle = xlNone
With Selection.Borders(xlEdgeLeft)
.LineStyle = xlContinuous
.ColorIndex = 0
.TintAndShade = 0
.Weight = xlThin
End With
With Selection.Borders(xlEdgeTop)
.LineStyle = xlContinuous
.ColorIndex = 0
.TintAndShade = 0
.Weight = xlThin
End With
With Selection.Borders(xlEdgeBottom)
.LineStyle = xlContinuous
.ColorIndex = 0
.TintAndShade = 0
.Weight = xlThin
End With
With Selection.Borders(xlEdgeRight)
.LineStyle = xlContinuous
.ColorIndex = 0
.TintAndShade = 0
.Weight = xlThin
End With
With Selection.Borders(xlInsideVertical)
.LineStyle = xlContinuous
.ColorIndex = 0
.TintAndShade = 0
.Weight = xlThin
End With
With Selection.Borders(xlInsideHorizontal)
.LineStyle = xlContinuous
.ColorIndex = 0
.TintAndShade = 0
.Weight = xlThin
End With
With Selection.Interior
.Pattern = xlSolid
.PatternColorIndex = xlAutomatic
.Color = 255
.TintAndShade = 0
.PatternTintAndShade = 0
End With
With Selection.Font
.ThemeColor = xlThemeColorDark1
.TintAndShade = 0
End With
Rows("1:1").Select
With Selection.Interior
.Pattern = xlSolid
.PatternColorIndex = xlAutomatic
.Color = 255
.TintAndShade = 0
.PatternTintAndShade = 0
End With
With Selection.Font
.ThemeColor = xlThemeColorDark1
.TintAndShade = 0
End With
Rows("1:24").Select
Selection.RowHeight = 18.75
ActiveWindow.ScrollRow = 5
ActiveWindow.ScrollRow = 4
ActiveWindow.ScrollRow = 3
ActiveWindow.ScrollRow = 2
ActiveWindow.ScrollRow = 1
Columns("A:M").Select
Selection.ColumnWidth = 23.63
Dim a
On Error Resume Next
For Each a In Sheets
a.ChartObjects.Delete
Next
Range("B58").Select
ActiveCell.FormulaR1C1 = " by Stephen Hou,Nov 2011"
Cells(1, 1) = tmp
Range("A2").Select
End Sub
Function RemoveHTML(strText)
Dim nPos1
Dim nPos2
nPos1 = InStr(strText, "<")
Do While nPos1 > 0
nPos2 = InStr(nPos1 + 1, strText, ">")
If nPos2 > 0 Then
strText = Left(strText, nPos1 - 1) & Mid(strText, nPos2 + 1)
Else
Exit Do
End If
nPos1 = InStr(strText, "<")
Loop
RemoveHTML = strText
End Function