标题:Excel 宏系列之抓取ad copy 百度广告位
-------------------------------------------------------------------------------------------------------------------------------
时间:2012/1/15 19:15:22
-------------------------------------------------------------------------------------------------------------------------------
内容:
Sub zhuaqusem()
Dim tmp() As String, p, tmp1() As String, brr() As String, o, r, crr() As String, err() As String, drr() As String, i As Integer, g As Integer, j As Integer, arr() As String, arr1() As String, xmlhttp As Object, m&, n&, z() As String
' tmp 放左边结果 tmp1 放右边结果,brr放左侧title crr左侧放描述 drr放右侧title err放右侧描述
' keywords change from colomn to row
Range("A2").Select
Range(Selection, Selection.End(xlDown)).Select
Selection.Copy
Range("B1").Select
Selection.PasteSpecial Paste:=xlPasteAll, Operation:=xlNone, SkipBlanks:= _
False, Transpose:=True
' 查询有多少关键词需要搜素 放到j里面
j = WorksheetFunction.CountA([a:a])
p = Cells(1, 1)
For g = 1 To j
Set xmlhttp = CreateObject("Microsoft.XMLHTTP")
With xmlhttp
' 查看搜索结果页面
.Open "get", "http://www.baidu.com/s?wd=" & Cells(1, g + 1) & "&pn=0&rn=", False
.send
' 左边的广告网站进行剥离 分有没有绿色背景2种情况 假设是模仿自然结果的 不带绿色背景 分割字符是<font size=-1 color="#008000"> -1不带引号
tmp = Split(.responsetext, "<font size=-1 color=""#008000"">")
' 如果结果为空
If UBound(tmp) = 0 Then
' 左边广告绿色背景 按另外一种方法分割 分组放到tmp里面
tmp = Split(.responsetext, "<font size=""-1"" color=""#008000"" style=""margin-left:6px;"">")
' 因为带绿色背景 所以得到的网站列表是重复了1次 要减半
ReDim Preserve tmp(Int(UBound(tmp) / 2))
End If
' 右边广告进行剥离如果发现出现百度e.baidu.com
z = Split(.responsetext, "<font color=""#008000"" size=""-1"">e.baidu.com</font></a>")
If UBound(z) <> 0 Then
'那么对前面的代码进行分组右侧广告 装入tmp1分列字符串<font size="-1" color="#008000"> tmp1 放右侧结果
tmp1 = Split(z(0), "<font size=""-1"" color=""#008000"">")
End If
End With
For i = 1 To UBound(tmp)
m = m + 1
' 把左测url结果放到arr里面
ReDim Preserve arr(1 To m)
ReDim Preserve brr(1 To m)
ReDim Preserve crr(1 To m)
' remove the <in the back
arr(m) = Split(tmp(i), "<")(0)
arr(m) = Split(arr(m), " ")(0) ' remove the space in the back
arr(m) = Split(arr(m), "/")(0) ' remove /
' 把标题结果放到brr里面 描述放crr
brr(m) = Split(Split(tmp(i - 1), "<table id=""30")(1), "<br>")(0)
brr(m) = RemoveHTML("<" & brr(m))
crr(m) = Split(Split(tmp(i - 1), "<table id=""30")(1), "<br>")(1)
crr(m) = RemoveHTML("<" & crr(m))
Next
' 把左侧广告网址开始装入
' 如果左测没广告 就开始装右侧广告
If UBound(tmp) = 0 Then
m = 1
Else:
'装入左测广告
r = 2
For i = 2 To UBound(arr)
'arr网站 brr放标题 crr放描述
Cells(r, g + 1) = brr(i)
Cells(r, g + 1).Font.ColorIndex = 5
Cells(r, g + 1).WrapText = False
Cells(r + 1, g + 1) = crr(i)
Cells(r + 1, g + 1).WrapText = True
Cells(r + 1, g + 1).Rows.RowHeight = 28
Cells(r + 2, g + 1) = arr(i)
Cells(r + 2, g + 1).Font.ColorIndex = 10
'' Cells(2, g + 1).Resize(UBound(arr), 1) = Application.Transpose(arr)
' Cells(2, g + 1).Resize(UBound(arr), 1) = Application.Transpose(brr)
' Cells(2, g + 1).Resize(UBound(arr), 1) = Application.Transpose(crr)
'左侧广告变色蓝色
' Range(Cells(2, g + 1), Cells((1 + UBound(arr)), g + 1)).Interior.ColorIndex = 10
r = r + 4
Next
End If
Erase tmp
Erase arr
For i = 1 To UBound(tmp1)
On Error Resume Next '有问题的地方
n = n + 1
ReDim Preserve arr1(1 To n)
ReDim Preserve drr(1 To n)
ReDim Preserve err(1 To n)
' 把右侧网站结果放到arr1里面去
arr1(n) = Split(tmp1(i), "<")(0)
arr1(n) = Split(arr1(n), "/")(0)
drr(n) = Split(Split(tmp1(i), "<div id=""bdfs")(1), "<br>")(0)
drr(n) = RemoveHTML(drr(n))
'drr 发标题 err 放描述
err(n) = Split(Split(tmp1(i), "<div id=""bdfs")(1), "<br>")(1)
err(n) = RemoveHTML(err(n))
Next
' take website list in the below of righside sem into colomn
' if could get any results give up
If UBound(tmp1) <> 0 Then
'右侧网站放入进去 从左侧广告的数的行数开始向下放
o = 4 * m + 1
For n = 1 To UBound(arr1)
'drr 发标题 err 放描述
Cells(o, g + 1) = drr(i)
Cells(o, g + 1).Font.ColorIndex = 5
Cells(o, g + 1).WrapText = False
Cells(o + 1, g + 1) = err(i)
Cells(o + 1, g + 1).WrapText = True
Cells(o + 1, g + 1).Rows.RowHeight = 28
'URl
Cells(o + 2, g + 1) = arr1(i)
Cells(o + 2, g + 1).Font.ColorIndex = 10
o = o + 4
Next
End If
Erase tmp1
Erase arr1
Set xmlhttp = Nothing
m = 0
n = 0
Cells(1, 1) = g & " of " & j
Next
For i = 2 To 20
Cells(i, 1) = i - 1
Next
Cells(1, 1) = p
End Sub