标题:excel seo sem 结果抓取模块不定期更新 1223
-------------------------------------------------------------------------------------------------------------------------------
时间:2011/12/20 0:10:13
-------------------------------------------------------------------------------------------------------------------------------
内容:
12.20版本抓取模块一般就是1个思路
首先找到切割每个数组的特征字符串 找的好 就不用2次判定
分派到每个组后 在小组内进行循环 利用split来找到需要的数据的前面的特征字符串和后面
特征字符串 最后显示 该版本优化了显示效果 动态显示抓取词的进度
Sub zhuaquseo()
Dim tmp() As String, i As Integer, g As Integer, j As Integer, arr() As String, xmlhttp As Object, m&, p
Range("a2:a" & Range("a65536").End(3).Row).Select
Selection.Copy
Range("B1").PasteSpecial Paste:=xlPasteAll, Operation:=xlNone, SkipBlanks:=False, Transpose:=True
'j 有j-1个关键词需要查排名
j = WorksheetFunction.CountA([1:1])
'每个关键词需要查前p名
p = Cells(1, 1)
For g = 2 To j
Set xmlhttp = CreateObject("Microsoft.XMLHTTP")
With xmlhttp
.Open "get", "http://202.108.22.5/s?wd=" & Cells(1, g) & "&pn=0&rn=" & p, False
.send
'分列特征字符串为cellpadding="0" cellspacing="0" 空格 但是第一个切出来的不是结果要忽略
tmp = Split(.responsetext, "cellpadding=""0"" cellspacing=""0"" ")
End With
'对切出来的结果进行分组
For i = 1 To p
m = m + 1
ReDim Preserve arr(1 To m)
arr(m) = Split(Split(tmp(i + 1), "href=""http://")(1), "/")(0)
Next
'显示结果网址
Cells(2, g).Resize(UBound(arr), 1) = Application.Transpose(arr)
'左上角显示关键词完成进度
Cells(1, 1) = (g - 1) & " of " & j
Erase arr
Set xmlhttp = Nothing
m = 0
Next
'整理第一列
Columns("A:A").Select
Selection.ClearContents
Cells(1, 1) = p
For i = 2 To p + 1
Cells(i, 1) = i - 1
Next
End Sub
'这个是sem模块
Sub zhuaqusem()
Dim tmp() As String, p, tmp1() As String, i As Integer, g As Integer, j As Integer, arr() As String, arr1() As String, xmlhttp As Object, m&, n&, z() As String
' tmp 放左边结果 tmp1 放右边结果
' keywords change from colomn to row
Range("a2:a" & Range("a65536").End(3).Row).Select
Selection.Copy
Range("B1").PasteSpecial Paste:=xlPasteAll, Operation:=xlNone, SkipBlanks:=False, Transpose:=True
' 查询有多少关键词需要搜素 放到j里面
j = WorksheetFunction.CountA([a:a])
p = Cells(1, 1)
For g = 1 To j
Set xmlhttp = CreateObject("Microsoft.XMLHTTP")
With xmlhttp
' 查看搜索结果页面
.Open "get", "http://www.baidu.com/s?wd=" & Cells(1, g + 1) & "&pn=0&rn=", False
.send
' 左边的广告网站进行剥离 分有没有绿色背景2种情况 假设是模仿自然结果的 不带绿色背景 分割字符是<font size=-1 color="#008000"> -1不带引号
tmp = Split(.responsetext, "<font size=-1 color=""#008000"">")
' 如果结果为空
If UBound(tmp) = 0 Then
' 左边广告绿色背景 按另外一种方法分割 分组放到tmp里面
tmp = Split(.responsetext, "<font size=""-1"" color=""#008000"" style=""margin-left:6px;"">")
' 因为带绿色背景 所以得到的网站列表是重复了1次 要减半
ReDim Preserve tmp(Int(UBound(tmp) / 2))
End If
' 右边广告进行剥离如果发现出现百度e.baidu.com
z = Split(.responsetext, "<font color=""#008000"" size=""-1"">e.baidu.com</font></a>")
If UBound(z) <> 0 Then
'那么对前面的代码进行分组右侧广告 装入tmp1分列字符串<font size="-1" color="#008000"> tmp1 放右侧结果
tmp1 = Split(z(0), "<font size=""-1"" color=""#008000"">")
End If
End With
For i = 1 To UBound(tmp)
m = m + 1
' 把左测结果放到arr里面
ReDim Preserve arr(1 To m)
' remove the <in the back
arr(m) = Split(tmp(i), "<")(0)
arr(m) = Split(arr(m), " ")(0) ' remove the space in the back
arr(m) = Split(arr(m), "/")(0) ' remove /
Next
' 把左侧广告网址开始装入
' 如果左测没广告 就开始装右侧广告
If UBound(tmp) = 0 Then
m = 1
Else:
'装入左测广告
Cells(2, g + 1).Resize(UBound(arr), 1) = Application.Transpose(arr)
'左侧广告变色蓝色
Range(Cells(2, g + 1), Cells((2 + UBound(arr)), g + 1)).Interior.ColorIndex = 10
End If
Erase tmp
Erase arr
For i = 1 To UBound(tmp1)
On Error Resume Next '有问题的地方
n = n + 1
ReDim Preserve arr1(1 To n)
' 把右侧网站结果放到arr1里面去
arr1(n) = Split(tmp1(i), "<")(0)
arr1(n) = Split(arr1(n), "/")(0)
Next
' take website list in the below of righside sem into colomn
' if could get any results give up
If UBound(tmp1) <> 0 Then
'右侧网站放入进去 从左侧广告的数的行数开始向下放
Cells(m + 1, g + 1).Resize(UBound(arr1), 1) = Application.Transpose(arr1)
End If
Erase tmp1
Erase arr1
Set xmlhttp = Nothing
m = 0
n = 0
Cells(1, 1) = g & " of " & j
Next
For i = 2 To 20
Cells(i, 1) = i - 1
Next
Cells(1, 1) = p
End Sub
Sub clearsreen()
Dim tmp
tmp = Cells(1, 1)
Cells.Select
With Selection.Interior
.ThemeColor = xlThemeColorDark1
End With
Selection.ClearContents
Sheets("sheet2").Select
Cells.Select
Selection.ClearContents
Sheets("sheet1").Select
Columns("A:A").Select
Selection.Borders(xlDiagonalDown).LineStyle = xlNone
Selection.Borders(xlDiagonalUp).LineStyle = xlNone
With Selection.Borders(xlEdgeLeft)
.LineStyle = xlContinuous
.ColorIndex = 0
.TintAndShade = 0
.Weight = xlThin
End With
With Selection.Borders(xlEdgeTop)
.LineStyle = xlContinuous
.ColorIndex = 0
.TintAndShade = 0
.Weight = xlThin
End With
With Selection.Borders(xlEdgeBottom)
.LineStyle = xlContinuous
.ColorIndex = 0
.TintAndShade = 0
.Weight = xlThin
End With
With Selection.Borders(xlEdgeRight)
.LineStyle = xlContinuous
.ColorIndex = 0
.TintAndShade = 0
.Weight = xlThin
End With
With Selection.Borders(xlInsideVertical)
.LineStyle = xlContinuous
.ColorIndex = 0
.TintAndShade = 0
.Weight = xlThin
End With
With Selection.Borders(xlInsideHorizontal)
.LineStyle = xlContinuous
.ColorIndex = 0
.TintAndShade = 0
.Weight = xlThin
End With
With Selection.Interior
.Pattern = xlSolid
.PatternColorIndex = xlAutomatic
.Color = 255
.TintAndShade = 0
.PatternTintAndShade = 0
End With
With Selection.Font
.ThemeColor = xlThemeColorDark1
.TintAndShade = 0
End With
Rows("1:1").Select
With Selection.Interior
.Pattern = xlSolid
.PatternColorIndex = xlAutomatic
.Color = 255
.TintAndShade = 0
.PatternTintAndShade = 0
End With
With Selection.Font
.ThemeColor = xlThemeColorDark1
.TintAndShade = 0
End With
Rows("1:24").Select
Selection.RowHeight = 18.75
ActiveWindow.ScrollRow = 5
ActiveWindow.ScrollRow = 4
ActiveWindow.ScrollRow = 3
ActiveWindow.ScrollRow = 2
ActiveWindow.ScrollRow = 1
Columns("A:M").Select
Selection.ColumnWidth = 23.63
Dim a
On Error Resume Next
For Each a In Sheets
a.ChartObjects.Delete
Next
Range("B58").Select
ActiveCell.FormulaR1C1 = " by Stephen Hou,Nov 2011"
Cells(1, 1) = tmp
Range("A2").Select
End Sub