标题:excel 宏系列-百度抓取PR

-------------------------------------------------------------------------------------------------------------------------------

时间:2012/1/15 19:34:01

-------------------------------------------------------------------------------------------------------------------------------

内容:

Option Explicit

Sub PRmonitor()
    Dim tmp() As String, i As Integer, g As Integer, j As Integer, arr() As String, arr1() As String, arr2() As String, arr3() As String, xmlhttp As Object, m&, p, o
    'arr放url arr1 放title arr2 放meta
   

      'j 有j-1个关键词需要查排名
    j = WorksheetFunction.CountA([a:a])
     '每个关键词需要查前p名
    p = Cells(1, 1)
  For g = 2 To j
   
    Set xmlhttp = CreateObject("Microsoft.XMLHTTP")
    With xmlhttp

        .Open "get", "http://202.108.22.5/s?wd=" & Cells(g, 1) & "&pn=0&rn=" & p, False
        .send
        '分列特征字符串为cellpadding="0" cellspacing="0" 空格 但是第一个切出来的不是结果要忽略
        tmp = Split(.responsetext, "cellpadding=""0"" cellspacing=""0"" ")
    End With
    '对切出来的结果进行分组得到url
    For i = 1 To p
             m = m + 1
             ReDim Preserve arr(1 To m)
             ReDim Preserve arr1(1 To m)
              ReDim Preserve arr2(1 To m)
                   ReDim Preserve arr2(1 To m)
                    ReDim Preserve arr3(1 To m)
            
           '  arr(m) = Split(Split(tmp(i + 1), "href=""http://")(1), "/")(0)
               arr(m) = Split(Split(tmp(i + 1), "href=""http://")(1), """")(0)
          '对切出来的结果进行分组得到title
                 '讲切出来的标签闭合 再用removehtml过滤 arr1 放title
                 arr1(m) = Split(Split(tmp(i + 1), "<a ")(1), "</a>")(0)
          o = "<" & arr1(m)
 arr1(m) = RemoveHTML(o)
         '对切出来的结果进行分组得到meta arr2 放meta
        
 arr2(m) = Split(Split(tmp(i + 1), "</a>")(1), "<br>")(0)
  o = arr2(m)
  arr2(m) = RemoveHTML(o)
   '对切出来的结果进行分组得到meta arr3 放url
  arr3(m) = Split(Split(tmp(i + 1), "href=""http://")(1), """")(0)
    Next

   
'arr1放title arr2放描述 arr放网址
   '显示结果title
  ' Cells(2, g).Resize(UBound(arr), 1) = Application.Transpose(arr1)
      Cells(g, 2).Resize(1, UBound(arr)) = arr1
    '显示结果meta
   Cells(g + j + 1, 2).Resize(1, UBound(arr)) = arr2
  
    Cells(g + j + j + 1, 2).Resize(1, UBound(arr)) = arr
   
   '左上角显示关键词完成进度
   Cells(1, 1) = (g - 1) & "  of " & j
    Erase arr
    Set xmlhttp = Nothing
     m = 0
      
   
  Next
   '整理第一列

 

      Cells(1, 1) = p
        For i = 2 To p + 1
        Cells(1, i) = "Position # " & i - 1
 
        Next
       

End Sub

Sub all()
Call PRmonitor
End Sub
Sub clearsreen()
Dim tmp
tmp = Cells(1, 1)
    Cells.Select
    Selection.ClearContents
        Sheets("sheet2").Select
         Cells.Select

    Selection.ClearContents
         Sheets("sheet1").Select
        Columns("A:A").Select
    Selection.Borders(xlDiagonalDown).LineStyle = xlNone
    Selection.Borders(xlDiagonalUp).LineStyle = xlNone
    With Selection.Borders(xlEdgeLeft)
        .LineStyle = xlContinuous
        .ColorIndex = 0
        .TintAndShade = 0
        .Weight = xlThin
    End With
    With Selection.Borders(xlEdgeTop)
        .LineStyle = xlContinuous
        .ColorIndex = 0
        .TintAndShade = 0
        .Weight = xlThin
    End With
    With Selection.Borders(xlEdgeBottom)
        .LineStyle = xlContinuous
        .ColorIndex = 0
        .TintAndShade = 0
        .Weight = xlThin
    End With
    With Selection.Borders(xlEdgeRight)
        .LineStyle = xlContinuous
        .ColorIndex = 0
        .TintAndShade = 0
        .Weight = xlThin
    End With
    With Selection.Borders(xlInsideVertical)
        .LineStyle = xlContinuous
        .ColorIndex = 0
        .TintAndShade = 0
        .Weight = xlThin
    End With
    With Selection.Borders(xlInsideHorizontal)
        .LineStyle = xlContinuous
        .ColorIndex = 0
        .TintAndShade = 0
        .Weight = xlThin
    End With
    With Selection.Interior
        .Pattern = xlSolid
        .PatternColorIndex = xlAutomatic
        .Color = 255
        .TintAndShade = 0
        .PatternTintAndShade = 0
    End With
    With Selection.Font
        .ThemeColor = xlThemeColorDark1
        .TintAndShade = 0
    End With
    Rows("1:1").Select
    With Selection.Interior
        .Pattern = xlSolid
        .PatternColorIndex = xlAutomatic
        .Color = 255
        .TintAndShade = 0
        .PatternTintAndShade = 0
    End With
    With Selection.Font
        .ThemeColor = xlThemeColorDark1
        .TintAndShade = 0
    End With
    Rows("1:24").Select
    Selection.RowHeight = 18.75
    ActiveWindow.ScrollRow = 5
    ActiveWindow.ScrollRow = 4
    ActiveWindow.ScrollRow = 3
    ActiveWindow.ScrollRow = 2
    ActiveWindow.ScrollRow = 1
    Columns("A:M").Select
    Selection.ColumnWidth = 23.63
Dim a
On Error Resume Next

For Each a In Sheets
a.ChartObjects.Delete
Next

    Range("B58").Select
    ActiveCell.FormulaR1C1 = " by Stephen Hou,Nov 2011"
    Cells(1, 1) = tmp


 
      Range("A2").Select
     
      End Sub
     
     
Function RemoveHTML(strText)
    Dim nPos1
    Dim nPos2
   
    nPos1 = InStr(strText, "<")
    Do While nPos1 > 0
        nPos2 = InStr(nPos1 + 1, strText, ">")
        If nPos2 > 0 Then
            strText = Left(strText, nPos1 - 1) & Mid(strText, nPos2 + 1)
        Else
            Exit Do
        End If
        nPos1 = InStr(strText, "<")
    Loop
   
    RemoveHTML = strText
End Function

Option Explicit

Sub PRmonitor()
    Dim tmp() As String, i As Integer, g As Integer, j As Integer, arr() As String, arr1() As String, arr2() As String, arr3() As String, xmlhttp As Object, m&, p, o
    'arr放url arr1 放title arr2 放meta
   

      'j 有j-1个关键词需要查排名
    j = WorksheetFunction.CountA([a:a])
     '每个关键词需要查前p名
    p = Cells(1, 1)
  For g = 2 To j
   
    Set xmlhttp = CreateObject("Microsoft.XMLHTTP")
    With xmlhttp

        .Open "get", "http://202.108.22.5/s?wd=" & Cells(g, 1) & "&pn=0&rn=" & p, False
        .send
        '分列特征字符串为cellpadding="0" cellspacing="0" 空格 但是第一个切出来的不是结果要忽略
        tmp = Split(.responsetext, "cellpadding=""0"" cellspacing=""0"" ")
    End With
    '对切出来的结果进行分组得到url
    For i = 1 To p
             m = m + 1
             ReDim Preserve arr(1 To m)
             ReDim Preserve arr1(1 To m)
              ReDim Preserve arr2(1 To m)
                   ReDim Preserve arr2(1 To m)
                    ReDim Preserve arr3(1 To m)
            
           '  arr(m) = Split(Split(tmp(i + 1), "href=""http://")(1), "/")(0)
               arr(m) = Split(Split(tmp(i + 1), "href=""http://")(1), """")(0)
          '对切出来的结果进行分组得到title
                 '讲切出来的标签闭合 再用removehtml过滤 arr1 放title
                 arr1(m) = Split(Split(tmp(i + 1), "<a ")(1), "</a>")(0)
          o = "<" & arr1(m)
 arr1(m) = RemoveHTML(o)
         '对切出来的结果进行分组得到meta arr2 放meta
        
 arr2(m) = Split(Split(tmp(i + 1), "</a>")(1), "<br>")(0)
  o = arr2(m)
  arr2(m) = RemoveHTML(o)
   '对切出来的结果进行分组得到meta arr3 放url
  arr3(m) = Split(Split(tmp(i + 1), "href=""http://")(1), """")(0)
    Next

   
'arr1放title arr2放描述 arr放网址
   '显示结果title
  ' Cells(2, g).Resize(UBound(arr), 1) = Application.Transpose(arr1)
      Cells(g, 2).Resize(1, UBound(arr)) = arr1
    '显示结果meta
   Cells(g + j + 1, 2).Resize(1, UBound(arr)) = arr2
  
    Cells(g + j + j + 1, 2).Resize(1, UBound(arr)) = arr
   
   '左上角显示关键词完成进度
   Cells(1, 1) = (g - 1) & "  of " & j
    Erase arr
    Set xmlhttp = Nothing
     m = 0
      
   
  Next
   '整理第一列

 

      Cells(1, 1) = p
        For i = 2 To p + 1
        Cells(1, i) = "Position # " & i - 1
 
        Next
       

End Sub

Sub all()
Call PRmonitor
End Sub
Sub clearsreen()
Dim tmp
tmp = Cells(1, 1)
    Cells.Select
    Selection.ClearContents
        Sheets("sheet2").Select
         Cells.Select

    Selection.ClearContents
         Sheets("sheet1").Select
        Columns("A:A").Select
    Selection.Borders(xlDiagonalDown).LineStyle = xlNone
    Selection.Borders(xlDiagonalUp).LineStyle = xlNone
    With Selection.Borders(xlEdgeLeft)
        .LineStyle = xlContinuous
        .ColorIndex = 0
        .TintAndShade = 0
        .Weight = xlThin
    End With
    With Selection.Borders(xlEdgeTop)
        .LineStyle = xlContinuous
        .ColorIndex = 0
        .TintAndShade = 0
        .Weight = xlThin
    End With
    With Selection.Borders(xlEdgeBottom)
        .LineStyle = xlContinuous
        .ColorIndex = 0
        .TintAndShade = 0
        .Weight = xlThin
    End With
    With Selection.Borders(xlEdgeRight)
        .LineStyle = xlContinuous
        .ColorIndex = 0
        .TintAndShade = 0
        .Weight = xlThin
    End With
    With Selection.Borders(xlInsideVertical)
        .LineStyle = xlContinuous
        .ColorIndex = 0
        .TintAndShade = 0
        .Weight = xlThin
    End With
    With Selection.Borders(xlInsideHorizontal)
        .LineStyle = xlContinuous
        .ColorIndex = 0
        .TintAndShade = 0
        .Weight = xlThin
    End With
    With Selection.Interior
        .Pattern = xlSolid
        .PatternColorIndex = xlAutomatic
        .Color = 255
        .TintAndShade = 0
        .PatternTintAndShade = 0
    End With
    With Selection.Font
        .ThemeColor = xlThemeColorDark1
        .TintAndShade = 0
    End With
    Rows("1:1").Select
    With Selection.Interior
        .Pattern = xlSolid
        .PatternColorIndex = xlAutomatic
        .Color = 255
        .TintAndShade = 0
        .PatternTintAndShade = 0
    End With
    With Selection.Font
        .ThemeColor = xlThemeColorDark1
        .TintAndShade = 0
    End With
    Rows("1:24").Select
    Selection.RowHeight = 18.75
    ActiveWindow.ScrollRow = 5
    ActiveWindow.ScrollRow = 4
    ActiveWindow.ScrollRow = 3
    ActiveWindow.ScrollRow = 2
    ActiveWindow.ScrollRow = 1
    Columns("A:M").Select
    Selection.ColumnWidth = 23.63
Dim a
On Error Resume Next

For Each a In Sheets
a.ChartObjects.Delete
Next

    Range("B58").Select
    ActiveCell.FormulaR1C1 = " by Stephen Hou,Nov 2011"
    Cells(1, 1) = tmp


 
      Range("A2").Select
     
      End Sub
     
     
Function RemoveHTML(strText)
    Dim nPos1
    Dim nPos2
   
    nPos1 = InStr(strText, "<")
    Do While nPos1 > 0
        nPos2 = InStr(nPos1 + 1, strText, ">")
        If nPos2 > 0 Then
            strText = Left(strText, nPos1 - 1) & Mid(strText, nPos2 + 1)
        Else
            Exit Do
        End If
        nPos1 = InStr(strText, "<")
    Loop
   
    RemoveHTML = strText
End Function