How to finda text and get the page no. for acrobat

2019-09-06 01:36发布

I want to find the text and get the page number of text found in acrobat using VBA, I am able to find the text but not able to get the page number. for that

Sub Main()

Dim acrApp, acrAVDoc
Set acrApp = CreateObject("AcroExch.app")
Set acrAVDoc = CreateObject("AcroExch.AVDoc")
acrApp.Show
If acrAVDoc.Open("FileName", "") Then
    Ok = acrAVDoc.FindText("Text to search", 0, 1, 1)
    MsgBox (Ok)
End If

Set acrAVDoc = Nothing
Set acrApp = Nothing

End Sub

I am not able to set the object for

Set acrPDDoc = CreateObject("Acrobat.AV_PAGE_VIEW")

2条回答
冷血范
2楼-- · 2019-09-06 02:14

I know this is an old question, but it was one of the top search results when I was looking for the same info. I never found anything that truly met my needs so I made something up by combining several different resources.

The function below is acceptably fast, even on very large documents. It searches page by page, not word by word, so it will find multi-word matches and words with dashes (case insensitive). It returns the matches for all pages separated by commas.

Hope this is helpful to someone in the future.

Sub Demo()
Dim SearchResult As String    
SearchResult = AdobePdfSearch("my search string", "C:\Demo\Demo.pdf")
MsgBox SearchResult
End Sub

Function AdobePdfSearch(SearchString As String, strFileName As String) As String
'Note: A Reference to the Adobe Library must be set in Tools|References!
'Note! This only works with Acrobat Pro installed on your PC, will not work with Reader
Dim AcroApp As CAcroApp, AcroAVDoc As CAcroAVDoc, AcroPDDoc As CAcroPDDoc
Dim AcroHiliteList As CAcroHiliteList, AcroTextSelect As CAcroPDTextSelect
Dim PageNumber, PageContent, Content, i, j, iNumPages
Dim strResult As String

Set AcroApp = CreateObject("AcroExch.App")
Set AcroAVDoc = CreateObject("AcroExch.AVDoc")
If AcroAVDoc.Open(strFileName, vbNull) <> True Then Exit Function

Set AcroPDDoc = AcroAVDoc.GetPDDoc
iNumPages = AcroPDDoc.GetNumPages
For i = 0 To iNumPages - 1

    Set PageNumber = AcroPDDoc.AcquirePage(i)
    Set PageContent = CreateObject("AcroExch.HiliteList")
    If PageContent.Add(0, 9000) <> True Then Exit Function
    Set AcroTextSelect = PageNumber.CreatePageHilite(PageContent)
    ' The next line is needed to avoid errors with protected PDFs that can't be read
    On Error Resume Next
    For j = 0 To AcroTextSelect.GetNumText - 1
        Content = Content & AcroTextSelect.GetText(j)
    Next j
    If InStr(1, LCase(Content), LCase(SearchString)) > 0 Then
        strResult = IIf(strResult = "", i + 1, strResult & "," & i + 1)
    End If
    Content = ""
Next i

AdobePdfSearch = strResult

'Uncomment the lines below if you want to close the PDF when done.
'AcroAVDoc.Close True
'AcroApp.Exit
'Set AcroAVDoc = Nothing: Set AcroApp = Nothing

End Function
查看更多
走好不送
3楼-- · 2019-09-06 02:22

sub checks each page of pdf, word by word

Sub FindtextandPageNumber()

    Dim FindWord        'Word you want to search
    Dim acroAppObj As Object
    Dim PDFDocObj As Object
    Dim myPDFPageHiliteObj As Object

    Dim iword As Integer, iTotalWords As Integer
    Dim numOfPage As Integer, Nthpage As Integer
    Dim word As String, sPath As String

   Set acroAppObj = CreateObject("AcroExch.App")
   Set PDFDocObj = CreateObject("AcroExch.PDDoc")
   Set myPDFPageHiliteObj = CreateObject("AcroExch.HiliteList")
   Check3 = myPDFPageHiliteObj.Add(0, 32767)
    FindWord = "Hello"
    acroAppObj.Show
    sPath = "Test.pdf"          'Path of pdf where you want to search
    PDFDocObj.Open (sPath)

    numOfPage = PDFDocObj.GetNumPages

    word = vbNullString
 Set PDFJScriptObj = Nothing

    For Nthpage = 0 To numOfPage - 1
      Set pAcroPDPage = PDFDocObj.AcquirePage(Nthpage)
      Set wordHilite = pAcroPDPage.CreateWordHilite(myPDFPageHiliteObj)
      Set PDFJScriptObj = PDFDocObj.GetJSObject
       iTotalWords = wordHilite.GetNumText
        iTotalWords = PDFJScriptObj.getPageNumWords(Nthpage)
        ''check the each word
        For iword = 0 To iTotalWords - 1
           word = Trim(CStr(PDFJScriptObj.getPageNthWord(Nthpage, iword)))
            If word <> "" Then
                If word = FindWord Then
                    PageNumber = Nthpage
                    msgbox PageNumber
                End If
                word = ""
            End If
        Next iword
    Next Nthpage

End Sub

查看更多
登录 后发表回答