In This Topic
Programming / OCR / Creating a searchable PDF (PDF/A) document from the content of the document feeder of a scanner

Creating a searchable PDF (PDF/A) document from the content of the document feeder of a scanner

In This Topic

In this example, you can see how to convert a physical document into a searchable 1.4 PDF/A document through the document feeder of the scanner.

Firstly, you have to do some usual declaration and set up the scanner (TWAIN protocol is used here). Then you need to create the PDF file to receive the pages. From the scanner you read the image-based pages one-by-one, you add them into the PDF document and subsequently you can OCR each page. When you are finished with all pages, you simply close the TWAIN source and clean the resources.

Copy Code
'We assume that GdPicture has been correctly installed and unlocked.

Dim ImageID As Integer = 0

Dim bContinue As Boolean = False

Dim message As String = "Done !" + vbCrLf 

Dim oGdPictureImaging As New GdPictureImaging()

Dim oGdPicturePDF As New GdPicturePDF()

If (oGdPictureImaging.TwainSelectSource(Me.Handle) AndAlso

    oGdPictureImaging.TwainOpenDefaultSource(Me.Handle)) Then

    oGdPictureImaging.TwainSetAutoFeed(True) 'Enabling AutoFeed option.

    oGdPictureImaging.TwainSetAutoScan(True) 'Achieving the maximum scanning rate.

    oGdPictureImaging.TwainSetResolution(200)

    oGdPictureImaging.TwainSetPixelType(TwainPixelType.TWPT_BW) 'Setting the image to be Black & White.

    oGdPictureImaging.TwainSetBitDepth(1) '1 bpp

    'Creating the destination PDF document.

    oGdPicturePDF.NewPDF(PdfConformance.PDF_A_1b)

    Do

        ImageID = oGdPictureImaging.TwainAcquireToGdPictureImage(Me.Handle)

        If oGdPictureImaging.GetStat() = GdPictureStatus.OK

            'Creating an image-based page in the destination document.

            If oGdPicturePDF.AddImageFromGdPictureImage(ImageID, false, false) = GdPictureStatus.OK Then

                'OCR-ing the currently created page, if the creation has been successful.

                oGdPicturePDF.OcrPage("eng", "C:\GdPicture.NET 14\Redist\OCR", "", 300)

            End If

            message = message + "Page nr." + oGdPicturePDF.GetCurrentPage().ToString() + " - status: " + oGdPicturePDF.GetStat().ToString() + vbCrLf

            'Releasing the image.

            oGdPictureImaging.ReleaseGdPictureImage(ImageID)

        End If

        If oGdPictureImaging.TwainGetState() <= TwainStatus.TWAIN_SOURCE_ENABLED Then

            If MessageBox.Show("Do you want to acquire other pages?", "", MessageBoxButtons.YesNo, MessageBoxIcon.Question) = DialogResult.Yes Then

                bContinue = True

            Else

                bContinue = False

            End If

        Else

             bContinue = True

        End If

    Loop While bContinue

    oGdPicturePDF.SaveToFile("pdfocr.pdf", True)

    message = message + "Saving - status: " + oGdPicturePDF.GetStat().ToString()

    oGdPictureImaging.TwainCloseSource()

    MessageBox.Show(message, "TWAIN + OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Information)

Else

    MessageBox.Show("Can't open the default source.\nresult code: " + oGdPictureImaging.TwainGetLastResultCode() +

                               "\ncondition code: " + oGdPictureImaging.TwainGetLastConditionCode(), "TWAIN + OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Error)

End If

oGdPictureImaging.Dispose()

oGdPicturePDF.Dispose()
Copy Code
//We assume that GdPicture has been correctly installed and unlocked.

int ImageID = 0;

bool bContinue = false;

string message = "Done !\n"; 

GdPictureImaging oGdPictureImaging = new GdPictureImaging();

GdPicturePDF oGdPicturePDF = new GdPicturePDF();

if (oGdPictureImaging.TwainSelectSource(this.Handle) &&

    oGdPictureImaging.TwainOpenDefaultSource(this.Handle))

{

    oGdPictureImaging.TwainOpenDefaultSource(this.Handle);

    oGdPictureImaging.TwainSetAutoFeed(true); //Enabling AutoFeed option.

    oGdPictureImaging.TwainSetAutoScan(true); //Achieving the maximum scanning rate.

    oGdPictureImaging.TwainSetResolution(200);

    oGdPictureImaging.TwainSetPixelType(TwainPixelType.TWPT_BW); //Setting the image to be Black & White.

    oGdPictureImaging.TwainSetBitDepth(1); //1 bpp

    oGdPicturePDF.NewPDF(PdfConformance.PDF_A_1b); //Creating the destination PDF document.

    do

    {

        ImageID = oGdPictureImaging.TwainAcquireToGdPictureImage(this.Handle);

        if (oGdPictureImaging.GetStat() == GdPictureStatus.OK)

        {

            //Creating an image-based page in the destination document.

            if (oGdPicturePDF.AddImageFromGdPictureImage(ImageID, false, false) == GdPictureStatus.OK)

            {

                //OCR-ing the currently created page, if the creation has been successful.

                oGdPicturePDF.OcrPage("eng", "C:\\GdPicture.NET 14\\Redist\\OCR", "", 300);

            }

            message = message + "Page nr." + oGdPicturePDF.GetCurrentPage().ToString() + " - status: " + oGdPicturePDF.GetStat().ToString() + "\n";

            //Releasing the image.

            oGdPictureImaging.ReleaseGdPictureImage(ImageID);

        }

        if (oGdPictureImaging.TwainGetState() <= TwainStatus.TWAIN_SOURCE_ENABLED)

        {

            if (MessageBox.Show("Do you want to acquire other pages?", "", MessageBoxButtons.YesNo, MessageBoxIcon.Question) == DialogResult.Yes)

            {

                bContinue = true;

            }

            else

            {

                bContinue = false;

            }

        }

        else

        {

            bContinue = true;

        }

    } while (bContinue);

    oGdPicturePDF.SaveToFile("pdfocr.pdf", true);

    message = message + "Saving - status: " + oGdPicturePDF.GetStat().ToString();

    oGdPictureImaging.TwainCloseSource();

    MessageBox.Show(message, "TWAIN + OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Information);

}

else

{

    MessageBox.Show("Can't open the default source.\nresult code: " + oGdPictureImaging.TwainGetLastResultCode() +

                               "\ncondition code: " + oGdPictureImaging.TwainGetLastConditionCode(), "TWAIN + OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Error);

}

oGdPictureImaging.Dispose();

oGdPicturePDF.Dispose();