In This Topic
Programming / OCR / Creating a searchable PDF (PDF/A) document from an image file (both single and multi-page TIFF image)

Creating a searchable PDF (PDF/A) document from an image file (both single and multi-page TIFF image)

In This Topic

This is how to easily convert an image file to a searchable PDF (PDF/A) document. We'll see two different scenarios, one for a multipage file and another one for a single page file.

 If your input file is a multipage TIFF file
Copy Code
'We assume GdPicture has been correctly installed and unlocked.

Dim oGdPictureImaging As GdPictureImaging = New GdPictureImaging()

'Selecting an image to process.

Dim imageID As Integer = oGdPictureImaging.TiffCreateMultiPageFromFile("")

If oGdPictureImaging.GetStat() = GdPictureStatus.OK Then

    'Retrieving the number of pages.

    Dim pageCount As Integer = 0

    If oGdPictureImaging.TiffIsMultiPage(imageID) Then pageCount = oGdPictureImaging.TiffGetPageCount(imageID)

    'Setting up the OCR engine.

    Dim oGdPictureOCR As GdPictureOCR = New GdPictureOCR()

    oGdPictureOCR.ResourceFolder = "C:\GdPicture.NET 14\Redist\OCR"

    oGdPictureOCR.CharacterSet = ""

    oGdPictureOCR.AddLanguage(OCRLanguage.English)

    Dim resID As String = "page"

    Dim content As String = Nothing

    'Creating a resulting PDF document.

    Dim oGdPicturePDF As GdPicturePDF = New GdPicturePDF()

    If oGdPicturePDF.NewPDF(PdfConformance.PDF_A_1b) = GdPictureStatus.OK Then

        oGdPicturePDF.SetOrigin(PdfOrigin.PdfOriginTopLeft)

        Dim fontResName As String = oGdPicturePDF.AddStandardFont(PdfStandardFont.PdfStandardFontCourier)

        If oGdPicturePDF.GetStat() = GdPictureStatus.OK Then

            'Looping through pages of the image file.

            For i As Integer = 1 To pageCount

                'Selecting the current page and set up the image for OCR.

                If (oGdPictureImaging.TiffSelectPage(imageID, i) = GdPictureStatus.OK) AndAlso

                   (oGdPictureOCR.SetImage(imageID) = GdPictureStatus.OK) Then

                    'Runnig the OCR process on the current page.

                    oGdPictureOCR.RunOCR(resID)

                    If oGdPictureOCR.GetStat() = GdPictureStatus.OK Then

                        'Getting the result.

                        content = oGdPictureOCR.GetOCRResultText(resID)

                        If (oGdPictureOCR.GetStat() = GdPictureStatus.OK) AndAlso

                           (oGdPicturePDF.NewPage(PdfPageSizes.PdfPageSizeA4) = GdPictureStatus.OK) AndAlso

                           (oGdPicturePDF.DrawText(fontResName, 0, 0, content) = GdPictureStatus.OK) Then

                            MessageBox.Show("The page nr. " + i.ToString() + " has been successfully processed.")

                        End If

                        'Releasing the previous OCR result to improve the memory management and to allow reusing of the result identifier.

                        oGdPictureOCR.ReleaseOCRResult(resID)

                    End If

                End If

            Next

        End If

        'Saving the resulting PDF document.

        If oGdPicturePDF.SaveToFile("OCR.pdf", True, True) = GdPictureStatus.OK Then MessageBox.Show("The created PDF document has been successfully saved.")

        oGdPicturePDF.CloseDocument()

    End If

    oGdPicturePDF.Dispose()

    oGdPictureOCR.Dispose()

End If

oGdPictureImaging.ReleaseGdPictureImage(imageID)

oGdPictureImaging.Dispose()
Copy Code
//We assume GdPicture has been correctly installed and unlocked.

GdPictureImaging oGdPictureImaging = new GdPictureImaging();

//Selecting an image to process.

int imageID = oGdPictureImaging.TiffCreateMultiPageFromFile("");

if (oGdPictureImaging.GetStat() == GdPictureStatus.OK)

{

    //Retrieving the number of pages.

    int pageCount = 0;

    if (oGdPictureImaging.TiffIsMultiPage(imageID))

        pageCount = oGdPictureImaging.TiffGetPageCount(imageID);

    //Setting up the OCR engine.

    GdPictureOCR oGdPictureOCR = new GdPictureOCR();

    oGdPictureOCR.ResourceFolder = "C:\\GdPicture.NET 14\\Redist\\OCR";

    oGdPictureOCR.CharacterSet = "";

    oGdPictureOCR.AddLanguage(OCRLanguage.English);

    string resID = "page";

    string content = null;

    //Creating a resulting PDF document.

    GdPicturePDF oGdPicturePDF = new GdPicturePDF();

    if (oGdPicturePDF.NewPDF(PdfConformance.PDF_A_1b) == GdPictureStatus.OK)

    {

        oGdPicturePDF.SetOrigin(PdfOrigin.PdfOriginTopLeft);

        string fontResName = oGdPicturePDF.AddStandardFont(PdfStandardFont.PdfStandardFontCourier);

        if (oGdPicturePDF.GetStat() == GdPictureStatus.OK)

        {

            //Loopign through pages of the image file.

            for (int i = 1; i <= pageCount; i++)

            {

                //Selecting the current page and set up the image for OCR.

                if ((oGdPictureImaging.TiffSelectPage(imageID, i) == GdPictureStatus.OK) &&

                     (oGdPictureOCR.SetImage(imageID) == GdPictureStatus.OK))

                {

                    //Running the OCR process on the current page.

                    oGdPictureOCR.RunOCR(resID);

                    if (oGdPictureOCR.GetStat() == GdPictureStatus.OK)

                    {

                        //Getting the result.

                        content = oGdPictureOCR.GetOCRResultText(resID);

                        if ((oGdPictureOCR.GetStat() == GdPictureStatus.OK) &&

                            (oGdPicturePDF.NewPage(PdfPageSizes.PdfPageSizeA4) == GdPictureStatus.OK) &&

                            (oGdPicturePDF.DrawText(fontResName, 0, 0, content) == GdPictureStatus.OK))

                        {

                            MessageBox.Show("The page nr. " + i.ToString() + " has been successfully processed.");

                        }

                        //Releasing the previous OCR result to improve the memory management and to allow reusing of the result identifier.

                        oGdPictureOCR.ReleaseOCRResult(resID);

                    }

                }

            }

        }

        //Saving the resulting PDF document.

        if (oGdPicturePDF.SaveToFile("OCR.pdf", true, true) == GdPictureStatus.OK)

            MessageBox.Show("The created PDF document has been successfully saved.");

        oGdPicturePDF.CloseDocument();

    }

    oGdPicturePDF.Dispose();

    oGdPictureOCR.Dispose();

}

oGdPictureImaging.ReleaseGdPictureImage(imageID);

oGdPictureImaging.Dispose();
 If your input file is a single page TIFF file
Copy Code
'We assume GdPicture has been correctly installed and unlocked.

Dim oGdPictureImaging As GdPictureImaging = New GdPictureImaging()

'Selectign an image to process.

Dim imageID As Integer = oGdPictureImaging.CreateGdPictureImageFromFile("")

If oGdPictureImaging.GetStat() = GdPictureStatus.OK Then

    Dim oGdPictureOCR As GdPictureOCR = New GdPictureOCR()

    'Setting the OCR parameters.

    oGdPictureOCR.ResourceFolder = "C:\Program Files\GdPicture.NET 14\Redist\OCR"

    oGdPictureOCR.CharacterSet = ""

    'Setting up the language and the image.

    If (oGdPictureOCR.AddLanguage(OCRLanguage.English) = GdPictureStatus.OK) AndAlso

       (oGdPictureOCR.SetImage(imageID) = GdPictureStatus.OK) Then

        'Running the OCR process.

        Dim resID As String = oGdPictureOCR.RunOCR()

        If oGdPictureOCR.GetStat() = GdPictureStatus.OK Then

            'Getting the result as a text.

            Dim content As String = oGdPictureOCR.GetOCRResultText(resID)

            If oGdPictureOCR.GetStat() = GdPictureStatus.OK Then

                'Creating a searchable PDF document.

                Using oGdPicturePDF As GdPicturePDF = New GdPicturePDF()

                    'Setting up your prefered page size and font parameters.

                    If (oGdPicturePDF.CreateFromText(PdfConformance.PDF_A_1b, 595, 842, 10, 10, 10, 10,

                                                        TextAlignment.TextAlignmentNear, content, 12, "Arial",

                                                        False, False, True, False) = GdPictureStatus.OK) AndAlso

                        (oGdPicturePDF.SaveToFile("OCR.pdf", True, True) = GdPictureStatus.OK) Then

                        MessageBox.Show("Done!", "OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Information)

                    Else

                        MessageBox.Show("Error when saving the document: " + oGdPicturePDF.GetStat().ToString(), "OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Error)

                    End If

                End Using

            End If

        Else

            MessageBox.Show("Error when processing the OCR: " + oGdPictureOCR.GetStat().ToString(), "OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Error)

        End If

    End If

    oGdPictureImaging.ReleaseGdPictureImage(imageID)

    oGdPictureOCR.Dispose()

End If

oGdPictureImaging.Dispose()
Copy Code
//We assume GdPicture has been correctly installed and unlocked.

GdPictureImaging oGdPictureImaging = new GdPictureImaging();

//Selecting an image to process.

int imageID = oGdPictureImaging.CreateGdPictureImageFromFile("");

if (oGdPictureImaging.GetStat() == GdPictureStatus.OK)

{

    GdPictureOCR oGdPictureOCR = new GdPictureOCR();

    //Setting the OCR parameters.

    oGdPictureOCR.ResourceFolder = "C:\\Program Files\\GdPicture.NET 14\\Redist\\OCR";

    oGdPictureOCR.CharacterSet = "";

    //Setting up the language and the image.

    if ((oGdPictureOCR.AddLanguage(OCRLanguage.English) == GdPictureStatus.OK) &&

        (oGdPictureOCR.SetImage(imageID) == GdPictureStatus.OK))

    {

        //Running the OCR process.

        string resID = oGdPictureOCR.RunOCR();

        if (oGdPictureOCR.GetStat() == GdPictureStatus.OK)

        {

            //Getting the result as a text.

            string content = oGdPictureOCR.GetOCRResultText(resID);

            if (oGdPictureOCR.GetStat() == GdPictureStatus.OK)

            {

                //Creating a searchable PDF document.

                using (GdPicturePDF oGdPicturePDF = new GdPicturePDF())

                {

                    //Setting up your prefered page size and font parameters.

                    if ((oGdPicturePDF.CreateFromText(PdfConformance.PDF_A_1b, 595, 842, 10, 10, 10, 10,

                                                        TextAlignment.TextAlignmentNear, content, 12, "Arial",

                                                        false, false, true, false) == GdPictureStatus.OK) &&

                        (oGdPicturePDF.SaveToFile("OCR.pdf", true, true) == GdPictureStatus.OK))

                    {

                        MessageBox.Show("Done!", "OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Information);

                    }

                    else

                    {

                        MessageBox.Show("Error when saving the document: " + oGdPicturePDF.GetStat().ToString(), "OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Error);

                    }

                }

            }

        }

        else

        {

            MessageBox.Show("Error when processing the OCR: " + oGdPictureOCR.GetStat().ToString(), "OCR Example", MessageBoxButtons.OK, MessageBoxIcon.Error);

        }

    }

    oGdPictureImaging.ReleaseGdPictureImage(imageID);

    oGdPictureOCR.Dispose();

}

oGdPictureImaging.Dispose();