In This Topic
Programming / OCR / How to OCR a single page or a multipage TIFF file

How to OCR a single page or a multipage TIFF file

In This Topic

OCRing multipage tiff images is as easy as looping through the image pages and OCRing each one. The resulting text is subsequently stored in a text file.
Here is how to do it.

Copy Code
'We assume that GdPicture has been correctly installed and unlocked.

Dim oGdPictureImaging As GdPictureImaging = New GdPictureImaging()

Dim pageCount As Integer = 1

Dim imageID As Integer = oGdPictureImaging.CreateGdPictureImageFromFile("")

If oGdPictureImaging.GetStat() = GdPictureStatus.OK Then

    'Setting up a correct number of pages for multi-page input file.

    If oGdPictureImaging.TiffIsMultiPage(imageID) Then

        pageCount = oGdPictureImaging.TiffGetPageCount(imageID)

    End If

    'Setting up the OCR engine.

    Dim oGdPictureOCR As GdPictureOCR = New GdPictureOCR()

    oGdPictureOCR.ResourceFolder = "C:\GdPicture.NET 14\Redist\OCR"

    oGdPictureOCR.CharacterSet = ""

    oGdPictureOCR.AddLanguage(OCRLanguage.English)

    Dim resID As String = "page"

    Dim content As String = Nothing

    'Creating a text file to store the resulting text.

    Dim stream As System.IO.StreamWriter = New System.IO.StreamWriter("output.txt")

    'Looping through multi-page image.

    For i As Integer = 1 To pageCount

        'Selecting a page.

        oGdPictureImaging.TiffSelectPage(imageID, i)

        'Setting up the image.

        If oGdPictureOCR.SetImage(imageID) = GdPictureStatus.OK Then

            'Runnig the OCR on the current page.

            oGdPictureOCR.RunOCR(resID)

            If oGdPictureOCR.GetStat() = GdPictureStatus.OK Then

                'Saving the page content as a text.

                content = oGdPictureOCR.GetOCRResultText(resID)

                If oGdPictureOCR.GetStat() = GdPictureStatus.OK Then

                    stream.WriteLine("Text on the page nr." + i.ToString() + ":" + vbCrLf + "---------------------------------" + vbCrLf + content)

                Else

                    stream.WriteLine("Error occurred on the page nr." + i.ToString() + ": " + oGdPictureOCR.GetStat().ToString())

               End If

            Else

                stream.WriteLine("Error occurred on the page nr." + i.ToString() + ": " + oGdPictureOCR.GetStat().ToString())

            End If

        Else

            MessageBox.Show("The image can't be set. Error: " + oGdPictureOCR.GetStat().ToString(), "OCR TIFF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)

        End If

        'Releasing the previous result in order to reuse the result identifier.

        oGdPictureOCR.ReleaseOCRResult(resID)

    Next

    stream.Close()

    oGdPictureImaging.ReleaseGdPictureImage(imageID)

    MessageBox.Show("Done!", "OCR TIFF Example", MessageBoxButtons.OK, MessageBoxIcon.Information)

    oGdPictureOCR.Dispose()

Else

    MessageBox.Show("The file can't be opened. Error: " + oGdPictureImaging.GetStat().ToString(), "OCR TIFF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)

End If

oGdPictureImaging.Dispose()
Copy Code
//We assume that GdPicture has been correctly installed and unlocked.

GdPictureImaging oGdPictureImaging = new GdPictureImaging();

int pageCount = 1;

int imageID = oGdPictureImaging.CreateGdPictureImageFromFile("");

if (oGdPictureImaging.GetStat() == GdPictureStatus.OK)

{

    //Setting up a correct number of pages for multi-page input file.

    if (oGdPictureImaging.TiffIsMultiPage(imageID))

    {

        pageCount = oGdPictureImaging.TiffGetPageCount(imageID);

    }

    //Setting up the OCR engine.

    GdPictureOCR oGdPictureOCR = new GdPictureOCR();

    oGdPictureOCR.ResourceFolder = "C:\\GdPicture.NET 14\\Redist\\OCR";

    oGdPictureOCR.CharacterSet = "";

    oGdPictureOCR.AddLanguage(OCRLanguage.English);

    string resID = "page";

    string content = null;

    //Creating a text file to store the resulting text.

    System.IO.StreamWriter stream = new System.IO.StreamWriter("output.txt");

    //Looping through multi-page image.

    for (int i = 1; i <= pageCount; i++)

    {

        //Selecting a page.

        oGdPictureImaging.TiffSelectPage(imageID, i);

        //Setting up the image.

        if (oGdPictureOCR.SetImage(imageID) == GdPictureStatus.OK)

        {

            //Runnig the OCR on the current page.

            oGdPictureOCR.RunOCR(resID);

            if (oGdPictureOCR.GetStat() == GdPictureStatus.OK)

            {

                //Saving the page content as a text.

                content = oGdPictureOCR.GetOCRResultText(resID);

                if (oGdPictureOCR.GetStat() == GdPictureStatus.OK)

                    stream.WriteLine("Text on the page nr." + i.ToString() + ":\n---------------------------------\n" + content);

                else

                    stream.WriteLine("Error occurred on the page nr." + i.ToString() + ": " + oGdPictureOCR.GetStat().ToString());

            }

            else

            {

                stream.WriteLine("Error occurred on the page nr." + i.ToString() + ": " + oGdPictureOCR.GetStat().ToString());

            }

        }

        else

        {

            MessageBox.Show("The image can't be set. Error: " + oGdPictureOCR.GetStat().ToString(), "OCR TIFF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);

        }                       

        //Releasing the previous result in order to reuse the result identifier.

        oGdPictureOCR.ReleaseOCRResult(resID);

    }

    stream.Close();

    oGdPictureImaging.ReleaseGdPictureImage(imageID);

    MessageBox.Show("Done!", "OCR TIFF Example", MessageBoxButtons.OK, MessageBoxIcon.Information);

    oGdPictureOCR.Dispose();

}

else

{

    MessageBox.Show("The file can't be opened. Error: " + oGdPictureImaging.GetStat().ToString(), "OCR TIFF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);

}

oGdPictureImaging.Dispose();