In This Topic
Programming / OCR / Using an external OCR engine during PDF/OCR generation

Using an external OCR engine during PDF/OCR generation

In This Topic
This topic assumes you are already familiar about how to use the GdPicturePDF class to build PDF/OCR using the GdPicturePDF class.

Overview

It is possible to easily use any external OCR engine during PDF/OCR generation using the GdPicturePDF class.

The concept is quite straightforward: you need to provide the OCR result to a GdPicturePDF instance, through a specific event, passing a string variable which is the serialization of a specific model. Several serialization methods will be supported, please read the "Supported models for serialization" section of this topic to get them.

After GdPicture.NET Toolkit installation, please have a look at our csharp  "PDF to PDF-OCR" demo included into the demo folder. You will be able to find complete implementation of external OCR engines:

- GdPicture.NET built-in OCR using the GdPictureOCR class.

- OmniPage.

- Other engines shall be exposed soon...

 

Step by step instructions

  • 1: tells to the instance to use an external OCR engine.

gdpicturePDF.SetOverrideOcrEngine(true);

  • 2: intercept the ExternalOcrRequest event.

gdpicturePDF.ExternalOcrPageRequest += this.ExternalOcrRequest;

  • 3: implement the logic to provide the OCR result through the ExternalOcrRequest event handler.
Copy Code
       //this version is using the "gdpictureocr-json" model. (the recommended one).

        private void ExternalOcrRequest(int ImageID, PdfOcrOptions PdfOcrOptions, out GdPictureStatus Status, out string ResultEncoding, out string OcrResult)

        {

            using (GdPictureOCR gdpictureOCR = new GdPictureOCR())

            {

                gdpictureOCR.ResourceFolder = PdfOcrOptions.ResourcePath;

                gdpictureOCR.AddCustomDictionary(PdfOcrOptions.Dictionary);

                gdpictureOCR.OCRMode = PdfOcrOptions.OCRMode;

                gdpictureOCR.EnableOrientationDetection = PdfOcrOptions.DetectOrientation;

                gdpictureOCR.EnableSkewDetection = PdfOcrOptions.DetectSkew;

                gdpictureOCR.SetImage(ImageID);

                string resultID = gdpictureOCR.RunOCR();

                Status = gdpictureOCR.GetStat();

                if (Status == GdPictureStatus.OK)

                {

                    ResultEncoding = "gdpictureocr-json";

                    OcrResult = gdpictureOCR.GetSerializedResult(resultID);

                    Status = gdpictureOCR.GetStat();

                }

                else

                {

                    ResultEncoding = OcrResult = null;

                }               

            }

        }



       //this version is using the "json" model.

       private void ExternalOcrRequest(int ImageID, PdfOcrOptions PdfOcrOptions, out GdPictureStatus Status, out string ResultEncoding, out string OcrResult)

        {

            using (GdPictureOCR gdpictureOCR = new GdPictureOCR())

            {

                gdpictureOCR.ResourceFolder = PdfOcrOptions.ResourcePath;

                gdpictureOCR.AddCustomDictionary(PdfOcrOptions.Dictionary);

                gdpictureOCR.OCRMode = PdfOcrOptions.OCRMode;

                gdpictureOCR.EnableOrientationDetection = PdfOcrOptions.DetectOrientation;

                gdpictureOCR.EnableSkewDetection = PdfOcrOptions.DetectSkew;

                gdpictureOCR.SetImage(ImageID);

                string resultID = gdpictureOCR.RunOCR();

                Status = gdpictureOCR.GetStat();

                if (Status == GdPictureStatus.OK)

                {

                    GdPictureOcrResult ocrResult = new GdPictureOcrResult()

                    {

                        Paragraphs = new List<GdPictureOcrParagraph>(),

                        PageRotation = gdpictureOCR.GetOrientation()

                    };

                    for (int paragraphIdx = 0; paragraphIdx < gdpictureOCR.GetParagraphCount(resultID); paragraphIdx++)

                    {

                        OCRBlockType blockType = gdpictureOCR.GetBlockType(resultID, gdpictureOCR.GetParagraphBlockIndex(resultID, paragraphIdx));

                        //rejecting non text block.

                        if (blockType != OCRBlockType.CaptionText &&

                            blockType != OCRBlockType.FlowingText &&

                            blockType != OCRBlockType.HeadingText &&

                            blockType != OCRBlockType.PulloutText &&

                            blockType != OCRBlockType.VerticalText &&

                            blockType != OCRBlockType.Table)

                        {

                            continue;

                        }

                        GdPictureOcrParagraph paragraph = new GdPictureOcrParagraph()

                        {

                            Lines = new List<GdPictureOcrLine>()

                        };

                        ((List<GdPictureOcrParagraph>)ocrResult.Paragraphs).Add(paragraph);

                        int firstLineIdx = gdpictureOCR.GetParagraphFirstTextLineIndex(resultID, paragraphIdx);

                        int lineCount = gdpictureOCR.GetParagraphTextLineCount(resultID, paragraphIdx);

                        for (int lineIdx = firstLineIdx; lineIdx < firstLineIdx + lineCount; lineIdx++)

                        {

                            GdPictureOcrLine line = new GdPictureOcrLine()

                            {

                                Words = new List<GdPictureOcrWord>()

                            };

                            ((List<GdPictureOcrLine>)paragraph.Lines).Add(line);

                            int firstWordIdx = gdpictureOCR.GetTextLineFirstWordIndex(resultID, lineIdx);

                            int wordCount = gdpictureOCR.GetTextLineWordCount(resultID, lineIdx);

                            for (int wordIdx = firstWordIdx; wordIdx < firstWordIdx + wordCount; wordIdx++)

                            {

                                GdPictureOcrWord word = new GdPictureOcrWord()

                                {

                                    Characters = new List<GdPictureOcrCharacter>()

                                };

                                ((List<GdPictureOcrWord>)line.Words).Add(word);

                                int firstCharacterIdx = gdpictureOCR.GetWordFirstCharacterIndex(resultID, wordIdx);

                                int characterCount = gdpictureOCR.GetWordCharacterCount(resultID, wordIdx);

                                for (int characterIdx = firstCharacterIdx; characterIdx < firstCharacterIdx + characterCount; characterIdx++)

                                {

                                    int characterLeft = gdpictureOCR.GetCharacterLeft(resultID, characterIdx);

                                    int characterTop = gdpictureOCR.GetCharacterTop(resultID, characterIdx);

                                    int characterRight = gdpictureOCR.GetCharacterRight(resultID, characterIdx);

                                    int characterBottom = gdpictureOCR.GetCharacterBottom(resultID, characterIdx);

                                    GdPictureOcrCharacter character = new GdPictureOcrCharacter()

                                    {

                                        BBox = new GdPictureOcrRect(characterLeft, characterTop, characterRight, characterBottom),

                                        Value = gdpictureOCR.GetCharacterValue(resultID, characterIdx)

                                    };

                                    ((List<GdPictureOcrCharacter>)word.Characters).Add(character);

                                }

                            }

                        }

                    }

                    ResultEncoding = "json";

                    OcrResult = JsonConvert.SerializeObject(ocrResult);

                }

                else

                {

                    ResultEncoding = OcrResult = null;

                }

            }

        }



 

 Supported models for serialization

Model name: "gdpictureocr-json".

Model information: the model is not public. To obtain serialized data from such model the method GetSerializedResult of the GdPictureOcr class must be used.

 


 

Model name: "json".

Model information: the provided data must be an enumeration of paragraphs containing lines containing words containing characters.

Model definition (CSharp):

Copy Code
    /// <summary>

    /// The OcrResult class manages the ocr result.

    /// </summary>

    [Serializable]

    public sealed class GdPictureOcrResult

    {

        /// <summary>

        /// The standard rotation applied to the page before starting the OCR process.

        /// Accepted values are 0, 90, 180 and 270.

        /// </summary>

        public int PageRotation;



        /// <summary>

        /// The detected page skew angle, in degrees, clockwise.

        /// </summary>

        public float PageSkewAngle;



        /// <summary>

        /// The paragraphs of the page.

        /// </summary>

        public IEnumerable<GdPictureOcrParagraph> Paragraphs;

    }



    [Serializable]

    public sealed class GdPictureOcrParagraph

    {

        /// <summary>

        /// The standard rotation of the paragraph.

        /// Accepted values are 0, 90, 180 and 270.

        /// </summary>

        public int ParagraphRotation;



        /// <summary>

        /// The text writing direction.

        /// Supported values are: 0 for left to right, 1 for right to left, 2 for top to bottom.

        public int TextWritingDirection;



        /// <summary>

        /// The lines of the paragraph.

        /// </summary>

        public IEnumerable<GdPictureOcrLine> Lines;

    }



    [Serializable]

    public sealed class GdPictureOcrLine

    {

        /// <summary>

        /// The words of the line.

        /// </summary>

        public IEnumerable<GdPictureOcrWord> Words;

    }



    [Serializable]

    public sealed class GdPictureOcrWord

    {

        /// <summary>

        /// The bounding box.

        /// It is not mandatory to provide it since it can be computed from character boxes.

        /// </summary>

        public GdPictureOcrRect BBox;



        /// <summary>

        /// The characters of the word.

        /// </summary>

        public IEnumerable<GdPictureOcrCharacter> Characters;

    }



    [Serializable]

    public sealed class GdPictureOcrCharacter

    {

        /// <summary>

        /// The bounding box.

        /// </summary>

        public GdPictureOcrRect BBox;



        /// <summary>

        /// The character value.

        /// </summary>

        public char Value;

    }



    [Serializable]

    public sealed class GdPictureOcrRect

    {

        public int Left;

        public int Top;

        public int Right;

        public int Bottom;



        public GdPictureOcrRect(int Left, int Top, int Right, int Bottom)

        {

            this.Left = Left;

            this.Top = Top;

            this.Right = Right;

            this.Bottom = Bottom;

        }

    }