In This Topic
Programming / Document Conversion / Converting a TIFF image to a searchable PDF document using multithreading

Converting a TIFF image to a searchable PDF document using multithreading

In This Topic

Converting scanned images to searchable PDF documents is now very easy and quick process if you take advantages of the GdPicturePDF class and its OCR methods using multithreading environment. Now you can just process OCR on any converted document and the data will be added as invisible text on the page.

  • The first example shows you a very quick and easy two-steps process how it works using the GdPictureDocumentConverter class.
    Copy Code
    'We assume that GdPicture has been correctly installed and unlocked.
    
    
    
    Dim oGdPicturePDF As New GdPicturePDF()
    
    'Adding the OcrPagesDone event.
    
    AddHandler oGdPicturePDF.OcrPagesDone, AddressOf OcrPagesDone
    
    Sub OcrPagesDone(status As GdPictureStatus) Handles oGdPicturePDF.OcrPagesDone
    
        'Saving the resulting document when the OCR process is finished.
    
        If oGdPicturePDF.SaveToFile("output.pdf") = GdPictureStatus.OK Then
    
            MessageBox.Show("PDF: The OCR-ed file has been saved successfully.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
    
        Else
    
            MessageBox.Show("PDF: The OCR-ed file has failed to save. Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
        End If
    
    End Sub
    
    
    
    Dim status As GdPictureStatus = GdPictureStatus.OK
    
    'First step - Converting your source TIF file to PDF document.
    
    Using oConverter As GdPictureDocumentConverter = New GdPictureDocumentConverter()
    
        status = oConverter.LoadFromFile("input.tif", GdPicture14.DocumentFormat.DocumentFormatTIFF)
    
        If status = GdPictureStatus.OK Then
    
            status = oConverter.SaveAsPDF("output.pdf", PdfConformance.PDF)
    
            If status = GdPictureStatus.OK Then
    
                MessageBox.Show("Converter: The PDF file has been saved successfully.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
    
            Else
    
                MessageBox.Show("Converter: The PDF file has failed to save. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
            End If
    
        Else
    
            MessageBox.Show("Converter: The TIF file has failed to load. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
        End If
    
    End Using
    
    'Second step - Processing OCR on the created PDF document.
    
    If status = GdPictureStatus.OK Then
    
        status = oGdPicturePDF.LoadFromFile("output.pdf", False)
    
        If status = GdPictureStatus.OK Then
    
            status = oGdPicturePDF.OcrPages("*", 0, "eng", "C:\GdPicture.NET 14\Redist\OCR", "", 300, 2, True)
    
            If status = GdPictureStatus.OK Then
    
                MessageBox.Show("PDF: The OCR process has been finished successfully.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
    
            Else
    
                MessageBox.Show("PDF: The OCR process has failed. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
            End If
    
        Else
    
            MessageBox.Show("PDF: The PDF file has failed to load. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
        End If
    
    End If
    
    
    
    'Releasing resources only if all processes are finished.
    
    oGdPicturePDF.Dispose()
    Copy Code
    //We assume that GdPicture has been correctly installed and unlocked.
    
    
    
    GdPicturePDF oGdPicturePDF = new GdPicturePDF();
    
    //Adding the OcrPagesDone event.
    
    oGdPicturePDF.OcrPagesDone += OcrPagesDone;
    
    
    
    void OcrPagesDone(GdPictureStatus status)
    
    {
    
        //Saving the resulting document when the OCR process is finished.
    
        if (oGdPicturePDF.SaveToFile("output.pdf") == GdPictureStatus.OK)
    
            MessageBox.Show("PDF: The OCR-ed file has been saved successfully.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
    
        else
    
           MessageBox.Show("PDF: The OCR-ed file has failed to save. Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
    }
    
    
    
    GdPictureStatus status = GdPictureStatus.OK;
    
    //First step - Converting your source TIF file to PDF document.
    
    using (GdPictureDocumentConverter oConverter = new GdPictureDocumentConverter())
    
    {
    
        status = oConverter.LoadFromFile("input.tif", GdPicture14.DocumentFormat.DocumentFormatTIFF);
    
        if (status == GdPictureStatus.OK)
    
        {
    
            status = oConverter.SaveAsPDF("output.pdf", PdfConformance.PDF);
    
            if (status == GdPictureStatus.OK)
    
            {
    
                MessageBox.Show("Converter: The PDF file has been saved successfully.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
    
            }
    
            else
    
            {
    
                MessageBox.Show("Converter: The PDF file has failed to save. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
            }
    
        }
    
        else
    
        {
    
            MessageBox.Show("Converter: The TIF file has failed to load. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
        }
    
    }
    
    //Second step - Processing OCR on the created PDF document.
    
    if (status == GdPictureStatus.OK)
    
    {
    
        status = oPDF.LoadFromFile("output.pdf", false);
    
        if (status == GdPictureStatus.OK)
    
        {
    
            status = oGdPicturePDF.OcrPages("*", 0, "eng", "C:\\GdPicture.NET 14\\Redist\\OCR", "", 300, 2, true);
    
            if (status == GdPictureStatus.OK)
    
            {
    
                MessageBox.Show("PDF: The OCR process has been finished successfully.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
    
            }
    
            else
    
            {
    
                MessageBox.Show("PDF: The OCR process has failed. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
            }
    
        }
    
        else
    
        {
    
            MessageBox.Show("PDF: The PDF file has failed to load. Status: " + status.ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
        }
    
    }
    
    
    
    //Releasing resources only if all processes are finished.
    
    oGdPicturePDF.Dispose();
  •  The second example demonstrates it using another approach by converting page by page to images through the GdPictureImaging class.
    Copy Code
    'We assume that GdPicture has been correctly installed and unlocked.
    
    
    
    Dim oGdPicturePDF As New GdPicturePDF()
    
    'Adding the OcrPagesDone event.
    
    AddHandler oGdPicturePDF.OcrPagesDone, AddressOf OcrPagesDone
    
    
    
    Sub OcrPagesDone(status As GdPictureStatus) Handles oGdPicturePDF.OcrPagesDone
    
        'Saving the resulting document when the OCR process is finished.
    
        If oGdPicturePDF.SaveToFile("output.pdf") = GdPictureStatus.OK Then
    
            MessageBox.Show("The resulting document is saved.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
    
        Else
    
            MessageBox.Show("The resulting document can't be saved. Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
        End If
    
    End Sub
    
    
    
    Using oGdPictureImaging As New GdPictureImaging()
    
        'Loading an image from a file.
    
        Dim imageId As Integer = oGdPictureImaging.CreateGdPictureImageFromFile("image.tif")
    
        If oGdPictureImaging.GetStat() = GdPictureStatus.OK Then
    
            If oGdPicturePDF.NewPDF() = GdPictureStatus.OK Then
    
                If oGdPictureImaging.TiffIsMultiPage(imageId) = False Then
    
                    'One-page tiff image.
    
                    'Adding an image as a resource and drawing it onto a new page.
    
                    oGdPicturePDF.AddImageFromGdPictureImage(imageId, false, false)
    
                Else
    
                    'Multi-page tiff image.
    
                    Dim NumberOfPages As Integer = oGdPictureImaging.TiffGetPageCount(imageId)
    
                    'Loop through pages.
    
                    For i As Integer = 1 To NumberOfPages
    
                        'Selecting each page in the tiff file.
    
                        If oGdPictureImaging.TiffSelectPage(imageId, i) = GdPictureStatus.OK Then
    
                            'Adding the selected tiff page as a resource to a PDF document and drawing it on a new page.
    
                            oGdPicturePDF.AddImageFromGdPictureImage(imageId, false, false)
    
                            If oGdPicturePDF.GetStat() <> GdPictureStatus.OK Then
    
                                Exit For
    
                            End If
    
                        Else
    
                            Exit For
    
                        End If
    
                    Next
    
                End If
    
                'Checking whether any error occurred in adding any image to the PDF document.
    
                If oGdPicturePDF.GetStat() = GdPictureStatus.OK Then
    
                    If oGdPicturePDF.OcrPages("*", 0, "eng", "C:\GdPicture.NET 14\Redist\OCR", "", 300, 2, True) = GdPictureStatus.OK Then
    
                        MessageBox.Show("OcrPages done! Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
    
                    End If
    
                End If
    
            End If
    
            MessageBox.Show("Finished! Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information)
    
            'Clearing resource.
    
            oGdPictureImaging.ReleaseGdPictureImage(imageId)
    
        Else
    
            MessageBox.Show("The image file can't be loaded. Status: " + oGdPictureImaging.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
        End If
    
    End Using
    
    
    
    'Releasing resources only if all processes are finished.
    
    oGdPicturePDF.Dispose()
    Copy Code
    //We assume that GdPicture has been correctly installed and unlocked.
    
    
    
    GdPicturePDF oGdPicturePDF = new GdPicturePDF();
    
    //Adding the OcrPagesDone event.
    
    oGdPicturePDF.OcrPagesDone += OcrPagesDone;
    
    
    
    void OcrPagesDone(GdPictureStatus status)
    
    {
    
        //Saving the resulting document when the OCR process is finished.
    
        if (oGdPicturePDF.SaveToFile("output.pdf") == GdPictureStatus.OK)
    
            MessageBox.Show("The resulting document is saved.", "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
    
        else
    
            MessageBox.Show("The resulting document can't be saved. Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
    }
    
    
    
    using (GdPictureImaging oGdPictureImaging = new GdPictureImaging())
    
    {
    
        //Loading an image from a file.
    
        int imageId = oGdPictureImaging.CreateGdPictureImageFromFile("image.tif");
    
        if (oGdPictureImaging.GetStat() == GdPictureStatus.OK)
    
        {
    
            if (oGdPicturePDF.NewPDF() == GdPictureStatus.OK)
    
            {
    
                if (oGdPictureImaging.TiffIsMultiPage(imageId) == false) //One-page tiff image.
    
                {
    
                    //Adding an image as a resource and drawing it onto a new page.
    
                    oGdPicturePDF.AddImageFromGdPictureImage(imageId, false, false);
    
                }
    
                else //Multi-page tiff image.
    
                {
    
                    int NumberOfPages = oGdPictureImaging.TiffGetPageCount(imageId);
    
                    //Loop through pages.
    
                    for (int i = 1; i <= NumberOfPages; i++)
    
                    {
    
                        //Selecting each page in a tiff file.
    
                        if (oGdPictureImaging.TiffSelectPage(imageId, i) == GdPictureStatus.OK)
    
                        {
    
                            //Adding the selected tiff page as a resource to a PDF document and drawing it on a new page.
    
                            oGdPicturePDF.AddImageFromGdPictureImage(imageId, false, false);
    
                            if (oGdPicturePDF.GetStat() != GdPictureStatus.OK)
    
                                break;
    
                        }
    
                        else
    
                            break;
    
                    }
    
                }
    
                //Checking whether any error occurred in adding any image to the PDF document.
    
                if (oGdPicturePDF.GetStat() == GdPictureStatus.OK)
    
                {
    
                    if (oGdPicturePDF.OcrPages("*", 0, "eng", @"C:\GdPicture.NET 14\Redist\OCR", "", 300, 2, true) == GdPictureStatus.OK)
    
                        MessageBox.Show("OcrPages done! Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
    
                }
    
            }
    
            MessageBox.Show("Finished! Status: " + oGdPicturePDF.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Information);
    
            //Clearing resource.
    
            oGdPictureImaging.ReleaseGdPictureImage(imageId);
    
        }
    
        else
    
        {
    
            MessageBox.Show("The image file can't be loaded. Status: " + oGdPictureImaging.GetStat().ToString(), "TIFF to PDF Example", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
        }
    
    }
    
    
    
    //Releasing resources only if all processes are finished.
    
    oGdPicturePDF.Dispose();