I have installed the lastest version of GdPicture 8.4.
I read the text from pdf file, but it's too slow.
I have some files (total 85 MB), and for read the text of all pdf the process during 2 hours.
Foreach file use this code:
Code: Select all
GdPicturePDF oPDF = new GdPicturePDF();
if (oPDF.LoadFromFile(path, false) == GdPictureStatus.OK)
{
int dimCount= oPDF.GetPageCount();
for (int i = 1; i <= dimCount; i++)
{
if (i > 1)
{
Debug("SELEZIONE PAGINA " + i);
oPDF.SelectPage(i);
}
Debug("RenderPageToGdPictureImage");
m_ImageID = oPDF.RenderPageToGdPictureImage(200, true);
Debug("OCRTesseractReinit");
oGdPictureImaging.OCRTesseractReinit();
Debug("OCRTesseractDoOCR");
s += oGdPictureImaging.OCRTesseractDoOCR(m_ImageID, "ita", _dirOCR, "");
if (oGdPictureImaging.GetStat() != GdPictureStatus.OK)
Debug("[" + path + "] Error on page " + i + ": " + oGdPictureImaging.GetStat().ToString());
Debug("OCRTesseractClear");
oGdPictureImaging.OCRTesseractClear();
}
oPDF.CloseDocument();
}
Thank you
Mirko