In This Topic
Programming / PDF / Extracting embedded files (attachments) from a PDF document

Extracting embedded files (attachments) from a PDF document

In This Topic

You can attach other files to a PDF document in two ways. That said, you can also extract those embedded files from your PDF document in two different ways. Here is how to do it using GdPicture.NET.

  • Embedded files can be included as a whole directly to a PDF document; these are named File Attachments. They should be located in the Attachments panel in viewer applications. This simple piece of code shows you how to find out the number of attached files and how to extract the first one into a single file.
    Copy Code
    'We assume that GdPicture has been correctly installed and unlocked.
    
     Using oGdPicturePDF As GdPicturePDF = New GdPicturePDF()
    
         If oGdPicturePDF.LoadFromFile("test.pdf", False) = GdPictureStatus.OK Then
    
             Dim embeddedFileCount As Integer = oGdPicturePDF.GetEmbeddedFileCount()
    
             If oGdPicturePDF.GetStat() = GdPictureStatus.OK Then
    
                 If embeddedFileCount > 0 Then
    
                     Dim FileName As String = oGdPicturePDF.GetEmbeddedFileName(0)
    
                     If oGdPicturePDF.GetStat() = GdPictureStatus.OK Then
    
                         Dim FileSize As Integer = oGdPicturePDF.GetEmbeddedFileSize(0)
    
                         If oGdPicturePDF.GetStat() = GdPictureStatus.OK Then
    
                             Dim FileData As Byte() = New Byte(FileSize) {}
    
                             Dim status As GdPictureStatus = oGdPicturePDF.ExtractEmbeddedFile(0, FileData)
    
                             If status = GdPictureStatus.OK Then
    
                                 MessageBox.Show("The content of the first embedded file has been extracted successfully.", "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Information)
    
                                 Dim oFileStream As System.IO.FileStream = Nothing
    
                                 oFileStream = New System.IO.FileStream(FileName + "_content.dat", System.IO.FileMode.Create)
    
                                 oFileStream.Write(FileData, 0, FileData.Length)
    
                                 oFileStream.Close()
    
                                 MessageBox.Show("The content of the embedded file has been saved successfully.", "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Information)
    
                             Else
    
                                 MessageBox.Show("The embedded file has failed to extract. Status: " + status.ToString(), "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
                             End If
    
                        Else
    
                             MessageBox.Show("An error occurred getting the file size. Status: " + oGdPicturePDF.GetStat().ToString(), "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
                         End If
    
                     Else
    
                         MessageBox.Show("An error occurred getting the file name. Status: " + oGdPicturePDF.GetStat().ToString(), "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
                     End If
    
                 Else
    
                     MessageBox.Show("This PDF file does not contain embedded files.", "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Information)
    
                 End If
    
             Else
    
                 MessageBox.Show("An error occurred getting the number of embedded files. Status: " + oGdPicturePDF.GetStat().ToString(), "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
             End If
    
         Else
    
             MessageBox.Show("The file can't be loaded.", "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
         End If
    
     End Using
    Copy Code
    //We assume that GdPicture has been correctly installed and unlocked.
    
     using (GdPicturePDF oGdPicturePDF = new GdPicturePDF())
    
     {
    
        if (oGdPicturePDF.LoadFromFile("test.pdf", false) == GdPictureStatus.OK)
    
         {
    
             int embeddedFileCount = oGdPicturePDF.GetEmbeddedFileCount();
    
             if (oGdPicturePDF.GetStat() == GdPictureStatus.OK)
    
             {
    
                 if (embeddedFileCount > 0)
    
                 {
    
                     string FileName = oGdPicturePDF.GetEmbeddedFileName(0);
    
                     if (oGdPicturePDF.GetStat() == GdPictureStatus.OK)
    
                     {
    
                         int FileSize = oGdPicturePDF.GetEmbeddedFileSize(0);
    
                         if (oGdPicturePDF.GetStat() == GdPictureStatus.OK)
    
                        {
    
                             byte[] FileData = new byte[FileSize + 1];
    
                             GdPictureStatus status = oGdPicturePDF.ExtractEmbeddedFile(0, ref FileData);
    
                             if (status == GdPictureStatus.OK)
    
                             {
    
                                 MessageBox.Show("The content of the first embedded file has been extracted successfully.", "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Information);
    
                                 System.IO.FileStream oFileStream = default(System.IO.FileStream);
    
                                 oFileStream = new System.IO.FileStream(FileName + "_content.dat", System.IO.FileMode.Create);
    
                                 oFileStream.Write(FileData, 0, FileData.Length);
    
                                 oFileStream.Close();
    
                                 MessageBox.Show("The content of the embedded file has been saved successfully.", "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Information);
    
                             }
    
                             else
    
                                 MessageBox.Show("The embedded file has failed to extract. Status: " + status.ToString(), "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
                         }
    
                         else
    
                             MessageBox.Show("An error occurred getting the file size. Status: " + oGdPicturePDF.GetStat().ToString(), "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
                     }
    
                     else
    
                         MessageBox.Show("An error occurred getting the file name. Status: " + oGdPicturePDF.GetStat().ToString(), "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
                 }
    
                 else
    
                     MessageBox.Show("This PDF file does not contain embedded files.", "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Information);
    
             }
    
             else
    
                 MessageBox.Show("An error occurred getting the number of embedded files. Status: " + oGdPicturePDF.GetStat().ToString(), "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
         }
    
         else
    
             MessageBox.Show("The file can't be loaded.", "Example: Emdedded files (Attachments)", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
     }
  • Embedded files can also be attached as File Attachment Annotations as part of a particular comment located somewhere on the page. Such a comment appears on that page with a file attachment icon, and you should find the corresponding file in the Comments panel in viewer applications. This example demonstrates how to find out attached files in comments and how to extract them subsequently.
    Copy Code
    'We assume that GdPicture has been correctly installed and unlocked.
    
     Using oGdPicturePDF As GdPicturePDF = New GdPicturePDF()
    
         If oGdPicturePDF.LoadFromFile("test.pdf", False) = GdPictureStatus.OK Then
    
             Dim pageCount As Integer = oGdPicturePDF.GetPageCount()
    
             If oGdPicturePDF.GetStat() = GdPictureStatus.OK Then
    
                 Dim message As String = ""
    
                 For page As Integer = 1 To pageCount
    
                     message = message + "Page nr." + page.ToString()
    
                     If oGdPicturePDF.SelectPage(page) = GdPictureStatus.OK Then
    
                         Dim annotCount As Integer = oGdPicturePDF.GetAnnotationCount()
    
                         If oGdPicturePDF.GetStat() = GdPictureStatus.OK Then
    
                             Dim subtype As String = ""
    
                             For annotID As Integer = 0 To annotCount - 1
    
                                 subtype = oGdPicturePDF.GetAnnotationSubType(annotID)
    
                                 If oGdPicturePDF.GetStat() = GdPictureStatus.OK Then
    
                                     If subtype.Equals("FileAttachment") Then
    
                                         message = message + vbCrLf + "AnnotID: " + annotID.ToString()
    
                                         Dim filedata As Byte() = Nothing
    
                                         Dim filesize As Integer = 0
    
                                         Dim filename As String = oGdPicturePDF.GetFileAttachmentAnnotFileName(annotID)
    
                                         If oGdPicturePDF.GetStat() = GdPictureStatus.OK Then filesize = oGdPicturePDF.GetFileAttachmentAnnotFileSize(annotID)
    
                                         If (oGdPicturePDF.GetStat() = GdPictureStatus.OK) AndAlso
    
                                            (oGdPicturePDF.GetFileAttachmentAnnotEmbeddedFile(annotID, filedata) = GdPictureStatus.OK) Then
    
                                             Using file As System.IO.Stream = System.IO.File.OpenWrite(filename)
    
                                                 file.Write(filedata, 0, filesize)
    
                                             End Using
    
                                             message = message + "The embedded file has been extracted successfully."
    
                                         End If
    
                                         If oGdPicturePDF.GetStat() <> GdPictureStatus.OK Then message = message + "Extracting embedded file has failed. Status: " + oGdPicturePDF.GetStat().ToString()
    
                                     End If
    
                                 Else
    
                                     message = message + "An error occurred getting the annotation subtype. Status: " + oGdPicturePDF.GetStat().ToString()
    
                                 End If
    
                             Next
    
                         Else
    
                             message = message + "An error occurred getting the annotation count. Status: " + oGdPicturePDF.GetStat().ToString()
    
                         End If
    
                     Else
    
                         message = message + "An error occurred selecting the page. Status: " + oGdPicturePDF.GetStat().ToString()
    
                     End If
    
                     message += vbCrLf
    
                 Next
    
                 If message.Contains("error") Then
    
                     MessageBox.Show(message, "Example: Annotation's file attachments", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
                 Else
    
                     MessageBox.Show(message, "Example: Annotation's file attachments", MessageBoxButtons.OK, MessageBoxIcon.Information)
    
                 End If
    
             Else
    
                 MessageBox.Show("An error occurred getting the page count. Status: " + oGdPicturePDF.GetStat().ToString(), "Example: Annotation's file attachments", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
             End If
    
         Else
    
             MessageBox.Show("The file can't be loaded.", "Example: Annotation's file attachments", MessageBoxButtons.OK, MessageBoxIcon.Error)
    
         End If
    
     End Using
    Copy Code
    //We assume that GdPicture has been correctly installed and unlocked.
    
     using (GdPicturePDF oGdPicturePDF = new GdPicturePDF())
    
     {
    
        if (oGdPicturePDF.LoadFromFile("test.pdf", false) == GdPictureStatus.OK)
    
         {
    
             int pageCount = oGdPicturePDF.GetPageCount();
    
             if (oGdPicturePDF.GetStat() == GdPictureStatus.OK)
    
             {
    
                 string message = "";
    
                 for (int page = 1; page <= pageCount; page++)
    
                 {
    
                     message = message + "Page nr." + page.ToString();
    
                     if (oGdPicturePDF.SelectPage(page) == GdPictureStatus.OK)
    
                     {
    
                         int annotCount = oGdPicturePDF.GetAnnotationCount();
    
                         if (oGdPicturePDF.GetStat() == GdPictureStatus.OK)
    
                         {
    
                             string subtype = "";
    
                             for (int annotID = 0; annotID < annotCount; annotID++)
    
                             {
    
                                 subtype = oGdPicturePDF.GetAnnotationSubType(annotID);
    
                                 if (oGdPicturePDF.GetStat() == GdPictureStatus.OK)
    
                                 {
    
                                     if (subtype.Equals("FileAttachment"))
    
                                     {
    
                                         message = message + "\nAnnotID: " + annotID.ToString();
    
                                         byte[] filedata = null;
    
                                         int filesize = 0;
    
                                         string filename = oGdPicturePDF.GetFileAttachmentAnnotFileName(annotID);
    
                                         if (oGdPicturePDF.GetStat() == GdPictureStatus.OK)
    
                                             filesize = oGdPicturePDF.GetFileAttachmentAnnotFileSize(annotID);
    
                                         if ((oGdPicturePDF.GetStat() == GdPictureStatus.OK) &&
    
                                             (oGdPicturePDF.GetFileAttachmentAnnotEmbeddedFile(annotID, ref filedata) == GdPictureStatus.OK))
    
                                         {
    
                                             using (System.IO.Stream file = File.OpenWrite(filename))
    
                                             {
    
                                                 file.Write(filedata, 0, filesize);
    
                                             }
    
                                             message = message + "The embedded file has been extracted successfully.";
    
                                         }
    
                                         if (oGdPicturePDF.GetStat() != GdPictureStatus.OK)
    
                                             message = message + "Extracting embedded file has failed. Status: " + oGdPicturePDF.GetStat().ToString();
    
                                     }
    
                                 }
    
                                 else message = message + "An error occurred getting the annotation subtype. Status: " + oGdPicturePDF.GetStat().ToString();
    
                             }
    
                         }
    
                         else
    
                             message = message + "An error occurred getting the annotation count. Status: " + oGdPicturePDF.GetStat().ToString();
    
                     }
    
                     else
    
                         message = message + "An error occurred selecting the page. Status: " + oGdPicturePDF.GetStat().ToString();
    
                     message += "\n";
    
                 }
    
                 if (message.Contains("error"))
    
                     MessageBox.Show(message, "Example: Annotation's file attachments", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
                 else
    
                     MessageBox.Show(message, "Example: Annotation's file attachments", MessageBoxButtons.OK, MessageBoxIcon.Information);
    
             }
    
             else
    
                 MessageBox.Show("An error occurred getting the page count. Status: " + oGdPicturePDF.GetStat().ToString(), "Example: Annotation's file attachments", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
         }
    
         else
    
             MessageBox.Show("The file can't be loaded.", "Example: Annotation's file attachments", MessageBoxButtons.OK, MessageBoxIcon.Error);
    
     }