The sample demonstrates how to extract images and text from PDF document.
(NO screenshot)
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Text;
using Spire.Pdf;
namespace Extraction
{
class Program
{
static void Main(string[] args)
{
//Create a pdf document.
PdfDocument doc = new PdfDocument();
doc.LoadFromFile(@"Sample2.pdf");
StringBuilder buffer = new StringBuilder();
IList<Image> images = new List<Image>();
foreach (PdfPageBase page in doc.Pages)
{
buffer.Append(page.ExtractText());
foreach (Image image in page.ExtractImages())
{
images.Add(image);
}
}
doc.Close();
//save text
String fileName = "TextInPdf.txt";
File.WriteAllText(fileName, buffer.ToString());
//save image
int index = 0;
foreach (Image image in images)
{
String imageFileName
= String.Format("Image-{0}.png", index++);
image.Save(imageFileName, ImageFormat.Png);
}
//Launching the Text file.
System.Diagnostics.Process.Start(fileName);
}
}
}
Imports System.Collections.Generic
Imports System.Drawing
Imports System.Drawing.Imaging
Imports System.IO
Imports System.Text
Imports Spire.Pdf
Namespace Extraction
Friend Class Program
Shared Sub Main(ByVal args() As String)
'Create a pdf document.
Dim doc As New PdfDocument()
doc.LoadFromFile("Sample2.pdf")
Dim buffer As New StringBuilder()
Dim images As IList(Of Image) = New List(Of Image)()
For Each page As PdfPageBase In doc.Pages
buffer.Append(page.ExtractText())
For Each image As Image In page.ExtractImages()
images.Add(image)
Next image
Next page
doc.Close()
'save text
Dim fileName As String = "TextInPdf.txt"
File.WriteAllText(fileName, buffer.ToString())
'save image
Dim index As Integer = 0
For Each image As Image In images
Dim imageFileName As String = String.Format("Image-{0}.png", index)
index += 1
image.Save(imageFileName, ImageFormat.Png)
Next image
'Launching the Text file.
Process.Start(fileName)
End Sub
End Class
End Namespace