- Demo
- C# source
- VB.Net source
The sample demonstrates how to extract images and text from PDF document.
(NO screenshot)
using System; using System.Collections.Generic; using System.Drawing; using System.Drawing.Imaging; using System.IO; using System.Text; using Spire.Pdf; namespace Extraction { class Program { static void Main(string[] args) { //Create a pdf document. PdfDocument doc = new PdfDocument(); doc.LoadFromFile(@"Sample2.pdf"); StringBuilder buffer = new StringBuilder(); IList<Image> images = new List<Image>(); foreach (PdfPageBase page in doc.Pages) { buffer.Append(page.ExtractText()); foreach (Image image in page.ExtractImages()) { images.Add(image); } } doc.Close(); //save text String fileName = "TextInPdf.txt"; File.WriteAllText(fileName, buffer.ToString()); //save image int index = 0; foreach (Image image in images) { String imageFileName = String.Format("Image-{0}.png", index++); image.Save(imageFileName, ImageFormat.Png); } //Launching the Text file. System.Diagnostics.Process.Start(fileName); } } }
Imports System.Collections.Generic Imports System.Drawing Imports System.Drawing.Imaging Imports System.IO Imports System.Text Imports Spire.Pdf Namespace Extraction Friend Class Program Shared Sub Main(ByVal args() As String) 'Create a pdf document. Dim doc As New PdfDocument() doc.LoadFromFile("Sample2.pdf") Dim buffer As New StringBuilder() Dim images As IList(Of Image) = New List(Of Image)() For Each page As PdfPageBase In doc.Pages buffer.Append(page.ExtractText()) For Each image As Image In page.ExtractImages() images.Add(image) Next image Next page doc.Close() 'save text Dim fileName As String = "TextInPdf.txt" File.WriteAllText(fileName, buffer.ToString()) 'save image Dim index As Integer = 0 For Each image As Image In images Dim imageFileName As String = String.Format("Image-{0}.png", index) index += 1 image.Save(imageFileName, ImageFormat.Png) Next image 'Launching the Text file. Process.Start(fileName) End Sub End Class End Namespace