Spire.PDF is a professional PDF library applied to creating, writing, editing, handling and reading PDF files without any external dependencies. Get free and professional technical support for Spire.PDF for .NET, Java, Android, C++, Python.

Fri Oct 09, 2015 1:50 am

I have a group of PDFs that were produced by scanning at 300dpi. And example is attached here which is 88k.

This is a 10 page black and white document. I need to convert the document to an image format (tiff, jpeg, png, etc.) without losing resolution, make some simple modifications to the images, then convert them back to PDF. The problem that I am having is the 88k Original PDF is becoming a 2,733k multipage tiff and then, when converting back, a 9,456k PDF. All of the images look the same at the same zoom levels, and the page dimensions are equal (8.5x11).

What setting am I missing when doing these conversions back and forth that is creating these monster files? Below is the code that I am using.

Code: Select all
        #region PDf to Tiff

        private void btnPDFtoTiff_Click(object sender, EventArgs e)
        {
            PdfDocument document = new PdfDocument();
            document.LoadFromFile(txtSource.Text);
           
            JoinTiffImages(SaveAsImage(document), @"C:\Workspace\Result.tiff", EncoderValue.CompressionLZW);
        }

        private static Image[] SaveAsImage(PdfDocument document)
        {
            Image[] images = new Image[document.Pages.Count];
            for (int i = 0; i < document.Pages.Count; i++)
            {
                images[i] = document.SaveAsImage(i, 300, 300);
            }
            return images;
        }

        private static ImageCodecInfo GetEncoderInfo(string mimeType)
        {
            ImageCodecInfo[] encoders = ImageCodecInfo.GetImageEncoders();
            for (int j = 0; j < encoders.Length; j++)
            {
                if (encoders[j].MimeType == mimeType)
                    return encoders[j];
            }
            throw new Exception(mimeType + " mime type not found in ImageCodecInfo");
        }

        public static void JoinTiffImages(Image[] images, string outFile, EncoderValue compressEncoder)
        {
            //use the save encoder
            System.Drawing.Imaging.Encoder enc = System.Drawing.Imaging.Encoder.SaveFlag;
            EncoderParameters ep = new EncoderParameters(3);
            ep.Param[0] = new EncoderParameter(enc, (long)EncoderValue.MultiFrame);
            ep.Param[1] = new EncoderParameter(System.Drawing.Imaging.Encoder.Compression, (long)compressEncoder);
            ep.Param[2] = new EncoderParameter(System.Drawing.Imaging.Encoder.Quality, 100L);
            Image pages = images[0];
            int frame = 0;
            ImageCodecInfo info = GetEncoderInfo("image/tiff");

            foreach (Image img in images)
            {
                if (frame == 0)
                {
                    pages = img;
                    //save the first frame
                    pages.Save(outFile, info, ep);
                }

                else
                {
                    //save the intermediate frames
                    ep.Param[0] = new EncoderParameter(enc, (long)EncoderValue.FrameDimensionPage);

                    pages.SaveAdd(img, ep);
                }
                if (frame == images.Length - 1)
                {
                    //flush and close.
                    ep.Param[0] = new EncoderParameter(enc, (long)EncoderValue.Flush);
                    pages.SaveAdd(ep);
                }
                frame++;
            }
        }

        #endregion

        #region Tiff to PDF

        private void btnTiffToPDF_Click(object sender, EventArgs e)
        {
            ConvertImagetoPDF(txtSource.Text);
        }

        public static void ConvertImagetoPDF(String ImageFilename)
        {
            using (PdfDocument pdfDoc = new PdfDocument())
            {
                Image image = Image.FromFile(ImageFilename);

                Image[] img = SplitImages(image, ImageFormat.Png);

                for (int i = 0; i < img.Length; i++)
                {
                    PdfImage pdfImg = PdfImage.FromImage(img[i]);

                    float width = pdfImg.Width *0.3f;
                    float height = pdfImg.Height *0.3f;

                    SizeF fPage = PdfPageSize.Letter;

                    PdfMargins margin = new PdfMargins();
                    margin.All = 0;

                    PdfPageBase page = pdfDoc.Pages.Add(fPage, margin);
                    pdfDoc.PageSettings.Margins.All = 0;

                    float widthFitRate = pdfImg.PhysicalDimension.Width / page.Canvas.ClientSize.Width;
                    float heightFitRate = pdfImg.PhysicalDimension.Height / page.Canvas.ClientSize.Height;
                    float fitRate = Math.Max(widthFitRate, heightFitRate);
                    float fitWidth = pdfImg.PhysicalDimension.Width / fitRate;
                    float fitHeight = pdfImg.PhysicalDimension.Height / fitRate;
                    page.Canvas.DrawImage(pdfImg, 0, 0, fitWidth, fitHeight);
                }

                string PdfFilename = @"C:\Workspace\Converted.pdf";
                pdfDoc.SaveToFile(PdfFilename);
                System.Diagnostics.Process.Start(PdfFilename);
            }
        }

        public static Image[] SplitImages(Image image, ImageFormat format)
        {
            Guid guid = image.FrameDimensionsList[0];
            FrameDimension dimension = new FrameDimension(guid);
            int pageCount = image.GetFrameCount(dimension);

            Image[] frames = new Image[pageCount];

            for (int i = 0; i < pageCount; i++)
            {
                using (MemoryStream buffer = new MemoryStream())
                {
                    image.SelectActiveFrame(dimension, i);
                    image.Save(buffer, format);
                    frames[i] = Image.FromStream(buffer);
                }
            }
            return frames;
        }

        #endregion

MatthewPierce
 
Posts: 22
Joined: Thu Jul 16, 2015 4:45 pm

Fri Oct 09, 2015 7:31 am

Hi,

Thanks for your inquiry.
I have recreated your issue and posted it to our dev team. We will tell you when there is any update.

Best Regards,
Amy
E-iceblue support team
User avatar

amy.zhao
 
Posts: 2772
Joined: Wed Jun 27, 2012 8:50 am

Sat Oct 10, 2015 3:16 am

Hi,

Please change ImageFormat in this code : Image[] img = SplitImages(image, ImageFormat.Png); as Tiff, in this case, the size of output PDF is about 2MB.
In your original pdf file, the images are stored by Device Gray, and when converting to tiff images, the images are stored by RGB, then converting such images to new PDF file, so the new pdf file is greater than original pdf file.

Best Regards,
Amy
E-iceblue support team
User avatar

amy.zhao
 
Posts: 2772
Joined: Wed Jun 27, 2012 8:50 am

Sat Oct 10, 2015 3:58 am

Hi Amy,

Going from 88k to 2MB is still unacceptable, this is over 20X the original file size. Are there other options or perhaps another method to generate PDFs with Spire that does not create such a huge inflation in the file size?

Is there a way to have Spire convert the Tiff images to Device Gray rather than RGB when generating the PDF so that it reduces the resulting PDF to something closer to the original 88k?

MatthewPierce
 
Posts: 22
Joined: Thu Jul 16, 2015 4:45 pm

Sat Oct 10, 2015 4:30 am

amy.zhao wrote:In your original pdf file, the images are stored by Device Gray


Out of curiosity, how would you go about detecting that with Spire? This could come in handy for future operations where I need to determine if the source PDF is B&W or Color.

Thank you.

MatthewPierce
 
Posts: 22
Joined: Thu Jul 16, 2015 4:45 pm

Sat Oct 10, 2015 9:08 am

Hi,

I have posted your requirement to our dev team. Our dev team is doing some investigation so that find a solution to decrease the size of image.
For the image in PDF, you can use text editor to detect the data(Image /ColorSpace /DeviceGray /BitsPerComponent 1 /Filter /JBIG2Decode /Width 2560 /Height 3300) , likes notepad, the general image is by Image.Flags property.

Best Regards,
Amy
E-iceblue support team
User avatar

amy.zhao
 
Posts: 2772
Joined: Wed Jun 27, 2012 8:50 am

Sat Oct 10, 2015 6:02 pm

amy.zhao wrote:For the image in PDF, you can use text editor to detect the data(Image /ColorSpace /DeviceGray /BitsPerComponent 1 /Filter /JBIG2Decode /Width 2560 /Height 3300) , likes notepad, the general image is by Image.Flags property.


So, there isn't a way to expose these values programmatically with Spire? Something like:

Code: Select all
            PdfDocument document = new PdfDocument();
            document.LoadFromFile(txtSource.Text);
            foreach (PdfPageBase page in document.Pages)
            {
                string strColorSpace = page.[somethingHere].ToString();
            }

MatthewPierce
 
Posts: 22
Joined: Thu Jul 16, 2015 4:45 pm

Mon Oct 12, 2015 1:16 am

Hi,

Thanks for your inquiry.
Sorry that there is no way to expose these values programmatically with Spire.

Best Regards,
Amy
E-iceblue support team
User avatar

amy.zhao
 
Posts: 2772
Joined: Wed Jun 27, 2012 8:50 am

Thu Oct 29, 2015 8:04 am

Hi,

Thanks for waiting.
Our dev team did a fix for your massive file size issue, please download and test Spire.PDF Pack(Hot Fix) Version:3.5.143 with new code for SaveAsImage below.
The downloading link is http://www.e-iceblue.com/Download/downl ... t-now.html.
Code: Select all
 private static Image[] SaveAsImage(PdfDocument document)
        {
            Image[] images = new Image[document.Pages.Count];
            for (int i = 0; i < document.Pages.Count; i++)
            {
                images[i] = document.Pages[i].ExtractImages(false)[0];
            }
            return images;
        }


Best Regards,
Amy
E-iceblue support team
User avatar

amy.zhao
 
Posts: 2772
Joined: Wed Jun 27, 2012 8:50 am

Thu Oct 29, 2015 3:32 pm

This only gets us halfway there, it does not resolve the problem.

"The problem that I am having is the 88k Original PDF is becoming a 2,733k multipage tiff and then, when converting back, a 9,456k PDF"

The fix does reduce the file size of the conversion from PDF to TIFF (the 88k PDF becomes a 1,205kb TIFF now instead of 2,773kb), but does not address the real problem: The TIFF to PDF conversion still produces a massive PDF.

When converting this TIFF back to PDF we get a 9,345kb PDF. Was there any progress made with the TIFF to PDF conversion?

MatthewPierce
 
Posts: 22
Joined: Thu Jul 16, 2015 4:45 pm

Fri Oct 30, 2015 3:23 am

Hi,

Thanks for your reply.
I attached my test code, the size of the generated tiff is 1.28MB and the size of the generated pdf is 708KB. The.net compression algorithms can only be compressed into this extent.

Best Regards,
Amy
E-iceblue support team
User avatar

amy.zhao
 
Posts: 2772
Joined: Wed Jun 27, 2012 8:50 am

Tue Nov 03, 2015 3:44 am

Hello,

Have you tested my code? Has your issue been resolved?
Thanks for your feedback.

Best Regards,
Amy
E-iceblue support team
User avatar

amy.zhao
 
Posts: 2772
Joined: Wed Jun 27, 2012 8:50 am

Return to Spire.PDF