Error message:
- Code: Select all
Traceback (most recent call last):
File "/src/pdf2text.py", line 8, in <module>
processor.process_document()
File "/src/utils/document_processor.py", line 68, in process_document
self._convert_pdf_to_docx()
File "/src/utils/document_processor.py", line 84, in _convert_pdf_to_docx
pdf_converter.convert_to_docx(self.temp_docx_filepath)
File "/src/utils/pdf_to_docx_converter.py", line 23, in convert_to_docx
self.doc.SaveToFile(output_file, FileFormat.DOCX)
File "/usr/local/lib/python3.10/dist-packages/plum/function.py", line 642, in __call__
return self.f(self.instance, *args, **kw_args)
File "/usr/local/lib/python3.10/dist-packages/plum/function.py", line 592, in __call__
return _convert(method(*args, **kw_args), return_type)
File "/usr/local/lib/python3.10/dist-packages/spire/pdf/PdfDocument.py", line 287, in SaveToFile
CallCFunction(GetDllLibPdf().PdfDocument_SaveToFileFF,self.Ptr, filename,enumfileFormat)
File "/usr/local/lib/python3.10/dist-packages/spire/pdf/common/__init__.py", line 109, in CallCFunction
raise SpireException(info)
spire.pdf.common.SpireException: Arg_NullReferenceException: at sprf1k.spra(sprf1d, String, sprf3n[], spraq6, Double, Double, Double, Boolean, sprauj, Boolean, Boolean) + 0xf7
at sprf07.spra(sprf3v, String, Boolean) + 0x429
at sprf07.spra(spreb0, String) + 0x4a9
at sprf07.sprd(spreb0) + 0xc35
at sprf07.spra(Boolean) + 0xf9
at sprf4a.spra(spreb8, sprdfc, sprf07) + 0xd8f
at sprf07.spre(spreb0) + 0x146
at sprf07.spra(Boolean) + 0xf9
at sprecn.spra(Int32) + 0x1a2
at Spire.Pdf.Conversion.PdfToDocConverter.spra(PdfDocumentBase, Stream, Int32, Int32) + 0xff
at Spire.Pdf.PdfDocumentBase.spra(String, Boolean) + 0x13e
at Spire.Pdf.AOT.NLPdfDocument.PdfDocument_SaveToFileFF(IntPtr, IntPtr, Int32, IntPtr) + 0x7b
Code:
- Code: Select all
from utils.pdf_to_docx_converter import PDFToDOCXConverter
def _convert_pdf_to_docx():
pdf_converter = PDFToDOCXConverter(pdf_path)
pdf_converter.convert_to_docx(temp_docx_filepath)
In my pdf_to_docx_converter.py file:
- Code: Select all
from spire.pdf.common import *
from spire.pdf import PdfDocument, FileFormat
class PDFToDOCXConverter:
def __init__(self, pdf_file):
self.pdf_file = pdf_file
self.doc = PdfDocument()
def convert_to_docx(self, output_file):
self.doc.LoadFromFile(self.pdf_file)
self.doc.SaveToFile(output_file, FileFormat.DOCX)
self.doc.Close()
I'm using Spire.Pdf 10.2.0 for Python (installed via pypi), and using WSL. The PDF file I used is too big so it can't be uploaded, but it's a PDF of a paper I downloaded online (paper name: BBDM: Image-to-Image Translation with Brownian Bridge Diffusion Models)
Appreciate the help, thank you.