Convert Word to HTML with JavaScript in React

In web page development, transforming Word documents into HTML allows content creators to leverage the familiar Word document editing for crafting web-ready content. This approach not only structures the content appropriately for web delivery but also streamlines content management processes. Furthermore, by harnessing the capabilities of React, developers can execute this transformation directly within the browser on the client side, thereby simplifying the development workflow and potentially reducing load times and server costs.

This article demonstrates how to use Spire.Doc for JavaScript to convert Word documents to HTML files within React applications.

Install Spire.Doc for JavaScript

To get started with converting Word documents to HTML in a React application, you can either download Spire.Doc for JavaScript from our website or install it via npm with the following command:

npm i spire.doc

After that, copy the "Spire.Doc.Base.js" and "Spire.Doc.Base.wasm" files into the public folder of your project. Additionally, include the required font files to ensure accurate and consistent text rendering.

For more details, refer to the documentation: How to Integrate Spire.Doc for JavaScript in a React Project

Convert Word Documents to HTML Using JavaScript

With Spire.Doc for JavaScript, you can load Word documents into the WASM environment using the Document.LoadFromFile() method and convert them to HTML files with the Document.SaveToFile() method. This approach converts Word documents into HTML format with CSS files and images separated from the main HTML file, allowing developers to easily customize the HTML page.

Follow these steps to convert a Word document to HTML format using Spire.Doc for JavaScript in React:

  • Load the Spire.Doc.Base.js file to initialize the WebAssembly module.
  • Load the Word file into the virtual file system using the wasmModule.FetchFileToVFS() method.
  • Create a Document instance in the WASM module using the wasmModule.Document.Create() method.
  • Load the Word document into the Document instance using the Document.LoadFromFile() method.
  • Convert the Word document to HTML format using the Document.SaveToFile({ fileName: string, fileFormat: wasmModule.FileFormat.Html }) method.
  • Pack and download the result files or take further actions as needed.
  • JavaScript
import React, { useState, useEffect } from 'react';
import JSZip from 'jszip';

function App() {

  // State to hold the loaded WASM module
  const [wasmModule, setWasmModule] = useState(null);

  // useEffect hook to load the WASM module when the component mounts
  useEffect(() => {
    const loadWasm = async () => {
      try {

        // Access the Module and spiredoc from the global window object
        const { Module, spiredoc } = window;

        // Set the wasmModule state when the runtime is initialized
        Module.onRuntimeInitialized = () => {
          setWasmModule(spiredoc);
        };
      } catch (err) {

        // Log any errors that occur during loading
        console.error('Failed to load WASM module:', err);
      }
    };

    // Create a script element to load the WASM JavaScript file
    const script = document.createElement('script');
    script.src = `${process.env.PUBLIC_URL}/Spire.Doc.Base.js`;
    script.onload = loadWasm;

    // Append the script to the document body
    document.body.appendChild(script);

    // Cleanup function to remove the script when the component unmounts
    return () => {
      document.body.removeChild(script);
    };
  }, []);

  // Function to convert the Word document to HTML format
  const WordToHTMLAndZip = async () => {
    if (wasmModule) {
      // Specify the input file name and the output folder name
      const inputFileName = 'Sample.docx';
      const outputFolderName = 'WordToHTMLOutput';

      // Fetch the input file and add it to the VFS
      await wasmModule.FetchFileToVFS(inputFileName, '', `${process.env.PUBLIC_URL}/`);

      // Create an instance of the Document class
      const doc = wasmModule.Document.Create();
      // Load the Word document
      doc.LoadFromFile({ fileName: inputFileName });

      // Save the Word document to HTML format in the output folder
      doc.SaveToFile({ fileName: `${outputFolderName}/document.html`, fileFormat: wasmModule.FileFormat.Html });

      // Release resources
      doc.Dispose();

      // Create a new JSZip object
      const zip = new JSZip();

      // Recursive function to add a directory and its contents to the ZIP
      const addFilesToZip = (folderPath, zipFolder) => {
        const items = wasmModule.FS.readdir(folderPath);
        items.filter(item => item !== "." && item !== "..").forEach((item) => {
          const itemPath = `${folderPath}/${item}`;

          try {
            // Attempt to read file data
            const fileData = wasmModule.FS.readFile(itemPath);
            zipFolder.file(item, fileData);
          } catch (error) {
            if (error.code === 'EISDIR') {
              // If it's a directory, create a new folder in the ZIP and recurse into it
              const zipSubFolder = zipFolder.folder(item);
              addFilesToZip(itemPath, zipSubFolder);
            } else {
              // Handle other errors
              console.error(`Error processing ${itemPath}:`, error);
            }
          }
        });
      };

      // Add all files in the output folder to the ZIP
      addFilesToZip(outputFolderName, zip);

      // Generate and download the ZIP file
      zip.generateAsync({ type: 'blob' }).then((content) => {
        const url = URL.createObjectURL(content);
        const a = document.createElement('a');
        a.href = url;
        a.download = `${outputFolderName}.zip`;
        document.body.appendChild(a);
        a.click();
        document.body.removeChild(a);
        URL.revokeObjectURL(url);
      });
    }
  };

  return (
      <div style={{ textAlign: 'center', height: '300px' }}>
        <h1>Convert Word File to HTML and Download as ZIP Using JavaScript in React</h1>
        <button onClick={WordToHTMLAndZip} disabled={!wasmModule}>
          Convert and Download
        </button>
      </div>
  );
}

export default App;

Word to HTML Conversion Effect with JavaScript

Convert Word to HTML with Embedded CSS and Images

In addition to converting Word documents to HTML with separated files, CSS and images can be embedded into a single HTML file by configuring the Document.HtmlExportOptions.CssStyleSheetType property and the Document.HtmlExportOptions.ImageEmbedded property. The steps to achieve this are as follows:

  • Load the Spire.Doc.Base.js file to initialize the WebAssembly module.
  • Load the Word file into the virtual file system using the wasmModule.FetchFileToVFS() method.
  • Create a Document instance in the WASM module using the wasmModule.Document.Create() method.
  • Load the Word document into the Document instance using the Document.LoadFromFile() method.
  • Set the Document.HtmlExportOptions.CssStyleSheetType property to wasmModule.CssStyleSheetType.Internal to embed CSS styles in the resulting HTML file.
  • Set the Document.HtmlExportOptions.ImageEmbedded property to true to embed images in the resulting HTML file.
  • Convert the Word document to an HTML file with CSS styles and images embedded using the Document.SaveToFile({ fileName: string, fileFormat: wasmModule.FileFormat.Html }) method.
  • Download the resulting HTML file or take further actions as needed.
  • JavaScript
import React, { useState, useEffect } from 'react';

function App() {

  // State to hold the loaded WASM module
  const [wasmModule, setWasmModule] = useState(null);

  // useEffect hook to load the WASM module when the component mounts
  useEffect(() => {
    const loadWasm = async () => {
      try {
        const { Module, spiredoc } = window;
        Module.onRuntimeInitialized = () => {
          setWasmModule(spiredoc);
        };
      } catch (err) {
        console.error('Failed to load WASM module:', err);
      }
    };

    const script = document.createElement('script');
    script.src = `${process.env.PUBLIC_URL}/Spire.Doc.Base.js`;
    script.onload = loadWasm;

    document.body.appendChild(script);

    return () => {
      document.body.removeChild(script);
    };
  }, []);

  // Function to convert the Word document to HTML format
  const WordToHTMLAndZip = async () => {
    if (wasmModule) {

      // Specify the input file name and the base output name
      const inputFileName = 'Sample.docx';
      const outputFileName = 'ConvertedDocument.html';

      // Fetch the input file and add it to the VFS
      await wasmModule.FetchFileToVFS(inputFileName, '', `${process.env.PUBLIC_URL}/`);

      // Create an instance of the Document class
      const doc = wasmModule.Document.Create();

      // Load the Word document
      doc.LoadFromFile({ fileName: inputFileName });

      // Embed CSS file in the HTML file
      doc.HtmlExportOptions.CssStyleSheetType = wasmModule.CssStyleSheetType.Internal;

      // Embed images in the HTML file
      doc.HtmlExportOptions.ImageEmbedded = true;

      // Save the Word document to HTML format
      doc.SaveToFile({ fileName: outputFileName, fileFormat: wasmModule.FileFormat.Html });

      // Release resources
      doc.Dispose();

      // Read the HTML file from the VFS
      const htmlFileArray = wasmModule.FS.readFile(outputFileName);

      // Generate a Blob from the HTML file array and trigger download
      const blob = new Blob([new Uint8Array(htmlFileArray)], { type: 'text/html' });
      const url = URL.createObjectURL(blob);
      const a = document.createElement("a");
      a.href = url;
      a.download = outputFileName;
      document.body.appendChild(a);
      a.click();
      document.body.removeChild(a);
      URL.revokeObjectURL(url);
    }
  };

  return (
      <div style={{ textAlign: 'center', height: '300px' }}>
        <h1>Convert Word to HTML Using JavaScript in React</h1>
        <button onClick={WordToHTMLAndZip} disabled={!wasmModule}>
          Convert and Download
        </button>
      </div>
  );
}

export default App;

Word to HTML Conversion Result with CSS and Images Embedded

Convert Word to HTML with Customized Options

Spire.Doc for JavaScript also supports customizing many other HTML export options, such as CSS file name, header and footer, form field, etc., through the Document.HtmlExportOptions property. The table below lists the properties available under Document.HtmlExportOptions, which can be used to tailor the Word-to-HTML conversion:

Property Description
CssStyleSheetType Specifies the type of the HTML CSS style sheet (External or Internal).
CssStyleSheetFileName Specifies the name of the HTML CSS style sheet file.
ImageEmbedded Specifies whether to embed images in the HTML code using the Data URI scheme.
ImagesPath Specifies the folder for images in the exported HTML.
UseSaveFileRelativePath Specifies whether the image file path is relative to the HTML file path.
HasHeadersFooters Specifies whether headers and footers should be included in the exported HTML.
IsTextInputFormFieldAsText Specifies whether text-input form fields should be exported as text in HTML.
IsExportDocumentStyles Specifies whether to export document styles to the HTML <head>.

Follow these steps to customize options when converting Word documents to HTML format:

  • Load the Spire.Doc.Base.js file to initialize the WebAssembly module.
  • Load the Word file into the virtual file system using the wasmModule.FetchFileToVFS() method.
  • Create a Document instance in the WASM module using the wasmModule.Document.Create() method.
  • Load the Word document into the Document instance using the Document.LoadFromFile() method.
  • Customize the conversion options through properties under Document.HtmlExportOptions.
  • Convert the Word document to HTML format using the Document.SaveToFile({ fileName: string, fileFormat: wasmModule.FileFormat.Html }) method.
  • Pack and download the result files or take further actions as needed.
  • JavaScript
import React, { useState, useEffect } from 'react';
import JSZip from 'jszip';

function App() {

  // State to hold the loaded WASM module
  const [wasmModule, setWasmModule] = useState(null);

  // useEffect hook to load the WASM module when the component mounts
  useEffect(() => {
    const loadWasm = async () => {
      try {

        // Access the Module and spiredoc from the global window object
        const { Module, spiredoc } = window;

        // Set the wasmModule state when the runtime is initialized
        Module.onRuntimeInitialized = () => {
          setWasmModule(spiredoc);
        };
      } catch (err) {

        // Log any errors that occur during loading
        console.error('Failed to load WASM module:', err);
      }
    };

    // Create a script element to load the WASM JavaScript file
    const script = document.createElement('script');
    script.src = `${process.env.PUBLIC_URL}/Spire.Doc.Base.js`;
    script.onload = loadWasm;

    // Append the script to the document body
    document.body.appendChild(script);

    // Cleanup function to remove the script when the component unmounts
    return () => {
      document.body.removeChild(script);
    };
  }, []);

  // Function to convert the Word document to HTML format
  const WordToHTMLAndZip = async () => {
    if (wasmModule) {
      // Specify the input file name and the base output file name
      const inputFileName = 'Sample.docx';
      const baseOutputFileName = 'WordToHTML';
      const outputFolderName = 'WordToHTMLOutput';

      // Fetch the input file and add it to the VFS
      await wasmModule.FetchFileToVFS(inputFileName, '', `${process.env.PUBLIC_URL}/`);

      // Create an instance of the Document class
      const doc = wasmModule.Document.Create();

      // Load the Word document
      doc.LoadFromFile({ fileName: inputFileName });

      // Un-embed the CSS file and set its name
      doc.HtmlExportOptions.CssStyleSheetType = wasmModule.CssStyleSheetType.External;
      doc.HtmlExportOptions.CssStyleSheetFileName = `${baseOutputFileName}CSS.css`;

      // Un-embed the image files and set their path
      doc.HtmlExportOptions.ImageEmbedded = false;
      doc.HtmlExportOptions.ImagesPath = `/Images`;
      doc.HtmlExportOptions.UseSaveFileRelativePath = true;

      // Set to ignore headers and footers
      doc.HtmlExportOptions.HasHeadersFooters = false;

      // Set form fields flattened as text
      doc.HtmlExportOptions.IsTextInputFormFieldAsText = true;

      // Set exporting document styles in the head section
      doc.HtmlExportOptions.IsExportDocumentStyles = true;

      // Save the Word document to HTML format
      doc.SaveToFile({
        fileName: `${outputFolderName}/${baseOutputFileName}.html`,
        fileFormat: wasmModule.FileFormat.Html
      });

      // Release resources
      doc.Dispose();

      // Create a new JSZip object
      const zip = new JSZip();

      // Recursive function to add a directory and its contents to the ZIP
      const addFilesToZip = (folderPath, zipFolder) => {
        const items = wasmModule.FS.readdir(folderPath);
        items.filter(item => item !== "." && item !== "..").forEach((item) => {
          const itemPath = `${folderPath}/${item}`;

          try {
            // Attempt to read file data. If it's a directory, this will throw an error.
            const fileData = wasmModule.FS.readFile(itemPath);
            zipFolder.file(item, fileData);
          } catch (error) {
            if (error.code === 'EISDIR') {
              // If it's a directory, create a new folder in the ZIP and recurse into it
              const zipSubFolder = zipFolder.folder(item);
              addFilesToZip(itemPath, zipSubFolder);
            } else {
              // Handle other errors
              console.error(`Error processing ${itemPath}:`, error);
            }
          }
        });
      };

      // Add the contents of the output folder to the ZIP
      addFilesToZip(`${outputFolderName}`, zip);

      // Generate and download the ZIP file
      zip.generateAsync({ type: 'blob' }).then((content) => {
        const url = URL.createObjectURL(content);
        const a = document.createElement("a");
        a.href = url;
        a.download = `${baseOutputFileName}.zip`;
        document.body.appendChild(a);
        a.click();
        document.body.removeChild(a);
        URL.revokeObjectURL(url);
      });
    }
  };

  return (
      <div style={{ textAlign: 'center', height: '300px' }}>
        <h1>Convert Word File to HTML and Download as ZIP Using JavaScript in React</h1>
        <button onClick={WordToHTMLAndZip} disabled={!wasmModule}>
          Convert and Download
        </button>
      </div>
  );
}

export default App;

Convert Word to HTML and Customize Conversion Options

Get a Free License

To fully experience the capabilities of Spire.Doc for JavaScript without any evaluation limitations, you can request a free 30-day trial license.