PDFium 是 Chromium 的 PDF 渲染引擎,许可协定为 BSD 3-Clause。不同于 Mozilla 基于 HTML5 的 PDF.js,PDFium 是基于 Foxit Software (福昕软件)的渲染代码,Google 与其单干开源出的。

此外,Qt PDF 模块也选用了 PDFium ,可见 QtWebEngine / QtPdf。

本文将介绍如何用 PDFium 实现一个简略的 PDF 阅读器,代码见:https://github.com/ikuokuo/pd... 。

编译 PDFium

应用预编译库:https://github.com/bblanchon/...

不然,参考 PDFium / README 本人编译,实际步骤如下:

# get depot_tools, contains: gclient, ninja, gn, ...git clone --depth 1 https://chromium.googlesource.com/chromium/tools/depot_tools.gitexport PATH="$PATH:$HOME/Codes/Star/depot_tools"# get pdfiumcd pdfium-reader/mkdir -p third_party/chromiumcd third_party/chromiumgclient config --unmanaged https://pdfium.googlesource.com/pdfium.gitgclient synccd pdfium# get deps#  on linux, install additional build dependencies./build/install-build-deps.sh# gn config#  args see the following `out/Release/args.gn`gn args out/Release# ninja build#  pdfiumninja -C out/Release pdfium#  pdfium_testninja -C out/Release pdfium_test# run sample: pdf > ppm./out/Release/pdfium_test --ppm path/to/myfile.pdf

期间 out/Release/args.gn 内容如下:

use_goma = false  # Googlers only. Make sure goma is installed and running first.is_debug = false  # Enable debugging features.# Set true to enable experimental Skia backend.pdf_use_skia = false# Set true to enable experimental Skia backend (paths only).pdf_use_skia_paths = falsepdf_enable_xfa = false  # Set false to remove XFA support (implies JS support).pdf_enable_v8 = false  # Set false to remove Javascript support.pdf_is_standalone = true  # Set for a non-embedded build.pdf_is_complete_lib = true  # Set for a static library build.is_component_build = false  # Disable component build (Though it should work)

应用 PDFium

浏览 PDFium / Getting Started,理解如何初始化 PDFium 及载入文档。步骤如下,或见 pdfium_start.c:

#include <fpdfview.h>#include <stdio.h>int main(int argc, char const *argv[]) {  FPDF_STRING test_doc = "test_doc.pdf";  if (argc >= 2) {    test_doc = argv[1];  }  printf("test_doc: %s\n", test_doc);  FPDF_InitLibrary();  FPDF_DOCUMENT doc = FPDF_LoadDocument(test_doc, NULL);  if (!doc) {    unsigned long err = FPDF_GetLastError();    // Load pdf docs unsuccessful: ...    goto EXIT;  }  FPDF_CloseDocument(doc);EXIT:  FPDF_DestroyLibrary();  return 0;}

获取信息

样例见 pdf_info.cc,可打印 PDF 元数据、页面信息等。

FPDF_GetMetaText 获取元数据(UTF-16LE 编码):

void PrintPdfMetaData(FPDF_DOCUMENT doc) {  static constexpr const char *kMetaTags[] = {      "Title",   "Author",   "Subject",      "Keywords",      "Creator", "Producer", "CreationDate", "ModDate"};  for (const char *meta_tag : kMetaTags) {    const unsigned long len = FPDF_GetMetaText(doc, meta_tag, nullptr, 0);    if (!len)      continue;    std::vector<char16_t> buf(len);    FPDF_GetMetaText(doc, meta_tag, buf.data(), buf.size());    auto text = strings::FromUtf16(std::u16string(buf.data()));    if (strcmp(meta_tag, "CreationDate") == 0 ||        strcmp(meta_tag, "ModDate") == 0) {      text = fpdf::DateToRFC3399(text);    }    std::cout << " " << meta_tag << ": " << text << std::endl;  }}

渲染页面

样例见 pdf_render.cc,可渲染 PDF 页面并保留为 PNG。

FPDF_RenderPageBitmap 渲染某一页:

void PdfRenderPage(const std::string &pdf_name, FPDF_DOCUMENT doc, int index) {  Timer t;  FPDF_PAGE page = FPDF_LoadPage(doc, index);  double scale = 1.0;  // double scale = 2.0;  int width = static_cast<int>(FPDF_GetPageWidth(page) * scale);  int height = static_cast<int>(FPDF_GetPageHeight(page) * scale);  int alpha = FPDFPage_HasTransparency(page) ? 1 : 0;  ScopedFPDFBitmap bitmap(FPDFBitmap_Create(width, height, alpha));  // BGRx  if (bitmap) {    FPDF_DWORD fill_color = alpha ? 0x00000000 : 0xFFFFFFFF;    FPDFBitmap_FillRect(bitmap.get(), 0, 0, width, height, fill_color);    int rotation = 0;    int flags = FPDF_ANNOT;    FPDF_RenderPageBitmap(bitmap.get(), page, 0, 0, width, height,        rotation, flags);    auto t_render = t.Elapsed();    int stride = FPDFBitmap_GetStride(bitmap.get());    void *buffer = FPDFBitmap_GetBuffer(bitmap.get());    char img_name[256];    int chars_formatted = snprintf(        img_name, sizeof(img_name), "%s.%d.png", pdf_name.c_str(), index);    if (chars_formatted < 0 ||        static_cast<size_t>(chars_formatted) >= sizeof(img_name)) {      fprintf(stderr, "Filename is too long: %s\n", img_name);      exit(EXIT_FAILURE);    }    auto ok = PdfWritePng(img_name, buffer, width, height, stride);    if (!ok) {      fprintf(stderr, "Write png failed: %s\n", img_name);      exit(EXIT_FAILURE);    }    auto t_write = t.Elapsed();    fprintf(stdout, "%s\n", img_name);    fprintf(stdout, " %02d: %dx%d, render=%lldms, write=%lldms\n",        index, width, height, t_render, t_write);  } else {    fprintf(stderr, "Page was too large to be rendered.\n");    exit(EXIT_FAILURE);  }  FPDF_ClosePage(page);}

stb_image_write.h 存为 PNG:

bool PdfWritePng(const std::string &img_name, void *buffer,                 int width, int height, int stride) {  // BGRA > RGBA  auto buf = reinterpret_cast<uint8_t *>(buffer);  for (int r = 0; r < height; ++r) {    for (int c = 0; c < width; ++c) {      auto pixel = buf + (r*stride) + (c*4);      auto b = pixel[0];      pixel[0] = pixel[2];  // b = r      pixel[2] = b;         // r = b    }  }  return stbi_write_png(img_name.c_str(), width, height, 4, buf, stride) != 0;}

实现 UI

本文给出的 PDFium Reader 代码,用的 ImGui+GLFW+OpenGL3 实现的 UI,可跨三大桌面零碎。

想进一步理解的,能够间接看代码,编译运行按照 README。

GoCoding 集体实际的教训分享,可关注公众号!