请移步到我的Blog,获得更好的阅读体验!本文的链接请点这里

起因

我的博客系统的文章是直接使用gitbook保存的markdown文档,后端使用marked.js来解析markdown文档。

因为gitbook比较优秀,markdown进行了语法扩展,拿gitbook写文章也就更爽了。但是,这样问题就出现了,marked.js无法解析gitbook扩展的语法,所以我们就需要手动去解析这些扩展的markdown语法。

marked.js如何扩展语法?

想要扩展语法,也就需要深入了解marked.js的工作机制。

marked.js的工作机制:

创建Lexer(词法解析器)实例:

const lexer = new marked.Lexer()

lexer将markdown字符串解析成tokens(tokens是官方文档的叫法,按照我的理解应该是markdown节点(node)):

const tokens = lexer.lex(md)

接下来创建Parser(解析器)实例:

const parser = new marked.Parser()

调用parser.parse()来解析tokens,生成html字符串:

const html = parser.parse(tokens)

到这里,marked.js的工作机制就完了。
那么,我们需要修改的部分也就很清晰了,我们需要修改lexer的生成tokens函数、parser的解析函数以及渲染函数。

如何修改Lexer生成token的函数?

我看了marked.js的源码发现,lexer生成token的函数是一个名为token的函数,这个函数的代码是这样的:

marked.Lexer.prototype.token = function (src, top) {  src = src.replace(/^ +$/gm, '');  var next,  // ...;  while (src) {    // newline    if (cap = this.rules.newline.exec(src)) {      src = src.substring(cap[0].length);      if (cap[0].length > 1) {        this.tokens.push({          type: 'space'        });      }    }    // ...  }  //...}

也就是,lexer用while循环来使用正则表达式进行判断是否符合语法,如果符合就push到tokens,然后切割字符串进行下次循环。我们就如法炮制一条正则表达式和循环中的代码块:

const customRules = {  hint: /^{% hint style="([a-z]+)" %}\n(.*)?(\n{% endhint %})/}// 将自定义的rule添加到词法解析器marked.Lexer.rules = Object.assign(marked.Lexer.rules,customRules)const lexer = new marked.Lexer();// 源代码是merge了新的block rules,所以需要在实例上再添加一次/*block.normal = merge({}, block);function Lexer(options) {  this.tokens = [];  this.tokens.links = Object.create(null);  this.options = options || marked.defaults;  this.rules = block.normal;} */Object.assign(lexer.rules,customRules)// 重写词法解析器marked.Lexer.prototype.token = function (src, top) {  src = src.replace(/^ +$/gm, '');  var next,    //...;  while (src) {    // newline    if (cap = this.rules.newline.exec(src)) {      src = src.substring(cap[0].length);      if (cap[0].length > 1) {        this.tokens.push({          type: 'space'        });      }    }    // 我们扩展的hint语法    // hint    if (cap = this.rules.hint.exec(src)) {      var lastToken = this.tokens[this.tokens.length - 1];      src = src.substring(cap[0].length);      this.tokens.push({        type: 'hint',        state:cap[1],        text: cap[2]      });      continue;    }  }  //...

这样,词法解析器就修改完成了,接下来我们需要修改Parser的解析函数用来生成对应hint语法的html

如何修改Parser解析?

依然是翻源码,找到parser的解析函数:tok

marked.Parser.prototype.tok = function() {  switch (this.token.type) {    case 'space': {      return '';    }    case 'hr': {      return this.renderer.hr();    }    //...  }  //...}

我们需要在switch中增加一个case:

marked.Parser.prototype.tok = function() {  switch (this.token.type) {    case 'space': {      return '';    }    case 'hint': {      return this.renderer.hint(this.token.state, this.token.text);    }    case 'hr': {      return this.renderer.hr();    }    //...  }  //...}

这里的`this.renderer.hint`调用的是渲染器的函数,也就是让marked.js知道怎么渲染hint语法:

// 新建渲染器const renderer = new marked.Renderer()const hintState={  info:`<svg preserveAspectRatio="xMidYMid meet" height="1em" width="1em" fill="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" stroke="none" class="icon-7f6730be--text-3f89f380"><g><path d="M12.2 8.98c.06-.01.12-.03.18-.06.06-.02.12-.05.18-.09l.15-.12c.18-.19.29-.45.29-.71 0-.06-.01-.13-.02-.19a.603.603 0 0 0-.06-.19.757.757 0 0 0-.09-.18c-.03-.05-.08-.1-.12-.15-.28-.27-.72-.37-1.09-.21-.13.05-.23.12-.33.21-.04.05-.09.1-.12.15-.04.06-.07.12-.09.18-.03.06-.05.12-.06.19-.01.06-.02.13-.02.19 0 .26.11.52.29.71.1.09.2.16.33.21.12.05.25.08.38.08.06 0 .13-.01.2-.02M13 16v-4a1 1 0 1 0-2 0v4a1 1 0 1 0 2 0M12 3c-4.962 0-9 4.038-9 9 0 4.963 4.038 9 9 9 4.963 0 9-4.037 9-9 0-4.962-4.037-9-9-9m0 20C5.935 23 1 18.065 1 12S5.935 1 12 1c6.066 0 11 4.935 11 11s-4.934 11-11 11" fill-rule="evenodd"></path></g></svg>`,  warning: `<svg preserveAspectRatio="xMidYMid meet" height="1em" width="1em" fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" stroke="currentColor" class="icon-7f6730be--text-3f89f380" style="color: rgb(247, 125, 5);"><g><circle cx="12" cy="12" r="10"></circle><line x1="12" y1="8" x2="12" y2="12"></line><line x1="12" y1="16" x2="12" y2="16"></line></g></svg>`,  danger: `<svg preserveAspectRatio="xMidYMid meet" height="1em" width="1em" fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" stroke="currentColor" class="icon-7f6730be--text-3f89f380" style="color: rgb(255, 70, 66);"><g><path d="M10.29 3.86L1.82 18a2 2 0 0 0 1.71 3h16.94a2 2 0 0 0 1.71-3L13.71 3.86a2 2 0 0 0-3.42 0z"></path><line x1="12" y1="9" x2="12" y2="13"></line><line x1="12" y1="17" x2="12" y2="17"></line></g></svg>`,  success: `<svg preserveAspectRatio="xMidYMid meet" height="1em" width="1em" fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" stroke="currentColor" class="icon-7f6730be--text-3f89f380" style="color: rgb(38, 203, 124);"><g><path d="M22 11.07V12a10 10 0 1 1-5.93-9.14"></path><polyline points="23 3 12 14 9 11"></polyline></g></svg>`}// 自定义语法的渲染函数renderer.hint = (state, text) => {  return `<div class="hint ${state}">    <span class="hint-icon">${hintState[state]}</span>    <span class="hint-content">${text.replace(/\n/g,'<br/>')}</span>  </div>`}

最后,将renderer传递给marked.js:

marked.setOptions({ renderer })

这样,我们就新扩展了markdown的语法啦

例:

const md = `{% hint style="info" %}1{% endhint %}{% hint style="warning" %}2{% endhint %}{% hint style="danger" %}3{% endhint %}{% hint style="success" %}4{% endhint %}`marked(md)

展示出来就是这个样子:

上面的代码是不完整的,下面贴上完整的代码
import marked from 'marked'import hljs from 'highlight.js'const customRules = {  hint: /^{% hint style="([a-z]+)" %}\n(.*)?(\n{% endhint %})/}// 将自定义的rule添加到词法解析器marked.Lexer.rules = Object.assign(marked.Lexer.rules,customRules)const lexer = new marked.Lexer();// 源代码是merge了新的block rules,所以需要在实例上再添加一次/*block.normal = merge({}, block);function Lexer(options) {  this.tokens = [];  this.tokens.links = Object.create(null);  this.options = options || marked.defaults;  this.rules = block.normal;} */Object.assign(lexer.rules,customRules)// 重写词法解析器marked.Lexer.prototype.token = function (src, top) {  src = src.replace(/^ +$/gm, '');  var next,    loose,    cap,    bull,    b,    item,    listStart,    listItems,    t,    space,    i,    tag,    l,    isordered,    istask,    ischecked;  while (src) {    // newline    if (cap = this.rules.newline.exec(src)) {      src = src.substring(cap[0].length);      if (cap[0].length > 1) {        this.tokens.push({          type: 'space'        });      }    }    // hint    if (cap = this.rules.hint.exec(src)) {      var lastToken = this.tokens[this.tokens.length - 1];      src = src.substring(cap[0].length);      this.tokens.push({        type: 'hint',        state:cap[1],        text: cap[2]      });      continue;    }    // code    if (cap = this.rules.code.exec(src)) {      var lastToken = this.tokens[this.tokens.length - 1];      src = src.substring(cap[0].length);      // An indented code block cannot interrupt a paragraph.      if (lastToken && lastToken.type === 'paragraph') {        lastToken.text += '\n' + cap[0].trimRight();      } else {        cap = cap[0].replace(/^ {4}/gm, '');        this.tokens.push({          type: 'code',          codeBlockStyle: 'indented',          text: !this.options.pedantic            ? rtrim(cap, '\n')            : cap        });      }      continue;    }    // fences    if (cap = this.rules.fences.exec(src)) {      src = src.substring(cap[0].length);      this.tokens.push({        type: 'code',        lang: cap[2] ? cap[2].trim() : cap[2],        text: cap[3] || ''      });      continue;    }    // heading    if (cap = this.rules.heading.exec(src)) {      src = src.substring(cap[0].length);      this.tokens.push({        type: 'heading',        depth: cap[1].length,        text: cap[2]      });      continue;    }    // table no leading pipe (gfm)    if (cap = this.rules.nptable.exec(src)) {      item = {        type: 'table',        header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),        align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),        cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []      };      if (item.header.length === item.align.length) {        src = src.substring(cap[0].length);        for (i = 0; i < item.align.length; i++) {          if (/^ *-+: *$/.test(item.align[i])) {            item.align[i] = 'right';          } else if (/^ *:-+: *$/.test(item.align[i])) {            item.align[i] = 'center';          } else if (/^ *:-+ *$/.test(item.align[i])) {            item.align[i] = 'left';          } else {            item.align[i] = null;          }        }        for (i = 0; i < item.cells.length; i++) {          item.cells[i] = splitCells(item.cells[i], item.header.length);        }        this.tokens.push(item);        continue;      }    }    // hr    if (cap = this.rules.hr.exec(src)) {      src = src.substring(cap[0].length);      this.tokens.push({        type: 'hr'      });      continue;    }    // blockquote    if (cap = this.rules.blockquote.exec(src)) {      src = src.substring(cap[0].length);      this.tokens.push({        type: 'blockquote_start'      });      cap = cap[0].replace(/^ *> ?/gm, '');      // Pass `top` to keep the current      // "toplevel" state. This is exactly      // how markdown.pl works.      this.token(cap, top);      this.tokens.push({        type: 'blockquote_end'      });      continue;    }    // list    if (cap = this.rules.list.exec(src)) {      src = src.substring(cap[0].length);      bull = cap[2];      isordered = bull.length > 1;      listStart = {        type: 'list_start',        ordered: isordered,        start: isordered ? +bull : '',        loose: false      };      this.tokens.push(listStart);      // Get each top-level item.      cap = cap[0].match(this.rules.item);      listItems = [];      next = false;      l = cap.length;      i = 0;      for (; i < l; i++) {        item = cap[i];        // Remove the list item's bullet        // so it is seen as the next token.        space = item.length;        item = item.replace(/^ *([*+-]|\d+\.) */, '');        // Outdent whatever the        // list item contains. Hacky.        if (~item.indexOf('\n ')) {          space -= item.length;          item = !this.options.pedantic            ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')            : item.replace(/^ {1,4}/gm, '');        }        // Determine whether the next list item belongs here.        // Backpedal if it does not belong in this list.        if (i !== l - 1) {          const bullet = /(?:[*+-]|\d{1,9}\.)/          b = bullet.exec(cap[i + 1])[0];          if (bull.length > 1 ? b.length === 1            : (b.length > 1 || (this.options.smartLists && b !== bull))) {            src = cap.slice(i + 1).join('\n') + src;            i = l - 1;          }        }        // Determine whether item is loose or not.        // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/        // for discount behavior.        loose = next || /\n\n(?!\s*$)/.test(item);        if (i !== l - 1) {          next = item.charAt(item.length - 1) === '\n';          if (!loose) loose = next;        }        if (loose) {          listStart.loose = true;        }        // Check for task list items        istask = /^\[[ xX]\] /.test(item);        ischecked = undefined;        if (istask) {          ischecked = item[1] !== ' ';          item = item.replace(/^\[[ xX]\] +/, '');        }        t = {          type: 'list_item_start',          task: istask,          checked: ischecked,          loose: loose        };        listItems.push(t);        this.tokens.push(t);        // Recurse.        this.token(item, false);        this.tokens.push({          type: 'list_item_end'        });      }      if (listStart.loose) {        l = listItems.length;        i = 0;        for (; i < l; i++) {          listItems[i].loose = true;        }      }      this.tokens.push({        type: 'list_end'      });      continue;    }    // html    if (cap = this.rules.html.exec(src)) {      src = src.substring(cap[0].length);      this.tokens.push({        type: this.options.sanitize          ? 'paragraph'          : 'html',        pre: !this.options.sanitizer          && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),        text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]      });      continue;    }    // def    if (top && (cap = this.rules.def.exec(src))) {      src = src.substring(cap[0].length);      if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);      tag = cap[1].toLowerCase().replace(/\s+/g, ' ');      if (!this.tokens.links[tag]) {        this.tokens.links[tag] = {          href: cap[2],          title: cap[3]        };      }      continue;    }    // table (gfm)    if (cap = this.rules.table.exec(src)) {      item = {        type: 'table',        header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),        align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),        cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []      };      if (item.header.length === item.align.length) {        src = src.substring(cap[0].length);        for (i = 0; i < item.align.length; i++) {          if (/^ *-+: *$/.test(item.align[i])) {            item.align[i] = 'right';          } else if (/^ *:-+: *$/.test(item.align[i])) {            item.align[i] = 'center';          } else if (/^ *:-+ *$/.test(item.align[i])) {            item.align[i] = 'left';          } else {            item.align[i] = null;          }        }        for (i = 0; i < item.cells.length; i++) {          item.cells[i] = splitCells(            item.cells[i].replace(/^ *\| *| *\| *$/g, ''),            item.header.length);        }        this.tokens.push(item);        continue;      }    }    // lheading    if (cap = this.rules.lheading.exec(src)) {      src = src.substring(cap[0].length);      this.tokens.push({        type: 'heading',        depth: cap[2].charAt(0) === '=' ? 1 : 2,        text: cap[1]      });      continue;    }    // top-level paragraph    if (top && (cap = this.rules.paragraph.exec(src))) {      src = src.substring(cap[0].length);      this.tokens.push({        type: 'paragraph',        text: cap[1].charAt(cap[1].length - 1) === '\n'          ? cap[1].slice(0, -1)          : cap[1]      });      continue;    }    // text    if (cap = this.rules.text.exec(src)) {      // Top-level should never reach here.      src = src.substring(cap[0].length);      this.tokens.push({        type: 'text',        text: cap[0]      });      continue;    }    if (src) {      throw new Error('Infinite loop on byte: ' + src.charCodeAt(0));    }  }  return this.tokens;}// 重写解析器marked.Parser.prototype.tok = function() {  switch (this.token.type) {    case 'space': {      return '';    }    case 'hint': {      return this.renderer.hint(this.token.state, this.token.text);    }    case 'hr': {      return this.renderer.hr();    }    case 'heading': {      return this.renderer.heading(        this.inline.output(this.token.text),        this.token.depth,        unescape(this.inlineText.output(this.token.text)),        this.slugger);    }    case 'code': {      return this.renderer.code(this.token.text,        this.token.lang,        this.token.escaped);    }    case 'table': {      var header = '',          body = '',          i,          row,          cell,          j;      // header      cell = '';      for (i = 0; i < this.token.header.length; i++) {        cell += this.renderer.tablecell(          this.inline.output(this.token.header[i]),          { header: true, align: this.token.align[i] }        );      }      header += this.renderer.tablerow(cell);      for (i = 0; i < this.token.cells.length; i++) {        row = this.token.cells[i];        cell = '';        for (j = 0; j < row.length; j++) {          cell += this.renderer.tablecell(            this.inline.output(row[j]),            { header: false, align: this.token.align[j] }          );        }        body += this.renderer.tablerow(cell);      }      return this.renderer.table(header, body);    }    case 'blockquote_start': {      body = '';      while (this.next().type !== 'blockquote_end') {        body += this.tok();      }      return this.renderer.blockquote(body);    }    case 'list_start': {      body = '';      var ordered = this.token.ordered,          start = this.token.start;      while (this.next().type !== 'list_end') {        body += this.tok();      }      return this.renderer.list(body, ordered, start);    }    case 'list_item_start': {      body = '';      var loose = this.token.loose;      var checked = this.token.checked;      var task = this.token.task;      if (this.token.task) {        body += this.renderer.checkbox(checked);      }      while (this.next().type !== 'list_item_end') {        body += !loose && this.token.type === 'text'          ? this.parseText()          : this.tok();      }      return this.renderer.listitem(body, task, checked);    }    case 'html': {      // TODO parse inline content if parameter markdown=1      return this.renderer.html(this.token.text);    }    case 'paragraph': {      return this.renderer.paragraph(this.inline.output(this.token.text));    }    case 'text': {      return this.renderer.paragraph(this.parseText());    }    default: {      var errMsg = 'Token with "' + this.token.type + '" type was not found.';      if (this.options.silent) {        console.log(errMsg);      } else {        throw new Error(errMsg);      }    }  }};// 新建渲染器const renderer = new marked.Renderer()const hintState={  info:`<svg preserveAspectRatio="xMidYMid meet" height="1em" width="1em" fill="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" stroke="none" class="icon-7f6730be--text-3f89f380"><g><path d="M12.2 8.98c.06-.01.12-.03.18-.06.06-.02.12-.05.18-.09l.15-.12c.18-.19.29-.45.29-.71 0-.06-.01-.13-.02-.19a.603.603 0 0 0-.06-.19.757.757 0 0 0-.09-.18c-.03-.05-.08-.1-.12-.15-.28-.27-.72-.37-1.09-.21-.13.05-.23.12-.33.21-.04.05-.09.1-.12.15-.04.06-.07.12-.09.18-.03.06-.05.12-.06.19-.01.06-.02.13-.02.19 0 .26.11.52.29.71.1.09.2.16.33.21.12.05.25.08.38.08.06 0 .13-.01.2-.02M13 16v-4a1 1 0 1 0-2 0v4a1 1 0 1 0 2 0M12 3c-4.962 0-9 4.038-9 9 0 4.963 4.038 9 9 9 4.963 0 9-4.037 9-9 0-4.962-4.037-9-9-9m0 20C5.935 23 1 18.065 1 12S5.935 1 12 1c6.066 0 11 4.935 11 11s-4.934 11-11 11" fill-rule="evenodd"></path></g></svg>`,  warning: `<svg preserveAspectRatio="xMidYMid meet" height="1em" width="1em" fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" stroke="currentColor" class="icon-7f6730be--text-3f89f380" style="color: rgb(247, 125, 5);"><g><circle cx="12" cy="12" r="10"></circle><line x1="12" y1="8" x2="12" y2="12"></line><line x1="12" y1="16" x2="12" y2="16"></line></g></svg>`,  danger: `<svg preserveAspectRatio="xMidYMid meet" height="1em" width="1em" fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" stroke="currentColor" class="icon-7f6730be--text-3f89f380" style="color: rgb(255, 70, 66);"><g><path d="M10.29 3.86L1.82 18a2 2 0 0 0 1.71 3h16.94a2 2 0 0 0 1.71-3L13.71 3.86a2 2 0 0 0-3.42 0z"></path><line x1="12" y1="9" x2="12" y2="13"></line><line x1="12" y1="17" x2="12" y2="17"></line></g></svg>`,  success: `<svg preserveAspectRatio="xMidYMid meet" height="1em" width="1em" fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" stroke="currentColor" class="icon-7f6730be--text-3f89f380" style="color: rgb(38, 203, 124);"><g><path d="M22 11.07V12a10 10 0 1 1-5.93-9.14"></path><polyline points="23 3 12 14 9 11"></polyline></g></svg>`}// 自定义语法的渲染函数renderer.hint = (state, text) => {  return `<div class="hint ${state}">    <span class="hint-icon">${hintState[state]}</span>    <span class="hint-content">${text.replace(/\n/g,'<br/>')}</span>  </div>`}// code高亮renderer.code = (code, language, isEscaped) => {  const isMarkup = language === 'markup'  let hled  if (isMarkup) {    hled = hljs.highlightAuto(code).value;  } else {    hled = hljs.highlight(language, code).value  }  return `<pre class="hljs"><code class="${language}">${hled}</code></pre>`}renderer.listitem = (body, task, checked) => {  let className = ''  if(task){    className = 'task-item'  }  return `<li class="${className}">${body}</li>\n`;}marked.setOptions({ renderer })export default marked