之前的文章中,笔者介绍了Linux/UNIX C语言库Melon的基本功能及框架应用。

本文将介绍Melon中的词法分析器组件。

Melon的Github仓库为:https://github.com/Water-Melo...

词法分析器在Melon中并不依赖于本身框架,因而能够在不初始化框架的状况下即可应用。

根底应用

咱们先来看一个根本例子:

//lexer.c#include <stdio.h>#include "mln_lex.h"MLN_DEFINE_TOKEN_TYPE_AND_STRUCT(static, mln_test, TEST);MLN_DEFINE_TOKEN(mln_test, TEST);int main(int argc, char *argv[]){    if (argc != 2) {        fprintf(stderr, "Usage: %s file_path\n", argv[0]);        return -1;    }    mln_string_t path;    mln_lex_t *lex = NULL;    struct mln_lex_attr lattr;    mln_test_struct_t *ts;    mln_string_nSet(&path, argv[1], strlen(argv[1]));    lattr.pool = mln_alloc_init();    if (lattr.pool == NULL) {        fprintf(stderr, "init memory pool failed\n");        return -1;    }    lattr.keywords = NULL;    lattr.hooks = NULL;    lattr.preprocess = 0;    lattr.padding = 0;    lattr.type = M_INPUT_T_FILE;    lattr.data = &path;    mln_lex_initWithHooks(mln_test, lex, &lattr);    if (lex == NULL) {        fprintf(stderr, "lexer init failed\n");        return -1;    }    while (1) {        ts = mln_test_token(lex);        if (ts == NULL || ts->type == TEST_TK_EOF)            break;        write(STDOUT_FILENO, ts->text->data, ts->text->len);        printf(" line:%u type:%d\n", ts->line, ts->type);    }    mln_lex_destroy(lex);    mln_alloc_destroy(lattr.pool);    return 0;}

如此,即可实现一个词法解析器程序,它读取程序的参数所指定的文件的内容,而后解析成词素,并将其打印进去。

咱们执行:

$ ./lexer lexer.c/ line:1 type:21/ line:1 type:21lexer line:1 type:5. line:1 type:20c line:1 type:5# line:3 type:9include line:3 type:5< line:3 type:24stdio line:3 type:5. line:3 type:20h line:3 type:5> line:3 type:26...

能够看到,这个程序将咱们的示例C程序拆解成各种词素,如:/,#,<等等。

进阶应用

下面的例子能够看到,根底的词法解析器解析出的词素过于细碎,有时咱们还心愿解析器反对咱们自定义的关键字、自定义格局的数据,甚至是一些预处理性能,例如引入其余文件的内容解析词素。

那么,咱们就将下面的例子进行一番批改:

//lexer.c#include <stdio.h>#include "mln_lex.h"mln_string_t keywords[] = {    mln_string("on"),    mln_string("off"),    mln_string(NULL)};MLN_DEFINE_TOKEN_TYPE_AND_STRUCT(static, mln_test, TEST, TEST_TK_ON, TEST_TK_OFF, TEST_TK_STRING);MLN_DEFINE_TOKEN(mln_test, TEST, {TEST_TK_ON, "TEST_TK_ON"}, {TEST_TK_OFF, "TEST_TK_OFF"}, {TEST_TK_STRING, "TEST_TK_STRING"});static inline intmln_get_char(mln_lex_t *lex, char c){    if (c == '\\') {        char n;        if ((n = mln_lex_getAChar(lex)) == MLN_ERR) return -1;        switch ( n ) {            case '\"':                if (mln_lex_putAChar(lex, n) == MLN_ERR) return -1;                break;            case '\'':                if (mln_lex_putAChar(lex, n) == MLN_ERR) return -1;                break;            case 'n':                if (mln_lex_putAChar(lex, '\n') == MLN_ERR) return -1;                break;            case 't':                if (mln_lex_putAChar(lex, '\t') == MLN_ERR) return -1;                break;            case 'b':                if (mln_lex_putAChar(lex, '\b') == MLN_ERR) return -1;                break;            case 'a':                if (mln_lex_putAChar(lex, '\a') == MLN_ERR) return -1;                break;            case 'f':                if (mln_lex_putAChar(lex, '\f') == MLN_ERR) return -1;                break;            case 'r':                if (mln_lex_putAChar(lex, '\r') == MLN_ERR) return -1;                break;            case 'v':                if (mln_lex_putAChar(lex, '\v') == MLN_ERR) return -1;                break;            case '\\':                if (mln_lex_putAChar(lex, '\\') == MLN_ERR) return -1;                break;            default:                mln_lex_setError(lex, MLN_LEX_EINVCHAR);                return -1;        }    } else {        if (mln_lex_putAChar(lex, c) == MLN_ERR) return -1;    }    return 0;}static mln_test_struct_t *mln_test_dblq_handler(mln_lex_t *lex, void *data){    mln_lex_cleanResult(lex);    char c;    while ( 1 ) {        c = mln_lex_getAChar(lex);        if (c == MLN_ERR) return NULL;        if (c == MLN_EOF) {            mln_lex_setError(lex, MLN_LEX_EINVEOF);            return NULL;        }        if (c == '\"') break;        if (mln_get_char(lex, c) < 0) return NULL;    }    return mln_test_new(lex, TEST_TK_STRING);}int main(int argc, char *argv[]){    if (argc != 2) {        fprintf(stderr, "Usage: %s file_path\n", argv[0]);        return -1;    }    mln_string_t path;    mln_lex_t *lex = NULL;    struct mln_lex_attr lattr;    mln_test_struct_t *ts;    mln_lex_hooks_t hooks;    memset(&hooks, 0, sizeof(hooks));    hooks.dblq_handler = (lex_hook)mln_test_dblq_handler;    mln_string_nSet(&path, argv[1], strlen(argv[1]));    lattr.pool = mln_alloc_init();    if (lattr.pool == NULL) {        fprintf(stderr, "init pool failed\n");        return -1;    }    lattr.keywords = keywords;    lattr.hooks = &hooks;    lattr.preprocess = 1;//反对预处理    lattr.padding = 0;    lattr.type = M_INPUT_T_FILE;    lattr.data = &path;    mln_lex_initWithHooks(mln_test, lex, &lattr);    if (lex == NULL) {        fprintf(stderr, "lexer init failed\n");        return -1;    }    while (1) {        ts = mln_test_token(lex);        if (ts == NULL || ts->type == TEST_TK_EOF)            break;        write(STDOUT_FILENO, ts->text->data, ts->text->len);        printf(" line:%u type:%d\n", ts->line, ts->type);    }    mln_lex_destroy(lex);    mln_alloc_destroy(lattr.pool);    return 0;}

这一次,咱们减少如下性能:

  • 反对关键字 onoff
  • 反对辨认双引号扩住的内容为字符串类型
  • 减少了预处理性能,例如引入其余文件内容

生成可执行程序:

$ cc -o a a.c -I /usr/local/melon/include/ -L /usr/local/melon/lib/ -lmelon -lpthread

创立两个测试文件:

a.ini

#include "b.ini"test_mode = onlog_level = 'debug'proc_num = 10

b.ini

conf_name = "b.ini"

运行咱们的程序来看看成果:

$ ./lexer a.iniconf_name line:1 type:5= line:1 type:25b.ini line:1 type:42test_mode line:2 type:5= line:2 type:25on line:2 type:40log_level line:3 type:5= line:3 type:25' line:3 type:13debug line:3 type:5' line:3 type:13proc_num line:4 type:5= line:4 type:2510 line:4 type:2

能够看到,在a.ini中写入include的局部,是b.ini文件内容解析后的词素。并且onoff都被失常解析进去了。且字符串也被失常解决进去了。


Melon的Github仓库为:https://github.com/Water-Melo...

感激浏览