一、背景

这两天在努力记单词,想着应该把最常使用的单词先记下来,从网上找了几篇文章之后分析了一批词汇,效果还算不错;

接着又想到了代码,也好奇开发者最常使用的单词或函数有哪些,我统计了三种类型:系统函数、变量名、自定义函数名等统计,感兴趣的朋友可以将正则设置为自己需要统计的规则。

二、参考代码

<?phpfunction scanMyDir($path){    $path = rtrim($path, '/');    // 打开目录    $dh = opendir($path);    // 循环读取目录    while (($file = readdir($dh)) !== false) {        // 先要过滤掉当前目录'.'和上一级目录'..'        if ($file == '.' || $file == '..' || $file == '.git') continue;        if (strpos($file, ".php") > 1) {            // 为了能够显示中文目录/文件,需要进行转码            $_SERVER['fileList'][] = $path . '/' . iconv('gbk', 'utf-8', $file);        }        // 如果该文件仍然是一个目录,进入递归        if (is_dir($path . '/' . $file)) {            scanMyDir($path . '/' . $file);        }    }}function explodeCount($fileName, $pregRule){    $text = file_get_contents($fileName);    preg_match_all($pregRule, $text, $keywordArr);    foreach ($keywordArr[1] as $keyword) {        $_SERVER['count'][$keyword] = $_SERVER['count'][$keyword] ?? 0;        $_SERVER['count'][$keyword] += 1;    }}function start($path, $pregRule){    scanMyDir($path);    //把要统计的文件写入到当前文件夹中,方便查看统计了那些文件    file_put_contents("fileList.txt", implode("\n", $_SERVER['fileList']));    //从上一个文件中读取要统计的文件列表,    $fileList = file_get_contents("fileList.txt");    $fileList = explode("\n", $fileList);    //遍历统计每个文件中的词汇    foreach ($fileList as $fileName) {        explodeCount($fileName, $pregRule);    }    //将结果写入到文件当中之前,先做好排序    arsort($_SERVER['count']);    //只需要前100个    $_SERVER['count'] = array_slice($_SERVER['count'], 0, 100);    //将结果写入到文件中去    $sumResult = var_export($_SERVER['count'], true);    file_put_contents("countResult.txt", $sumResult);}//要统计的代码目录$path = '/root/mycode/work/offcn-live/vendor';//$pregRule = '/ ([a-z]+_?[a-z]+)\(/';  //系统函数规则//$pregRule = '/(\$[a-zA-Z]+_?[a-zA-Z]+)/';   //变量名规则$pregRule = '/[->:]+([a-z]+_?[a-z]+)\(/'; //自定义函数名规则start($path, $pregRule);

三、常用函数

array (  'array' => 6126,  'isset' => 1345,  'substr' => 845,  'sprintf' => 737,  'strlen' => 650,  'count' => 598,  'unset' => 556,  'array_merge' => 449,  'list' => 413,  'strpos' => 408,  'str_replace' => 393,  'implode' => 348,  'explode' => 333,  'is_array' => 332,  'static' => 297,  'trim' => 263,  'declare' => 238,  'mock' => 237,  'pack' => 232,  'preg_match' => 222,  'is_null' => 210,  'get_class' => 203,  'array_map' => 195,  'self' => 191,  'strtolower' => 190,  'empty' => 183,  'preg_replace' => 180,  'chr' => 169,  'function_exists' => 163,  'user_error' => 161,  'handle' => 158,  'is_string' => 155,  'is_object' => 140,  'str_repeat' => 139,  'array_keys' => 138,  'rewind' => 137,  'in_array' => 133,  'write' => 132,  'mt_rand' => 132,  'array_values' => 129,  'time' => 125,  'not' => 124,  'array_shift' => 124,  'extract' => 120,  'getenv' => 115,  'reset' => 113,  'execute' => 112,  'printf' => 110,  'fopen' => 108,  'get' => 105,  'collect' => 100,  'current' => 100,  'fclose' => 99,  'unpack' => 96,  'strval' => 96,  'matches' => 92,  'rtrim' => 90,  'str_pad' => 88,  'json_encode' => 88,  'array_filter' => 88,  'array_pop' => 85,  'app' => 84,  'range' => 84,  'dirname' => 83,  'define' => 81,  'microtime' => 80,  'foo' => 80,  'create' => 80,  'ord' => 80,  'compact' => 79,  'read' => 77,  'method_exists' => 76,  'register' => 75,  'realpath' => 74,  'intval' => 73,  'bar' => 73,  'strtotime' => 73,  'fread' => 72,  'class_exists' => 72,  'print' => 72,  'max' => 72,  'curl_setopt' => 70,  'fwrite' => 69,  'tap' => 66,  'strtoupper' => 65,  'array_unshift' => 65,  'serialize' => 64,  'ob_start' => 64,  'unserialize' => 63,  'strrpos' => 61,  'key' => 61,  'preg_split' => 61,  'ini_get' => 61,  'add' => 59,  'close' => 59,  'array_slice' => 58,  'putenv' => 57,  'eval' => 57,  'gettype' => 56,  'var_export' => 56,)

四、常用变量名

array (  '$this' => 75572,  '$value' => 6303,  '$options' => 4731,  '$key' => 4597,  '$name' => 4367,  '$vendorDir' => 4310,  '$message' => 4115,  '$request' => 3453,  '$stackPos' => 3237,  '$response' => 2796,  '$result' => 2577,  '$data' => 2308,  '$path' => 2117,  '$node' => 1733,  '$type' => 1650,  '$method' => 1620,  '$file' => 1449,  '$arguments' => 1415,  '$class' => 1408,  '$callback' => 1378,  '$output' => 1364,  '$command' => 1314,  '$parameters' => 1273,  '$config' => 1252,  '$expected' => 1197,  '$column' => 1153,  '$input' => 1140,  '$id' => 1119,  '$headers' => 1083,  '$event' => 1083,  '$args' => 986,  '$attributes' => 979,  '$length' => 961,  '$code' => 950,  '$query' => 947,  '$prefix' => 947,  '$mock' => 930,  '$token' => 925,  '$context' => 909,  '$test' => 892,  '$temp' => 884,  '$header' => 871,  '$matches' => 847,  '$object' => 825,  '$string' => 813,  '$container' => 810,  '$server' => 810,  '$stream' => 768,  '$collection' => 768,  '$route' => 761,  '$values' => 761,  '$record' => 748,  '$exception' => 748,  '$actual' => 719,  '$connection' => 712,  '$item' => 697,  '$constraint' => 670,  '$operation' => 666,  '$date' => 655,  '$bucket' => 648,  '$array' => 644,  '$line' => 643,  '$count' => 641,  '$uri' => 622,  '$buf' => 618,  '$handler' => 608,  '$default' => 598,  '$table' => 594,  '$content' => 578,  '$reader' => 558,  '$resource' => 549,  '$application' => 549,  '$tokens' => 541,  '$locale' => 539,  '$attribute' => 531,  '$format' => 518,  '$filename' => 510,  '$className' => 509,  '$str' => 505,  '$parts' => 505,  '$matcher' => 499,  '$text' => 498,  '$queue' => 483,  '$generator' => 480,  '$filter' => 476,  '$client' => 475,  '$level' => 468,  '$domain' => 467,  '$writer' => 464,  '$argument' => 460,  '$number' => 459,  '$option' => 452,  '$payload' => 448,  '$keys' => 445,  '$process' => 444,  '$translator' => 437,  '$app' => 435,  '$listener' => 430,  '$files' => 429,  '$index' => 422,)

五、常用自定义函数

array (  'once' => 1292,  'with' => 1105,  'get' => 997,  'expects' => 700,  'method' => 651,  'set' => 612,  'create' => 600,  'add' => 588,  'foo' => 464,  'format' => 434,  'write' => 429,  'execute' => 421,  'all' => 378,  'evaluate' => 344,  'has' => 320,  'register' => 318,  'fail' => 294,  'find' => 286,  'run' => 284,  'any' => 280,  'start' => 254,  'parse' => 233,  'load' => 203,  'make' => 200,  'read' => 191,  'generate' => 185,  'factory' => 182,  'close' => 164,  'current' => 155,  'render' => 152,  'ask' => 149,  'numerify' => 146,  'will' => 145,  'where' => 137,  'singleton' => 133,  'writeln' => 128,  'valid' => 125,  'next' => 124,  'main' => 122,  'send' => 121,  'trans' => 117,  'request' => 116,  'option' => 113,  'handle' => 112,  'matches' => 111,  'match' => 110,  'contains' => 103,  'write_shortstr' => 103,  'process' => 101,  'never' => 100,  'at' => 99,  'initialize' => 97,  'rewind' => 96,  'bind' => 92,  'validate' => 90,  'dispatch' => 88,  'filter' => 86,  'in' => 86,  'copy' => 85,  'verify' => 84,  'delete' => 83,  'wrap' => 81,  'put' => 79,  'stop' => 78,  'mock' => 78,  'dump' => 78,  'supports' => 78,  'observe' => 77,  'encrypt' => 77,  'attach' => 75,  'first' => 74,  'apply' => 73,  'remove' => 72,  'invoke' => 72,  'connection' => 71,  'advance' => 69,  'decrypt' => 69,  'ordered' => 69,  'save' => 68,  'resolve' => 68,  'prepare' => 67,  'println' => 67,  'auth' => 65,  'reset' => 65,  'bar' => 64,  'write_short' => 64,  'call' => 63,  'map' => 63,  'compare' => 63,  'string' => 62,  'log' => 62,  'wait' => 61,  'info' => 61,  'update' => 60,  'escape' => 60,  'lookup' => 58,  'write_bits' => 57,  'count' => 57,  'push' => 56,  'times' => 55,)

后面的数字,代表为在代码中出现的次数,我用的四项目的vendor目录,里面都是一些比较常用的开源代码库,所以应该算是比较有参考价值

作者:汤青松
日期: 2020-06-30