PHP怎么实现词法分析与自定义语言
发表于:2025-11-07 作者:千家信息网编辑
千家信息网最后更新 2025年11月07日,本文小编为大家详细介绍"PHP怎么实现词法分析与自定义语言",内容详细,步骤清晰,细节处理妥当,希望这篇"PHP怎么实现词法分析与自定义语言"文章能帮助大家解决疑惑,下面跟着小编的思路慢慢深入,一起来
千家信息网最后更新 2025年11月07日PHP怎么实现词法分析与自定义语言
本文小编为大家详细介绍"PHP怎么实现词法分析与自定义语言",内容详细,步骤清晰,细节处理妥当,希望这篇"PHP怎么实现词法分析与自定义语言"文章能帮助大家解决疑惑,下面跟着小编的思路慢慢深入,一起来学习新知识吧。
之前项目有一个需求,业务人员使用中文编写一些自定义公式,然后需要我们后台执行将结果返回到界面上,于是就基于有限状态机写了这个词法分析器,比较简单,希望能够抛砖引玉。
一、分析需求
输入中文公式,返回结果,比如:
现有薪资=10000;个税起点=3000;当前年份=2021;如果(当前年份=2022){ 个税起点=5000;}返回 (现有薪资-个税起点) * 0.2;二、实现需求
最初的想法是使用字符串替换的方式,将中文关键字替换成php的关键字,然后调用eval执行,这样确实也是可以的,但是总觉得不是很美丽,并且不能实现动态解析。就想着自己实现一个简单的词法分析,然后结合ast将词法转换成php代码执行,岂不快哉。当前版本没有用到抽象语法树来生成代码,全部使用字符串拼接。
", "<", "!", "(", ")", "{", "}", ",", ";" ]; // 源代码 private $input; // 当前的字符 private $currChar; // 当前字符位置 private $currCharPos = 0; // 结束符 private $eof = "eof"; // 当前编码 private $currEncode = "UTF-8"; // 内置关键字 public const VAR = "variable"; public const STR = "string"; public const KW = "keyword"; public const OPR = "operator"; public const INT = "integer"; public const NIL = "null"; /** * Lexer constructor. * @param string $input */ public function __construct(string $input) { $this->input = $input; $this->currChar = mb_substr($this->input, $this->currCharPos, 1); } /** * @param array $keywordList */ public function setKeywordList($keywordList) { $this->keywordList = $keywordList; } /** * @return array * @throws Exception */ public function parseInput() { if ($this->input == "") { throw new Exception("code can not be empty"); } $tokens = []; do { $token = $this->nextToken(); if ($token["type"] != "eof") { $tokens[] = $token; } if ($token["type"] == self::KW) { $tokens[] = $this->makeToken(self::NIL, " "); } } while ($token["type"] != "eof"); return $tokens; } /** * @return array */ public function nextToken() { $this->skipBlankChar(); $this->currChar == "" && $this->currChar = $this->eof; if ($this->isCnLetter()) { $word = $this->matchUntilNextCharIsNotCn(); if ($this->isKeyword($word)) { $this->currCharPos -= 1; return $this->currToken(static::KW, $word); } // 不是关键字的全部归为变量 return $this->makeToken(static::VAR, $word); } // 如果是操作符 if ($this->isOperator()) { return $this->currToken(static::OPR, $this->currChar); } // 如果是数字 if ($this->isNumber()) { return $this->currToken(static::INT, $this->currChar); } // 如果是字符串 if ($str = $this->isStr()) { return $this->currToken(static::STR, $str); } // 如果是变量 if ($this->isVar()) { $word = $this->matchVar(); if ($this->isKeyword($word)) { return $this->currToken(static::KW, $word); } return $this->makeToken(static::VAR, $word); } if ($this->currChar == $this->eof) { return $this->currToken('eof', $this->currChar); } return $this->currToken(static::VAR, $this->currChar); } /** * @param string $input * @return string */ private function matchVar(string $input = "") { $word = $input ?: ''; while ($this->isVar()) { $word .= $this->currChar; $this->nextChar(); } return $word; } /** * @return bool * 是否为普通变量 */ private function isVar() { return $this->isCnLetter() || $this->isEnLetter(); } /** * 跳过空白字符 */ private function skipBlankChar() { while (ord($this->currChar) == 10 || ord($this->currChar) == 13 || ord($this->currChar) == 32) { $this->nextChar(); } } /** * @param string $type * @param $word * @return array * 记录当前token和下一个字符 */ private function currToken(string $type, $word) { $token = $this->makeToken($type, $word); $this->nextChar(); return $token; } /** * @param string $type * @param string $char * @return array */ private function makeToken(string $type, string $char) { return ["type" => $type, "char" => $char, "pos" => $this->currCharPos]; } /** * @return bool * 判断是否是英文字符 */ private function isEnLetter() { if ($this->currChar == "" || $this->currChar == $this->eof) { return false; } $ord = mb_ord($this->currChar, $this->currEncode); if ($ord > ord('a') && $ord < ord('z')) { return true; } return false; } /** * @return false|int * 是否中文字符 */ private function isCnLetter() { return preg_match("/^[\x{4e00}-\x{9fa5}]+$/u", $this->currChar); } /** * @return bool * 是否为数字 */ private function isNumber() { return is_numeric($this->currChar); } /** * @return bool * 是否是字符串 */ private function isStr() { return $this->matchCompleteStr(); } /** * @return string * 匹配完整字符串 */ private function matchCompleteStr() { $char = ""; if ($this->currChar == "\"") { $this->nextChar(); while ($this->currChar != "\"") { if ($this->currChar != "\"") { $char .= $this->currChar; } $this->nextChar(); } return $char; } return $char; } /** * @return bool * 是否是操作符 */ private function isOperator() { return in_array($this->currChar, $this->operatorList); } /** * @return string * 匹配中文字符 */ private function matchUntilNextCharIsNotCn() { $char = ""; while ($this->isCnLetter()) { $char .= $this->currChar; $this->nextChar(); } return $char; } /** * @return void 获取下一个字符 * 获取下一个字符 */ private function nextChar() { $this->currCharPos += 1; $this->currChar = mb_substr($this->input, $this->currCharPos, 1); if ($this->currChar == "") { $this->currChar = $this->eof; } } /** * @param string $input * @return bool * 是否是关键字 */ private function isKeyword(string $input) { return ($this->keywordList[$input] ?? "") != ""; } public function convert(array $tokens) { $code = ""; foreach ($this->lexerIterator($tokens) as $generator) { switch ($generator["type"]) { case static::KW: $code .= $this->keywordList[$generator["char"]]; break; case static::VAR: $code .= sprintf("$%s", $generator["char"]); break; case static::OPR: $code .= $this->replace($generator["char"]); break; case static::INT: $code .= $generator["char"]; break; case static::STR: $code .= sprintf("\"%s\"", $generator["char"]); break; default: $code .= $generator["char"]; } } return $code; } private function replace(string $char) { return str_replace("+", ".", $char); } /** * @param array $tokens * @return \Generator */ private function lexerIterator(array $tokens) { foreach ($tokens as $index => $token) { yield $token; } }}三、如何使用
require __DIR__ . "/vendor/autoload.php";// 定义一段代码$code = <<3){ 地址=1;}否则{ 地址="艾欧尼亚"}说话 = ("我"+"爱")+"你";返回 姓名+年龄;EOF;$lexer = new Lexer($code);// 自定义你的关键字$kwMap = [ "如果" => "if", "否则" => "else", "返回" => "return", "否则如果" => "elseif"];$lexer->setKeywordList($kwMap);// 这里是生成的词$tokens = $lexer->parseInput();// 将生成的词转成php,当然你也可以尝试用php-parse转ast再转成php,这里只是简单的拼接var_dump($lexer->convert($tokens));
生成词
[{ "type": "variable", "char": "姓名", "pos": 2}, { "type": "operator", "char": "=", "pos": 2}, { "type": "string", "char": "腕豪", "pos": 7}, { "type": "operator", "char": ";", "pos": 8}, { "type": "variable", "char": "问候", "pos": 13}, { "type": "operator", "char": "=", "pos": 13}, { "typ e": "string", "char": "你好啊", "pos": 17}, { "type": "operator", "char": ";", "pos": 18}, { "type": "variable", "char": "地址", "pos": 23}, { "type": "operator", "char": "=", "pos": 23}, { "type": "operator", "char": "(", "pos": 24}, { "type": "integer", "char": "1", "pos": 25}, { "type": "operator", "char": " +", "pos": 26}, { "type": "integer", "char": "2", "pos": 27}, { "type": "operator", "char": ")", "pos": 28}, { "type": "operator", "char": "*", "pos": 30}, { "type": "integer", "char": "3", "pos": 32}, { "type": "operator", "char": ";", "pos": 33}, { "type": "keyword", "char": "如果", "pos": 37}, { "type": "nul l", "char": " ", "pos": 38}, { "type": "operator", "char": "(", "pos": 38}, { "type": "variable", "char": "地址", "pos": 41}, { "type": "operator", "char": ">", "pos": 42}, { "type": "integer", "char": "3", "pos": 44}, { "type": "operator", "char": ")", "pos": 45}, { "type": "operator", "char": "{", "pos": 46}, { "type": "variable", "char": "地址", "pos": 55}, { "type": "operator", "char": "=", "pos": 55}, { "type": "integer", "char": "1", "pos": 56}, { "type": "operator", "char": ";", "pos": 57}, { "type": "operator", "char": "}", "pos": 60}, { "type": "keyword", "char": "否则", "pos": 62}, { "type": "null", "char ": " ", "pos": 63}, { "type": "operator", "char": "{", "pos": 63}, { "type": "variable", "char": "地址", "pos": 72}, { "type": "operator", "char": "=", "pos": 72}, { "type": "string", "char": "艾欧尼亚", "pos": 78}, { "type": "operator", "char": ";", "pos": 79}, { "type": "operator", "char": "}", "pos": 82}, { "type": "variable", "char": "说话", "pos": 87}, { "type": "operator", "char": "=", "pos": 88}, { "type": "operator", "char": "(", "pos": 90}, { "type": "string", "char": "我", "pos": 93}, { "type": "operator", "char": "+", "pos": 94}, { "type": "string", "char": "爱", "pos": 97}, { "type": "operator", "char": ")", "pos": 98}, { "type": "operator", "char": "+", "pos": 99}, { "type": "string", "char": "你", "pos": 102}, { "type": "operator", "char": ";", "pos": 103}, { "type": "keyword", "char": "返回", "pos": 107}, { "type": "null", "char": " ", "pos": 108}, { "type": "variable", "char": "姓名", "pos": 111}, { "typ e": "operator", "char": "+", "pos": 111}, { "type": "variable", "char": "年龄", "pos": 114}, { "type": "operator", "char": ";", "pos": 114}]输出:
$姓名="腕豪";$问候="你好啊";$地址=(1.2)*3;if ($地址>3){$地址=1;}else {$地址="艾欧尼亚";}$说话=("我"."爱")."你";return $姓名.$年龄;能执行吗?当然能。还存在一些小bug,不想改了。
四、使用场景
什么,居然有人说没什么用?oa系统总有用到的时候。
读到这里,这篇"PHP怎么实现词法分析与自定义语言"文章已经介绍完毕,想要掌握这篇文章的知识点还需要大家自己动手实践使用过才能领会,如果想了解更多相关内容的文章,欢迎关注行业资讯频道。
字符
地址
词法
分析
关键
关键字
姓名
字符串
生成
语言
代码
变量
年龄
文章
起点
需求
尼亚
中文
你好
公式
数据库的安全要保护哪些东西
数据库安全各自的含义是什么
生产安全数据库录入
数据库的安全性及管理
数据库安全策略包含哪些
海淀数据库安全审计系统
建立农村房屋安全信息数据库
易用的数据库客户端支持安全管理
连接数据库失败ssl安全错误
数据库的锁怎样保障安全
智能化软件开发注意事项
南通银联软件开发推荐咨询
开发网络安全系统
网剧打印服务器驱动
深圳定制软件开发教程
网络安全手抄报字少
哪里找软件开发人才
高级数据库技术第5章
吉林软件开发研发公司
服务器省级科学技术进步一等奖
网络安全6月10日
饥荒服务器必须要网络吗
软件开发的方案书怎样写
临沂智慧社区软件开发哪儿好
速达3000数据库结构
软件开发三本就业好吗
非油气数据库检查任务创建失败
c 向数据库增加新表
吉林有名的网络技术排名靠前
揭阳分布式服务器咨询
网易服务器维护需要多久时间
我的世界服务器剧情副本内容
浅谈数据库标准的制定
软件开发课程 教学目标
西安游戏软件开发制作公司
广东盈创网络技术有限公司
手机老是找不到服务器
客户端远程数据库服务器失败
揭阳分布式服务器咨询
池州软件开发培训