文章出處
文章列表
本文轉載自http://chriszz.sinaapp.com/?p=257
輸入一個正則表達式,輸出一個NFA。
我的做法:輸入一個字符串表示正則,輸出則是把輸出到一個.dot文件中并將dot文件編譯成pdf,fedora需要sudo yum install dot,然后evince XXX.pdf就可以查看生成的NFA了。
具體算法是按照龍書上的Tompson算法來的。
廢話不多說,放碼過來:
/* Author:ChrisZZ(zchrissirhcz@gmail.com) Time:2013-12-25 14:13:09 輸入:正則表達式 輸出:自動機 算法步驟: 1.把正則表達式轉化為后綴表達式 2.把后綴表達式轉化為NFA 3.用dot語言把NFA輸出到PDF 參考: 1.Regular Expression Matching Can Be Simple And Fast http://swtch.com/~rsc/regexp/regexp1.html 2.龍書 chap3.7.4 從正則表達式構造NFA 3.YCC學長的project中dot語言的使用 其他說明: 1.需要安裝dot,并添加到系統path中 2.在windows下運行時,控制臺因為編碼不支持可能導致中文提示無法顯示 */ #include <iostream> #include <string> #include <stdio.h> #include <stack> #include <string.h> #include <stdexcept> #include <stdlib.h> using namespace std; const int Match = 256; const int Split = 257;//表示epsilon分支 struct Paren{//括號結構體 int natom; int nalt; }; string re2post(string re){ Paren paren;//括號 stack<struct Paren>parenStk; string postExpr=""; int i, len=re.length(); int nalt=0, natom=0; const string invalidRegExp = "非法的正則表達式"; for(i=0; i<len; i++){ if(isspace(re[i])) continue; if(isalpha(re[i])){ if(natom>1){ natom--; postExpr = postExpr + '.'; } natom++; postExpr = postExpr + re[i]; } else if(re[i]=='('){ if(natom>1){ postExpr = postExpr + '.'; } paren.natom = natom; paren.nalt = nalt; parenStk.push(paren); nalt = 0; natom = 0; } else if(re[i]==')'){ if(natom==0 || parenStk.empty()) throw runtime_error(invalidRegExp+":括號不匹配"); while(--natom>0){//比如((a|b)(c|d))模式,當上一次匹配完倒數第二個右括號后,natom為2,需要添加'.' postExpr = postExpr + '.'; } while(nalt-->0){ postExpr = postExpr + '|'; } paren=parenStk.top(); parenStk.pop(); natom = paren.natom; nalt = paren.nalt; natom++; } else if(re[i]=='*'){ if(natom==0) throw runtime_error(invalidRegExp+":提前出現'*'"); postExpr = postExpr + re[i]; } else if(re[i]=='|'){ if(natom==0) throw runtime_error(invalidRegExp+":提前出現'|'"); while(--natom>0){ postExpr = postExpr + '.'; } nalt++; } else throw runtime_error(invalidRegExp); } if(!parenStk.empty()) throw runtime_error(invalidRegExp+":括號不匹配"); while(--natom>0){ postExpr = postExpr + '.'; } while(nalt-->0){ postExpr = postExpr + '|'; } return postExpr; } class NFA; /* * c<256表示edge權重為c; * c=256表示終結狀態,匹配成功 * c=257表示分支(split) */ class State{ friend class NFA; friend void nfa2graph(State* head, const string& re); public: State(int c=256, State* out=NULL, State* out1=NULL){ this->c = c; this->out = out; this->out1 = out1; this->id = 0; } void setId(int id){ this->id = id; } private: int c; int id;//狀態的編號 State* out;//從本狀態出去的狀態集合的頭指針 State* out1;//兩個分支的情況 }; class NFA{ public: NFA(){ head = NULL; tail = NULL; } NFA(const int& c){ tail = new State(Match, NULL, NULL); head = new State(c, tail, NULL); } void doCat(NFA& nfa){ tail->out = nfa.head; tail->c = Split; tail = nfa.tail; nfa.head = NULL; nfa.tail = NULL; } void doUnion(NFA& nfa){ State* newHead = new State(Split, head, nfa.head); State* newTail = new State(Match, NULL, NULL); tail->c = Split; tail->out = newTail; nfa.tail->c = Split; nfa.tail->out = newTail; tail = newTail; head = newHead; nfa.head = NULL; nfa.tail = NULL; } void doStar(){ State* newTail = new State(Match, NULL, NULL); State* newHead = new State(Split, head, newTail); tail->c = Split; tail->out = newTail; tail->out1 = head; tail = newTail; head = newHead; } void nfa2graph(const string& re){ char myfile[100]; printf("請輸入一個文件名,用來保存生成的NFA-graph(不必提供后綴):\n"); scanf("%s", myfile); printf("已將DOT文件存儲在\"%s.dot\",\n", myfile); printf("PDF文件則存儲在\"%s.dot.pdf\".\n", myfile); int i; while(myfile[i]!='\0') i++; myfile[i] = '.'; myfile[i+1] = 'd'; myfile[i+2] = 'o'; myfile[i+3] = 't'; myfile[i+4] = '\0'; FILE *file = fopen(myfile, "w"); fputs("digraph {\n", file); fputs("\t\"", file); int len=re.length(); for(i=0; i<len; i++){ fprintf(file, "%c", re[i]); } fputs("\" [shape = plaintext]\n", file); fputs("\trankdir = LR\n", file); fputs("\t\"\" [shape = point]\n", file); fputs("\t\"\" -> 1 [label = Start]\n\n", file); int id = 1; char circle[2000]; memset(circle, 0, sizeof(circle)); State* p; stack<State*> staStk; head->setId(id++); staStk.push(head); while(!staStk.empty()){ p = staStk.top(); staStk.pop(); char flag = 1; cout << "p->c=" << p->c << endl; if(p->c < Match){ cout << "p->out->id=" << p->out->id << endl; if(p->out->id==0){ p->out->id = id++; cout << "id=" << id << endl; } else flag = 0; fprintf(file, "\t%d -> %d [label = \"%c\"]\n", p->id, (p->out)->id, p->c); State *what = p->out; if(flag) //push(*what); staStk.push(what); } else if(p->c == Match){ circle[p->id] = 1; } else{ //對應Split的情形 if(p->out->id==0) p->out->id = id++; else flag = 0; fprintf(file, "\t%d -> %d [label = <ε>]\n", p->id, p->out->id); State *what = p->out; if(flag) staStk.push(what); if(p->out1!=NULL){ flag = 1; if(p->out1->id==0) p->out1->id = id++; else flag = 0; fprintf(file, "\t%d -> %d [label = <ε>]\n", p->id, p->out1->id); what = p->out1; if(flag) staStk.push(what); } } } for(i=1; i<id; i++){ fprintf(file, "\t%d [shape = circle", i); if(circle[i]) fputs(", peripheries = 2", file); fprintf(file, "]\n"); } fputs("}", file); fclose(file); char cmd[108]; sprintf(cmd, "dot %s -O -Tpdf", myfile); if(system(cmd)==0){ printf("成功生成pdf圖像!\n"); //printf("Linux用戶可以使用evince file.pdf &命令打開~\n"); } else printf("悲劇!生成pdf圖像時出現錯誤..\n"); } private: State* head; State* tail; }; NFA post2nfa(const string& postExpr){ stack<NFA> nfaStk; NFA e1, e2, e; int i, len=postExpr.length(); for(i=0; i<len; i++){ switch(postExpr[i]){ case '.': e2 = nfaStk.top(); nfaStk.pop(); e1 = nfaStk.top(); nfaStk.pop(); e1.doCat(e2); nfaStk.push(e1); break; case '|': e2 = nfaStk.top(); nfaStk.pop(); e1 = nfaStk.top(); nfaStk.pop(); e1.doUnion(e2); nfaStk.push(e1); break; case '*': e = nfaStk.top(); nfaStk.pop(); e.doStar(); nfaStk.push(e); break; default:// NFA alpha(postExpr[i]); nfaStk.push(alpha); } } e = nfaStk.top(); nfaStk.pop(); if(!nfaStk.empty()) throw runtime_error("未知錯誤"); return e; } int main(){ string re; while(true){ cout << "請輸入一個正則表達式:\n"; cin >> re; string postExpr = re2post(re); cout << "postExpr is : " << postExpr << endl; NFA nfa = post2nfa(postExpr); nfa.nfa2graph(re); cout << "繼續嗎?(y/n)\n" << endl; char c; cin >> c; while(c!='y' && c!='n'){ cout << "請輸入'y'或'n'.\n"; c=getchar(); } if(c=='n') break; } cout << "Bye~\n"; return 0; }
文章列表
全站熱搜