给定一个由 ab 组成的字符串,希望你判断这些字符串是否符合一定的模式。具体而言,除去字符之外,我们有这些符号:
+. 如 a+ 表示可以匹配一次或多次 a.
*. 如 b* 表示可以匹配零次或多次 b.
?. 如 a? 表示可以匹配零次或一次 a.
字符串之间可以连接。如 ab+a* 代表先匹配 a,再匹配 b+, 再匹配 a*.
|. 如 a+b|ba* 代表既可以匹配 a+b,也可以匹配 ba*. 该运算的优先级是最低的。
只存在上面的运算优先级,表达式不包含括号。
不考虑空字符串的特殊情况。
Explanation
很显然,这是一个NFA问题,因为更新后状态是不确定的。
Several Classes
1 2 3 4 5 6 7 8 9
enum classTransitionType { Epsilon, a, b };
structTransition { TransitionType type; int to; Transition(TransitionType type, int to) : type(type), to(to) {} };
Transition结构体代表每一条边,即边的类型和指向的状态终点。
这里只考虑静态自动机的设计,因此起点信息将会储存在NFA的类中。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
classNFA { private: /*! \brief This is used to store the start state of the NFA. */ int start; /*! \brief This is used to store the end state of the NFA. */ std::unordered_set<int> ends; /*! \brief This is used to store the transitions of the NFA. \details For example, transitions[3] stores all the transitions beginning with \details state 3. */ std::vector<std::vector<Transition>> transitions;
NFA parse_expression(const std::string ®ex, size_t &pos){ NFA left = parse_term(regex, pos); while (pos < regex.size() && regex[pos] == '|') { pos++; NFA right = parse_term(regex, pos); left = Union(left, right); } return left; }
NFA parse_term(const std::string ®ex, size_t &pos){ NFA left = parse_factor(regex, pos); while (pos < regex.size() && regex[pos] != '|' && regex[pos] != ')') { NFA right = parse_factor(regex, pos); left = Concatenate(left, right); } return left; }
NFA parse_factor(const std::string ®ex, size_t &pos){ NFA atom = parse_atom(regex, pos); while (pos < regex.size() && (regex[pos] == '*' || regex[pos] == '+' || regex[pos] == '?')) { char op = regex[pos++]; char c = extract_char_from_atom(atom); switch (op) { case'*': atom = MakeStar(c); break; case'+': atom = MakePlus(c); break; case'?': atom = MakeQuestion(c); break; default: throw std::runtime_error("Unexpected operator"); } } return atom; }
NFA parse_atom(const std::string ®ex, size_t &pos){ if (pos >= regex.size()) throw std::runtime_error("Unexpected end of regex"); char c = regex[pos++]; if (c != 'a' && c != 'b') throw std::runtime_error("Invalid character in regex"); returnMakeSimple(c); }
charextract_char_from_atom(const NFA &atom){ if (atom.transitions.size() < 1 || atom.transitions[0].size() != 1) throw std::runtime_error("Invalid atom for closure"); TransitionType type = atom.transitions[0][0].type; if (type != TransitionType::a && type != TransitionType::b) throw std::runtime_error("Invalid transition type in atom"); return (type == TransitionType::a) ? 'a' : 'b'; }
boolCheck(const std::string &str)const{ if (str.empty()) returnfalse; std::unordered_set<int> current = {nfa.GetStart()}; current = nfa.GetEpsilonClosure(current); for (char c : str) { current = nfa.Advance(current, c); if (current.empty()) returnfalse; } for (int state : current) { if (nfa.IsAccepted(state)) returntrue; } returnfalse; } };
NFA Concatenate(const NFA &nfa1, const NFA &nfa2){ NFA result; int m = nfa1.transitions.size(); result.start = nfa1.start; result.transitions = nfa1.transitions; for (constauto &trans : nfa2.transitions) { std::vector<Transition> adjusted; for (const Transition &t : trans) { adjusted.emplace_back(t.type, t.to + m); } result.transitions.push_back(adjusted); } for (int end : nfa1.ends) { result.transitions[end].emplace_back(TransitionType::Epsilon, nfa2.start + m); } for (int end : nfa2.ends) { result.ends.insert(end + m); } return result; }
NFA Union(const NFA &nfa1, const NFA &nfa2){ NFA result; int m = nfa1.transitions.size(); int n = nfa2.transitions.size(); result.transitions.resize(1 + m + n + 1); result.start = 0; result.ends.insert(1 + m + n);
for (int i = 0; i < m; ++i) { for (const Transition &t : nfa1.transitions[i]) { result.transitions[i + 1].emplace_back(t.type, t.to + 1); } } for (int i = 0; i < n; ++i) { for (const Transition &t : nfa2.transitions[i]) { result.transitions[i + m + 1].emplace_back(t.type, t.to + m + 1); } } for (int end : nfa1.ends) { result.transitions[end + 1].emplace_back(TransitionType::Epsilon, 1 + m + n); } for (int end : nfa2.ends) { result.transitions[end + m + 1].emplace_back(TransitionType::Epsilon, 1 + m + n); } return result; }