#pragma once #include #include "Array.h" #include "Critical.h" #include "Text.h" namespace Framework { namespace Regex { class Flags { public: static const int START = 1; static const int END = 2; }; template class State; template class Transition { private: std::function conditionFunction; State* zTarget; int requiredFlags; public: Transition() : conditionFunction(0), zTarget(0) {} Transition(std::function conditionFunction, State* zTarget, int requiredFlags) : conditionFunction(conditionFunction), zTarget(zTarget), requiredFlags(requiredFlags) {} Transition(const Transition& other) : conditionFunction(other.conditionFunction), zTarget(other.zTarget) {} bool test(Data value, int flags) const { return conditionFunction && conditionFunction(value, flags) && (requiredFlags | flags) == flags; } bool test(int flags) const { return !conditionFunction && (requiredFlags | flags) == flags; } State* zTargetState() const { return zTarget; } std::function getConditionFunction() const { return conditionFunction; } int getRequiredFlags() const { return requiredFlags; } }; template class State : public ReferenceCounter { private: Array> transitions; bool final; int groupStart; int groupEnd; int id; public: State(int id) : ReferenceCounter(), final(false), groupStart(0), groupEnd(0), id(id) {} void addTransition(Transition transition) { transitions.add(transition); } void setFinal(bool isFinal) { final = isFinal; } void setGroupStart(int groupStart) { this->groupStart = groupStart; } void setGroupEnd(int groupEnd) { this->groupEnd = groupEnd; } bool isFinal() const { return final; } int getGroupStart() const { return groupStart; } int getGroupEnd() const { return groupEnd; } int getId() const { return id; } const Array>& getTransitions() const { return transitions; } }; class Group { private: int start; int end; int id; public: DLLEXPORT Group(int start, int end, int id); DLLEXPORT Group(); DLLEXPORT int getStart(); DLLEXPORT int getEnd(); DLLEXPORT int getId(); }; class Result : public ReferenceCounter { private: Array groups; public: DLLEXPORT Result(Array groups); DLLEXPORT int getGroupCount(); DLLEXPORT Group getGroup(int index); DLLEXPORT int getStart(); DLLEXPORT int getEnd(); }; template class ExecutionStackFrame { private: ExecutionStackFrame* before; State* state; int transitionIndex; int strIndex; public: ExecutionStackFrame(ExecutionStackFrame* before, State* zState, int strIndex) : before(before), state(zState), transitionIndex(0), strIndex(strIndex) {} ~ExecutionStackFrame() { if (before) { delete before; } } ExecutionStackFrame* getBefore() const { return before; } State* zState() const { return state; } void setStrIndex(int index) { strIndex = index; } int getStrIndex() const { return strIndex; } int getTransitionIndex() const { return transitionIndex; } void setTransitionIndex(int transitionIndex) { this->transitionIndex = transitionIndex; } void discard() { before = 0; } Array getGroups() const { Array groups; const ExecutionStackFrame* current = this; while (true) { if (current->getBefore()) { current = current->getBefore(); } else { break; } } groups.add(Group(current->getStrIndex(), strIndex, 0)); current = this; Array groupEnds; Array knownGroups; while (current) { if (current->zState()->getGroupEnd()) { groupEnds.add(current->getStrIndex()); } if (current->zState()->getGroupStart() && groupEnds.getEintragAnzahl() > 0) { if (knownGroups.getWertIndex( current->zState()->getGroupStart()) < 0) { while (groups.getEintragAnzahl() <= current->zState()->getGroupStart()) { groups.add( Group(-1, -1, groups.getEintragAnzahl())); } groups.set(Group(current->getStrIndex(), groupEnds.get(0), current->zState()->getGroupStart()), current->zState()->getGroupStart()); knownGroups.add(current->zState()->getGroupStart()); } groupEnds.remove(0); } current = current->getBefore(); } return groups; } bool isEndlessLoop() const { const ExecutionStackFrame* current = this; while (current) { if (current->getBefore() && current->getBefore()->zState() == state && current->getBefore()->getStrIndex() == strIndex) { return true; } current = current->getBefore(); } return false; } }; template class Automata : public ReferenceCounter { private: RCArray> states; public: Automata() : ReferenceCounter() {} State* addState() { State* state = new State(states.getEintragAnzahl()); states.add(state); return state; } const RCArray>& getStates() const { return states; } RCArray* match(const Data* str, int length) { RCArray* results = new RCArray(); ExecutionStackFrame* currentFrame = new ExecutionStackFrame(0, states.z(0), 0); int flags = Flags::START; int startIndex = 0; while (currentFrame) { if (currentFrame->getStrIndex() >= length) { flags |= Flags::END; } else { flags &= ~Flags::END; } if (currentFrame->getStrIndex() == 0) { flags |= Flags::START; } else { flags &= ~Flags::START; } if (currentFrame->zState()->isFinal()) { results->add(new Result(currentFrame->getGroups())); int index = currentFrame->getStrIndex(); if (startIndex < index) { delete currentFrame; currentFrame = new ExecutionStackFrame( 0, states.z(0), index); startIndex = currentFrame->getStrIndex(); if (currentFrame->getStrIndex() > length) { delete currentFrame; currentFrame = 0; continue; } } } if (currentFrame->isEndlessLoop()) { delete currentFrame; results->release(); return 0; // endless loop } bool found = 0; for (int i = currentFrame->getTransitionIndex(); i < currentFrame->zState() ->getTransitions() .getEintragAnzahl(); i++) { Transition t = currentFrame->zState()->getTransitions().get(i); if (t.test(flags)) { currentFrame->setTransitionIndex(i + 1); currentFrame = new ExecutionStackFrame(currentFrame, t.zTargetState(), currentFrame->getStrIndex()); found = 1; break; } else if (currentFrame->getStrIndex() < length && t.test( str[currentFrame->getStrIndex()], flags)) { currentFrame->setTransitionIndex(i + 1); currentFrame = new ExecutionStackFrame(currentFrame, t.zTargetState(), currentFrame->getStrIndex() + 1); found = 1; break; } } if (!found) { ExecutionStackFrame* before = currentFrame->getBefore(); if (before) { currentFrame->discard(); delete currentFrame; currentFrame = before; } else { currentFrame->setStrIndex( currentFrame->getStrIndex() + 1); startIndex = currentFrame->getStrIndex(); currentFrame->setTransitionIndex(0); if (currentFrame->getStrIndex() > length) { delete currentFrame; currentFrame = 0; } } } } return results; } }; template Automata* oneOf(Automata* left, Automata* right) { Automata* result = new Automata(); State* start = result->addState(); int leftStateCount = left->getStates().getEintragAnzahl(); for (State* leftState : left->getStates()) { if (leftState) { result->addState(); } } for (State* leftState : left->getStates()) { State* newState = result->getStates().z(leftState->getId() + 1); newState->setFinal(leftState->isFinal()); newState->setGroupStart(leftState->getGroupStart()); newState->setGroupEnd(leftState->getGroupEnd()); for (Transition transition : leftState->getTransitions()) { newState->addTransition( Transition(transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId() + 1), transition.getRequiredFlags())); } } start->addTransition( Transition(0, result->getStates().z(1), 0)); for (State* rightState : right->getStates()) { if (rightState) { result->addState(); } } for (State* rightState : right->getStates()) { State* newState = result->getStates().z( rightState->getId() + 1 + leftStateCount); newState->setFinal(rightState->isFinal()); newState->setGroupStart(rightState->getGroupStart()); newState->setGroupEnd(rightState->getGroupEnd()); for (Transition transition : rightState->getTransitions()) { newState->addTransition(Transition( transition.getConditionFunction(), result->getStates().z(transition.zTargetState()->getId() + 1 + leftStateCount), transition.getRequiredFlags())); } } start->addTransition(Transition( 0, result->getStates().z(1 + leftStateCount), 0)); left->release(); right->release(); return result; } template Automata* concat(Automata* left, Automata* right) { Automata* result = new Automata(); int leftStateCount = left->getStates().getEintragAnzahl(); for (State* leftState : left->getStates()) { if (leftState) { result->addState(); } } bool canSkipFirst = !right->getStates().z(0)->getGroupStart() && !right->getStates().z(0)->getGroupEnd(); int index = 0; for (State* rightState : right->getStates()) { if (rightState && (index != 0 || !canSkipFirst)) { result->addState(); } index++; } for (State* leftState : left->getStates()) { State* newState = result->getStates().z(leftState->getId()); newState->setGroupStart(leftState->getGroupStart()); newState->setGroupEnd(leftState->getGroupEnd()); for (Transition transition : leftState->getTransitions()) { newState->addTransition( Transition(transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId()), transition.getRequiredFlags())); } if (leftState->isFinal()) { if (canSkipFirst) { State* oldRightStart = right->getStates().z(0); if (oldRightStart->isFinal()) { newState->setFinal(true); } for (Transition transition : oldRightStart->getTransitions()) { newState->addTransition(Transition( transition.getConditionFunction(), result->getStates().z( leftStateCount - 1 + transition.zTargetState()->getId()), transition.getRequiredFlags())); } } else { newState->addTransition(Transition( 0, result->getStates().z(leftStateCount), 0)); } } } index = 0; for (State* rightState : right->getStates()) { if (index == 0 && canSkipFirst) { index++; continue; } State* newState = result->getStates().z(rightState->getId() + leftStateCount - (canSkipFirst ? 1 : 0)); newState->setFinal(rightState->isFinal()); newState->setGroupStart(rightState->getGroupStart()); newState->setGroupEnd(rightState->getGroupEnd()); for (Transition transition : rightState->getTransitions()) { newState->addTransition( Transition(transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId() + leftStateCount - (canSkipFirst ? 1 : 0)), transition.getRequiredFlags())); } index++; } left->release(); right->release(); return result; } template Automata* many(Automata* before, bool lazy) { Automata* result = new Automata(); for (State* beforeState : before->getStates()) { if (beforeState) { result->addState(); } } State* newFinal = result->addState(); if (lazy) { result->getStates().z(0)->addTransition( Transition(0, newFinal, 0)); } newFinal->setFinal(true); for (State* beforeState : before->getStates()) { State* newState = result->getStates().z(beforeState->getId()); newState->setGroupStart(beforeState->getGroupStart()); newState->setGroupEnd(beforeState->getGroupEnd()); for (Transition transition : beforeState->getTransitions()) { newState->addTransition( Transition(transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId()), transition.getRequiredFlags())); } if (beforeState->isFinal()) { newState->addTransition( Transition(0, result->getStates().z(0), 0)); } } if (!lazy) { result->getStates().z(0)->addTransition( Transition(0, newFinal, 0)); } before->release(); return result; } template Automata* atLeastOnce(Automata* before, bool lazy) { Automata* result = new Automata(); for (State* beforeState : before->getStates()) { if (beforeState) { result->addState(); } } State* newFinal = result->addState(); newFinal->setFinal(true); for (State* beforeState : before->getStates()) { State* newState = result->getStates().z(beforeState->getId()); newState->setGroupStart(beforeState->getGroupStart()); newState->setGroupEnd(beforeState->getGroupEnd()); if (lazy && beforeState->isFinal()) { newState->addTransition(Transition(0, newFinal, 0)); } for (Transition transition : beforeState->getTransitions()) { newState->addTransition( Transition(transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId()), transition.getRequiredFlags())); } if (beforeState->isFinal()) { newState->addTransition( Transition(0, result->getStates().z(0), 0)); } if (!lazy && beforeState->isFinal()) { newState->addTransition(Transition(0, newFinal, 0)); } } before->release(); return result; } template Automata* maybe(Automata* before) { Automata* result = new Automata(); for (State* beforeState : before->getStates()) { if (beforeState) { result->addState(); } } State* newFinal = result->addState(); newFinal->setFinal(true); for (State* beforeState : before->getStates()) { State* newState = result->getStates().z(beforeState->getId()); newState->setGroupStart(beforeState->getGroupStart()); newState->setGroupEnd(beforeState->getGroupEnd()); for (Transition transition : beforeState->getTransitions()) { newState->addTransition( Transition(transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId()), transition.getRequiredFlags())); } if (beforeState->isFinal()) { newState->addTransition(Transition(0, newFinal, 0)); } } result->getStates().z(0)->addTransition( Transition(0, newFinal, 0)); before->release(); return result; } template Automata* repeat( Automata* before, int minAmount, int maxAmount) { Automata* result = new Automata(); for (int i = 0; i < maxAmount; i++) { for (State* beforeState : before->getStates()) { if (beforeState) { result->addState(); } } for (State* beforeState : before->getStates()) { State* newState = result->getStates().z( beforeState->getId() + before->getStates().getEintragAnzahl() * i); newState->setGroupStart(beforeState->getGroupStart()); newState->setGroupEnd(beforeState->getGroupEnd()); for (Transition transition : beforeState->getTransitions()) { newState->addTransition(Transition( transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId() + before->getStates().getEintragAnzahl() * i), transition.getRequiredFlags())); } } } State* newFinal = result->addState(); newFinal->setFinal(true); if (minAmount == 0) { result->getStates().z(0)->addTransition( Transition(0, newFinal, 0)); } for (int i = 0; i < maxAmount; i++) { for (State* beforeState : before->getStates()) { State* newState = result->getStates().z( beforeState->getId() + before->getStates().getEintragAnzahl() * i); if (beforeState->isFinal()) { if (i < maxAmount - 1) { newState->addTransition(Transition(0, result->getStates().z( before->getStates().getEintragAnzahl() * (i + 1)), 0)); } if (i >= minAmount - 1) { newState->addTransition( Transition(0, newFinal, 0)); } } } } before->release(); return result; } template Automata* group(Automata* before, int groupId) { Automata* result = new Automata(); State* newStart = result->addState(); newStart->setGroupStart(groupId); State* groupStart = result->addState(); newStart->addTransition(Transition(0, groupStart, 0)); for (State* beforeState : before->getStates()) { if (beforeState) { result->addState(); } } groupStart->addTransition( Transition(0, result->getStates().z(2), 0)); State* groupEnd = result->addState(); groupEnd->setGroupEnd(groupId); State* newFinal = result->addState(); groupEnd->addTransition(Transition(0, newFinal, 0)); newFinal->setFinal(true); for (State* beforeState : before->getStates()) { State* newState = result->getStates().z(beforeState->getId() + 2); newState->setGroupStart(beforeState->getGroupStart()); newState->setGroupEnd(beforeState->getGroupEnd()); for (Transition transition : beforeState->getTransitions()) { newState->addTransition( Transition(transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId() + 2), transition.getRequiredFlags())); } if (beforeState->isFinal()) { newState->addTransition(Transition(0, groupEnd, 0)); } } before->release(); return result; } class RegexConfig { private: Text whitespaceChars; Text wordChars; public: DLLEXPORT RegexConfig(); DLLEXPORT void setWhitespaceChars(Text whitespaceChars); DLLEXPORT void setWordChars(Text wordChars); DLLEXPORT Text getWhitespaceChars() const; DLLEXPORT Text getWordChars() const; }; DLLEXPORT Automata* parse(Text regex); DLLEXPORT Automata* parse(Text regex, RegexConfig& config); } // namespace Regex } // namespace Framework