#pragma once #include <functional> #include "Array.h" #include "Critical.h" #include "Text.h" namespace Framework { namespace Regex { class Flags { public: static const int START = 1; static const int END = 2; }; template<typename Data> class State; template<typename Data> class Transition { private: std::function<bool(Data, int)> conditionFunction; State<Data>* zTarget; int requiredFlags; public: Transition() : conditionFunction(0), zTarget(0), requiredFlags(0) {} Transition(std::function<bool(Data, int)> conditionFunction, State<Data>* zTarget, int requiredFlags) : conditionFunction(conditionFunction), zTarget(zTarget), requiredFlags(requiredFlags) {} Transition(const Transition& other) : conditionFunction(other.conditionFunction), zTarget(other.zTarget), requiredFlags(other.requiredFlags) {} bool test(Data value, int flags) const { return conditionFunction && conditionFunction(value, flags) && (requiredFlags | flags) == flags; } bool test(int flags) const { return !conditionFunction && (requiredFlags | flags) == flags; } State<Data>* zTargetState() const { return zTarget; } std::function<bool(Data, int)> getConditionFunction() const { return conditionFunction; } int getRequiredFlags() const { return requiredFlags; } }; template<typename Data> class State : public ReferenceCounter { private: Array<Transition<Data>> transitions; bool final; int groupStart; int groupEnd; int id; public: State(int id) : ReferenceCounter(), final(false), groupStart(0), groupEnd(0), id(id) {} void addTransition(Transition<Data> transition) { transitions.add(transition); } void setFinal(bool isFinal) { final = isFinal; } void setGroupStart(int groupStart) { this->groupStart = groupStart; } void setGroupEnd(int groupEnd) { this->groupEnd = groupEnd; } bool isFinal() const { return final; } int getGroupStart() const { return groupStart; } int getGroupEnd() const { return groupEnd; } int getId() const { return id; } const Array<Transition<Data>>& getTransitions() const { return transitions; } }; class Group { private: int start; int end; int id; public: DLLEXPORT Group(int start, int end, int id); DLLEXPORT Group(); DLLEXPORT int getStart(); DLLEXPORT int getEnd(); DLLEXPORT int getId(); }; class Result : public ReferenceCounter { private: Array<Group> groups; public: DLLEXPORT Result(Array<Group> groups); DLLEXPORT int getGroupCount(); DLLEXPORT Group getGroup(int index); DLLEXPORT int getStart(); DLLEXPORT int getEnd(); }; template<typename Data> class ExecutionStackFrame { private: ExecutionStackFrame* before; State<Data>* state; int transitionIndex; int strIndex; public: ExecutionStackFrame(ExecutionStackFrame<Data>* before, State<Data>* zState, int strIndex) : before(before), state(zState), transitionIndex(0), strIndex(strIndex) {} ~ExecutionStackFrame() { if (before) { delete before; } } ExecutionStackFrame<Data>* getBefore() const { return before; } State<Data>* zState() const { return state; } void setStrIndex(int index) { strIndex = index; } int getStrIndex() const { return strIndex; } int getTransitionIndex() const { return transitionIndex; } void setTransitionIndex(int transitionIndex) { this->transitionIndex = transitionIndex; } void discard() { before = 0; } Array<Group> getGroups() const { Array<Group> groups; const ExecutionStackFrame* current = this; while (true) { if (current->getBefore()) { current = current->getBefore(); } else { break; } } groups.add(Group(current->getStrIndex(), strIndex, 0)); current = this; Array<int> groupEnds; Array<int> knownGroups; while (current) { if (current->zState()->getGroupEnd()) { groupEnds.add(current->getStrIndex()); } if (current->zState()->getGroupStart() && groupEnds.getEintragAnzahl() > 0) { if (knownGroups.getWertIndex( current->zState()->getGroupStart()) < 0) { while (groups.getEintragAnzahl() <= current->zState()->getGroupStart()) { groups.add( Group(-1, -1, groups.getEintragAnzahl())); } groups.set(Group(current->getStrIndex(), groupEnds.get(0), current->zState()->getGroupStart()), current->zState()->getGroupStart()); knownGroups.add(current->zState()->getGroupStart()); } groupEnds.remove(0); } current = current->getBefore(); } return groups; } bool isEndlessLoop() const { const ExecutionStackFrame* current = this; while (current) { if (current->getBefore() && current->getBefore()->zState() == state && current->getBefore()->getStrIndex() == strIndex) { return true; } current = current->getBefore(); } return false; } }; template<typename Data> class Automata : public ReferenceCounter { private: RCArray<State<Data>> states; public: Automata() : ReferenceCounter() {} State<Data>* addState() { State<Data>* state = new State<Data>(states.getEintragAnzahl()); states.add(state); return state; } const RCArray<State<Data>>& getStates() const { return states; } RCArray<Result>* match(const Data* str, int length) { RCArray<Result>* results = new RCArray<Result>(); ExecutionStackFrame<Data>* currentFrame = new ExecutionStackFrame<Data>(0, states.z(0), 0); int flags = Flags::START; int startIndex = 0; while (currentFrame) { if (currentFrame->getStrIndex() >= length) { flags |= Flags::END; } else { flags &= ~Flags::END; } if (currentFrame->getStrIndex() == 0) { flags |= Flags::START; } else { flags &= ~Flags::START; } if (currentFrame->zState()->isFinal()) { results->add(new Result(currentFrame->getGroups())); int index = currentFrame->getStrIndex(); if (startIndex < index) { delete currentFrame; currentFrame = new ExecutionStackFrame<Data>( 0, states.z(0), index); startIndex = currentFrame->getStrIndex(); if (currentFrame->getStrIndex() > length) { delete currentFrame; currentFrame = 0; continue; } } } if (currentFrame->isEndlessLoop()) { delete currentFrame; results->release(); return 0; // endless loop } bool found = 0; for (int i = currentFrame->getTransitionIndex(); i < currentFrame->zState() ->getTransitions() .getEintragAnzahl(); i++) { Transition<Data> t = currentFrame->zState()->getTransitions().get(i); if (t.test(flags)) { currentFrame->setTransitionIndex(i + 1); currentFrame = new ExecutionStackFrame<Data>(currentFrame, t.zTargetState(), currentFrame->getStrIndex()); found = 1; break; } else if (currentFrame->getStrIndex() < length && t.test( str[currentFrame->getStrIndex()], flags)) { currentFrame->setTransitionIndex(i + 1); currentFrame = new ExecutionStackFrame<Data>(currentFrame, t.zTargetState(), currentFrame->getStrIndex() + 1); found = 1; break; } } if (!found) { ExecutionStackFrame<Data>* before = currentFrame->getBefore(); if (before) { currentFrame->discard(); delete currentFrame; currentFrame = before; } else { currentFrame->setStrIndex( currentFrame->getStrIndex() + 1); startIndex = currentFrame->getStrIndex(); currentFrame->setTransitionIndex(0); if (currentFrame->getStrIndex() > length) { delete currentFrame; currentFrame = 0; } } } } return results; } }; template<typename Data> Automata<Data>* oneOf(Automata<Data>* left, Automata<Data>* right) { Automata<Data>* result = new Automata<Data>(); State<Data>* start = result->addState(); int leftStateCount = left->getStates().getEintragAnzahl(); for (State<Data>* leftState : left->getStates()) { if (leftState) { result->addState(); } } for (State<Data>* leftState : left->getStates()) { State<Data>* newState = result->getStates().z(leftState->getId() + 1); newState->setFinal(leftState->isFinal()); newState->setGroupStart(leftState->getGroupStart()); newState->setGroupEnd(leftState->getGroupEnd()); for (Transition<Data> transition : leftState->getTransitions()) { newState->addTransition( Transition<Data>(transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId() + 1), transition.getRequiredFlags())); } } start->addTransition( Transition<Data>(0, result->getStates().z(1), 0)); for (State<Data>* rightState : right->getStates()) { if (rightState) { result->addState(); } } for (State<Data>* rightState : right->getStates()) { State<Data>* newState = result->getStates().z( rightState->getId() + 1 + leftStateCount); newState->setFinal(rightState->isFinal()); newState->setGroupStart(rightState->getGroupStart()); newState->setGroupEnd(rightState->getGroupEnd()); for (Transition<Data> transition : rightState->getTransitions()) { newState->addTransition(Transition<Data>( transition.getConditionFunction(), result->getStates().z(transition.zTargetState()->getId() + 1 + leftStateCount), transition.getRequiredFlags())); } } start->addTransition(Transition<Data>( 0, result->getStates().z(1 + leftStateCount), 0)); left->release(); right->release(); return result; } template<typename Data> Automata<Data>* concat(Automata<Data>* left, Automata<Data>* right) { Automata<Data>* result = new Automata<Data>(); int leftStateCount = left->getStates().getEintragAnzahl(); for (State<Data>* leftState : left->getStates()) { if (leftState) { result->addState(); } } bool canSkipFirst = !right->getStates().z(0)->getGroupStart() && !right->getStates().z(0)->getGroupEnd(); int index = 0; for (State<Data>* rightState : right->getStates()) { if (rightState && (index != 0 || !canSkipFirst)) { result->addState(); } index++; } for (State<Data>* leftState : left->getStates()) { State<Data>* newState = result->getStates().z(leftState->getId()); newState->setGroupStart(leftState->getGroupStart()); newState->setGroupEnd(leftState->getGroupEnd()); for (Transition<Data> transition : leftState->getTransitions()) { newState->addTransition( Transition<Data>(transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId()), transition.getRequiredFlags())); } if (leftState->isFinal()) { if (canSkipFirst) { State<Data>* oldRightStart = right->getStates().z(0); if (oldRightStart->isFinal()) { newState->setFinal(true); } for (Transition<Data> transition : oldRightStart->getTransitions()) { newState->addTransition(Transition<Data>( transition.getConditionFunction(), result->getStates().z( leftStateCount - 1 + transition.zTargetState()->getId()), transition.getRequiredFlags())); } } else { newState->addTransition(Transition<Data>( 0, result->getStates().z(leftStateCount), 0)); } } } index = 0; for (State<Data>* rightState : right->getStates()) { if (index == 0 && canSkipFirst) { index++; continue; } State<Data>* newState = result->getStates().z(rightState->getId() + leftStateCount - (canSkipFirst ? 1 : 0)); newState->setFinal(rightState->isFinal()); newState->setGroupStart(rightState->getGroupStart()); newState->setGroupEnd(rightState->getGroupEnd()); for (Transition<Data> transition : rightState->getTransitions()) { newState->addTransition( Transition<Data>(transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId() + leftStateCount - (canSkipFirst ? 1 : 0)), transition.getRequiredFlags())); } index++; } left->release(); right->release(); return result; } template<typename Data> Automata<Data>* many(Automata<Data>* before, bool lazy) { Automata<Data>* result = new Automata<Data>(); for (State<Data>* beforeState : before->getStates()) { if (beforeState) { result->addState(); } } State<Data>* newFinal = result->addState(); if (lazy) { result->getStates().z(0)->addTransition( Transition<Data>(0, newFinal, 0)); } newFinal->setFinal(true); for (State<Data>* beforeState : before->getStates()) { State<Data>* newState = result->getStates().z(beforeState->getId()); newState->setGroupStart(beforeState->getGroupStart()); newState->setGroupEnd(beforeState->getGroupEnd()); for (Transition<Data> transition : beforeState->getTransitions()) { newState->addTransition( Transition<Data>(transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId()), transition.getRequiredFlags())); } if (beforeState->isFinal()) { newState->addTransition( Transition<Data>(0, result->getStates().z(0), 0)); } } if (!lazy) { result->getStates().z(0)->addTransition( Transition<Data>(0, newFinal, 0)); } before->release(); return result; } template<typename Data> Automata<Data>* atLeastOnce(Automata<Data>* before, bool lazy) { Automata<Data>* result = new Automata<Data>(); for (State<Data>* beforeState : before->getStates()) { if (beforeState) { result->addState(); } } State<Data>* newFinal = result->addState(); newFinal->setFinal(true); for (State<Data>* beforeState : before->getStates()) { State<Data>* newState = result->getStates().z(beforeState->getId()); newState->setGroupStart(beforeState->getGroupStart()); newState->setGroupEnd(beforeState->getGroupEnd()); if (lazy && beforeState->isFinal()) { newState->addTransition(Transition<Data>(0, newFinal, 0)); } for (Transition<Data> transition : beforeState->getTransitions()) { newState->addTransition( Transition<Data>(transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId()), transition.getRequiredFlags())); } if (beforeState->isFinal()) { newState->addTransition( Transition<Data>(0, result->getStates().z(0), 0)); } if (!lazy && beforeState->isFinal()) { newState->addTransition(Transition<Data>(0, newFinal, 0)); } } before->release(); return result; } template<typename Data> Automata<Data>* maybe(Automata<Data>* before) { Automata<Data>* result = new Automata<Data>(); for (State<Data>* beforeState : before->getStates()) { if (beforeState) { result->addState(); } } State<Data>* newFinal = result->addState(); newFinal->setFinal(true); for (State<Data>* beforeState : before->getStates()) { State<Data>* newState = result->getStates().z(beforeState->getId()); newState->setGroupStart(beforeState->getGroupStart()); newState->setGroupEnd(beforeState->getGroupEnd()); for (Transition<Data> transition : beforeState->getTransitions()) { newState->addTransition( Transition<Data>(transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId()), transition.getRequiredFlags())); } if (beforeState->isFinal()) { newState->addTransition(Transition<Data>(0, newFinal, 0)); } } result->getStates().z(0)->addTransition( Transition<Data>(0, newFinal, 0)); before->release(); return result; } template<typename Data> Automata<Data>* repeat( Automata<Data>* before, int minAmount, int maxAmount) { Automata<Data>* result = new Automata<Data>(); for (int i = 0; i < maxAmount; i++) { for (State<Data>* beforeState : before->getStates()) { if (beforeState) { result->addState(); } } for (State<Data>* beforeState : before->getStates()) { State<Data>* newState = result->getStates().z( beforeState->getId() + before->getStates().getEintragAnzahl() * i); newState->setGroupStart(beforeState->getGroupStart()); newState->setGroupEnd(beforeState->getGroupEnd()); for (Transition<Data> transition : beforeState->getTransitions()) { newState->addTransition(Transition<Data>( transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId() + before->getStates().getEintragAnzahl() * i), transition.getRequiredFlags())); } } } State<Data>* newFinal = result->addState(); newFinal->setFinal(true); if (minAmount == 0) { result->getStates().z(0)->addTransition( Transition<Data>(0, newFinal, 0)); } for (int i = 0; i < maxAmount; i++) { for (State<Data>* beforeState : before->getStates()) { State<Data>* newState = result->getStates().z( beforeState->getId() + before->getStates().getEintragAnzahl() * i); if (beforeState->isFinal()) { if (i < maxAmount - 1) { newState->addTransition(Transition<Data>(0, result->getStates().z( before->getStates().getEintragAnzahl() * (i + 1)), 0)); } if (i >= minAmount - 1) { newState->addTransition( Transition<Data>(0, newFinal, 0)); } } } } before->release(); return result; } template<typename Data> Automata<Data>* group(Automata<Data>* before, int groupId) { Automata<Data>* result = new Automata<Data>(); State<Data>* newStart = result->addState(); newStart->setGroupStart(groupId); State<Data>* groupStart = result->addState(); newStart->addTransition(Transition<Data>(0, groupStart, 0)); for (State<Data>* beforeState : before->getStates()) { if (beforeState) { result->addState(); } } groupStart->addTransition( Transition<Data>(0, result->getStates().z(2), 0)); State<Data>* groupEnd = result->addState(); groupEnd->setGroupEnd(groupId); State<Data>* newFinal = result->addState(); groupEnd->addTransition(Transition<Data>(0, newFinal, 0)); newFinal->setFinal(true); for (State<Data>* beforeState : before->getStates()) { State<Data>* newState = result->getStates().z(beforeState->getId() + 2); newState->setGroupStart(beforeState->getGroupStart()); newState->setGroupEnd(beforeState->getGroupEnd()); for (Transition<Data> transition : beforeState->getTransitions()) { newState->addTransition( Transition<Data>(transition.getConditionFunction(), result->getStates().z( transition.zTargetState()->getId() + 2), transition.getRequiredFlags())); } if (beforeState->isFinal()) { newState->addTransition(Transition<Data>(0, groupEnd, 0)); } } before->release(); return result; } class RegexConfig { private: Text whitespaceChars; Text wordChars; public: DLLEXPORT RegexConfig(); DLLEXPORT void setWhitespaceChars(Text whitespaceChars); DLLEXPORT void setWordChars(Text wordChars); DLLEXPORT Text getWhitespaceChars() const; DLLEXPORT Text getWordChars() const; }; DLLEXPORT Text quote(const Text& text); DLLEXPORT Automata<char>* parse(Text regex); DLLEXPORT Automata<char>* parse(Text regex, RegexConfig& config); } // namespace Regex } // namespace Framework