diff --git a/App/RegM/Lectures/Lecture.1.Notes.md b/App/RegM/Lectures/Lecture.1.Notes.md new file mode 100644 index 0000000..c44ecc4 --- /dev/null +++ b/App/RegM/Lectures/Lecture.1.Notes.md @@ -0,0 +1,35 @@ +# Automata Theory: Building a RegExp machine + +## Content: +State Machines +Formal Grammars +Implement a regular expression processor + +## History: + +*Pioneers:* + +1951 - Stephen Kleene invented reg exp (sets). + +Reuglar Langauge : Langauge recognized by a finite automata (state machines). +Kleene's Therem : Equivalence of regular expressions and finite automata. + +Has a notation named after him: +Kleene-Closure (AKA: Kleene star) : A* (Stands for repetition) + +1956 - Chomsky defines his hiearchy fo grammers + +Regular grammers are considered a type 3. +See: https://en.wikipedia.org/wiki/Chomsky_hierarchy + +![img](https://i.imgur.com/Pj2aFeg.png) + +Thus they are the weakest form of grammars. + +1968 - Ken Thompson used them for pattern matching in strings, and +lexical analysis (scanners) + +NFA - Thompson construction + + + diff --git a/App/RegM/Lectures/Lecture.2.Notes.md b/App/RegM/Lectures/Lecture.2.Notes.md new file mode 100644 index 0000000..e8f4d87 --- /dev/null +++ b/App/RegM/Lectures/Lecture.2.Notes.md @@ -0,0 +1,74 @@ +# Symbols, alphabets, and langauges and Regular Grammars + +Alphabet : A set of characters. + +Sigma = { a, b } + +Langauge : A set of strings over a particular alphabet. + +L1(Sigma) = { a, aa, b, ab, ba, bba, .. } (Infinite) +L2(Sigma) = { aa, bb, ab, ba }; (Length = 2, Finite) + +Any time you constraint a langauge you are +defining a formal grammar. + +## Formal Grammars: + +FormalGrammer = (Non-Terminals, Terminals, Productions, Starting Symbol) + +Non-Terminals : Variables (can be subsituted with a value) +Terminals : Cannot be replaced by anything (constant) +Productions : Rule in the grammar + +**G = (N, T, P, S)** + +Ex: +``` +S -> aX +X -> b +``` +**(This notation is known as BNF : Bakus-Naur Form)** + +Ex.Non-Terminals = S, X +Ex.Terminals = a, b +Ex.Productions = S -> aX, X -> b (2) +Ex.Starting Symbol = S + +Only valid string : "ab" + +## Chomsky Hierachy : + +0. Unrestricted : Natural Langauges, Turing Machines +1. Context-Sensitive : Programming Languages (Almost all in production) +2. Context-Free : Programming Langauges (Parsing Syntax only) +3. Regular : Regular Expressions + +The lower in the hiearchy the less expressive it is. + +RegExp is a vomit inducing terse notation that is equivalent to BNF. + +BNF : RegExp +S -> aS : +S -> bA : `a*bc*` +A -> epsilon : +A -> cA : + +epsilon : "The empty string". + +Regular expressions may only have one non-terminal: +* A the very right side (right-linear, RHS) +* At the very left side (left-linear, LHS) + +Regular expression have no support for *NESTING* +They can be *RECURSIVE* + +Context-free grammers support nesting. +Ex: +(( () )) +`Parenthesis balacing` + +Non-regular RegExp can support nesting but are not pure +finite automata and are slower implementation. + + + diff --git a/App/RegM/Lectures/Lecture.3.Notes.md b/App/RegM/Lectures/Lecture.3.Notes.md new file mode 100644 index 0000000..90248fd --- /dev/null +++ b/App/RegM/Lectures/Lecture.3.Notes.md @@ -0,0 +1,85 @@ +# Finite Automata +***(AKA: Finite State Machine)*** + +Mechanism and abstraction used behind regular grammars. + +Usually has its state represented using nodes and edges. + +Regular grammar: +``` +S -> bA +A -> epsilon +``` +Equivalent to: `\b\` + +State transition: + +--label--> : Transition symbol +O : State Symbol +(o) : Accepting State +->O.Start : Starting State (State transition to Start) + +Ex: + +->O.*Start* --*transition*--> (o).*Accepting* + +*ε* - Epsilon (Empty String) +`I will be spelling it out as I do not enjoy single glyth representation` + +Two main types of Finite Automtata : + +FA w/ output +* Moore machine +* Mealy machine + +FA w/o output +* DFA - Deterministic +* NFA - Non-deterministic +* epsilon-NFA - (Epsilon Transition) special case + +NFA : Non-deterministic FA - Allos transition on the same symbol to +different states + +``` + a->o + / +->o.1---b-->o + \ + a->o +``` + +epsilon-NFA : Extension of NFA that allows *epsilon* transitions + +``` + a--->o---epsi--->(o) + / / +->o----b-->epsi--->o + \ + a-->o--epsi-->(o) +``` + +DFA : A state machine which forbids multiple transitions on the same symbol, and *epsilon* transitions + +``` + a--->o + / +->o----b-->o +``` + +Use case: + +Implementation Transformations: +```RegExp -> epsilon-NFA -> ... -> DFA``` + +## Formal Definition: + +Non-deterministic finite automata is a tuple of five elements: +* All possible states +* Alphabet +* Transition Function +* Starting State +* Set of accepting states + +NFA = ( States, Alphabet, TransitionFunction, StartingState, AcceptingStates ) + +NFA = ( Q, Σ, Δ, q0, F ) diff --git a/App/RegM/Lectures/Lecture.4.Notes.md b/App/RegM/Lectures/Lecture.4.Notes.md new file mode 100644 index 0000000..b40dce7 --- /dev/null +++ b/App/RegM/Lectures/Lecture.4.Notes.md @@ -0,0 +1,28 @@ +# Basic NFA Fragments + +### Single Character +RegExp: `/^A$/` +Psuedo: +`str.start glyph(A) str.end` + +^ : Beginning of string : Str.Start +$ : End of a string : Str.End + +Machine: +->o.*Start* ---**Glyph**---> (o).*Accepting* + +### Epsilon-Transition +RegExp: `/^$/` +Psuedo: `str.start str.end` + +Machine: +``` +->o --epsilon--> (o) +``` + +Everyhing else can be built on top of these machines. + +``` +Start = Input, Accepting = Output +``` + diff --git a/App/RegM/Lectures/Lecture.5.6.7.Notes.md b/App/RegM/Lectures/Lecture.5.6.7.Notes.md new file mode 100644 index 0000000..8f33b12 --- /dev/null +++ b/App/RegM/Lectures/Lecture.5.6.7.Notes.md @@ -0,0 +1,39 @@ +## Concatenation + +Regex : `/^AB%/` +Psuedo: `str.start str(AB) str.end` + +Machine: +``` +->o --A--> o --epsilon--> o --B--> (o) + +Submachine_A --epsilon--> Submachine_B +``` + +## Union + +Regex : `/^A|B$/` +Psuedo: `str.start glyph(A) | glyph(B) str.end` + +Machine: +``` + epsilon--> o --A--> o --epsilon + / \ +->o ->(o) + \ / + epsilon--> o --B--> o --epsilon +``` + +## Kleene Closure + +Regex : `/^A*$/` +Psuedo: `str.start glyph(A).repeating str.end` + +Machine: +``` + <------episolon------- + / \ +->o --epsilon--> o --A--> o --epsilon--> (o) + \ / + -------------epsilon----------------> +``` diff --git a/App/RegM/Scripts/FiniteAutomata.gd b/App/RegM/Scripts/FiniteAutomata.gd new file mode 100644 index 0000000..e53e0a5 --- /dev/null +++ b/App/RegM/Scripts/FiniteAutomata.gd @@ -0,0 +1,107 @@ +extends Object + + +const epsilon = 'ε' + + +class State : + var accepting : bool = false + var transitionMap : Dictionary + + func _init(accepting : bool): + self.accepting = accepting + + transitionMap[epsilon] = Array.new() + + func add_Transition(symbol : string, state : State): + if symbol == epsilon : + transitionMap[symbol].append(state) + return + + transitionMap[symbol] = state + + func get_Transition(symbol : string) : + return transitionMap[symbol] + +class NFA : + var input : State + var output : State + + func _init(input : State, output : State): + self.input = input + self.output = output + + func test(string : String) : + return input.test(string) + + + +func concat(first : NFA, rest : Array): + for entry in rest : + first = concat_pair(first, entry) + + return first + +func concat_pair(first : NFA, second : NFA): + first.output.accepting = false + second.output.accepting = true + + first.output.add_Transition(epsilon, second.input) + + return NFA.new(first.input, second.output) + +# Epsilon-Transition machine +func empty(): + return glyph(epsilon) + +# Single character machine. +func glyph(symbol : string): + var start = State.new(false) + var accepting = State.new(true) + + start.add_Transition(symbol, accepting) + + return NFA.new(start, accepting) + +func repeat(entry : NFA) + var start = State.new(false) + var accepting = State.new(true) + + start.add_Transition(epsilon, entry.input) + + entry.output.accepting(false) + entry.output.add_Transition(epsilon, entry.input) # Repeater transition + entry.output.add_Transition(epsilon, accepting) + + return NFA.new(start, accepting) + +func union(first : NFA, rest : Array): + for entry in rest : + first = union_pair(first, entry) + + return first + +func union_pair(a : NFA, b : NFA): + var start = State.new(false) + var accepting = State.new(true) + + start.add_Transition(epsilon, a.input) + start.add_Transition(epsilon, b.output) + + a.output.accepting = false + b.output.accepting = false + + a.output.add_Transition(epsilon, accepting) + b.output.add_Transition(epsilon, accepting) + + return NFA.new(start, accepting) + + + +func test(): + var state_1 = State.new(false) + var state_2 = State.new(true) + + state_1.add_Transition('A', state_2) + + print("State 1 Transition for " + "A: " + state_1.get_Transition('A'))