From e848b79a6275288d58892c31d02c8fad00d8ad74 Mon Sep 17 00:00:00 2001 From: jonathantsang Date: Sat, 9 Feb 2019 19:01:13 -0500 Subject: [PATCH 01/13] re to nfa --- src/watcompiler/re.clj | 59 ++++++++++++++++++++++++++++++++++++ test/watcompiler/re_test.clj | 15 +++++++++ 2 files changed, 74 insertions(+) create mode 100644 src/watcompiler/re.clj create mode 100644 test/watcompiler/re_test.clj diff --git a/src/watcompiler/re.clj b/src/watcompiler/re.clj new file mode 100644 index 0000000..294e235 --- /dev/null +++ b/src/watcompiler/re.clj @@ -0,0 +1,59 @@ +(ns watcompiler.re + (:require [clojure.set :refer :all] + [watcompiler.nfa :refer :all] + [watcompiler.lang :refer :all]) + (:import [watcompiler.nfa NFA])) + +(def int-nfa + (let [stateS (gensym :S) + state1 (gensym :1) + state2 (gensym :2) + state3 (gensym :3) + state4 (gensym :4)] + (make-NFA (into #{} ) + #{stateS state1 state2 state3 state4} + stateS + {state4 (list :INT 1)} + (make-transition-NFA [[stateS state1 e] + [state1 state2 \i] + [state2 state3 \n] + [state3 state4 \t]])))) + +(def integer-literal-nfa + (let [stateS (gensym :S) + state1 (gensym :1) + state2 (gensym :2)] + (make-NFA (into #{} ) + #{stateS state1 state2} + stateS + {state2 (list :INTEGER 1)} + (make-transition-NFA [[stateS state1 e] + [state1 state2 DIGITS-NONZERO] + [state2 state2 DIGITS]])))) + +;; java keywords +;; https://www.student.cs.uwaterloo.ca/~cs444/joos.html + +;; operators +;; = assign, == equal, <= lessequal + +;; booleans +;; true, false + + + +;; complete nfa from all of the individual RE nfas +(def complete-nfa + (let [stateS (gensym :S)] + ;; use default constructor because we no longer have the merged accept-map + (->NFA (into #{} ) + (union (:states int-nfa) (:states integer-literal-nfa)) + stateS + (merge (:accept-states int-nfa) (:accept-states integer-literal-nfa)) + (merge (:transitions int-nfa) (:transitions integer-literal-nfa) + (make-transition-NFA [[stateS (:start int-nfa) e] + [stateS (:start integer-literal-nfa) e]])) + (merge (:accept-priorities int-nfa) (:accept-priorities integer-literal-nfa))))) + + +;; merge arbitrary number of nfas diff --git a/test/watcompiler/re_test.clj b/test/watcompiler/re_test.clj new file mode 100644 index 0000000..0d02cd7 --- /dev/null +++ b/test/watcompiler/re_test.clj @@ -0,0 +1,15 @@ +(ns watcompiler.re-test + (:require [clojure.test :refer :all] + [watcompiler.nfa :refer :all] + [watcompiler.re :refer :all]) + (:import [watcompiler.nfa NFA])) + +;; Individual NFA tests +(deftest int-test + (is (= :INT (run-NFA int-nfa "int"))) + (is (= :INTEGER (run-NFA integer-literal-nfa "109")))) + +;; Merged NFA test +(deftest merged-test + (is (= :INT (run-NFA complete-nfa "int"))) + (is (= :INTEGER (run-NFA complete-nfa "109")))) From a5fa0816c1f0a7d169af621f77d4b7c549ab7eb5 Mon Sep 17 00:00:00 2001 From: jonathantsang Date: Sat, 9 Feb 2019 20:28:06 -0500 Subject: [PATCH 02/13] re --- src/watcompiler/re.clj | 1243 +++++++++++++++++++++++++++++++++- test/watcompiler/re_test.clj | 44 +- 2 files changed, 1256 insertions(+), 31 deletions(-) diff --git a/src/watcompiler/re.clj b/src/watcompiler/re.clj index 294e235..c3a6a6e 100644 --- a/src/watcompiler/re.clj +++ b/src/watcompiler/re.clj @@ -4,20 +4,26 @@ [watcompiler.lang :refer :all]) (:import [watcompiler.nfa NFA])) -(def int-nfa - (let [stateS (gensym :S) - state1 (gensym :1) - state2 (gensym :2) - state3 (gensym :3) - state4 (gensym :4)] - (make-NFA (into #{} ) - #{stateS state1 state2 state3 state4} - stateS - {state4 (list :INT 1)} - (make-transition-NFA [[stateS state1 e] - [state1 state2 \i] - [state2 state3 \n] - [state3 state4 \t]])))) +;; Helpers for merging nfas +(defn merge-nfas + [& args] + (apply merge args)) + +(defn merge-nfas-states + [& args] + (apply union (:states args))) + +(defn merge-nfas-accept-states + [& args] + (apply merge (:accept-states args))) + +(defn merge-nfas-transitions + [& args] + (apply merge (:transitions args))) + +(defn merge-nfas-accept-priorities + [& args] + (apply merge (:accept-priorities args))) (def integer-literal-nfa (let [stateS (gensym :S) @@ -26,34 +32,1215 @@ (make-NFA (into #{} ) #{stateS state1 state2} stateS - {state2 (list :INTEGER 1)} + {state2 (list :INTEGER 0)} (make-transition-NFA [[stateS state1 e] [state1 state2 DIGITS-NONZERO] [state2 state2 DIGITS]])))) +;; Operators +(def operators-nfa + (let [stateS (gensym :S) + state1 (gensym :1) ;; valid, but can add = + state2 (gensym :2) ;; valid, but can add <, = + state3 (gensym :3) ;; valid but can add >, = + state4 (gensym :4) ;; valid, but can add + + state5 (gensym :5) ;; valid, but can add - + state6 (gensym :6) ;; valid, but can add >, = + state7 (gensym :7) ;; valid, but can add >, = + state11 (gensym :11)] ;; Nothing else can be added to it + (make-NFA (into #{} ) + #{stateS state1 state2, state3, state4, state5, state6, state11} + stateS + {state11 (list :OPERATOR 0) + state1 (list :OPERATOR 1) + state2 (list :OPERATOR 2) + state3 (list :OPERATOR 3) + state4 (list :OPERATOR 4) + state5 (list :OPERATOR 5) + state6 (list :OPERATOR 6) + state7 (list :OPERATOR 7)} + (make-transition-NFA [[stateS state1 \=] + [stateS state2 \<] + [stateS state3 \>] + [stateS state1 \!] + [stateS state1 \:] + [stateS state1 \~] + [stateS state1 \?] + [stateS state1 \&] + [stateS state1 \|] + [stateS state1 \^] + [stateS state1 \%] + [stateS state4 \+] + [stateS state5 \-] + [stateS state1 \*] + [stateS state1 \/] + + [state1 state11 \=] + [state2 state1 \<] + [state3 state6 \>] + [state3 state11 \=] + [state4 state11 \+] + [state5 state11 \-] + [state6 state7 \>] ;; >> + [state6 state11 \=] ;; >>= + [state7 state11 \=] ;; >>> + ])))) + +;; Keywords +;; From page 46 of jls2.pdf +(def abstract-nfa + (let [stateS (gensym :S) + a (gensym :a) + ab (gensym :ab) + abs (gensym :abs) + abst (gensym :abst) + abstr (gensym :abstr) + abstra (gensym :abstra) + abstrac (gensym :abstrac) + abstract (gensym :abstract)] + (make-NFA (into #{} ) + #{stateS a ab abs abst abstr abstra abstrac abstract} + stateS + {abstract (list :KEYWORD 0)} + (make-transition-NFA [[stateS a \a] + [a ab \b] + [ab abs \s] + [abs abst \t] + [abst abstr \r] + [abstr abstra \a] + [abstra abstrac \c] + [abstrac abstract \t]])))) + +(def default-nfa + (let [stateS (gensym :S) + d (gensym :d) + de (gensym :de) + def (gensym :def) + defa (gensym :defa) + defau (gensym :defau) + defaul (gensym :defaul) + default (gensym :default)] + (make-NFA (into #{} ) + #{stateS d de def defa defau defaul default} + stateS + {default (list :KEYWORD 0)} + (make-transition-NFA [[stateS d \d] + [d de \e] + [de def \f] + [def defa \a] + [defa defau \u] + [defau defaul \l] + [defaul default \t]])))) + +(def if-nfa + (let [stateS (gensym :S) + i (gensym :i) + if (gensym :if)] + (make-NFA (into #{} ) + #{stateS i if} + stateS + {if (list :KEYWORD 0)} + (make-transition-NFA [[stateS i \i] + [i if \f]])))) + +(def private-nfa + (let [stateS (gensym :S) + p (gensym :p) + pr (gensym :pr) + pri (gensym :pri) + priv (gensym :priv) + priva (gensym :priva) + privat (gensym :privat) + private (gensym :private)] + (make-NFA (into #{} ) + #{stateS p pr pri priv priva privat private} + stateS + {private (list :KEYWORD 0)} + (make-transition-NFA [[stateS p \p] + [p pr \r] + [pr pri \i] + [pri priv \v] + [priv priva \a] + [priva privat \t] + [privat private \e]])))) + +(def this-nfa + (let [stateS (gensym :S) + t (gensym :t) + th (gensym :th) + thi (gensym :thi) + this (gensym :this)] + (make-NFA (into #{} ) + #{stateS t th thi this} + stateS + {this (list :KEYWORD 0)} + (make-transition-NFA [[stateS t \t] + [t th \h] + [th thi \i] + [thi this \s]])))) + +(def boolean-nfa + (let [stateS (gensym :S) + b (gensym :b) + bo (gensym :bo) + boo (gensym :boo) + bool (gensym :bool) + boole (gensym :boole) + boolea (gensym :boolea) + boolean (gensym :boolean)] + (make-NFA (into #{} ) + #{stateS b bo boo bool boole boolea boolean} + stateS + {boolean (list :KEYWORD 0)} + (make-transition-NFA [[stateS b \b] + [b bo \o] + [bo boo \o] + [boo bool \l] + [bool boole \e] + [boole boolea \a] + [boolea boolean \n]])))) + +(def do-nfa + (let [stateS (gensym :S) + d (gensym :d) + do (gensym :do)] + (make-NFA (into #{} ) + #{stateS d do} + stateS + {do (list :KEYWORD 0)} + (make-transition-NFA [[stateS d \d] + [d do \o]])))) + +(def implements-nfa + (let [stateS (gensym :S) + i (gensym :i) + im (gensym :im) + imp (gensym :imp) + impl (gensym :impl) + imple (gensym :imple) + implem (gensym :implem) + impleme (gensym :impleme) + implemen (gensym :implemen) + implement (gensym :implement) + implements (gensym :implements)] + (make-NFA (into #{} ) + #{stateS i im imp impl imple implem impleme implemen implement implements} + stateS + {implements (list :KEYWORD 0)} + (make-transition-NFA [[stateS i \i] + [i im \m] + [im imp \p] + [imp impl \l] + [impl imple \e] + [imple implem \m] + [implem impleme \e] + [impleme implemen \n] + [implemen implement \t] + [implement implements \s]])))) + +(def protected-nfa + (let [stateS (gensym :S) + p (gensym :p) + pr (gensym :pr) + pro (gensym :pro) + prot (gensym :prot) + prote (gensym :prote) + protec (gensym :protec) + protect (gensym :protect) + protecte (gensym :protecte) + protected (gensym :protected)] + (make-NFA (into #{} ) + #{stateS p pr pro prot prote protec protect protecte protected} + stateS + {protected (list :KEYWORD 0)} + (make-transition-NFA [[stateS p \p] + [p pr \r] + [pr pro \o] + [pro prot \t] + [prot prote \e] + [prote protec \c] + [protec protect \t] + [protect protecte \e] + [protecte protected \d]])))) + +;; throw and throws +(def throws-nfa + (let [stateS (gensym :S) + t (gensym :t) + th (gensym :th) + thr (gensym :thr) + thro (gensym :thro) + throw (gensym :throw) + throws (gensym :throws)] + (make-NFA (into #{} ) + #{stateS t th thr thro throw throws} + stateS + {throws (list :KEYWORD 0) + throw (list :KEYWORD 1)} + (make-transition-NFA [[stateS t \t] + [t th \h] + [th thr \r] + [thr thro \o] + [thro throw \w] + [throw throws \s]])))) + +(def break-nfa + (let [stateS (gensym :S) + b (gensym :b) + br (gensym :br) + bre (gensym :bre) + brea (gensym :brea) + break (gensym :break)] + (make-NFA (into #{} ) + #{stateS b br bre brea break} + stateS + {break (list :KEYWORD 0)} + (make-transition-NFA [[stateS b \b] + [b br \r] + [br bre \e] + [bre brea \a] + [brea break \k]])))) + +(def double-nfa + (let [stateS (gensym :S) + d (gensym :d) + do (gensym :do) + dou (gensym :dou) + doub (gensym :doub) + doubl (gensym :doubl) + double (gensym :double)] + (make-NFA (into #{} ) + #{stateS d do dou doub doubl double} + stateS + {double (list :KEYWORD 0)} + (make-transition-NFA [[stateS d \d] + [d do \o] + [do dou \u] + [dou doub \b] + [doub doubl \l] + [double double \e]])))) + +(def import-nfa + (let [stateS (gensym :S) + i (gensym :i) + im (gensym :im) + imp (gensym :imp) + impo (gensym :impo) + impor (gensym :impor) + import (gensym :import)] + (make-NFA (into #{} ) + #{stateS i im imp impo impor import} + stateS + {import (list :KEYWORD 0)} + (make-transition-NFA [[stateS i \i] + [i im \m] + [im imp \p] + [imp impo \o] + [impo impor \r] + [impor import \t]])))) + +(def public-nfa + (let [stateS (gensym :S) + p (gensym :p) + pu (gensym :pu) + pub (gensym :pub) + publ (gensym :publ) + publi (gensym :publi) + public (gensym :public)] + (make-NFA (into #{} ) + #{stateS p pu pub publ publi public} + stateS + {public (list :KEYWORD 0)} + (make-transition-NFA [[stateS p \p] + [p pu \u] + [pu pub \b] + [pub publ \l] + [publ publi \i] + [publi public \c]])))) + +(def byte-nfa + (let [stateS (gensym :S) + b (gensym :b) + by (gensym :by) + byt (gensym :byt) + byte (gensym :byte)] + (make-NFA (into #{} ) + #{stateS b by byt byte} + stateS + {byte (list :KEYWORD 0)} + (make-transition-NFA [[stateS b \b] + [b by \y] + [by byt \t] + [byt byte \e]])))) + +(def else-nfa + (let [stateS (gensym :S) + e (gensym :e) + el (gensym :el) + els (gensym :els) + else (gensym :else)] + (make-NFA (into #{} ) + #{stateS e el els else} + stateS + {else (list :KEYWORD 0)} + (make-transition-NFA [[stateS e \e] + [e el \l] + [el els \s] + [els else \e]])))) + +(def instanceof-nfa + (let [stateS (gensym :S) + i (gensym :i) + in (gensym :in) + ins (gensym :ins) + inst (gensym :inst) + insta (gensym :insta) + instan (gensym :instan) + instanc (gensym :instanc) + instance (gensym :instance) + instanceo (gensym :instanceo) + instanceof (gensym :instanceof)] + (make-NFA (into #{} ) + #{stateS i in ins inst insta instan instanc instance instanceo instanceof} + stateS + {instanceof (list :KEYWORD 0)} + (make-transition-NFA [[stateS i \i] + [i in \n] + [in ins \s] + [ins inst \t] + [inst insta \a] + [insta instan \n] + [instan instanc \c] + [instanc instance \e] + [instance instanceo \o] + [instanceo instanceof \f]])))) + +(def return-nfa + (let [stateS (gensym :S) + r (gensym :r) + re (gensym :re) + ret (gensym :ret) + retu (gensym :retu) + retur (gensym :retur) + return (gensym :return)] + (make-NFA (into #{} ) + #{stateS r re ret retu retur return} + stateS + {return (list :KEYWORD 0)} + (make-transition-NFA [[stateS r \r] + [r re \e] + [re ret \t] + [ret retu \u] + [retu retur \r] + [retur return \n]])))) + +(def transient-nfa + (let [stateS (gensym :S) + t (gensym :t) + tr (gensym :tr) + tra (gensym :tra) + tran (gensym :tran) + trans (gensym :trans) + transi (gensym :transi) + transie (gensym :transie) + transien (gensym :transien) + transient (gensym :transient)] + (make-NFA (into #{} ) + #{stateS t tr tra tran trans transi transie transien transient} + stateS + {transient (list :KEYWORD 0)} + (make-transition-NFA [[stateS t \t] + [t tr \r] + [tr tra \a] + [tra tran \n] + [tran trans \s] + [trans transi \i] + [transi transie \e] + [transie transien \n] + [transien transient \t]])))) + +(def case-nfa + (let [stateS (gensym :S) + c (gensym :c) + ca (gensym :ca) + cas (gensym :cas) + case (gensym :case)] + (make-NFA (into #{} ) + #{stateS c ca cas case} + stateS + {case (list :KEYWORD 0)} + (make-transition-NFA [[stateS c \c] + [c ca \a] + [ca cas \s] + [cas case \e]])))) + +(def extends-nfa + (let [stateS (gensym :S) + e (gensym :e) + ex (gensym :ex) + ext (gensym :ext) + exte (gensym :exte) + exten (gensym :exten) + extend (gensym :extend) + extends (gensym :extends)] + (make-NFA (into #{} ) + #{stateS e ex ext exte exten extend extends} + stateS + {extends (list :KEYWORD 0)} + (make-transition-NFA [[stateS e \e] + [e ex \x] + [ex ext \t] + [ext exte \e] + [exte exten \n] + [exten extend \d] + [extend extends \s]])))) + +(def int-nfa + (let [stateS (gensym :S) + i (gensym :i) + in (gensym :in) + int (gensym :int)] + (make-NFA (into #{} ) + #{stateS i in int} + stateS + {int (list :KEYWORD 0)} + (make-transition-NFA [[stateS i \i] + [i in \n] + [in int \t]])))) + +(def short-nfa + (let [stateS (gensym :S) + s (gensym :s) + sh (gensym :sh) + sho (gensym :sho) + shor (gensym :shor) + short (gensym :short)] + (make-NFA (into #{} ) + #{stateS s sh sho shor short} + stateS + {short (list :KEYWORD 0)} + (make-transition-NFA [[stateS s \s] + [s sh \h] + [sh sho \o] + [sho shor \r] + [shor short \t]])))) + +(def try-nfa + (let [stateS (gensym :S) + t (gensym :t) + tr (gensym :tr) + try (gensym :try)] + (make-NFA (into #{} ) + #{stateS t tr try} + stateS + {try (list :KEYWORD 0)} + (make-transition-NFA [[stateS t \t] + [t tr \r] + [tr try \y]])))) + +(def catch-nfa + (let [stateS (gensym :S) + c (gensym :c) + ca (gensym :ca) + cat (gensym :cat) + catc (gensym :catc) + catch (gensym :catch)] + (make-NFA (into #{} ) + #{stateS c ca cat catc catch} + stateS + {catch (list :KEYWORD 0)} + (make-transition-NFA [[stateS c \c] + [c ca \a] + [ca cat \t] + [cat catc \c] + [catc catch \h]])))) + +;; final and finally +(def finally-nfa + (let [stateS (gensym :S) + f (gensym :f) + fi (gensym :fi) + fin (gensym :fin) + fina (gensym :fina) + final (gensym :final) + finall (gensym :finall) + finally (gensym :finally)] + (make-NFA (into #{} ) + #{stateS f fi fin fina final finall finally} + stateS + {finally (list :KEYWORD 0) + final (list :KEYWORD 1)} + (make-transition-NFA [[stateS f \f] + [f fi \i] + [fi fin \n] + [fin fina \a] + [fina final \l] + [final finall \l] + [finall finally \y]])))) + +(def interface-nfa + (let [stateS (gensym :S) + i (gensym :i) + in (gensym :in) + int (gensym :int) + inte (gensym :inte) + inter (gensym :inter) + interf (gensym :interf) + interfa (gensym :interfa) + interfac (gensym :interfac) + interface (gensym :interface)] + (make-NFA (into #{} ) + #{stateS i in int inte inter interf interfa interfac interface} + stateS + {interface (list :KEYWORD 0)} + (make-transition-NFA [[stateS i \i] + [i in \n] + [in int \t] + [int inte \e] + [inte inter \r] + [inter interf \f] + [interf interfa \a] + [interfa interfac \c] + [interfac interface \e]])))) + +(def static-nfa + (let [stateS (gensym :S) + s (gensym :s) + st (gensym :st) + sta (gensym :sta) + stat (gensym :stat) + stati (gensym :stati) + static (gensym :static)] + (make-NFA (into #{} ) + #{stateS static} + stateS + {static (list :KEYWORD 0)} + (make-transition-NFA [[stateS s \s] + [s st \t] + [st sta \a] + [sta stat \t] + [stat stati \i] + [stati static \c]])))) + +(def void-nfa + (let [stateS (gensym :S) + v (gensym :v) + vo (gensym :vo) + voi (gensym :voi) + void (gensym :void)] + (make-NFA (into #{} ) + #{stateS v vo voi void} + stateS + {void (list :KEYWORD 0)} + (make-transition-NFA [[stateS v \v] + [v vo \o] + [vo voi \i] + [voi void \d]])))) + +(def char-nfa + (let [stateS (gensym :S) + c (gensym :c) + ch (gensym :ch) + cha (gensym :cha) + char (gensym :char)] + (make-NFA (into #{} ) + #{stateS c ch cha char} + stateS + {char (list :KEYWORD 0)} + (make-transition-NFA [[stateS c \c] + [c ch \h] + [ch cha \a] + [cha char \r]])))) + +(def long-nfa + (let [stateS (gensym :S) + l (gensym :l) + lo (gensym :lo) + lon (gensym :lon) + long (gensym :long)] + (make-NFA (into #{} ) + #{stateS l lo lon long} + stateS + {long (list :KEYWORD 0)} + (make-transition-NFA [[stateS l \l] + [l lo \o] + [lo lon \n] + [lon long \g]])))) + +(def strictfp-nfa + (let [stateS (gensym :S) + s (gensym :s) + st (gensym :st) + str (gensym :str) + stri (gensym :stri) + stric (gensym :stric) + strict (gensym :strict) + strictf (gensym :strictf) + strictfp (gensym :strictfp)] + (make-NFA (into #{} ) + #{stateS s st str stri stric strict strictf strictfp} + stateS + {strictfp (list :KEYWORD 0)} + (make-transition-NFA [[stateS s \s] + [s st \t] + [st str \r] + [str stri \i] + [stri stric \c] + [stric strict \t] + [strict strictf \f] + [strictf strictfp \p]])))) + +(def volatile-nfa + (let [stateS (gensym :S) + v (gensym :v) + vo (gensym :vo) + vol (gensym :vol) + vola (gensym :vola) + volat (gensym :volat) + volati (gensym :volati) + volatil (gensym :volatil) + volatile (gensym :volatile)] + (make-NFA (into #{} ) + #{stateS v vo vol vola volat volati volatil volatile} + stateS + {volatile (list :KEYWORD 0)} + (make-transition-NFA [[stateS v \v] + [v vo \o] + [vo vol \l] + [vol vola \a] + [vola volat \t] + [volat volati \i] + [volati volatil \l] + [volatil volatile \e]])))) + +(def class-nfa + (let [stateS (gensym :S) + c (gensym :c) + cl (gensym :cl) + cla (gensym :cla) + clas (gensym :clas) + class (gensym :class)] + (make-NFA (into #{} ) + #{stateS c cl cla clas class} + stateS + {class (list :KEYWORD 0)} + (make-transition-NFA [[stateS c \c] + [c cl \l] + [cl cla \a] + [cla clas \s] + [clas class \s]])))) + +(def float-nfa + (let [stateS (gensym :S) + f (gensym :f) + fl (gensym :fl) + flo (gensym :flo) + floa (gensym :floa) + float (gensym :float)] + (make-NFA (into #{} ) + #{stateS f fl flo floa float} + stateS + {float (list :KEYWORD 0)} + (make-transition-NFA [[stateS f \f] + [f fl \l] + [fl flo \o] + [flo floa \a] + [floa float \t]])))) + +(def native-nfa + (let [stateS (gensym :S) + n (gensym :n) + na (gensym :na) + nat (gensym :nat) + nati (gensym :nati) + nativ (gensym :nativ) + native (gensym :native)] + (make-NFA (into #{} ) + #{stateS n na nat nati nativ native} + stateS + {native (list :KEYWORD 0)} + (make-transition-NFA [[stateS n \n] + [n na \a] + [na nat \t] + [nat nati \i] + [nati nativ \v] + [nativ native \e]])))) + +(def super-nfa + (let [stateS (gensym :S) + s (gensym :s) + su (gensym :su) + sup (gensym :sup) + supe (gensym :supe) + super (gensym :super)] + (make-NFA (into #{} ) + #{stateS s su sup supe super} + stateS + {super (list :KEYWORD 0)} + (make-transition-NFA [[stateS s \s] + [s su \u] + [su sup \p] + [sup supe \e] + [supe super \r]])))) + +(def while-nfa + (let [stateS (gensym :S) + w (gensym :w) + wh (gensym :wh) + whi (gensym :whi) + whil (gensym :whil) + while (gensym :while)] + (make-NFA (into #{} ) + #{stateS w wh whi whil while} + stateS + {while (list :KEYWORD 0)} + (make-transition-NFA [[stateS w \w] + [w wh \h] + [wh whi \i] + [whi whil \l] + [whil while \e]])))) + +(def const-nfa + (let [stateS (gensym :S) + c (gensym :c) + co (gensym :co) + con (gensym :con) + cons (gensym :cons) + const (gensym :const)] + (make-NFA (into #{} ) + #{stateS c co con cons const} + stateS + {const (list :KEYWORD 0)} + (make-transition-NFA [[stateS c \c] + [c co \o] + [co con \n] + [con cons \s] + [cons const \t]])))) + +(def for-nfa + (let [stateS (gensym :S) + f (gensym :f) + fo (gensym :fo) + for (gensym :for)] + (make-NFA (into #{} ) + #{stateS f fo for} + stateS + {for (list :KEYWORD 0)} + (make-transition-NFA [[stateS f \f] + [f fo \o] + [fo for \r]])))) + +(def new-nfa + (let [stateS (gensym :S) + n (gensym :n) + ne (gensym :ne) + new (gensym :new)] + (make-NFA (into #{} ) + #{stateS n ne new} + stateS + {new (list :KEYWORD 0)} + (make-transition-NFA [[stateS n \n] + [n ne \e] + [ne new \w]])))) + +(def switch-nfa + (let [stateS (gensym :S) + s (gensym :s) + sw (gensym :sw) + swi (gensym :swi) + swit (gensym :swit) + switc (gensym :switc) + switch (gensym :switch)] + (make-NFA (into #{} ) + #{stateS s sw swi swit switc switch} + stateS + {switch (list :KEYWORD 0)} + (make-transition-NFA [[stateS s \s] + [s sw \w] + [sw swi \i] + [swi swit \t] + [swit switc \c] + [switc switch \h]])))) + +(def continue-nfa + (let [stateS (gensym :S) + c (gensym :c) + co (gensym :co) + con (gensym :con) + cont (gensym :cont) + conti (gensym :conti) + contin (gensym :contin) + continu (gensym :continu) + continue (gensym :continue)] + (make-NFA (into #{} ) + #{stateS c co con cont conti contin continu continue} + stateS + {continue (list :KEYWORD 0)} + (make-transition-NFA [[stateS c \c] + [c co \o] + [co con \n] + [con cont \t] + [cont conti \i] + [conti contin \n] + [contin continu \u] + [continu continue \e]])))) + +(def goto-nfa + (let [stateS (gensym :S) + g (gensym :g) + go (gensym :go) + got (gensym :got) + goto (gensym :goto)] + (make-NFA (into #{} ) + #{stateS g go got goto} + stateS + {goto (list :KEYWORD 0)} + (make-transition-NFA [[stateS g \g] + [g go \o] + [go got \t] + [got goto \o]])))) + +(def package-nfa + (let [stateS (gensym :S) + p (gensym :p) + pa (gensym :pa) + pac (gensym :pac) + pack (gensym :pack) + packa (gensym :packa) + packag (gensym :packag) + package (gensym :package)] + (make-NFA (into #{} ) + #{stateS p pa pac pack packa packag package} + stateS + {package (list :KEYWORD 0)} + (make-transition-NFA [[stateS p \p] + [p pa \a] + [pa pac \c] + [pac pack \k] + [pack packa \a] + [packa packag \g] + [packag package \e]])))) + +(def synchronized-nfa + (let [stateS (gensym :S) + s (gensym :s) + sy (gensym :sy) + syn (gensym :syn) + sync (gensym :sync) + synch (gensym :synch) + synchr (gensym :synchr) + synchro (gensym :synchro) + synchron (gensym :synchron) + synchroni (gensym :synchroni) + synchroniz (gensym :synchroniz) + synchronize (gensym :synchronize) + synchronized (gensym :synchronized)] + (make-NFA (into #{} ) + #{stateS s sy syn sync synch synchr synchro synchron synchroni synchroniz synchronize synchronized} + stateS + {synchronized (list :KEYWORD 0)} + (make-transition-NFA [[stateS s \s] + [s sy \y] + [sy syn \n] + [syn sync \c] + [sync synch \h] + [synch synchr \r] + [synchro synchro \o] + [synchron synchron \n] + [synchroni synchroni \i] + [synchroniz synchroniz \z] + [synchronize synchronize \e] + [synchronized synchronized \d]])))) + +(def keywords-nfa + (let [stateS (gensym :S)] + ;; use default constructor because we no longer have the merged accept-map + (->NFA (into #{} ) + (merge-nfas-states + abstract-nfa + default-nfa + if-nfa + private-nfa + this-nfa + boolean-nfa + do-nfa + implements-nfa + protected-nfa + break-nfa + double-nfa + import-nfa + public-nfa + throws-nfa + byte-nfa + else-nfa + instanceof-nfa + return-nfa + transient-nfa + case-nfa + extends-nfa + int-nfa + short-nfa + try-nfa + catch-nfa + interface-nfa + static-nfa + void-nfa + char-nfa + finally-nfa + long-nfa + strictfp-nfa + volatile-nfa + class-nfa + float-nfa + native-nfa + super-nfa + while-nfa + const-nfa + for-nfa + new-nfa + switch-nfa + continue-nfa + goto-nfa + package-nfa + synchronized-nfa) + stateS + (merge-nfas + (:accept-states abstract-nfa) + (:accept-states default-nfa) + (:accept-states if-nfa) + (:accept-states private-nfa) + (:accept-states this-nfa) + (:accept-states boolean-nfa) + (:accept-states do-nfa) + (:accept-states implements-nfa) + (:accept-states protected-nfa) + (:accept-states break-nfa) + (:accept-states double-nfa) + (:accept-states import-nfa) + (:accept-states public-nfa) + (:accept-states throws-nfa) + (:accept-states byte-nfa) + (:accept-states else-nfa) + (:accept-states instanceof-nfa) + (:accept-states return-nfa) + (:accept-states transient-nfa) + (:accept-states case-nfa) + (:accept-states extends-nfa) + (:accept-states int-nfa) + (:accept-states short-nfa) + (:accept-states try-nfa) + (:accept-states catch-nfa) + (:accept-states interface-nfa) + (:accept-states static-nfa) + (:accept-states void-nfa) + (:accept-states char-nfa) + (:accept-states finally-nfa) + (:accept-states long-nfa) + (:accept-states strictfp-nfa) + (:accept-states volatile-nfa) + (:accept-states class-nfa) + (:accept-states float-nfa) + (:accept-states native-nfa) + (:accept-states super-nfa) + (:accept-states while-nfa) + (:accept-states const-nfa) + (:accept-states for-nfa) + (:accept-states new-nfa) + (:accept-states switch-nfa) + (:accept-states continue-nfa) + (:accept-states goto-nfa) + (:accept-states package-nfa) + (:accept-states synchronized-nfa)) + (merge-nfas + (:transitions abstract-nfa) + (:transitions default-nfa) + (:transitions if-nfa) + (:transitions private-nfa) + (:transitions this-nfa) + (:transitions boolean-nfa) + (:transitions do-nfa) + (:transitions implements-nfa) + (:transitions protected-nfa) + (:transitions break-nfa) + (:transitions double-nfa) + (:transitions import-nfa) + (:transitions public-nfa) + (:transitions throws-nfa) + (:transitions byte-nfa) + (:transitions else-nfa) + (:transitions instanceof-nfa) + (:transitions return-nfa) + (:transitions transient-nfa) + (:transitions case-nfa) + (:transitions extends-nfa) + (:transitions int-nfa) + (:transitions short-nfa) + (:transitions try-nfa) + (:transitions catch-nfa) + (:transitions interface-nfa) + (:transitions static-nfa) + (:transitions void-nfa) + (:transitions char-nfa) + (:transitions finally-nfa) + (:transitions long-nfa) + (:transitions strictfp-nfa) + (:transitions volatile-nfa) + (:transitions class-nfa) + (:transitions float-nfa) + (:transitions native-nfa) + (:transitions super-nfa) + (:transitions while-nfa) + (:transitions const-nfa) + (:transitions for-nfa) + (:transitions new-nfa) + (:transitions switch-nfa) + (:transitions continue-nfa) + (:transitions goto-nfa) + (:transitions package-nfa) + (:transitions synchronized-nfa) + (make-transition-NFA [[stateS (:start abstract-nfa) e] + [stateS (:start default-nfa) e] + [stateS (:start if-nfa) e] + [stateS (:start private-nfa) e] + [stateS (:start this-nfa) e] + [stateS (:start boolean-nfa) e] + [stateS (:start do-nfa) e] + [stateS (:start implements-nfa) e] + [stateS (:start protected-nfa) e] + [stateS (:start break-nfa) e] + [stateS (:start double-nfa) e] + [stateS (:start import-nfa) e] + [stateS (:start public-nfa) e] + [stateS (:start throws-nfa) e] + [stateS (:start byte-nfa) e] + [stateS (:start else-nfa) e] + [stateS (:start instanceof-nfa) e] + [stateS (:start return-nfa) e] + [stateS (:start transient-nfa) e] + [stateS (:start case-nfa) e] + [stateS (:start extends-nfa) e] + [stateS (:start int-nfa) e] + [stateS (:start short-nfa) e] + [stateS (:start try-nfa) e] + [stateS (:start catch-nfa) e] + [stateS (:start interface-nfa) e] + [stateS (:start static-nfa) e] + [stateS (:start void-nfa) e] + [stateS (:start char-nfa) e] + [stateS (:start finally-nfa) e] + [stateS (:start long-nfa) e] + [stateS (:start strictfp-nfa) e] + [stateS (:start volatile-nfa) e] + [stateS (:start class-nfa) e] + [stateS (:start float-nfa) e] + [stateS (:start native-nfa) e] + [stateS (:start super-nfa) e] + [stateS (:start while-nfa) e] + [stateS (:start const-nfa) e] + [stateS (:start for-nfa) e] + [stateS (:start new-nfa) e] + [stateS (:start switch-nfa) e] + [stateS (:start continue-nfa) e] + [stateS (:start goto-nfa) e] + [stateS (:start package-nfa) e] + [stateS (:start synchronized-nfa) e]])) + (merge-nfas + (:accept-priorities abstract-nfa) + (:accept-priorities default-nfa) + (:accept-priorities if-nfa) + (:accept-priorities private-nfa) + (:accept-priorities this-nfa) + (:accept-priorities boolean-nfa) + (:accept-priorities do-nfa) + (:accept-priorities implements-nfa) + (:accept-priorities protected-nfa) + (:accept-priorities break-nfa) + (:accept-priorities double-nfa) + (:accept-priorities import-nfa) + (:accept-priorities public-nfa) + (:accept-priorities throws-nfa) + (:accept-priorities byte-nfa) + (:accept-priorities else-nfa) + (:accept-priorities instanceof-nfa) + (:accept-priorities return-nfa) + (:accept-priorities transient-nfa) + (:accept-priorities case-nfa) + (:accept-priorities extends-nfa) + (:accept-priorities int-nfa) + (:accept-priorities short-nfa) + (:accept-priorities try-nfa) + (:accept-priorities catch-nfa) + (:accept-priorities interface-nfa) + (:accept-priorities static-nfa) + (:accept-priorities void-nfa) + (:accept-priorities char-nfa) + (:accept-priorities finally-nfa) + (:accept-priorities long-nfa) + (:accept-priorities strictfp-nfa) + (:accept-priorities volatile-nfa) + (:accept-priorities class-nfa) + (:accept-priorities float-nfa) + (:accept-priorities native-nfa) + (:accept-priorities super-nfa) + (:accept-priorities while-nfa) + (:accept-priorities const-nfa) + (:accept-priorities for-nfa) + (:accept-priorities new-nfa) + (:accept-priorities switch-nfa) + (:accept-priorities continue-nfa) + (:accept-priorities goto-nfa) + (:accept-priorities package-nfa) + (:accept-priorities synchronized-nfa))))) ;; java keywords ;; https://www.student.cs.uwaterloo.ca/~cs444/joos.html -;; operators -;; = assign, == equal, <= lessequal +;; operators, can get the string later on +;; = assign, == EQ, <= LE, != NE -;; booleans -;; true, false +;; Booleans +(def boolean-nfa + (let [stateS (gensym :S) + statet (gensym :t) + statetr (gensym :tr) + statetru (gensym :tru) + statetrue (gensym :true) + statef (gensym :f) + statefa (gensym :fa) + statefal (gensym :fal) + statefals (gensym :fals) + statefalse (gensym :false)] + (make-NFA (into #{} ) + #{stateS statet statetr statetru statetrue statef statefa statefal statefals statefalse} + stateS + {statetrue (list :BOOLEAN 0) + statefalse (list :BOOLEAN 0)} + (make-transition-NFA [[stateS statet \t] + [statet statetr \r] + [statetr statetru \u] + [statetru statetrue \e] + [stateS statef \f] + [statef statefa \a] + [statefa statefal \l] + [statefal statefals \s] + [statefals statefalse \e]])))) ;; complete nfa from all of the individual RE nfas +;; boolean +;; int-literal +;; keywords +;; operators (def complete-nfa (let [stateS (gensym :S)] ;; use default constructor because we no longer have the merged accept-map (->NFA (into #{} ) - (union (:states int-nfa) (:states integer-literal-nfa)) + (merge-nfas-states integer-literal-nfa operators-nfa) stateS - (merge (:accept-states int-nfa) (:accept-states integer-literal-nfa)) - (merge (:transitions int-nfa) (:transitions integer-literal-nfa) - (make-transition-NFA [[stateS (:start int-nfa) e] - [stateS (:start integer-literal-nfa) e]])) - (merge (:accept-priorities int-nfa) (:accept-priorities integer-literal-nfa))))) - - -;; merge arbitrary number of nfas + (merge-nfas (:accept-states integer-literal-nfa) (:accept-states operators-nfa)) + (merge-nfas (:transitions integer-literal-nfa) + (:transitions operators-nfa) + (make-transition-NFA [[stateS (:start integer-literal-nfa) e] + [stateS (:start operators-nfa) e]])) + (merge-nfas (:accept-priorities integer-literal-nfa) + (:accept-priorities operators-nfa))))) diff --git a/test/watcompiler/re_test.clj b/test/watcompiler/re_test.clj index 0d02cd7..78111a6 100644 --- a/test/watcompiler/re_test.clj +++ b/test/watcompiler/re_test.clj @@ -6,10 +6,48 @@ ;; Individual NFA tests (deftest int-test - (is (= :INT (run-NFA int-nfa "int"))) + (is (= :KEYWORD (run-NFA int-nfa "int"))) (is (= :INTEGER (run-NFA integer-literal-nfa "109")))) ;; Merged NFA test (deftest merged-test - (is (= :INT (run-NFA complete-nfa "int"))) - (is (= :INTEGER (run-NFA complete-nfa "109")))) + ;; Integer + (is (= :INTEGER (run-NFA complete-nfa "109"))) + ;; Operators + (is (= :OPERATOR (run-NFA complete-nfa "+"))) + (is (= :OPERATOR (run-NFA complete-nfa "++"))) + (is (= :OPERATOR (run-NFA complete-nfa ">"))) + (is (= :OPERATOR (run-NFA complete-nfa ">="))) + (is (= :OPERATOR (run-NFA complete-nfa ">>"))) + (is (= :OPERATOR (run-NFA complete-nfa ">>="))) + (is (= :OPERATOR (run-NFA complete-nfa ">>>"))) + (is (= :OPERATOR (run-NFA complete-nfa ">>>="))) + (is (= :OPERATOR (run-NFA complete-nfa "&"))) + (is (= :OPERATOR (run-NFA complete-nfa "^="))) + (is (= :OPERATOR (run-NFA complete-nfa "^"))) + (is (= :OPERATOR (run-NFA complete-nfa "<<"))) + (is (= :OPERATOR (run-NFA complete-nfa "="))) + (is (= :OPERATOR (run-NFA complete-nfa "=="))) + (is (= :OPERATOR (run-NFA complete-nfa "!"))) + (is (= :OPERATOR (run-NFA complete-nfa "!=")))) + ;; Keywords + ;; (is (= :KEYWORD (run-NFA complete-nfa "int")))) + +(deftest boolean-test + (is (= :BOOLEAN (run-NFA boolean-nfa "true"))) + (is (= :BOOLEAN (run-NFA boolean-nfa "false"))) + (is (= false (run-NFA boolean-nfa "tru"))) + (is (= false (run-NFA boolean-nfa "fals")))) + +;; Keyword test +(deftest keyword-test + ;; Individual Keywords on their nfas + (is (= :KEYWORD (run-NFA int-nfa "int"))) + (is (= :KEYWORD (run-NFA abstract-nfa "abstract"))) + (is (= :KEYWORD (run-NFA default-nfa "default"))) + ;; All Test + (is (= :KEYWORD (run-NFA keywords-nfa "abstract"))) + (is (= :KEYWORD (run-NFA keywords-nfa "default"))) + (is (= :KEYWORD (run-NFA keywords-nfa "if"))) + (is (= :KEYWORD (run-NFA keywords-nfa "private"))) + (is (= :KEYWORD (run-NFA keywords-nfa "this")))) From d344e47f275f46e272391d6f6cc4b7395df52246 Mon Sep 17 00:00:00 2001 From: jonathantsang Date: Sat, 2 Mar 2019 23:51:09 -0500 Subject: [PATCH 03/13] finished merge and tests --- src/watcompiler/re.clj | 35 +++++++----- test/watcompiler/re_test.clj | 107 ++++++++++++++++++++++++++++------- 2 files changed, 107 insertions(+), 35 deletions(-) diff --git a/src/watcompiler/re.clj b/src/watcompiler/re.clj index c3a6a6e..0b369a3 100644 --- a/src/watcompiler/re.clj +++ b/src/watcompiler/re.clj @@ -317,7 +317,7 @@ [do dou \u] [dou doub \b] [doub doubl \l] - [double double \e]])))) + [doubl double \e]])))) (def import-nfa (let [stateS (gensym :S) @@ -944,12 +944,12 @@ [syn sync \c] [sync synch \h] [synch synchr \r] - [synchro synchro \o] - [synchron synchron \n] - [synchroni synchroni \i] - [synchroniz synchroniz \z] - [synchronize synchronize \e] - [synchronized synchronized \d]])))) + [synchr synchro \o] + [synchro synchron \n] + [synchron synchroni \i] + [synchroni synchroniz \z] + [synchroniz synchronize \e] + [synchronize synchronized \d]])))) (def keywords-nfa (let [stateS (gensym :S)] @@ -1227,20 +1227,29 @@ ;; complete nfa from all of the individual RE nfas -;; boolean ;; int-literal -;; keywords ;; operators +;; boolean +;; keywords (def complete-nfa (let [stateS (gensym :S)] ;; use default constructor because we no longer have the merged accept-map (->NFA (into #{} ) - (merge-nfas-states integer-literal-nfa operators-nfa) + (merge-nfas-states integer-literal-nfa operators-nfa boolean-nfa keywords-nfa) stateS - (merge-nfas (:accept-states integer-literal-nfa) (:accept-states operators-nfa)) + (merge-nfas (:accept-states integer-literal-nfa) + (:accept-states operators-nfa) + (:accept-states boolean-nfa) + (:accept-states keywords-nfa)) (merge-nfas (:transitions integer-literal-nfa) (:transitions operators-nfa) + (:transitions boolean-nfa) + (:transitions keywords-nfa) (make-transition-NFA [[stateS (:start integer-literal-nfa) e] - [stateS (:start operators-nfa) e]])) + [stateS (:start operators-nfa) e] + [stateS (:start boolean-nfa) e] + [stateS (:start keywords-nfa) e]])) (merge-nfas (:accept-priorities integer-literal-nfa) - (:accept-priorities operators-nfa))))) + (:accept-priorities operators-nfa) + (:accept-priorities boolean-nfa) + (:accept-priorities keywords-nfa))))) diff --git a/test/watcompiler/re_test.clj b/test/watcompiler/re_test.clj index 78111a6..513054d 100644 --- a/test/watcompiler/re_test.clj +++ b/test/watcompiler/re_test.clj @@ -10,7 +10,91 @@ (is (= :INTEGER (run-NFA integer-literal-nfa "109")))) ;; Merged NFA test -(deftest merged-test +(deftest operator-test + ;; Operators + (is (= :OPERATOR (run-NFA operators-nfa "+"))) + (is (= :OPERATOR (run-NFA operators-nfa "++"))) + (is (= :OPERATOR (run-NFA operators-nfa ">"))) + (is (= :OPERATOR (run-NFA operators-nfa ">="))) + (is (= :OPERATOR (run-NFA operators-nfa ">>"))) + (is (= :OPERATOR (run-NFA operators-nfa ">>="))) + (is (= :OPERATOR (run-NFA operators-nfa ">>>"))) + (is (= :OPERATOR (run-NFA operators-nfa ">>>="))) + (is (= :OPERATOR (run-NFA operators-nfa "&"))) + (is (= :OPERATOR (run-NFA operators-nfa "^="))) + (is (= :OPERATOR (run-NFA operators-nfa "^"))) + (is (= :OPERATOR (run-NFA operators-nfa "<<"))) + (is (= :OPERATOR (run-NFA operators-nfa "="))) + (is (= :OPERATOR (run-NFA operators-nfa "=="))) + (is (= :OPERATOR (run-NFA operators-nfa "!"))) + (is (= :OPERATOR (run-NFA operators-nfa "!=")))) + +(deftest boolean-test + (is (= :BOOLEAN (run-NFA boolean-nfa "true"))) + (is (= :BOOLEAN (run-NFA boolean-nfa "false"))) + (is (= false (run-NFA boolean-nfa "tru"))) + (is (= false (run-NFA boolean-nfa "fals")))) + +;; Keyword test +(deftest keyword-test + ;; Individual Keywords on their nfas + (is (= :KEYWORD (run-NFA int-nfa "int"))) + (is (= :KEYWORD (run-NFA abstract-nfa "abstract"))) + (is (= :KEYWORD (run-NFA default-nfa "default")))) + +(deftest all-test + ;; All Test + (is (= :KEYWORD (run-NFA complete-nfa "abstract"))) + (is (= :KEYWORD (run-NFA complete-nfa "default"))) + (is (= :KEYWORD (run-NFA complete-nfa "if"))) + (is (= :KEYWORD (run-NFA complete-nfa "private"))) + (is (= :KEYWORD (run-NFA complete-nfa "this"))) + (is (= :KEYWORD (run-NFA complete-nfa "boolean"))) + (is (= :KEYWORD (run-NFA complete-nfa "do"))) + (is (= :KEYWORD (run-NFA complete-nfa "implements"))) + (is (= :KEYWORD (run-NFA complete-nfa "protected"))) + (is (= :KEYWORD (run-NFA complete-nfa "break"))) + (is (= :KEYWORD (run-NFA complete-nfa "double"))) + (is (= :KEYWORD (run-NFA complete-nfa "import"))) + (is (= :KEYWORD (run-NFA complete-nfa "public"))) + (is (= :KEYWORD (run-NFA complete-nfa "throws"))) + (is (= :KEYWORD (run-NFA complete-nfa "byte"))) + (is (= :KEYWORD (run-NFA complete-nfa "else"))) + (is (= :KEYWORD (run-NFA complete-nfa "instanceof"))) + (is (= :KEYWORD (run-NFA complete-nfa "return"))) + (is (= :KEYWORD (run-NFA complete-nfa "transient"))) + (is (= :KEYWORD (run-NFA complete-nfa "case"))) + (is (= :KEYWORD (run-NFA complete-nfa "extends"))) + (is (= :KEYWORD (run-NFA complete-nfa "int"))) + (is (= :KEYWORD (run-NFA complete-nfa "short"))) + (is (= :KEYWORD (run-NFA complete-nfa "try"))) + (is (= :KEYWORD (run-NFA complete-nfa "catch"))) + (is (= :KEYWORD (run-NFA complete-nfa "interface"))) + (is (= :KEYWORD (run-NFA complete-nfa "static"))) + (is (= :KEYWORD (run-NFA complete-nfa "void"))) + (is (= :KEYWORD (run-NFA complete-nfa "char"))) + (is (= :KEYWORD (run-NFA complete-nfa "finally"))) + (is (= :KEYWORD (run-NFA complete-nfa "long"))) + (is (= :KEYWORD (run-NFA complete-nfa "strictfp"))) + (is (= :KEYWORD (run-NFA complete-nfa "volatile"))) + (is (= :KEYWORD (run-NFA complete-nfa "class"))) + (is (= :KEYWORD (run-NFA complete-nfa "float"))) + (is (= :KEYWORD (run-NFA complete-nfa "native"))) + (is (= :KEYWORD (run-NFA complete-nfa "super"))) + (is (= :KEYWORD (run-NFA complete-nfa "while"))) + (is (= :KEYWORD (run-NFA complete-nfa "const"))) + (is (= :KEYWORD (run-NFA complete-nfa "for"))) + (is (= :KEYWORD (run-NFA complete-nfa "new"))) + (is (= :KEYWORD (run-NFA complete-nfa "switch"))) + (is (= :KEYWORD (run-NFA complete-nfa "continue"))) + (is (= :KEYWORD (run-NFA complete-nfa "goto"))) + (is (= :KEYWORD (run-NFA complete-nfa "package"))) + (is (= :KEYWORD (run-NFA complete-nfa "synchronized"))) + ;; Booleans + (is (= :BOOLEAN (run-NFA complete-nfa "true"))) + (is (= :BOOLEAN (run-NFA complete-nfa "false"))) + (is (= false (run-NFA complete-nfa "tru"))) + (is (= false (run-NFA complete-nfa "fals"))) ;; Integer (is (= :INTEGER (run-NFA complete-nfa "109"))) ;; Operators @@ -30,24 +114,3 @@ (is (= :OPERATOR (run-NFA complete-nfa "=="))) (is (= :OPERATOR (run-NFA complete-nfa "!"))) (is (= :OPERATOR (run-NFA complete-nfa "!=")))) - ;; Keywords - ;; (is (= :KEYWORD (run-NFA complete-nfa "int")))) - -(deftest boolean-test - (is (= :BOOLEAN (run-NFA boolean-nfa "true"))) - (is (= :BOOLEAN (run-NFA boolean-nfa "false"))) - (is (= false (run-NFA boolean-nfa "tru"))) - (is (= false (run-NFA boolean-nfa "fals")))) - -;; Keyword test -(deftest keyword-test - ;; Individual Keywords on their nfas - (is (= :KEYWORD (run-NFA int-nfa "int"))) - (is (= :KEYWORD (run-NFA abstract-nfa "abstract"))) - (is (= :KEYWORD (run-NFA default-nfa "default"))) - ;; All Test - (is (= :KEYWORD (run-NFA keywords-nfa "abstract"))) - (is (= :KEYWORD (run-NFA keywords-nfa "default"))) - (is (= :KEYWORD (run-NFA keywords-nfa "if"))) - (is (= :KEYWORD (run-NFA keywords-nfa "private"))) - (is (= :KEYWORD (run-NFA keywords-nfa "this")))) From 525119e7856c7d47c3a9e0a03fcd504396896255 Mon Sep 17 00:00:00 2001 From: jonathantsang Date: Wed, 6 Mar 2019 01:24:59 -0500 Subject: [PATCH 04/13] function to make re -> nfa --- src/watcompiler/re.clj | 1024 ++++------------------------------ test/watcompiler/re_test.clj | 30 +- 2 files changed, 138 insertions(+), 916 deletions(-) diff --git a/src/watcompiler/re.clj b/src/watcompiler/re.clj index 0b369a3..4d52e9b 100644 --- a/src/watcompiler/re.clj +++ b/src/watcompiler/re.clj @@ -9,21 +9,36 @@ [& args] (apply merge args)) -(defn merge-nfas-states - [& args] - (apply union (:states args))) - -(defn merge-nfas-accept-states - [& args] - (apply merge (:accept-states args))) - -(defn merge-nfas-transitions - [& args] - (apply merge (:transitions args))) - -(defn merge-nfas-accept-priorities - [& args] - (apply merge (:accept-priorities args))) +;; parse each string to form the nfa and then form complete nfa +;; form the states in the nfa +(defn string-to-nfa + [word wordtype] + (let + [stateS (gensym :s) + + ;; List of substrings of word, stored as strings + states-map (rest (reductions str (str) word)) + + ;; Key: substring of word, Value: gensym associated with this state + gensym-map (into (sorted-map) (for [c states-map] + [c (gensym c)])) + ;; Key: gensym value, Value: char to get to this state + states-char-map (into (sorted-map) + (for [pair (map list (vals gensym-map) (seq word))] + [(first pair) (second pair)])) + + ;; Accept states + accept-states-map {(get gensym-map word) (list wordtype 0)} + + ;; transitions from previous substring gensym to next substring gensym + transitions-map (into #{ [stateS (get gensym-map (str (first (seq word))) \a) (first (seq word))] } + (for [v (partition 2 1 (vals gensym-map))] + [(first v) (second v) (get states-char-map (second v))]))] + (make-NFA (into #{} ) + states-map + stateS + accept-states-map + (make-transition-NFA transitions-map)))) (def integer-literal-nfa (let [stateS (gensym :S) @@ -80,882 +95,65 @@ [state3 state11 \=] [state4 state11 \+] [state5 state11 \-] - [state6 state7 \>] ;; >> - [state6 state11 \=] ;; >>= - [state7 state11 \=] ;; >>> - ])))) - -;; Keywords -;; From page 46 of jls2.pdf -(def abstract-nfa - (let [stateS (gensym :S) - a (gensym :a) - ab (gensym :ab) - abs (gensym :abs) - abst (gensym :abst) - abstr (gensym :abstr) - abstra (gensym :abstra) - abstrac (gensym :abstrac) - abstract (gensym :abstract)] - (make-NFA (into #{} ) - #{stateS a ab abs abst abstr abstra abstrac abstract} - stateS - {abstract (list :KEYWORD 0)} - (make-transition-NFA [[stateS a \a] - [a ab \b] - [ab abs \s] - [abs abst \t] - [abst abstr \r] - [abstr abstra \a] - [abstra abstrac \c] - [abstrac abstract \t]])))) - -(def default-nfa - (let [stateS (gensym :S) - d (gensym :d) - de (gensym :de) - def (gensym :def) - defa (gensym :defa) - defau (gensym :defau) - defaul (gensym :defaul) - default (gensym :default)] - (make-NFA (into #{} ) - #{stateS d de def defa defau defaul default} - stateS - {default (list :KEYWORD 0)} - (make-transition-NFA [[stateS d \d] - [d de \e] - [de def \f] - [def defa \a] - [defa defau \u] - [defau defaul \l] - [defaul default \t]])))) - -(def if-nfa - (let [stateS (gensym :S) - i (gensym :i) - if (gensym :if)] - (make-NFA (into #{} ) - #{stateS i if} - stateS - {if (list :KEYWORD 0)} - (make-transition-NFA [[stateS i \i] - [i if \f]])))) - -(def private-nfa - (let [stateS (gensym :S) - p (gensym :p) - pr (gensym :pr) - pri (gensym :pri) - priv (gensym :priv) - priva (gensym :priva) - privat (gensym :privat) - private (gensym :private)] - (make-NFA (into #{} ) - #{stateS p pr pri priv priva privat private} - stateS - {private (list :KEYWORD 0)} - (make-transition-NFA [[stateS p \p] - [p pr \r] - [pr pri \i] - [pri priv \v] - [priv priva \a] - [priva privat \t] - [privat private \e]])))) - -(def this-nfa - (let [stateS (gensym :S) - t (gensym :t) - th (gensym :th) - thi (gensym :thi) - this (gensym :this)] - (make-NFA (into #{} ) - #{stateS t th thi this} - stateS - {this (list :KEYWORD 0)} - (make-transition-NFA [[stateS t \t] - [t th \h] - [th thi \i] - [thi this \s]])))) - -(def boolean-nfa - (let [stateS (gensym :S) - b (gensym :b) - bo (gensym :bo) - boo (gensym :boo) - bool (gensym :bool) - boole (gensym :boole) - boolea (gensym :boolea) - boolean (gensym :boolean)] - (make-NFA (into #{} ) - #{stateS b bo boo bool boole boolea boolean} - stateS - {boolean (list :KEYWORD 0)} - (make-transition-NFA [[stateS b \b] - [b bo \o] - [bo boo \o] - [boo bool \l] - [bool boole \e] - [boole boolea \a] - [boolea boolean \n]])))) - -(def do-nfa - (let [stateS (gensym :S) - d (gensym :d) - do (gensym :do)] - (make-NFA (into #{} ) - #{stateS d do} - stateS - {do (list :KEYWORD 0)} - (make-transition-NFA [[stateS d \d] - [d do \o]])))) - -(def implements-nfa - (let [stateS (gensym :S) - i (gensym :i) - im (gensym :im) - imp (gensym :imp) - impl (gensym :impl) - imple (gensym :imple) - implem (gensym :implem) - impleme (gensym :impleme) - implemen (gensym :implemen) - implement (gensym :implement) - implements (gensym :implements)] - (make-NFA (into #{} ) - #{stateS i im imp impl imple implem impleme implemen implement implements} - stateS - {implements (list :KEYWORD 0)} - (make-transition-NFA [[stateS i \i] - [i im \m] - [im imp \p] - [imp impl \l] - [impl imple \e] - [imple implem \m] - [implem impleme \e] - [impleme implemen \n] - [implemen implement \t] - [implement implements \s]])))) - -(def protected-nfa - (let [stateS (gensym :S) - p (gensym :p) - pr (gensym :pr) - pro (gensym :pro) - prot (gensym :prot) - prote (gensym :prote) - protec (gensym :protec) - protect (gensym :protect) - protecte (gensym :protecte) - protected (gensym :protected)] - (make-NFA (into #{} ) - #{stateS p pr pro prot prote protec protect protecte protected} - stateS - {protected (list :KEYWORD 0)} - (make-transition-NFA [[stateS p \p] - [p pr \r] - [pr pro \o] - [pro prot \t] - [prot prote \e] - [prote protec \c] - [protec protect \t] - [protect protecte \e] - [protecte protected \d]])))) - -;; throw and throws -(def throws-nfa - (let [stateS (gensym :S) - t (gensym :t) - th (gensym :th) - thr (gensym :thr) - thro (gensym :thro) - throw (gensym :throw) - throws (gensym :throws)] - (make-NFA (into #{} ) - #{stateS t th thr thro throw throws} - stateS - {throws (list :KEYWORD 0) - throw (list :KEYWORD 1)} - (make-transition-NFA [[stateS t \t] - [t th \h] - [th thr \r] - [thr thro \o] - [thro throw \w] - [throw throws \s]])))) - -(def break-nfa - (let [stateS (gensym :S) - b (gensym :b) - br (gensym :br) - bre (gensym :bre) - brea (gensym :brea) - break (gensym :break)] - (make-NFA (into #{} ) - #{stateS b br bre brea break} - stateS - {break (list :KEYWORD 0)} - (make-transition-NFA [[stateS b \b] - [b br \r] - [br bre \e] - [bre brea \a] - [brea break \k]])))) - -(def double-nfa - (let [stateS (gensym :S) - d (gensym :d) - do (gensym :do) - dou (gensym :dou) - doub (gensym :doub) - doubl (gensym :doubl) - double (gensym :double)] - (make-NFA (into #{} ) - #{stateS d do dou doub doubl double} - stateS - {double (list :KEYWORD 0)} - (make-transition-NFA [[stateS d \d] - [d do \o] - [do dou \u] - [dou doub \b] - [doub doubl \l] - [doubl double \e]])))) - -(def import-nfa - (let [stateS (gensym :S) - i (gensym :i) - im (gensym :im) - imp (gensym :imp) - impo (gensym :impo) - impor (gensym :impor) - import (gensym :import)] - (make-NFA (into #{} ) - #{stateS i im imp impo impor import} - stateS - {import (list :KEYWORD 0)} - (make-transition-NFA [[stateS i \i] - [i im \m] - [im imp \p] - [imp impo \o] - [impo impor \r] - [impor import \t]])))) - -(def public-nfa - (let [stateS (gensym :S) - p (gensym :p) - pu (gensym :pu) - pub (gensym :pub) - publ (gensym :publ) - publi (gensym :publi) - public (gensym :public)] - (make-NFA (into #{} ) - #{stateS p pu pub publ publi public} - stateS - {public (list :KEYWORD 0)} - (make-transition-NFA [[stateS p \p] - [p pu \u] - [pu pub \b] - [pub publ \l] - [publ publi \i] - [publi public \c]])))) - -(def byte-nfa - (let [stateS (gensym :S) - b (gensym :b) - by (gensym :by) - byt (gensym :byt) - byte (gensym :byte)] - (make-NFA (into #{} ) - #{stateS b by byt byte} - stateS - {byte (list :KEYWORD 0)} - (make-transition-NFA [[stateS b \b] - [b by \y] - [by byt \t] - [byt byte \e]])))) - -(def else-nfa - (let [stateS (gensym :S) - e (gensym :e) - el (gensym :el) - els (gensym :els) - else (gensym :else)] - (make-NFA (into #{} ) - #{stateS e el els else} - stateS - {else (list :KEYWORD 0)} - (make-transition-NFA [[stateS e \e] - [e el \l] - [el els \s] - [els else \e]])))) - -(def instanceof-nfa - (let [stateS (gensym :S) - i (gensym :i) - in (gensym :in) - ins (gensym :ins) - inst (gensym :inst) - insta (gensym :insta) - instan (gensym :instan) - instanc (gensym :instanc) - instance (gensym :instance) - instanceo (gensym :instanceo) - instanceof (gensym :instanceof)] - (make-NFA (into #{} ) - #{stateS i in ins inst insta instan instanc instance instanceo instanceof} - stateS - {instanceof (list :KEYWORD 0)} - (make-transition-NFA [[stateS i \i] - [i in \n] - [in ins \s] - [ins inst \t] - [inst insta \a] - [insta instan \n] - [instan instanc \c] - [instanc instance \e] - [instance instanceo \o] - [instanceo instanceof \f]])))) - -(def return-nfa - (let [stateS (gensym :S) - r (gensym :r) - re (gensym :re) - ret (gensym :ret) - retu (gensym :retu) - retur (gensym :retur) - return (gensym :return)] - (make-NFA (into #{} ) - #{stateS r re ret retu retur return} - stateS - {return (list :KEYWORD 0)} - (make-transition-NFA [[stateS r \r] - [r re \e] - [re ret \t] - [ret retu \u] - [retu retur \r] - [retur return \n]])))) - -(def transient-nfa - (let [stateS (gensym :S) - t (gensym :t) - tr (gensym :tr) - tra (gensym :tra) - tran (gensym :tran) - trans (gensym :trans) - transi (gensym :transi) - transie (gensym :transie) - transien (gensym :transien) - transient (gensym :transient)] - (make-NFA (into #{} ) - #{stateS t tr tra tran trans transi transie transien transient} - stateS - {transient (list :KEYWORD 0)} - (make-transition-NFA [[stateS t \t] - [t tr \r] - [tr tra \a] - [tra tran \n] - [tran trans \s] - [trans transi \i] - [transi transie \e] - [transie transien \n] - [transien transient \t]])))) - -(def case-nfa - (let [stateS (gensym :S) - c (gensym :c) - ca (gensym :ca) - cas (gensym :cas) - case (gensym :case)] - (make-NFA (into #{} ) - #{stateS c ca cas case} - stateS - {case (list :KEYWORD 0)} - (make-transition-NFA [[stateS c \c] - [c ca \a] - [ca cas \s] - [cas case \e]])))) - -(def extends-nfa - (let [stateS (gensym :S) - e (gensym :e) - ex (gensym :ex) - ext (gensym :ext) - exte (gensym :exte) - exten (gensym :exten) - extend (gensym :extend) - extends (gensym :extends)] - (make-NFA (into #{} ) - #{stateS e ex ext exte exten extend extends} - stateS - {extends (list :KEYWORD 0)} - (make-transition-NFA [[stateS e \e] - [e ex \x] - [ex ext \t] - [ext exte \e] - [exte exten \n] - [exten extend \d] - [extend extends \s]])))) - -(def int-nfa - (let [stateS (gensym :S) - i (gensym :i) - in (gensym :in) - int (gensym :int)] - (make-NFA (into #{} ) - #{stateS i in int} - stateS - {int (list :KEYWORD 0)} - (make-transition-NFA [[stateS i \i] - [i in \n] - [in int \t]])))) - -(def short-nfa - (let [stateS (gensym :S) - s (gensym :s) - sh (gensym :sh) - sho (gensym :sho) - shor (gensym :shor) - short (gensym :short)] - (make-NFA (into #{} ) - #{stateS s sh sho shor short} - stateS - {short (list :KEYWORD 0)} - (make-transition-NFA [[stateS s \s] - [s sh \h] - [sh sho \o] - [sho shor \r] - [shor short \t]])))) - -(def try-nfa - (let [stateS (gensym :S) - t (gensym :t) - tr (gensym :tr) - try (gensym :try)] - (make-NFA (into #{} ) - #{stateS t tr try} - stateS - {try (list :KEYWORD 0)} - (make-transition-NFA [[stateS t \t] - [t tr \r] - [tr try \y]])))) - -(def catch-nfa - (let [stateS (gensym :S) - c (gensym :c) - ca (gensym :ca) - cat (gensym :cat) - catc (gensym :catc) - catch (gensym :catch)] - (make-NFA (into #{} ) - #{stateS c ca cat catc catch} - stateS - {catch (list :KEYWORD 0)} - (make-transition-NFA [[stateS c \c] - [c ca \a] - [ca cat \t] - [cat catc \c] - [catc catch \h]])))) - -;; final and finally -(def finally-nfa - (let [stateS (gensym :S) - f (gensym :f) - fi (gensym :fi) - fin (gensym :fin) - fina (gensym :fina) - final (gensym :final) - finall (gensym :finall) - finally (gensym :finally)] - (make-NFA (into #{} ) - #{stateS f fi fin fina final finall finally} - stateS - {finally (list :KEYWORD 0) - final (list :KEYWORD 1)} - (make-transition-NFA [[stateS f \f] - [f fi \i] - [fi fin \n] - [fin fina \a] - [fina final \l] - [final finall \l] - [finall finally \y]])))) - -(def interface-nfa - (let [stateS (gensym :S) - i (gensym :i) - in (gensym :in) - int (gensym :int) - inte (gensym :inte) - inter (gensym :inter) - interf (gensym :interf) - interfa (gensym :interfa) - interfac (gensym :interfac) - interface (gensym :interface)] - (make-NFA (into #{} ) - #{stateS i in int inte inter interf interfa interfac interface} - stateS - {interface (list :KEYWORD 0)} - (make-transition-NFA [[stateS i \i] - [i in \n] - [in int \t] - [int inte \e] - [inte inter \r] - [inter interf \f] - [interf interfa \a] - [interfa interfac \c] - [interfac interface \e]])))) - -(def static-nfa - (let [stateS (gensym :S) - s (gensym :s) - st (gensym :st) - sta (gensym :sta) - stat (gensym :stat) - stati (gensym :stati) - static (gensym :static)] - (make-NFA (into #{} ) - #{stateS static} - stateS - {static (list :KEYWORD 0)} - (make-transition-NFA [[stateS s \s] - [s st \t] - [st sta \a] - [sta stat \t] - [stat stati \i] - [stati static \c]])))) - -(def void-nfa - (let [stateS (gensym :S) - v (gensym :v) - vo (gensym :vo) - voi (gensym :voi) - void (gensym :void)] - (make-NFA (into #{} ) - #{stateS v vo voi void} - stateS - {void (list :KEYWORD 0)} - (make-transition-NFA [[stateS v \v] - [v vo \o] - [vo voi \i] - [voi void \d]])))) - -(def char-nfa - (let [stateS (gensym :S) - c (gensym :c) - ch (gensym :ch) - cha (gensym :cha) - char (gensym :char)] - (make-NFA (into #{} ) - #{stateS c ch cha char} - stateS - {char (list :KEYWORD 0)} - (make-transition-NFA [[stateS c \c] - [c ch \h] - [ch cha \a] - [cha char \r]])))) - -(def long-nfa - (let [stateS (gensym :S) - l (gensym :l) - lo (gensym :lo) - lon (gensym :lon) - long (gensym :long)] - (make-NFA (into #{} ) - #{stateS l lo lon long} - stateS - {long (list :KEYWORD 0)} - (make-transition-NFA [[stateS l \l] - [l lo \o] - [lo lon \n] - [lon long \g]])))) - -(def strictfp-nfa - (let [stateS (gensym :S) - s (gensym :s) - st (gensym :st) - str (gensym :str) - stri (gensym :stri) - stric (gensym :stric) - strict (gensym :strict) - strictf (gensym :strictf) - strictfp (gensym :strictfp)] - (make-NFA (into #{} ) - #{stateS s st str stri stric strict strictf strictfp} - stateS - {strictfp (list :KEYWORD 0)} - (make-transition-NFA [[stateS s \s] - [s st \t] - [st str \r] - [str stri \i] - [stri stric \c] - [stric strict \t] - [strict strictf \f] - [strictf strictfp \p]])))) - -(def volatile-nfa - (let [stateS (gensym :S) - v (gensym :v) - vo (gensym :vo) - vol (gensym :vol) - vola (gensym :vola) - volat (gensym :volat) - volati (gensym :volati) - volatil (gensym :volatil) - volatile (gensym :volatile)] - (make-NFA (into #{} ) - #{stateS v vo vol vola volat volati volatil volatile} - stateS - {volatile (list :KEYWORD 0)} - (make-transition-NFA [[stateS v \v] - [v vo \o] - [vo vol \l] - [vol vola \a] - [vola volat \t] - [volat volati \i] - [volati volatil \l] - [volatil volatile \e]])))) - -(def class-nfa - (let [stateS (gensym :S) - c (gensym :c) - cl (gensym :cl) - cla (gensym :cla) - clas (gensym :clas) - class (gensym :class)] - (make-NFA (into #{} ) - #{stateS c cl cla clas class} - stateS - {class (list :KEYWORD 0)} - (make-transition-NFA [[stateS c \c] - [c cl \l] - [cl cla \a] - [cla clas \s] - [clas class \s]])))) - -(def float-nfa - (let [stateS (gensym :S) - f (gensym :f) - fl (gensym :fl) - flo (gensym :flo) - floa (gensym :floa) - float (gensym :float)] - (make-NFA (into #{} ) - #{stateS f fl flo floa float} - stateS - {float (list :KEYWORD 0)} - (make-transition-NFA [[stateS f \f] - [f fl \l] - [fl flo \o] - [flo floa \a] - [floa float \t]])))) - -(def native-nfa - (let [stateS (gensym :S) - n (gensym :n) - na (gensym :na) - nat (gensym :nat) - nati (gensym :nati) - nativ (gensym :nativ) - native (gensym :native)] - (make-NFA (into #{} ) - #{stateS n na nat nati nativ native} - stateS - {native (list :KEYWORD 0)} - (make-transition-NFA [[stateS n \n] - [n na \a] - [na nat \t] - [nat nati \i] - [nati nativ \v] - [nativ native \e]])))) - -(def super-nfa - (let [stateS (gensym :S) - s (gensym :s) - su (gensym :su) - sup (gensym :sup) - supe (gensym :supe) - super (gensym :super)] - (make-NFA (into #{} ) - #{stateS s su sup supe super} - stateS - {super (list :KEYWORD 0)} - (make-transition-NFA [[stateS s \s] - [s su \u] - [su sup \p] - [sup supe \e] - [supe super \r]])))) - -(def while-nfa - (let [stateS (gensym :S) - w (gensym :w) - wh (gensym :wh) - whi (gensym :whi) - whil (gensym :whil) - while (gensym :while)] - (make-NFA (into #{} ) - #{stateS w wh whi whil while} - stateS - {while (list :KEYWORD 0)} - (make-transition-NFA [[stateS w \w] - [w wh \h] - [wh whi \i] - [whi whil \l] - [whil while \e]])))) - -(def const-nfa - (let [stateS (gensym :S) - c (gensym :c) - co (gensym :co) - con (gensym :con) - cons (gensym :cons) - const (gensym :const)] - (make-NFA (into #{} ) - #{stateS c co con cons const} - stateS - {const (list :KEYWORD 0)} - (make-transition-NFA [[stateS c \c] - [c co \o] - [co con \n] - [con cons \s] - [cons const \t]])))) - -(def for-nfa - (let [stateS (gensym :S) - f (gensym :f) - fo (gensym :fo) - for (gensym :for)] - (make-NFA (into #{} ) - #{stateS f fo for} - stateS - {for (list :KEYWORD 0)} - (make-transition-NFA [[stateS f \f] - [f fo \o] - [fo for \r]])))) - -(def new-nfa - (let [stateS (gensym :S) - n (gensym :n) - ne (gensym :ne) - new (gensym :new)] - (make-NFA (into #{} ) - #{stateS n ne new} - stateS - {new (list :KEYWORD 0)} - (make-transition-NFA [[stateS n \n] - [n ne \e] - [ne new \w]])))) - -(def switch-nfa - (let [stateS (gensym :S) - s (gensym :s) - sw (gensym :sw) - swi (gensym :swi) - swit (gensym :swit) - switc (gensym :switc) - switch (gensym :switch)] - (make-NFA (into #{} ) - #{stateS s sw swi swit switc switch} - stateS - {switch (list :KEYWORD 0)} - (make-transition-NFA [[stateS s \s] - [s sw \w] - [sw swi \i] - [swi swit \t] - [swit switc \c] - [switc switch \h]])))) - -(def continue-nfa - (let [stateS (gensym :S) - c (gensym :c) - co (gensym :co) - con (gensym :con) - cont (gensym :cont) - conti (gensym :conti) - contin (gensym :contin) - continu (gensym :continu) - continue (gensym :continue)] - (make-NFA (into #{} ) - #{stateS c co con cont conti contin continu continue} - stateS - {continue (list :KEYWORD 0)} - (make-transition-NFA [[stateS c \c] - [c co \o] - [co con \n] - [con cont \t] - [cont conti \i] - [conti contin \n] - [contin continu \u] - [continu continue \e]])))) - -(def goto-nfa - (let [stateS (gensym :S) - g (gensym :g) - go (gensym :go) - got (gensym :got) - goto (gensym :goto)] - (make-NFA (into #{} ) - #{stateS g go got goto} - stateS - {goto (list :KEYWORD 0)} - (make-transition-NFA [[stateS g \g] - [g go \o] - [go got \t] - [got goto \o]])))) - -(def package-nfa - (let [stateS (gensym :S) - p (gensym :p) - pa (gensym :pa) - pac (gensym :pac) - pack (gensym :pack) - packa (gensym :packa) - packag (gensym :packag) - package (gensym :package)] - (make-NFA (into #{} ) - #{stateS p pa pac pack packa packag package} - stateS - {package (list :KEYWORD 0)} - (make-transition-NFA [[stateS p \p] - [p pa \a] - [pa pac \c] - [pac pack \k] - [pack packa \a] - [packa packag \g] - [packag package \e]])))) - -(def synchronized-nfa - (let [stateS (gensym :S) - s (gensym :s) - sy (gensym :sy) - syn (gensym :syn) - sync (gensym :sync) - synch (gensym :synch) - synchr (gensym :synchr) - synchro (gensym :synchro) - synchron (gensym :synchron) - synchroni (gensym :synchroni) - synchroniz (gensym :synchroniz) - synchronize (gensym :synchronize) - synchronized (gensym :synchronized)] - (make-NFA (into #{} ) - #{stateS s sy syn sync synch synchr synchro synchron synchroni synchroniz synchronize synchronized} - stateS - {synchronized (list :KEYWORD 0)} - (make-transition-NFA [[stateS s \s] - [s sy \y] - [sy syn \n] - [syn sync \c] - [sync synch \h] - [synch synchr \r] - [synchr synchro \o] - [synchro synchron \n] - [synchron synchroni \i] - [synchroni synchroniz \z] - [synchroniz synchronize \e] - [synchronize synchronized \d]])))) - + [state6 state7 \>] + [state6 state11 \=] + [state7 state11 \=]])))) + +(def abstract-nfa (string-to-nfa "abstract" :KEYWORD)) +(def default-nfa (string-to-nfa "default" :KEYWORD)) +(def if-nfa (string-to-nfa "if" :KEYWORD)) +(def private-nfa (string-to-nfa "private" :KEYWORD)) +(def this-nfa (string-to-nfa "this" :KEYWORD)) +(def boolean-nfa (string-to-nfa "boolean" :KEYWORD)) +(def do-nfa (string-to-nfa "do" :KEYWORD)) +(def implements-nfa (string-to-nfa "implements" :KEYWORD)) +(def protected-nfa (string-to-nfa "protected" :KEYWORD)) +(def break-nfa (string-to-nfa "break" :KEYWORD)) +(def double-nfa (string-to-nfa "double" :KEYWORD)) +(def import-nfa (string-to-nfa "import" :KEYWORD)) +(def public-nfa (string-to-nfa "public" :KEYWORD)) +(def throws-nfa (string-to-nfa "throws" :KEYWORD)) +(def throw-nfa (string-to-nfa "throw" :KEYWORD)) +(def byte-nfa (string-to-nfa "byte" :KEYWORD)) +(def else-nfa (string-to-nfa "else" :KEYWORD)) +(def instanceof-nfa (string-to-nfa "instanceof" :KEYWORD)) +(def return-nfa (string-to-nfa "return" :KEYWORD)) +(def transient-nfa (string-to-nfa "transient" :KEYWORD)) +(def case-nfa (string-to-nfa "case" :KEYWORD)) +(def extends-nfa (string-to-nfa "extends" :KEYWORD)) +(def int-nfa (string-to-nfa "int" :KEYWORD)) +(def short-nfa (string-to-nfa "short" :KEYWORD)) +(def try-nfa (string-to-nfa "try" :KEYWORD)) +(def catch-nfa (string-to-nfa "catch" :KEYWORD)) +(def interface-nfa (string-to-nfa "interface" :KEYWORD)) +(def static-nfa (string-to-nfa "static" :KEYWORD)) +(def void-nfa (string-to-nfa "void" :KEYWORD)) +(def char-nfa (string-to-nfa "char" :KEYWORD)) +(def finally-nfa (string-to-nfa "finally" :KEYWORD)) +(def final-nfa (string-to-nfa "final" :KEYWORD)) +(def long-nfa (string-to-nfa "long" :KEYWORD)) +(def strictfp-nfa (string-to-nfa "strictfp" :KEYWORD)) +(def volatile-nfa (string-to-nfa "volatile" :KEYWORD)) +(def class-nfa (string-to-nfa "class" :KEYWORD)) +(def float-nfa (string-to-nfa "float" :KEYWORD)) +(def native-nfa (string-to-nfa "native" :KEYWORD)) +(def super-nfa (string-to-nfa "super" :KEYWORD)) +(def while-nfa (string-to-nfa "while" :KEYWORD)) +(def const-nfa (string-to-nfa "const" :KEYWORD)) +(def for-nfa (string-to-nfa "for" :KEYWORD)) +(def new-nfa (string-to-nfa "new" :KEYWORD)) +(def switch-nfa (string-to-nfa "switch" :KEYWORD)) +(def continue-nfa (string-to-nfa "continue" :KEYWORD)) +(def goto-nfa (string-to-nfa "goto" :KEYWORD)) +(def package-nfa (string-to-nfa "package" :KEYWORD)) +(def synchronized-nfa (string-to-nfa "synchronized" :KEYWORD)) + +;; Keywords nfa (def keywords-nfa (let [stateS (gensym :S)] ;; use default constructor because we no longer have the merged accept-map (->NFA (into #{} ) - (merge-nfas-states + (union abstract-nfa default-nfa if-nfa @@ -970,6 +168,7 @@ import-nfa public-nfa throws-nfa + throw-nfa byte-nfa else-nfa instanceof-nfa @@ -986,6 +185,7 @@ void-nfa char-nfa finally-nfa + final-nfa long-nfa strictfp-nfa volatile-nfa @@ -1003,7 +203,7 @@ package-nfa synchronized-nfa) stateS - (merge-nfas + (merge (:accept-states abstract-nfa) (:accept-states default-nfa) (:accept-states if-nfa) @@ -1018,6 +218,7 @@ (:accept-states import-nfa) (:accept-states public-nfa) (:accept-states throws-nfa) + (:accept-states throw-nfa) (:accept-states byte-nfa) (:accept-states else-nfa) (:accept-states instanceof-nfa) @@ -1034,6 +235,7 @@ (:accept-states void-nfa) (:accept-states char-nfa) (:accept-states finally-nfa) + (:accept-states final-nfa) (:accept-states long-nfa) (:accept-states strictfp-nfa) (:accept-states volatile-nfa) @@ -1050,7 +252,7 @@ (:accept-states goto-nfa) (:accept-states package-nfa) (:accept-states synchronized-nfa)) - (merge-nfas + (merge (:transitions abstract-nfa) (:transitions default-nfa) (:transitions if-nfa) @@ -1065,6 +267,7 @@ (:transitions import-nfa) (:transitions public-nfa) (:transitions throws-nfa) + (:transitions throw-nfa) (:transitions byte-nfa) (:transitions else-nfa) (:transitions instanceof-nfa) @@ -1081,6 +284,7 @@ (:transitions void-nfa) (:transitions char-nfa) (:transitions finally-nfa) + (:transitions final-nfa) (:transitions long-nfa) (:transitions strictfp-nfa) (:transitions volatile-nfa) @@ -1111,6 +315,7 @@ [stateS (:start import-nfa) e] [stateS (:start public-nfa) e] [stateS (:start throws-nfa) e] + [stateS (:start throw-nfa) e] [stateS (:start byte-nfa) e] [stateS (:start else-nfa) e] [stateS (:start instanceof-nfa) e] @@ -1127,6 +332,7 @@ [stateS (:start void-nfa) e] [stateS (:start char-nfa) e] [stateS (:start finally-nfa) e] + [stateS (:start final-nfa) e] [stateS (:start long-nfa) e] [stateS (:start strictfp-nfa) e] [stateS (:start volatile-nfa) e] @@ -1143,7 +349,7 @@ [stateS (:start goto-nfa) e] [stateS (:start package-nfa) e] [stateS (:start synchronized-nfa) e]])) - (merge-nfas + (merge (:accept-priorities abstract-nfa) (:accept-priorities default-nfa) (:accept-priorities if-nfa) @@ -1158,6 +364,7 @@ (:accept-priorities import-nfa) (:accept-priorities public-nfa) (:accept-priorities throws-nfa) + (:accept-priorities throw-nfa) (:accept-priorities byte-nfa) (:accept-priorities else-nfa) (:accept-priorities instanceof-nfa) @@ -1174,6 +381,7 @@ (:accept-priorities void-nfa) (:accept-priorities char-nfa) (:accept-priorities finally-nfa) + (:accept-priorities final-nfa) (:accept-priorities long-nfa) (:accept-priorities strictfp-nfa) (:accept-priorities volatile-nfa) @@ -1191,12 +399,6 @@ (:accept-priorities package-nfa) (:accept-priorities synchronized-nfa))))) -;; java keywords -;; https://www.student.cs.uwaterloo.ca/~cs444/joos.html - -;; operators, can get the string later on -;; = assign, == EQ, <= LE, != NE - ;; Booleans (def boolean-nfa (let [stateS (gensym :S) @@ -1224,8 +426,6 @@ [statefal statefals \s] [statefals statefalse \e]])))) - - ;; complete nfa from all of the individual RE nfas ;; int-literal ;; operators @@ -1235,21 +435,21 @@ (let [stateS (gensym :S)] ;; use default constructor because we no longer have the merged accept-map (->NFA (into #{} ) - (merge-nfas-states integer-literal-nfa operators-nfa boolean-nfa keywords-nfa) + (union integer-literal-nfa operators-nfa boolean-nfa keywords-nfa) stateS - (merge-nfas (:accept-states integer-literal-nfa) + (merge (:accept-states integer-literal-nfa) (:accept-states operators-nfa) (:accept-states boolean-nfa) (:accept-states keywords-nfa)) - (merge-nfas (:transitions integer-literal-nfa) - (:transitions operators-nfa) - (:transitions boolean-nfa) - (:transitions keywords-nfa) - (make-transition-NFA [[stateS (:start integer-literal-nfa) e] - [stateS (:start operators-nfa) e] - [stateS (:start boolean-nfa) e] - [stateS (:start keywords-nfa) e]])) - (merge-nfas (:accept-priorities integer-literal-nfa) - (:accept-priorities operators-nfa) - (:accept-priorities boolean-nfa) - (:accept-priorities keywords-nfa))))) + (merge (:transitions integer-literal-nfa) + (:transitions operators-nfa) + (:transitions boolean-nfa) + (:transitions keywords-nfa) + (make-transition-NFA [[stateS (:start integer-literal-nfa) e] + [stateS (:start operators-nfa) e] + [stateS (:start boolean-nfa) e] + [stateS (:start keywords-nfa) e]])) + (merge (:accept-priorities integer-literal-nfa) + (:accept-priorities operators-nfa) + (:accept-priorities boolean-nfa) + (:accept-priorities keywords-nfa))))) diff --git a/test/watcompiler/re_test.clj b/test/watcompiler/re_test.clj index 513054d..f508c19 100644 --- a/test/watcompiler/re_test.clj +++ b/test/watcompiler/re_test.clj @@ -4,12 +4,22 @@ [watcompiler.re :refer :all]) (:import [watcompiler.nfa NFA])) +;; Function formed nfa tests +(deftest function-test + (def int-nfa-test (string-to-nfa "int" :INT)) + (is :MAP int-nfa-test) + (is (= :INT (run-NFA int-nfa-test "int"))) + (def synchronized-nfa-test (string-to-nfa "synchronized" :KEYWORD)) + (is :MAP synchronized-nfa-test) + (is (= :KEYWORD (run-NFA synchronized-nfa-test "synchronized"))) + (is (= false (run-NFA synchronized-nfa-test "synchronize"))) + (is (= false (run-NFA synchronized-nfa-test "ynchronize")))) + ;; Individual NFA tests (deftest int-test (is (= :KEYWORD (run-NFA int-nfa "int"))) (is (= :INTEGER (run-NFA integer-literal-nfa "109")))) -;; Merged NFA test (deftest operator-test ;; Operators (is (= :OPERATOR (run-NFA operators-nfa "+"))) @@ -29,6 +39,7 @@ (is (= :OPERATOR (run-NFA operators-nfa "!"))) (is (= :OPERATOR (run-NFA operators-nfa "!=")))) +;; Booleans test (deftest boolean-test (is (= :BOOLEAN (run-NFA boolean-nfa "true"))) (is (= :BOOLEAN (run-NFA boolean-nfa "false"))) @@ -40,10 +51,21 @@ ;; Individual Keywords on their nfas (is (= :KEYWORD (run-NFA int-nfa "int"))) (is (= :KEYWORD (run-NFA abstract-nfa "abstract"))) - (is (= :KEYWORD (run-NFA default-nfa "default")))) + (is (= :KEYWORD (run-NFA default-nfa "default"))) + (is (= false (run-NFA synchronized-nfa-test "synchronize"))) + (is (= false (run-NFA synchronized-nfa-test "ynchronize")))) + +;; Test merging nfas from function +(deftest merged-function-nfa-test + (is :MAP complete-nfa) + (is (= :KEYWORD (run-NFA complete-nfa "int"))) + (is (= :KEYWORD (run-NFA complete-nfa "synchronized"))) + (is (= :INTEGER (run-NFA complete-nfa "109"))) + (is (= :OPERATOR (run-NFA complete-nfa "++"))) + (is (= :BOOLEAN (run-NFA complete-nfa "true"))) + (is (= :BOOLEAN (run-NFA complete-nfa "false")))) -(deftest all-test - ;; All Test +(deftest keywords-test (is (= :KEYWORD (run-NFA complete-nfa "abstract"))) (is (= :KEYWORD (run-NFA complete-nfa "default"))) (is (= :KEYWORD (run-NFA complete-nfa "if"))) From 44e132c294df3dcc3e5069366f6cf5c89a72337d Mon Sep 17 00:00:00 2001 From: jonathantsang Date: Thu, 7 Mar 2019 21:23:35 -0500 Subject: [PATCH 05/13] functions for multiple strings to nfa, multiple nfas to nfa --- src/watcompiler/re.clj | 431 +++++++++-------------------------- test/watcompiler/re_test.clj | 18 +- 2 files changed, 120 insertions(+), 329 deletions(-) diff --git a/src/watcompiler/re.clj b/src/watcompiler/re.clj index 4d52e9b..2c89772 100644 --- a/src/watcompiler/re.clj +++ b/src/watcompiler/re.clj @@ -4,28 +4,44 @@ [watcompiler.lang :refer :all]) (:import [watcompiler.nfa NFA])) -;; Helpers for merging nfas +;; Merging multiple nfas (defn merge-nfas - [& args] - (apply merge args)) + [& nfas] + (let + [stateS (gensym :s) + all-states (apply union (map :states nfas)) + all-accept-states (apply union (map :accept-states nfas)) + merged-transitions (apply merge (map :transitions nfas)) + all-transitions (merge + ;; Merged transitions from the nfas + merged-transitions + ;; Episilon transition to each nfa + (make-transition-NFA (into [] + (for [nfa-start (map :start nfas)] + [stateS nfa-start e])))) + all-accept-priorities (apply union (map :accept-priorities nfas))] + (->NFA (into #{} ) + all-states + stateS + all-accept-states + all-transitions + all-accept-priorities))) -;; parse each string to form the nfa and then form complete nfa -;; form the states in the nfa +;; Parses a string to form the nfa (defn string-to-nfa [word wordtype] (let [stateS (gensym :s) ;; List of substrings of word, stored as strings - states-map (rest (reductions str (str) word)) + states-map (set (rest (reductions str (str) word))) ;; Key: substring of word, Value: gensym associated with this state gensym-map (into (sorted-map) (for [c states-map] [c (gensym c)])) ;; Key: gensym value, Value: char to get to this state - states-char-map (into (sorted-map) - (for [pair (map list (vals gensym-map) (seq word))] - [(first pair) (second pair)])) + states-char-map (into (sorted-map) (for [pair (map list (vals gensym-map) (seq word))] + [(first pair) (second pair)])) ;; Accept states accept-states-map {(get gensym-map word) (list wordtype 0)} @@ -40,6 +56,36 @@ accept-states-map (make-transition-NFA transitions-map)))) + +;; Takes strings and forms nfas from them and links them into one nfa +(defn form-multiple-nfas + [& args] + (let + [stateS (gensym :s) + ;; Key: string for keyword, Value: NFA for that keyword + strings-nfas (into (sorted-map) (for [nfa-name args] + [nfa-name (string-to-nfa nfa-name :KEYWORD)])) + all-states (apply union (map :states (vals strings-nfas))) + all-accept-states (apply union (map :accept-states (vals strings-nfas))) + merged-transitions (apply merge (map :transitions (vals strings-nfas))) + all-transitions (merge + ;; Merged transitions from the nfas + merged-transitions + ;; Episilon transition to each nfa + (make-transition-NFA (into [] + (for [nfa-start (map :start (vals strings-nfas))] + [stateS nfa-start e])))) + all-accept-priorities (apply union (map :accept-priorities (vals strings-nfas)))] + (->NFA (into #{} ) + all-states + stateS + all-accept-states + all-transitions + all-accept-priorities))) + +;; NFAs for types + +;; Integer literal (def integer-literal-nfa (let [stateS (gensym :S) state1 (gensym :1) @@ -99,305 +145,56 @@ [state6 state11 \=] [state7 state11 \=]])))) -(def abstract-nfa (string-to-nfa "abstract" :KEYWORD)) -(def default-nfa (string-to-nfa "default" :KEYWORD)) -(def if-nfa (string-to-nfa "if" :KEYWORD)) -(def private-nfa (string-to-nfa "private" :KEYWORD)) -(def this-nfa (string-to-nfa "this" :KEYWORD)) -(def boolean-nfa (string-to-nfa "boolean" :KEYWORD)) -(def do-nfa (string-to-nfa "do" :KEYWORD)) -(def implements-nfa (string-to-nfa "implements" :KEYWORD)) -(def protected-nfa (string-to-nfa "protected" :KEYWORD)) -(def break-nfa (string-to-nfa "break" :KEYWORD)) -(def double-nfa (string-to-nfa "double" :KEYWORD)) -(def import-nfa (string-to-nfa "import" :KEYWORD)) -(def public-nfa (string-to-nfa "public" :KEYWORD)) -(def throws-nfa (string-to-nfa "throws" :KEYWORD)) -(def throw-nfa (string-to-nfa "throw" :KEYWORD)) -(def byte-nfa (string-to-nfa "byte" :KEYWORD)) -(def else-nfa (string-to-nfa "else" :KEYWORD)) -(def instanceof-nfa (string-to-nfa "instanceof" :KEYWORD)) -(def return-nfa (string-to-nfa "return" :KEYWORD)) -(def transient-nfa (string-to-nfa "transient" :KEYWORD)) -(def case-nfa (string-to-nfa "case" :KEYWORD)) -(def extends-nfa (string-to-nfa "extends" :KEYWORD)) -(def int-nfa (string-to-nfa "int" :KEYWORD)) -(def short-nfa (string-to-nfa "short" :KEYWORD)) -(def try-nfa (string-to-nfa "try" :KEYWORD)) -(def catch-nfa (string-to-nfa "catch" :KEYWORD)) -(def interface-nfa (string-to-nfa "interface" :KEYWORD)) -(def static-nfa (string-to-nfa "static" :KEYWORD)) -(def void-nfa (string-to-nfa "void" :KEYWORD)) -(def char-nfa (string-to-nfa "char" :KEYWORD)) -(def finally-nfa (string-to-nfa "finally" :KEYWORD)) -(def final-nfa (string-to-nfa "final" :KEYWORD)) -(def long-nfa (string-to-nfa "long" :KEYWORD)) -(def strictfp-nfa (string-to-nfa "strictfp" :KEYWORD)) -(def volatile-nfa (string-to-nfa "volatile" :KEYWORD)) -(def class-nfa (string-to-nfa "class" :KEYWORD)) -(def float-nfa (string-to-nfa "float" :KEYWORD)) -(def native-nfa (string-to-nfa "native" :KEYWORD)) -(def super-nfa (string-to-nfa "super" :KEYWORD)) -(def while-nfa (string-to-nfa "while" :KEYWORD)) -(def const-nfa (string-to-nfa "const" :KEYWORD)) -(def for-nfa (string-to-nfa "for" :KEYWORD)) -(def new-nfa (string-to-nfa "new" :KEYWORD)) -(def switch-nfa (string-to-nfa "switch" :KEYWORD)) -(def continue-nfa (string-to-nfa "continue" :KEYWORD)) -(def goto-nfa (string-to-nfa "goto" :KEYWORD)) -(def package-nfa (string-to-nfa "package" :KEYWORD)) -(def synchronized-nfa (string-to-nfa "synchronized" :KEYWORD)) - ;; Keywords nfa (def keywords-nfa - (let [stateS (gensym :S)] - ;; use default constructor because we no longer have the merged accept-map - (->NFA (into #{} ) - (union - abstract-nfa - default-nfa - if-nfa - private-nfa - this-nfa - boolean-nfa - do-nfa - implements-nfa - protected-nfa - break-nfa - double-nfa - import-nfa - public-nfa - throws-nfa - throw-nfa - byte-nfa - else-nfa - instanceof-nfa - return-nfa - transient-nfa - case-nfa - extends-nfa - int-nfa - short-nfa - try-nfa - catch-nfa - interface-nfa - static-nfa - void-nfa - char-nfa - finally-nfa - final-nfa - long-nfa - strictfp-nfa - volatile-nfa - class-nfa - float-nfa - native-nfa - super-nfa - while-nfa - const-nfa - for-nfa - new-nfa - switch-nfa - continue-nfa - goto-nfa - package-nfa - synchronized-nfa) - stateS - (merge - (:accept-states abstract-nfa) - (:accept-states default-nfa) - (:accept-states if-nfa) - (:accept-states private-nfa) - (:accept-states this-nfa) - (:accept-states boolean-nfa) - (:accept-states do-nfa) - (:accept-states implements-nfa) - (:accept-states protected-nfa) - (:accept-states break-nfa) - (:accept-states double-nfa) - (:accept-states import-nfa) - (:accept-states public-nfa) - (:accept-states throws-nfa) - (:accept-states throw-nfa) - (:accept-states byte-nfa) - (:accept-states else-nfa) - (:accept-states instanceof-nfa) - (:accept-states return-nfa) - (:accept-states transient-nfa) - (:accept-states case-nfa) - (:accept-states extends-nfa) - (:accept-states int-nfa) - (:accept-states short-nfa) - (:accept-states try-nfa) - (:accept-states catch-nfa) - (:accept-states interface-nfa) - (:accept-states static-nfa) - (:accept-states void-nfa) - (:accept-states char-nfa) - (:accept-states finally-nfa) - (:accept-states final-nfa) - (:accept-states long-nfa) - (:accept-states strictfp-nfa) - (:accept-states volatile-nfa) - (:accept-states class-nfa) - (:accept-states float-nfa) - (:accept-states native-nfa) - (:accept-states super-nfa) - (:accept-states while-nfa) - (:accept-states const-nfa) - (:accept-states for-nfa) - (:accept-states new-nfa) - (:accept-states switch-nfa) - (:accept-states continue-nfa) - (:accept-states goto-nfa) - (:accept-states package-nfa) - (:accept-states synchronized-nfa)) - (merge - (:transitions abstract-nfa) - (:transitions default-nfa) - (:transitions if-nfa) - (:transitions private-nfa) - (:transitions this-nfa) - (:transitions boolean-nfa) - (:transitions do-nfa) - (:transitions implements-nfa) - (:transitions protected-nfa) - (:transitions break-nfa) - (:transitions double-nfa) - (:transitions import-nfa) - (:transitions public-nfa) - (:transitions throws-nfa) - (:transitions throw-nfa) - (:transitions byte-nfa) - (:transitions else-nfa) - (:transitions instanceof-nfa) - (:transitions return-nfa) - (:transitions transient-nfa) - (:transitions case-nfa) - (:transitions extends-nfa) - (:transitions int-nfa) - (:transitions short-nfa) - (:transitions try-nfa) - (:transitions catch-nfa) - (:transitions interface-nfa) - (:transitions static-nfa) - (:transitions void-nfa) - (:transitions char-nfa) - (:transitions finally-nfa) - (:transitions final-nfa) - (:transitions long-nfa) - (:transitions strictfp-nfa) - (:transitions volatile-nfa) - (:transitions class-nfa) - (:transitions float-nfa) - (:transitions native-nfa) - (:transitions super-nfa) - (:transitions while-nfa) - (:transitions const-nfa) - (:transitions for-nfa) - (:transitions new-nfa) - (:transitions switch-nfa) - (:transitions continue-nfa) - (:transitions goto-nfa) - (:transitions package-nfa) - (:transitions synchronized-nfa) - (make-transition-NFA [[stateS (:start abstract-nfa) e] - [stateS (:start default-nfa) e] - [stateS (:start if-nfa) e] - [stateS (:start private-nfa) e] - [stateS (:start this-nfa) e] - [stateS (:start boolean-nfa) e] - [stateS (:start do-nfa) e] - [stateS (:start implements-nfa) e] - [stateS (:start protected-nfa) e] - [stateS (:start break-nfa) e] - [stateS (:start double-nfa) e] - [stateS (:start import-nfa) e] - [stateS (:start public-nfa) e] - [stateS (:start throws-nfa) e] - [stateS (:start throw-nfa) e] - [stateS (:start byte-nfa) e] - [stateS (:start else-nfa) e] - [stateS (:start instanceof-nfa) e] - [stateS (:start return-nfa) e] - [stateS (:start transient-nfa) e] - [stateS (:start case-nfa) e] - [stateS (:start extends-nfa) e] - [stateS (:start int-nfa) e] - [stateS (:start short-nfa) e] - [stateS (:start try-nfa) e] - [stateS (:start catch-nfa) e] - [stateS (:start interface-nfa) e] - [stateS (:start static-nfa) e] - [stateS (:start void-nfa) e] - [stateS (:start char-nfa) e] - [stateS (:start finally-nfa) e] - [stateS (:start final-nfa) e] - [stateS (:start long-nfa) e] - [stateS (:start strictfp-nfa) e] - [stateS (:start volatile-nfa) e] - [stateS (:start class-nfa) e] - [stateS (:start float-nfa) e] - [stateS (:start native-nfa) e] - [stateS (:start super-nfa) e] - [stateS (:start while-nfa) e] - [stateS (:start const-nfa) e] - [stateS (:start for-nfa) e] - [stateS (:start new-nfa) e] - [stateS (:start switch-nfa) e] - [stateS (:start continue-nfa) e] - [stateS (:start goto-nfa) e] - [stateS (:start package-nfa) e] - [stateS (:start synchronized-nfa) e]])) - (merge - (:accept-priorities abstract-nfa) - (:accept-priorities default-nfa) - (:accept-priorities if-nfa) - (:accept-priorities private-nfa) - (:accept-priorities this-nfa) - (:accept-priorities boolean-nfa) - (:accept-priorities do-nfa) - (:accept-priorities implements-nfa) - (:accept-priorities protected-nfa) - (:accept-priorities break-nfa) - (:accept-priorities double-nfa) - (:accept-priorities import-nfa) - (:accept-priorities public-nfa) - (:accept-priorities throws-nfa) - (:accept-priorities throw-nfa) - (:accept-priorities byte-nfa) - (:accept-priorities else-nfa) - (:accept-priorities instanceof-nfa) - (:accept-priorities return-nfa) - (:accept-priorities transient-nfa) - (:accept-priorities case-nfa) - (:accept-priorities extends-nfa) - (:accept-priorities int-nfa) - (:accept-priorities short-nfa) - (:accept-priorities try-nfa) - (:accept-priorities catch-nfa) - (:accept-priorities interface-nfa) - (:accept-priorities static-nfa) - (:accept-priorities void-nfa) - (:accept-priorities char-nfa) - (:accept-priorities finally-nfa) - (:accept-priorities final-nfa) - (:accept-priorities long-nfa) - (:accept-priorities strictfp-nfa) - (:accept-priorities volatile-nfa) - (:accept-priorities class-nfa) - (:accept-priorities float-nfa) - (:accept-priorities native-nfa) - (:accept-priorities super-nfa) - (:accept-priorities while-nfa) - (:accept-priorities const-nfa) - (:accept-priorities for-nfa) - (:accept-priorities new-nfa) - (:accept-priorities switch-nfa) - (:accept-priorities continue-nfa) - (:accept-priorities goto-nfa) - (:accept-priorities package-nfa) - (:accept-priorities synchronized-nfa))))) + (form-multiple-nfas "abstract" + "default" + "if" + "private" + "this" + "boolean" + "do" + "implements" + "protected" + "break" + "double" + "import" + "public" + "throws" + "throw" + "byte" + "else" + "instanceof" + "return" + "transient" + "case" + "extends" + "int" + "short" + "try" + "catch" + "interface" + "static" + "void" + "char" + "finally" + "final" + "long" + "strictfp" + "volatile" + "class" + "float" + "native" + "super" + "while" + "const" + "for" + "new" + "switch" + "continue" + "goto" + "package" + "synchronized")) ;; Booleans (def boolean-nfa @@ -432,24 +229,4 @@ ;; boolean ;; keywords (def complete-nfa - (let [stateS (gensym :S)] - ;; use default constructor because we no longer have the merged accept-map - (->NFA (into #{} ) - (union integer-literal-nfa operators-nfa boolean-nfa keywords-nfa) - stateS - (merge (:accept-states integer-literal-nfa) - (:accept-states operators-nfa) - (:accept-states boolean-nfa) - (:accept-states keywords-nfa)) - (merge (:transitions integer-literal-nfa) - (:transitions operators-nfa) - (:transitions boolean-nfa) - (:transitions keywords-nfa) - (make-transition-NFA [[stateS (:start integer-literal-nfa) e] - [stateS (:start operators-nfa) e] - [stateS (:start boolean-nfa) e] - [stateS (:start keywords-nfa) e]])) - (merge (:accept-priorities integer-literal-nfa) - (:accept-priorities operators-nfa) - (:accept-priorities boolean-nfa) - (:accept-priorities keywords-nfa))))) + (merge-nfas integer-literal-nfa operators-nfa boolean-nfa keywords-nfa)) diff --git a/test/watcompiler/re_test.clj b/test/watcompiler/re_test.clj index f508c19..f10a145 100644 --- a/test/watcompiler/re_test.clj +++ b/test/watcompiler/re_test.clj @@ -4,7 +4,16 @@ [watcompiler.re :refer :all]) (:import [watcompiler.nfa NFA])) -;; Function formed nfa tests +;; Test forming multiple nfas from multiple strings +(deftest multiple-nfas-function-test + (def full-nfa (form-multiple-nfas "int" "if")) + (is (= :KEYWORD (run-NFA full-nfa "int"))) + (is (= :KEYWORD (run-NFA full-nfa "if"))) + (is (= false (run-NFA full-nfa "in"))) + (is (= false (run-NFA full-nfa "nt")))) + + +;; Test function forming individual nfa (deftest function-test (def int-nfa-test (string-to-nfa "int" :INT)) (is :MAP int-nfa-test) @@ -17,6 +26,7 @@ ;; Individual NFA tests (deftest int-test + (def int-nfa (string-to-nfa "int" :KEYWORD)) (is (= :KEYWORD (run-NFA int-nfa "int"))) (is (= :INTEGER (run-NFA integer-literal-nfa "109")))) @@ -49,13 +59,17 @@ ;; Keyword test (deftest keyword-test ;; Individual Keywords on their nfas + (def int-nfa (string-to-nfa "int" :KEYWORD)) + (def abstract-nfa (string-to-nfa "abstract" :KEYWORD)) + (def default-nfa (string-to-nfa "default" :KEYWORD)) + (def synchronize-nfa (string-to-nfa "synchronize" :KEYWORD)) (is (= :KEYWORD (run-NFA int-nfa "int"))) (is (= :KEYWORD (run-NFA abstract-nfa "abstract"))) (is (= :KEYWORD (run-NFA default-nfa "default"))) (is (= false (run-NFA synchronized-nfa-test "synchronize"))) (is (= false (run-NFA synchronized-nfa-test "ynchronize")))) -;; Test merging nfas from function +;; Test on a complete merged nfa (deftest merged-function-nfa-test (is :MAP complete-nfa) (is (= :KEYWORD (run-NFA complete-nfa "int"))) From 04bb5782b56acfc816c83a027b631bfff9563389 Mon Sep 17 00:00:00 2001 From: jonathantsang Date: Sat, 16 Mar 2019 00:40:40 -0400 Subject: [PATCH 06/13] cleaned up methods, brackets, identifiers --- src/watcompiler/re.clj | 110 +++++++++++++---------------------- test/watcompiler/re_test.clj | 6 +- 2 files changed, 43 insertions(+), 73 deletions(-) diff --git a/src/watcompiler/re.clj b/src/watcompiler/re.clj index 2c89772..7ffc1aa 100644 --- a/src/watcompiler/re.clj +++ b/src/watcompiler/re.clj @@ -59,12 +59,14 @@ ;; Takes strings and forms nfas from them and links them into one nfa (defn form-multiple-nfas - [& args] + [& arguments] (let [stateS (gensym :s) + class (first arguments) + args (rest arguments) ;; Key: string for keyword, Value: NFA for that keyword strings-nfas (into (sorted-map) (for [nfa-name args] - [nfa-name (string-to-nfa nfa-name :KEYWORD)])) + [nfa-name (string-to-nfa nfa-name class)])) all-states (apply union (map :states (vals strings-nfas))) all-accept-states (apply union (map :accept-states (vals strings-nfas))) merged-transitions (apply merge (map :transitions (vals strings-nfas))) @@ -86,6 +88,7 @@ ;; NFAs for types ;; Integer literal +;; 0 and [1-9][0-9]* (def integer-literal-nfa (let [stateS (gensym :S) state1 (gensym :1) @@ -99,55 +102,14 @@ [state2 state2 DIGITS]])))) ;; Operators (def operators-nfa - (let [stateS (gensym :S) - state1 (gensym :1) ;; valid, but can add = - state2 (gensym :2) ;; valid, but can add <, = - state3 (gensym :3) ;; valid but can add >, = - state4 (gensym :4) ;; valid, but can add + - state5 (gensym :5) ;; valid, but can add - - state6 (gensym :6) ;; valid, but can add >, = - state7 (gensym :7) ;; valid, but can add >, = - state11 (gensym :11)] ;; Nothing else can be added to it - (make-NFA (into #{} ) - #{stateS state1 state2, state3, state4, state5, state6, state11} - stateS - {state11 (list :OPERATOR 0) - state1 (list :OPERATOR 1) - state2 (list :OPERATOR 2) - state3 (list :OPERATOR 3) - state4 (list :OPERATOR 4) - state5 (list :OPERATOR 5) - state6 (list :OPERATOR 6) - state7 (list :OPERATOR 7)} - (make-transition-NFA [[stateS state1 \=] - [stateS state2 \<] - [stateS state3 \>] - [stateS state1 \!] - [stateS state1 \:] - [stateS state1 \~] - [stateS state1 \?] - [stateS state1 \&] - [stateS state1 \|] - [stateS state1 \^] - [stateS state1 \%] - [stateS state4 \+] - [stateS state5 \-] - [stateS state1 \*] - [stateS state1 \/] - - [state1 state11 \=] - [state2 state1 \<] - [state3 state6 \>] - [state3 state11 \=] - [state4 state11 \+] - [state5 state11 \-] - [state6 state7 \>] - [state6 state11 \=] - [state7 state11 \=]])))) + (form-multiple-nfas :OPERATOR ">" "<" "<<" ">>" ">>>" "<<<" ">>>=" ">>=" + ">=" "<=" "&" "&=" "=" "==" "!" "!=" "^=" "^" "+" "+=" + "++" "-" "-=" "--" "*" "*=" "/" "/=" "%" "%=")) ;; Keywords nfa (def keywords-nfa - (form-multiple-nfas "abstract" + (form-multiple-nfas :KEYWORD + "abstract" "default" "if" "private" @@ -198,35 +160,43 @@ ;; Booleans (def boolean-nfa + (form-multiple-nfas :BOOLEAN "true" "false")) + +;; Brackets +(def bracket-nfa + (form-multiple-nfas :BRACKET "{" "}" "(" ")" "[" "]")) + + +;; Identifiers +;; [a-zA-Z][a-zA-Z0-9]* +(def identifier-nfa (let [stateS (gensym :S) - statet (gensym :t) - statetr (gensym :tr) - statetru (gensym :tru) - statetrue (gensym :true) - statef (gensym :f) - statefa (gensym :fa) - statefal (gensym :fal) - statefals (gensym :fals) - statefalse (gensym :false)] + state1 (gensym :s1) + state2 (gensym :s2)] (make-NFA (into #{} ) - #{stateS statet statetr statetru statetrue statef statefa statefal statefals statefalse} + #{stateS state1 state2} stateS - {statetrue (list :BOOLEAN 0) - statefalse (list :BOOLEAN 0)} - (make-transition-NFA [[stateS statet \t] - [statet statetr \r] - [statetr statetru \u] - [statetru statetrue \e] - [stateS statef \f] - [statef statefa \a] - [statefa statefal \l] - [statefal statefals \s] - [statefals statefalse \e]])))) + {state1 (list :IDENTIFIER 1) + state2 (list :IDENTIFIER 1)} + (make-transition-NFA [[stateS state1 UPPER-ALPHABET] + [stateS state1 LOWER-ALPHABET] + [state1 state2 UPPER-ALPHABET] + [state1 state2 LOWER-ALPHABET] + [state2 state2 UPPER-ALPHABET] + [state2 state2 LOWER-ALPHABET] + [state2 state2 DIGITS]])))) ;; complete nfa from all of the individual RE nfas ;; int-literal ;; operators ;; boolean ;; keywords +;; brackets +;; identifier (def complete-nfa - (merge-nfas integer-literal-nfa operators-nfa boolean-nfa keywords-nfa)) + (merge-nfas integer-literal-nfa + operators-nfa + boolean-nfa + keywords-nfa + bracket-nfa + identifier-nfa)) diff --git a/test/watcompiler/re_test.clj b/test/watcompiler/re_test.clj index f10a145..87aa528 100644 --- a/test/watcompiler/re_test.clj +++ b/test/watcompiler/re_test.clj @@ -6,7 +6,7 @@ ;; Test forming multiple nfas from multiple strings (deftest multiple-nfas-function-test - (def full-nfa (form-multiple-nfas "int" "if")) + (def full-nfa (form-multiple-nfas :KEYWORD "int" "if")) (is (= :KEYWORD (run-NFA full-nfa "int"))) (is (= :KEYWORD (run-NFA full-nfa "if"))) (is (= false (run-NFA full-nfa "in"))) @@ -129,8 +129,8 @@ ;; Booleans (is (= :BOOLEAN (run-NFA complete-nfa "true"))) (is (= :BOOLEAN (run-NFA complete-nfa "false"))) - (is (= false (run-NFA complete-nfa "tru"))) - (is (= false (run-NFA complete-nfa "fals"))) + (is (= :IDENTIFIER (run-NFA complete-nfa "tru"))) + (is (= :IDENTIFIER (run-NFA complete-nfa "fals"))) ;; Integer (is (= :INTEGER (run-NFA complete-nfa "109"))) ;; Operators From d0c4212431ed8b3ec632a3eb062917ee618f4755 Mon Sep 17 00:00:00 2001 From: jonathantsang Date: Tue, 26 Mar 2019 12:53:03 -0400 Subject: [PATCH 07/13] form nfas from file text --- src/watcompiler/Tokens.txt | 8 +++ src/watcompiler/re.clj | 102 ++++++++++++++++++++++++----------- test/watcompiler/re_test.clj | 44 ++++++++------- 3 files changed, 104 insertions(+), 50 deletions(-) create mode 100644 src/watcompiler/Tokens.txt diff --git a/src/watcompiler/Tokens.txt b/src/watcompiler/Tokens.txt new file mode 100644 index 0000000..b85735b --- /dev/null +++ b/src/watcompiler/Tokens.txt @@ -0,0 +1,8 @@ +BRACKET { } ( ) [ ] +BOOLEAN true false +KEYWORD abstract default if private this boolean do implements protected break double import public throws throw byte else instanceof return transient case extends int short try catch interface static void char finally final long strictfp volatile class float native super while const for new switch continue goto package synchronized +UNARYOPERATOR + ++ - -- ! ~ +BINARYOPERATOR == * / % < << > >> >>> & ^ | != >= <= +ASSIGNMENTOPERATOR = *= /= %= += -= <<= >>= >>>= &= ^= |= +TERMINAL ; " , + diff --git a/src/watcompiler/re.clj b/src/watcompiler/re.clj index 7ffc1aa..7fa82c3 100644 --- a/src/watcompiler/re.clj +++ b/src/watcompiler/re.clj @@ -1,7 +1,8 @@ (ns watcompiler.re (:require [clojure.set :refer :all] [watcompiler.nfa :refer :all] - [watcompiler.lang :refer :all]) + [watcompiler.lang :refer :all] + [clojure.string :as str]) (:import [watcompiler.nfa NFA])) ;; Merging multiple nfas @@ -62,7 +63,7 @@ [& arguments] (let [stateS (gensym :s) - class (first arguments) + class (keyword (first arguments)) ;; Conver to a keyword since it reads strings from a file args (rest arguments) ;; Key: string for keyword, Value: NFA for that keyword strings-nfas (into (sorted-map) (for [nfa-name args] @@ -85,6 +86,24 @@ all-transitions all-accept-priorities))) +;; Reading the file +(def readFile + (with-open [rdr (clojure.java.io/reader "src/watcompiler/Tokens.txt")] + (reduce conj [] (line-seq rdr)))) + +;; Splitting the lines by space +(def splitLines + (into [] + (for [x readFile] + (str/split x #" ")))) + +(def fileFormed-nfa + (let [nfas + (into [] + (for [x splitLines] + (apply form-multiple-nfas x)))] + (apply merge-nfas nfas))) + ;; NFAs for types ;; Integer literal @@ -100,12 +119,48 @@ (make-transition-NFA [[stateS state1 e] [state1 state2 DIGITS-NONZERO] [state2 state2 DIGITS]])))) + +;; String literal +;; \".*\" (\ shown for escaping ") +(def string-literal-nfa + (let [stateS (gensym :S) + state1 (gensym :1) + state2 (gensym :2)] + (make-NFA (into #{} ) + #{stateS state1 state2} + stateS + {state2 (list :STRING-LITERAL 0)} + (make-transition-NFA [[stateS state1 "\""] + [state1 state1 UPPER-ALPHABET] + [state1 state1 LOWER-ALPHABET] + [state1 state2 "\""]])))) + +;; Identifiers +;; [a-zA-Z][a-zA-Z0-9]* +(def identifier-nfa + (let [stateS (gensym :S) + state1 (gensym :s1) + state2 (gensym :s2)] + (make-NFA (into #{} ) + #{stateS state1 state2} + stateS + {state1 (list :IDENTIFIER 1) + state2 (list :IDENTIFIER 1)} + (make-transition-NFA [[stateS state1 UPPER-ALPHABET] + [stateS state1 LOWER-ALPHABET] + [state1 state2 UPPER-ALPHABET] + [state1 state2 LOWER-ALPHABET] + [state2 state2 UPPER-ALPHABET] + [state2 state2 LOWER-ALPHABET] + [state2 state2 DIGITS]])))) ;; Operators (def operators-nfa (form-multiple-nfas :OPERATOR ">" "<" "<<" ">>" ">>>" "<<<" ">>>=" ">>=" ">=" "<=" "&" "&=" "=" "==" "!" "!=" "^=" "^" "+" "+=" "++" "-" "-=" "--" "*" "*=" "/" "/=" "%" "%=")) +;; white space? + ;; Keywords nfa (def keywords-nfa (form-multiple-nfas :KEYWORD @@ -166,37 +221,20 @@ (def bracket-nfa (form-multiple-nfas :BRACKET "{" "}" "(" ")" "[" "]")) - -;; Identifiers -;; [a-zA-Z][a-zA-Z0-9]* -(def identifier-nfa - (let [stateS (gensym :S) - state1 (gensym :s1) - state2 (gensym :s2)] - (make-NFA (into #{} ) - #{stateS state1 state2} - stateS - {state1 (list :IDENTIFIER 1) - state2 (list :IDENTIFIER 1)} - (make-transition-NFA [[stateS state1 UPPER-ALPHABET] - [stateS state1 LOWER-ALPHABET] - [state1 state2 UPPER-ALPHABET] - [state1 state2 LOWER-ALPHABET] - [state2 state2 UPPER-ALPHABET] - [state2 state2 LOWER-ALPHABET] - [state2 state2 DIGITS]])))) - ;; complete nfa from all of the individual RE nfas ;; int-literal -;; operators -;; boolean -;; keywords -;; brackets -;; identifier +;; string-literal +;; identifiers +;; file specified nfas: +;; BRACKET +;; BOOLEAN +;; KEYWORD +;; UNARYOPERATOR +;; BINARYOPERATOR +;; ASSIGNMENTOPERATOR +;; TERMINAL (def complete-nfa (merge-nfas integer-literal-nfa - operators-nfa - boolean-nfa - keywords-nfa - bracket-nfa - identifier-nfa)) + string-literal-nfa + identifier-nfa + fileFormed-nfa)) diff --git a/test/watcompiler/re_test.clj b/test/watcompiler/re_test.clj index 87aa528..613e9f4 100644 --- a/test/watcompiler/re_test.clj +++ b/test/watcompiler/re_test.clj @@ -4,6 +4,15 @@ [watcompiler.re :refer :all]) (:import [watcompiler.nfa NFA])) +;; Form the NFAs from a file +(deftest read-file + (def lines readFile) + (def formed fileFormed-nfa) + + (is (= :BRACKET (run-NFA formed "]"))) + (is (= :BOOLEAN (run-NFA formed "true"))) + (is (= :BRACKET (run-NFA formed "{")))) + ;; Test forming multiple nfas from multiple strings (deftest multiple-nfas-function-test (def full-nfa (form-multiple-nfas :KEYWORD "int" "if")) @@ -12,7 +21,6 @@ (is (= false (run-NFA full-nfa "in"))) (is (= false (run-NFA full-nfa "nt")))) - ;; Test function forming individual nfa (deftest function-test (def int-nfa-test (string-to-nfa "int" :INT)) @@ -75,7 +83,7 @@ (is (= :KEYWORD (run-NFA complete-nfa "int"))) (is (= :KEYWORD (run-NFA complete-nfa "synchronized"))) (is (= :INTEGER (run-NFA complete-nfa "109"))) - (is (= :OPERATOR (run-NFA complete-nfa "++"))) + (is (= :UNARYOPERATOR (run-NFA complete-nfa "++"))) (is (= :BOOLEAN (run-NFA complete-nfa "true"))) (is (= :BOOLEAN (run-NFA complete-nfa "false")))) @@ -134,19 +142,19 @@ ;; Integer (is (= :INTEGER (run-NFA complete-nfa "109"))) ;; Operators - (is (= :OPERATOR (run-NFA complete-nfa "+"))) - (is (= :OPERATOR (run-NFA complete-nfa "++"))) - (is (= :OPERATOR (run-NFA complete-nfa ">"))) - (is (= :OPERATOR (run-NFA complete-nfa ">="))) - (is (= :OPERATOR (run-NFA complete-nfa ">>"))) - (is (= :OPERATOR (run-NFA complete-nfa ">>="))) - (is (= :OPERATOR (run-NFA complete-nfa ">>>"))) - (is (= :OPERATOR (run-NFA complete-nfa ">>>="))) - (is (= :OPERATOR (run-NFA complete-nfa "&"))) - (is (= :OPERATOR (run-NFA complete-nfa "^="))) - (is (= :OPERATOR (run-NFA complete-nfa "^"))) - (is (= :OPERATOR (run-NFA complete-nfa "<<"))) - (is (= :OPERATOR (run-NFA complete-nfa "="))) - (is (= :OPERATOR (run-NFA complete-nfa "=="))) - (is (= :OPERATOR (run-NFA complete-nfa "!"))) - (is (= :OPERATOR (run-NFA complete-nfa "!=")))) + (is (= :UNARYOPERATOR (run-NFA complete-nfa "+"))) + (is (= :UNARYOPERATOR (run-NFA complete-nfa "++"))) + (is (= :BINARYOPERATOR (run-NFA complete-nfa ">"))) + (is (= :BINARYOPERATOR (run-NFA complete-nfa ">="))) + (is (= :BINARYOPERATOR (run-NFA complete-nfa ">>"))) + (is (= :ASSIGNMENTOPERATOR (run-NFA complete-nfa ">>="))) + (is (= :BINARYOPERATOR (run-NFA complete-nfa ">>>"))) + (is (= :ASSIGNMENTOPERATOR (run-NFA complete-nfa ">>>="))) + (is (= :BINARYOPERATOR (run-NFA complete-nfa "&"))) + (is (= :ASSIGNMENTOPERATOR (run-NFA complete-nfa "^="))) + (is (= :BINARYOPERATOR (run-NFA complete-nfa "^"))) + (is (= :BINARYOPERATOR (run-NFA complete-nfa "<<"))) + (is (= :ASSIGNMENTOPERATOR (run-NFA complete-nfa "="))) + (is (= :BINARYOPERATOR (run-NFA complete-nfa "=="))) + (is (= :UNARYOPERATOR (run-NFA complete-nfa "!"))) + (is (= :BINARYOPERATOR (run-NFA complete-nfa "!=")))) From df42f7ae3049f92a55a7de71866175926178d5d3 Mon Sep 17 00:00:00 2001 From: jonathantsang Date: Wed, 27 Mar 2019 23:41:53 -0400 Subject: [PATCH 08/13] escapes, escapes --- src/watcompiler/Tokens.txt | 15 ++- src/watcompiler/lang.clj | 6 ++ src/watcompiler/nfa.clj | 2 +- src/watcompiler/re.clj | 177 ++++++++++++++--------------------- test/watcompiler/re_test.clj | 156 +++++++++++++----------------- 5 files changed, 151 insertions(+), 205 deletions(-) diff --git a/src/watcompiler/Tokens.txt b/src/watcompiler/Tokens.txt index b85735b..16bd3ba 100644 --- a/src/watcompiler/Tokens.txt +++ b/src/watcompiler/Tokens.txt @@ -1,8 +1,13 @@ BRACKET { } ( ) [ ] -BOOLEAN true false KEYWORD abstract default if private this boolean do implements protected break double import public throws throw byte else instanceof return transient case extends int short try catch interface static void char finally final long strictfp volatile class float native super while const for new switch continue goto package synchronized -UNARYOPERATOR + ++ - -- ! ~ -BINARYOPERATOR == * / % < << > >> >>> & ^ | != >= <= +UNARYOPERATOR ++ - -- ! ~ +BINARYOPERATOR == * / % < << > >> >>> & ^ | != >= <= + ASSIGNMENTOPERATOR = *= /= %= += -= <<= >>= >>>= &= ^= |= -TERMINAL ; " , - +TERMINAL ; , +BOOLEAN-LITERAL true false +NULL-LITERAL null +INT-LITERAL +STRING-LITERAL +CHARACTER-LITERAL +IDENTIFIER +WHITESPACE diff --git a/src/watcompiler/lang.clj b/src/watcompiler/lang.clj index d40a90f..51f112e 100644 --- a/src/watcompiler/lang.clj +++ b/src/watcompiler/lang.clj @@ -21,6 +21,12 @@ (def DIGITS-NONZERO (char-range \1 \9)) +(def ALL-ASCII + (char-range 32 126)) + +(def ESCAPABLE + [\b \t \n \f \r \" \' \\]) + (def S-PLUS \+) (def S-MINUS \-) (def S-STAR \*) diff --git a/src/watcompiler/nfa.clj b/src/watcompiler/nfa.clj index 715811d..bdface1 100644 --- a/src/watcompiler/nfa.clj +++ b/src/watcompiler/nfa.clj @@ -79,7 +79,7 @@ transition-map (let [[s-from s-to alphabets] (first remaining)] (recur (rest remaining) - (if (seq? alphabets) + (if (or (seq? alphabets) (vector? alphabets)) (reduce #(add-to-map %1 (list s-from %2) s-to) transition-map alphabets) diff --git a/src/watcompiler/re.clj b/src/watcompiler/re.clj index 7fa82c3..0412ce3 100644 --- a/src/watcompiler/re.clj +++ b/src/watcompiler/re.clj @@ -30,10 +30,9 @@ ;; Parses a string to form the nfa (defn string-to-nfa - [word wordtype] + [wordtype word] (let [stateS (gensym :s) - ;; List of substrings of word, stored as strings states-map (set (rest (reductions str (str) word))) @@ -51,7 +50,7 @@ transitions-map (into #{ [stateS (get gensym-map (str (first (seq word))) \a) (first (seq word))] } (for [v (partition 2 1 (vals gensym-map))] [(first v) (second v) (get states-char-map (second v))]))] - (make-NFA (into #{} ) + (make-NFA (into #{} (concat (seq word))) states-map stateS accept-states-map @@ -67,7 +66,7 @@ args (rest arguments) ;; Key: string for keyword, Value: NFA for that keyword strings-nfas (into (sorted-map) (for [nfa-name args] - [nfa-name (string-to-nfa nfa-name class)])) + [nfa-name (string-to-nfa class nfa-name)])) all-states (apply union (map :states (vals strings-nfas))) all-accept-states (apply union (map :accept-states (vals strings-nfas))) merged-transitions (apply merge (map :transitions (vals strings-nfas))) @@ -88,21 +87,24 @@ ;; Reading the file (def readFile - (with-open [rdr (clojure.java.io/reader "src/watcompiler/Tokens.txt")] - (reduce conj [] (line-seq rdr)))) - -;; Splitting the lines by space -(def splitLines (into [] - (for [x readFile] - (str/split x #" ")))) + (for [line (str/split-lines (slurp "src/watcompiler/Tokens.txt"))] + (str/split line #" ")))) (def fileFormed-nfa - (let [nfas - (into [] - (for [x splitLines] - (apply form-multiple-nfas x)))] - (apply merge-nfas nfas))) + (let [nfas (into [] + (for [x readFile] + ;; Check for regex based NFAs + (if + (or + (if (= (first x) "IDENTIFIER") true false) + (if (= (first x) "INT-LITERAL") true false) + (if (= (first x) "STRING-LITERAL") true false) + (if (= (first x) "CHARACTER-LITERAL") true false) + (if (= (first x) "WHITESPACE") true false)) + nil + (apply form-multiple-nfas x))))] + (apply merge-nfas (remove nil? nfas)))) ;; NFAs for types @@ -112,28 +114,51 @@ (let [stateS (gensym :S) state1 (gensym :1) state2 (gensym :2)] - (make-NFA (into #{} ) + (make-NFA (into #{} (concat [\0] DIGITS DIGITS-NONZERO)) #{stateS state1 state2} stateS - {state2 (list :INTEGER 0)} - (make-transition-NFA [[stateS state1 e] - [state1 state2 DIGITS-NONZERO] + {state1 (list :INTEGER-LITERAL 0) + state2 (list :INTEGER-LITERAL 0)} + (make-transition-NFA [[stateS state1 \0] + [stateS state2 DIGITS-NONZERO] [state2 state2 DIGITS]])))) ;; String literal -;; \".*\" (\ shown for escaping ") +;; \"(\\[btnfr\"\'\\] | ALL-ASCII)*\" (\ shown for escaping ") +;; aka \"(.*)\" with escapes inside (def string-literal-nfa (let [stateS (gensym :S) state1 (gensym :1) - state2 (gensym :2)] - (make-NFA (into #{} ) - #{stateS state1 state2} + state2 (gensym :2) + state3 (gensym :3)] + (make-NFA (into #{} (concat [\'] ALL-ASCII [\\] ESCAPABLE)) + #{stateS state1 state2 state3} + stateS + {state3 (list :STRING-LITERAL 0)} + (make-transition-NFA [[stateS state1 \"] + [state1 state1 ALL-ASCII] + [state1 state2 \\] + [state2 state1 ESCAPABLE] + [state1 state3 \"]])))) + +;; Character literal +;; \'(\\ESCAPABLE | ALL-ASCII)*\' (\ shown for escaping ") +;; aka \'(.*)\' with escapes inside +(def character-literal-nfa + (let [stateS (gensym :S) + state1 (gensym :1) + state2 (gensym :2) + state3 (gensym :3) + state4 (gensym :4)] + (make-NFA (into #{} (concat [\'] ALL-ASCII [\\] ESCAPABLE)) + #{stateS state1 state2 state3 state4} stateS - {state2 (list :STRING-LITERAL 0)} - (make-transition-NFA [[stateS state1 "\""] - [state1 state1 UPPER-ALPHABET] - [state1 state1 LOWER-ALPHABET] - [state1 state2 "\""]])))) + {state4 (list :CHARACTER-LITERAL 0)} + (make-transition-NFA [[stateS state1 \'] + [state1 state3 ALL-ASCII] + [state1 state2 \\] + [state2 state3 ESCAPABLE] + [state3 state4 \']])))) ;; Identifiers ;; [a-zA-Z][a-zA-Z0-9]* @@ -141,7 +166,7 @@ (let [stateS (gensym :S) state1 (gensym :s1) state2 (gensym :s2)] - (make-NFA (into #{} ) + (make-NFA (into #{} (concat UPPER-ALPHABET LOWER-ALPHABET DIGITS)) #{stateS state1 state2} stateS {state1 (list :IDENTIFIER 1) @@ -153,88 +178,24 @@ [state2 state2 UPPER-ALPHABET] [state2 state2 LOWER-ALPHABET] [state2 state2 DIGITS]])))) -;; Operators -(def operators-nfa - (form-multiple-nfas :OPERATOR ">" "<" "<<" ">>" ">>>" "<<<" ">>>=" ">>=" - ">=" "<=" "&" "&=" "=" "==" "!" "!=" "^=" "^" "+" "+=" - "++" "-" "-=" "--" "*" "*=" "/" "/=" "%" "%=")) - -;; white space? - -;; Keywords nfa -(def keywords-nfa - (form-multiple-nfas :KEYWORD - "abstract" - "default" - "if" - "private" - "this" - "boolean" - "do" - "implements" - "protected" - "break" - "double" - "import" - "public" - "throws" - "throw" - "byte" - "else" - "instanceof" - "return" - "transient" - "case" - "extends" - "int" - "short" - "try" - "catch" - "interface" - "static" - "void" - "char" - "finally" - "final" - "long" - "strictfp" - "volatile" - "class" - "float" - "native" - "super" - "while" - "const" - "for" - "new" - "switch" - "continue" - "goto" - "package" - "synchronized")) - -;; Booleans -(def boolean-nfa - (form-multiple-nfas :BOOLEAN "true" "false")) - -;; Brackets -(def bracket-nfa - (form-multiple-nfas :BRACKET "{" "}" "(" ")" "[" "]")) + +;; Whitespace +;; [space tab newline]+ +(def whitespace-nfa + (let [stateS (gensym :S) + state1 (gensym :s1)] + (make-NFA (into #{} WHITESPACE) + #{stateS state1} + stateS + {state1 (list :WHITESPACE 0)} + (make-transition-NFA [[stateS state1 WHITESPACE] + [state1 state1 WHITESPACE]])))) ;; complete nfa from all of the individual RE nfas -;; int-literal -;; string-literal -;; identifiers -;; file specified nfas: -;; BRACKET -;; BOOLEAN -;; KEYWORD -;; UNARYOPERATOR -;; BINARYOPERATOR -;; ASSIGNMENTOPERATOR -;; TERMINAL (def complete-nfa (merge-nfas integer-literal-nfa string-literal-nfa + character-literal-nfa identifier-nfa + whitespace-nfa fileFormed-nfa)) diff --git a/test/watcompiler/re_test.clj b/test/watcompiler/re_test.clj index 613e9f4..227bdc4 100644 --- a/test/watcompiler/re_test.clj +++ b/test/watcompiler/re_test.clj @@ -4,13 +4,41 @@ [watcompiler.re :refer :all]) (:import [watcompiler.nfa NFA])) +;; Regex NFA tests +(deftest integer-literal-tests + (is (= :INTEGER-LITERAL (run-NFA integer-literal-nfa "1010"))) + (is (= :INTEGER-LITERAL (run-NFA integer-literal-nfa "0"))) + (is (= :INTEGER-LITERAL (run-NFA integer-literal-nfa "1")))) + +(deftest string-literal-tests + (is (= :STRING-LITERAL (run-NFA string-literal-nfa "\"s\""))) + (is (= :STRING-LITERAL (run-NFA string-literal-nfa "\"thisis a string literal\""))) + (is (= :STRING-LITERAL (run-NFA string-literal-nfa "\"[]~`!%^&*$(&^%#.][` @$g literal\""))) + (is (= :STRING-LITERAL (run-NFA string-literal-nfa "\"\""))) + + (is (= :STRING-LITERAL (run-NFA string-literal-nfa "\" \\b \\t \\n \\f \\r \\' \\\\ \""))) + (is (= false (run-NFA string-literal-nfa "needquotes")))) + +(deftest character-literal-tests + (is (= :CHARACTER-LITERAL (run-NFA character-literal-nfa "'s'"))) + (is (= :CHARACTER-LITERAL (run-NFA character-literal-nfa "'\\b'"))) + (is (= :CHARACTER-LITERAL (run-NFA character-literal-nfa "'0'"))) + (is (= false (run-NFA string-literal-nfa "'sa'")))) + +(deftest identifier-tests + (is (= :IDENTIFIER (run-NFA identifier-nfa "thisidentifier"))) + (is (= :IDENTIFIER (run-NFA identifier-nfa "a")))) + +(deftest whitespace-test + (is (= :WHITESPACE (run-NFA whitespace-nfa " \n\n")))) + ;; Form the NFAs from a file (deftest read-file (def lines readFile) (def formed fileFormed-nfa) (is (= :BRACKET (run-NFA formed "]"))) - (is (= :BOOLEAN (run-NFA formed "true"))) + (is (= :BOOLEAN-LITERAL (run-NFA formed "true"))) (is (= :BRACKET (run-NFA formed "{")))) ;; Test forming multiple nfas from multiple strings @@ -23,10 +51,10 @@ ;; Test function forming individual nfa (deftest function-test - (def int-nfa-test (string-to-nfa "int" :INT)) + (def int-nfa-test (string-to-nfa :INT "int")) (is :MAP int-nfa-test) (is (= :INT (run-NFA int-nfa-test "int"))) - (def synchronized-nfa-test (string-to-nfa "synchronized" :KEYWORD)) + (def synchronized-nfa-test (string-to-nfa :KEYWORD "synchronized")) (is :MAP synchronized-nfa-test) (is (= :KEYWORD (run-NFA synchronized-nfa-test "synchronized"))) (is (= false (run-NFA synchronized-nfa-test "synchronize"))) @@ -34,43 +62,33 @@ ;; Individual NFA tests (deftest int-test - (def int-nfa (string-to-nfa "int" :KEYWORD)) - (is (= :KEYWORD (run-NFA int-nfa "int"))) - (is (= :INTEGER (run-NFA integer-literal-nfa "109")))) + (def int-nfa (string-to-nfa :KEYWORD "int")) + (is (= :KEYWORD (run-NFA complete-nfa "int"))) + (is (= :INTEGER-LITERAL (run-NFA complete-nfa "109")))) (deftest operator-test ;; Operators - (is (= :OPERATOR (run-NFA operators-nfa "+"))) - (is (= :OPERATOR (run-NFA operators-nfa "++"))) - (is (= :OPERATOR (run-NFA operators-nfa ">"))) - (is (= :OPERATOR (run-NFA operators-nfa ">="))) - (is (= :OPERATOR (run-NFA operators-nfa ">>"))) - (is (= :OPERATOR (run-NFA operators-nfa ">>="))) - (is (= :OPERATOR (run-NFA operators-nfa ">>>"))) - (is (= :OPERATOR (run-NFA operators-nfa ">>>="))) - (is (= :OPERATOR (run-NFA operators-nfa "&"))) - (is (= :OPERATOR (run-NFA operators-nfa "^="))) - (is (= :OPERATOR (run-NFA operators-nfa "^"))) - (is (= :OPERATOR (run-NFA operators-nfa "<<"))) - (is (= :OPERATOR (run-NFA operators-nfa "="))) - (is (= :OPERATOR (run-NFA operators-nfa "=="))) - (is (= :OPERATOR (run-NFA operators-nfa "!"))) - (is (= :OPERATOR (run-NFA operators-nfa "!=")))) + (is (= :BINARYOPERATOR (run-NFA complete-nfa "+"))) + (is (= :UNARYOPERATOR (run-NFA complete-nfa "++"))) + (is (= :BINARYOPERATOR (run-NFA complete-nfa ">"))) + (is (= :ASSIGNMENTOPERATOR (run-NFA complete-nfa ">>>="))) + (is (= :UNARYOPERATOR (run-NFA complete-nfa "!"))) + (is (= :BINARYOPERATOR (run-NFA complete-nfa "!=")))) ;; Booleans test (deftest boolean-test - (is (= :BOOLEAN (run-NFA boolean-nfa "true"))) - (is (= :BOOLEAN (run-NFA boolean-nfa "false"))) - (is (= false (run-NFA boolean-nfa "tru"))) - (is (= false (run-NFA boolean-nfa "fals")))) + (is (= :BOOLEAN-LITERAL (run-NFA complete-nfa "true"))) + (is (= :BOOLEAN-LITERAL (run-NFA complete-nfa "false"))) + (is (= :IDENTIFIER (run-NFA complete-nfa "tru"))) + (is (= :IDENTIFIER (run-NFA complete-nfa "fals")))) ;; Keyword test (deftest keyword-test ;; Individual Keywords on their nfas - (def int-nfa (string-to-nfa "int" :KEYWORD)) - (def abstract-nfa (string-to-nfa "abstract" :KEYWORD)) - (def default-nfa (string-to-nfa "default" :KEYWORD)) - (def synchronize-nfa (string-to-nfa "synchronize" :KEYWORD)) + (def int-nfa (string-to-nfa :KEYWORD "int")) + (def abstract-nfa (string-to-nfa :KEYWORD "abstract")) + (def default-nfa (string-to-nfa :KEYWORD "default")) + (def synchronize-nfa (string-to-nfa :KEYWORD "synchronize")) (is (= :KEYWORD (run-NFA int-nfa "int"))) (is (= :KEYWORD (run-NFA abstract-nfa "abstract"))) (is (= :KEYWORD (run-NFA default-nfa "default"))) @@ -82,79 +100,35 @@ (is :MAP complete-nfa) (is (= :KEYWORD (run-NFA complete-nfa "int"))) (is (= :KEYWORD (run-NFA complete-nfa "synchronized"))) - (is (= :INTEGER (run-NFA complete-nfa "109"))) + (is (= :INTEGER-LITERAL (run-NFA complete-nfa "9"))) (is (= :UNARYOPERATOR (run-NFA complete-nfa "++"))) - (is (= :BOOLEAN (run-NFA complete-nfa "true"))) - (is (= :BOOLEAN (run-NFA complete-nfa "false")))) + (is (= :BOOLEAN-LITERAL (run-NFA complete-nfa "true"))) + (is (= :BOOLEAN-LITERAL (run-NFA complete-nfa "false")))) -(deftest keywords-test +(deftest complete-nfa-test (is (= :KEYWORD (run-NFA complete-nfa "abstract"))) (is (= :KEYWORD (run-NFA complete-nfa "default"))) - (is (= :KEYWORD (run-NFA complete-nfa "if"))) - (is (= :KEYWORD (run-NFA complete-nfa "private"))) - (is (= :KEYWORD (run-NFA complete-nfa "this"))) - (is (= :KEYWORD (run-NFA complete-nfa "boolean"))) - (is (= :KEYWORD (run-NFA complete-nfa "do"))) - (is (= :KEYWORD (run-NFA complete-nfa "implements"))) - (is (= :KEYWORD (run-NFA complete-nfa "protected"))) - (is (= :KEYWORD (run-NFA complete-nfa "break"))) - (is (= :KEYWORD (run-NFA complete-nfa "double"))) - (is (= :KEYWORD (run-NFA complete-nfa "import"))) - (is (= :KEYWORD (run-NFA complete-nfa "public"))) - (is (= :KEYWORD (run-NFA complete-nfa "throws"))) - (is (= :KEYWORD (run-NFA complete-nfa "byte"))) - (is (= :KEYWORD (run-NFA complete-nfa "else"))) - (is (= :KEYWORD (run-NFA complete-nfa "instanceof"))) - (is (= :KEYWORD (run-NFA complete-nfa "return"))) - (is (= :KEYWORD (run-NFA complete-nfa "transient"))) - (is (= :KEYWORD (run-NFA complete-nfa "case"))) - (is (= :KEYWORD (run-NFA complete-nfa "extends"))) - (is (= :KEYWORD (run-NFA complete-nfa "int"))) - (is (= :KEYWORD (run-NFA complete-nfa "short"))) - (is (= :KEYWORD (run-NFA complete-nfa "try"))) - (is (= :KEYWORD (run-NFA complete-nfa "catch"))) - (is (= :KEYWORD (run-NFA complete-nfa "interface"))) - (is (= :KEYWORD (run-NFA complete-nfa "static"))) - (is (= :KEYWORD (run-NFA complete-nfa "void"))) - (is (= :KEYWORD (run-NFA complete-nfa "char"))) - (is (= :KEYWORD (run-NFA complete-nfa "finally"))) - (is (= :KEYWORD (run-NFA complete-nfa "long"))) - (is (= :KEYWORD (run-NFA complete-nfa "strictfp"))) - (is (= :KEYWORD (run-NFA complete-nfa "volatile"))) - (is (= :KEYWORD (run-NFA complete-nfa "class"))) - (is (= :KEYWORD (run-NFA complete-nfa "float"))) - (is (= :KEYWORD (run-NFA complete-nfa "native"))) - (is (= :KEYWORD (run-NFA complete-nfa "super"))) - (is (= :KEYWORD (run-NFA complete-nfa "while"))) - (is (= :KEYWORD (run-NFA complete-nfa "const"))) - (is (= :KEYWORD (run-NFA complete-nfa "for"))) - (is (= :KEYWORD (run-NFA complete-nfa "new"))) - (is (= :KEYWORD (run-NFA complete-nfa "switch"))) - (is (= :KEYWORD (run-NFA complete-nfa "continue"))) - (is (= :KEYWORD (run-NFA complete-nfa "goto"))) (is (= :KEYWORD (run-NFA complete-nfa "package"))) (is (= :KEYWORD (run-NFA complete-nfa "synchronized"))) ;; Booleans - (is (= :BOOLEAN (run-NFA complete-nfa "true"))) - (is (= :BOOLEAN (run-NFA complete-nfa "false"))) + (is (= :BOOLEAN-LITERAL (run-NFA complete-nfa "true"))) + (is (= :BOOLEAN-LITERAL (run-NFA complete-nfa "false"))) (is (= :IDENTIFIER (run-NFA complete-nfa "tru"))) (is (= :IDENTIFIER (run-NFA complete-nfa "fals"))) ;; Integer - (is (= :INTEGER (run-NFA complete-nfa "109"))) + (is (= :INTEGER-LITERAL (run-NFA complete-nfa "109"))) ;; Operators - (is (= :UNARYOPERATOR (run-NFA complete-nfa "+"))) + (is (= :BINARYOPERATOR (run-NFA complete-nfa "+"))) (is (= :UNARYOPERATOR (run-NFA complete-nfa "++"))) (is (= :BINARYOPERATOR (run-NFA complete-nfa ">"))) - (is (= :BINARYOPERATOR (run-NFA complete-nfa ">="))) - (is (= :BINARYOPERATOR (run-NFA complete-nfa ">>"))) - (is (= :ASSIGNMENTOPERATOR (run-NFA complete-nfa ">>="))) (is (= :BINARYOPERATOR (run-NFA complete-nfa ">>>"))) (is (= :ASSIGNMENTOPERATOR (run-NFA complete-nfa ">>>="))) - (is (= :BINARYOPERATOR (run-NFA complete-nfa "&"))) - (is (= :ASSIGNMENTOPERATOR (run-NFA complete-nfa "^="))) - (is (= :BINARYOPERATOR (run-NFA complete-nfa "^"))) - (is (= :BINARYOPERATOR (run-NFA complete-nfa "<<"))) - (is (= :ASSIGNMENTOPERATOR (run-NFA complete-nfa "="))) - (is (= :BINARYOPERATOR (run-NFA complete-nfa "=="))) - (is (= :UNARYOPERATOR (run-NFA complete-nfa "!"))) - (is (= :BINARYOPERATOR (run-NFA complete-nfa "!=")))) + ;; Terminal + (is (= :TERMINAL (run-NFA complete-nfa ";"))) + ;; null + (is (= :NULL-LITERAL (run-NFA complete-nfa "null")))) + +(deftest filter-regex-nfas + ;; INT-LITERAL in Tokens.txt + ;; shouldn't give a real matching to the text given + (is (= false (run-NFA complete-nfa "")))) From 565cc6f9cd78293e927a87832efe41864e5ec4b3 Mon Sep 17 00:00:00 2001 From: jonathantsang Date: Mon, 1 Apr 2019 18:19:34 -0400 Subject: [PATCH 09/13] builds, if check --- Makefile | 0 src/watcompiler/{Tokens.txt => Language.txt} | 11 +- src/watcompiler/re.clj | 285 +++++++++++-------- test/watcompiler/re_test.clj | 174 ++++++----- 4 files changed, 270 insertions(+), 200 deletions(-) create mode 100644 Makefile rename src/watcompiler/{Tokens.txt => Language.txt} (67%) diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e69de29 diff --git a/src/watcompiler/Tokens.txt b/src/watcompiler/Language.txt similarity index 67% rename from src/watcompiler/Tokens.txt rename to src/watcompiler/Language.txt index 16bd3ba..7f7badd 100644 --- a/src/watcompiler/Tokens.txt +++ b/src/watcompiler/Language.txt @@ -1,13 +1,14 @@ BRACKET { } ( ) [ ] KEYWORD abstract default if private this boolean do implements protected break double import public throws throw byte else instanceof return transient case extends int short try catch interface static void char finally final long strictfp volatile class float native super while const for new switch continue goto package synchronized -UNARYOPERATOR ++ - -- ! ~ -BINARYOPERATOR == * / % < << > >> >>> & ^ | != >= <= + -ASSIGNMENTOPERATOR = *= /= %= += -= <<= >>= >>>= &= ^= |= +UNARY-OPERATOR ++ - -- ! ~ +BINARY-OPERATOR == * / % < << > >> >>> & ^ | != >= <= + +ASSIGNMENT-OPERATOR = *= /= %= += -= <<= >>= >>>= &= ^= |= TERMINAL ; , BOOLEAN-LITERAL true false NULL-LITERAL null INT-LITERAL -STRING-LITERAL -CHARACTER-LITERAL +STRING-LITERAL +CHARACTER-LITERAL IDENTIFIER WHITESPACE +COMMENT diff --git a/src/watcompiler/re.clj b/src/watcompiler/re.clj index 0412ce3..12cde1f 100644 --- a/src/watcompiler/re.clj +++ b/src/watcompiler/re.clj @@ -5,120 +5,20 @@ [clojure.string :as str]) (:import [watcompiler.nfa NFA])) -;; Merging multiple nfas -(defn merge-nfas - [& nfas] - (let - [stateS (gensym :s) - all-states (apply union (map :states nfas)) - all-accept-states (apply union (map :accept-states nfas)) - merged-transitions (apply merge (map :transitions nfas)) - all-transitions (merge - ;; Merged transitions from the nfas - merged-transitions - ;; Episilon transition to each nfa - (make-transition-NFA (into [] - (for [nfa-start (map :start nfas)] - [stateS nfa-start e])))) - all-accept-priorities (apply union (map :accept-priorities nfas))] - (->NFA (into #{} ) - all-states - stateS - all-accept-states - all-transitions - all-accept-priorities))) - -;; Parses a string to form the nfa -(defn string-to-nfa - [wordtype word] - (let - [stateS (gensym :s) - ;; List of substrings of word, stored as strings - states-map (set (rest (reductions str (str) word))) - - ;; Key: substring of word, Value: gensym associated with this state - gensym-map (into (sorted-map) (for [c states-map] - [c (gensym c)])) - ;; Key: gensym value, Value: char to get to this state - states-char-map (into (sorted-map) (for [pair (map list (vals gensym-map) (seq word))] - [(first pair) (second pair)])) - - ;; Accept states - accept-states-map {(get gensym-map word) (list wordtype 0)} - - ;; transitions from previous substring gensym to next substring gensym - transitions-map (into #{ [stateS (get gensym-map (str (first (seq word))) \a) (first (seq word))] } - (for [v (partition 2 1 (vals gensym-map))] - [(first v) (second v) (get states-char-map (second v))]))] - (make-NFA (into #{} (concat (seq word))) - states-map - stateS - accept-states-map - (make-transition-NFA transitions-map)))) - - -;; Takes strings and forms nfas from them and links them into one nfa -(defn form-multiple-nfas - [& arguments] - (let - [stateS (gensym :s) - class (keyword (first arguments)) ;; Conver to a keyword since it reads strings from a file - args (rest arguments) - ;; Key: string for keyword, Value: NFA for that keyword - strings-nfas (into (sorted-map) (for [nfa-name args] - [nfa-name (string-to-nfa class nfa-name)])) - all-states (apply union (map :states (vals strings-nfas))) - all-accept-states (apply union (map :accept-states (vals strings-nfas))) - merged-transitions (apply merge (map :transitions (vals strings-nfas))) - all-transitions (merge - ;; Merged transitions from the nfas - merged-transitions - ;; Episilon transition to each nfa - (make-transition-NFA (into [] - (for [nfa-start (map :start (vals strings-nfas))] - [stateS nfa-start e])))) - all-accept-priorities (apply union (map :accept-priorities (vals strings-nfas)))] - (->NFA (into #{} ) - all-states - stateS - all-accept-states - all-transitions - all-accept-priorities))) - -;; Reading the file -(def readFile - (into [] - (for [line (str/split-lines (slurp "src/watcompiler/Tokens.txt"))] - (str/split line #" ")))) - -(def fileFormed-nfa - (let [nfas (into [] - (for [x readFile] - ;; Check for regex based NFAs - (if - (or - (if (= (first x) "IDENTIFIER") true false) - (if (= (first x) "INT-LITERAL") true false) - (if (= (first x) "STRING-LITERAL") true false) - (if (= (first x) "CHARACTER-LITERAL") true false) - (if (= (first x) "WHITESPACE") true false)) - nil - (apply form-multiple-nfas x))))] - (apply merge-nfas (remove nil? nfas)))) - ;; NFAs for types ;; Integer literal ;; 0 and [1-9][0-9]* -(def integer-literal-nfa +(defn build-integer-literal-nfa + [] (let [stateS (gensym :S) state1 (gensym :1) state2 (gensym :2)] (make-NFA (into #{} (concat [\0] DIGITS DIGITS-NONZERO)) #{stateS state1 state2} stateS - {state1 (list :INTEGER-LITERAL 0) - state2 (list :INTEGER-LITERAL 0)} + {state1 (list "INTEGER-LITERAL" 0) + state2 (list "INTEGER-LITERAL" 0)} (make-transition-NFA [[stateS state1 \0] [stateS state2 DIGITS-NONZERO] [state2 state2 DIGITS]])))) @@ -126,7 +26,8 @@ ;; String literal ;; \"(\\[btnfr\"\'\\] | ALL-ASCII)*\" (\ shown for escaping ") ;; aka \"(.*)\" with escapes inside -(def string-literal-nfa +(defn build-string-literal-nfa + [] (let [stateS (gensym :S) state1 (gensym :1) state2 (gensym :2) @@ -134,7 +35,7 @@ (make-NFA (into #{} (concat [\'] ALL-ASCII [\\] ESCAPABLE)) #{stateS state1 state2 state3} stateS - {state3 (list :STRING-LITERAL 0)} + {state3 (list "STRING-LITERAL" 0)} (make-transition-NFA [[stateS state1 \"] [state1 state1 ALL-ASCII] [state1 state2 \\] @@ -144,7 +45,8 @@ ;; Character literal ;; \'(\\ESCAPABLE | ALL-ASCII)*\' (\ shown for escaping ") ;; aka \'(.*)\' with escapes inside -(def character-literal-nfa +(defn build-character-literal-nfa + [] (let [stateS (gensym :S) state1 (gensym :1) state2 (gensym :2) @@ -153,7 +55,7 @@ (make-NFA (into #{} (concat [\'] ALL-ASCII [\\] ESCAPABLE)) #{stateS state1 state2 state3 state4} stateS - {state4 (list :CHARACTER-LITERAL 0)} + {state4 (list "CHARACTER-LITERAL" 0)} (make-transition-NFA [[stateS state1 \'] [state1 state3 ALL-ASCII] [state1 state2 \\] @@ -162,15 +64,16 @@ ;; Identifiers ;; [a-zA-Z][a-zA-Z0-9]* -(def identifier-nfa +(defn build-identifier-nfa + [] (let [stateS (gensym :S) state1 (gensym :s1) state2 (gensym :s2)] (make-NFA (into #{} (concat UPPER-ALPHABET LOWER-ALPHABET DIGITS)) #{stateS state1 state2} stateS - {state1 (list :IDENTIFIER 1) - state2 (list :IDENTIFIER 1)} + {state1 (list "IDENTIFIER" 1) + state2 (list "IDENTIFIER" 1)} (make-transition-NFA [[stateS state1 UPPER-ALPHABET] [stateS state1 LOWER-ALPHABET] [state1 state2 UPPER-ALPHABET] @@ -181,21 +84,165 @@ ;; Whitespace ;; [space tab newline]+ -(def whitespace-nfa +(defn build-whitespace-nfa + [] (let [stateS (gensym :S) state1 (gensym :s1)] (make-NFA (into #{} WHITESPACE) #{stateS state1} stateS - {state1 (list :WHITESPACE 0)} + {state1 (list "WHITESPACE" 0)} (make-transition-NFA [[stateS state1 WHITESPACE] [state1 state1 WHITESPACE]])))) +;; Comment +;; //.* +;; /*.**/ +(defn build-comment-nfa + [] + (let [stateS (gensym :S) + state1 (gensym :s1) + state2 (gensym :s2) + state3 (gensym :s3) + state4 (gensym :s4) + state5 (gensym :s5)] + (make-NFA (into #{} (concat ALL-ASCII [\*] [\/])) + #{stateS state1 state2 state3 state4 state5} + stateS + {state2 (list "COMMENT" 0) + state5 (list "COMMENT"0)} + (make-transition-NFA [[stateS state1 \/] + [state1 state2 \/] + [state2 state2 ALL-ASCII] + [state1 state3 \*] + [state3 state3 ALL-ASCII] + [state3 state4 \*] + [state4 state5 \/]])))) + +;; Merging multiple nfas +(defn merge-nfas + [& nfas] + (let + [stateS (gensym :s) + all-states (apply union (map :states nfas)) + all-accept-states (apply union (map :accept-states nfas)) + merged-transitions (apply merge (map :transitions nfas)) + all-transitions (merge + ;; Merged transitions from the nfas + merged-transitions + ;; Episilon transition to each nfa + (make-transition-NFA (into [] + (for [nfa-start (map :start nfas)] + [stateS nfa-start e])))) + all-accept-priorities (apply union (map :accept-priorities nfas))] + (->NFA (into #{} ) ;; collect the alphabet + all-states + stateS + all-accept-states + all-transitions + all-accept-priorities))) + +;; Parses a string to form the nfa +(defn string-to-nfa + [class-type word] + ;; case on the word, if it is a special token, pass the respective nfa + (let + [stateS (gensym :s) + ;; List of prefixes of word, stored as strings + states-map (set (rest (reductions str (str) word))) + + ;; Key: substring of word, Value: gensym associated with this state + gensym-map (into (sorted-map) (for [c states-map] + [c (gensym c)])) + ;; Key: gensym value, Value: char to get to this state + states-char-map (into (sorted-map) (for [pair (map list (vals gensym-map) (seq word))] + [(first pair) (second pair)])) + + ;; Accept states + accept-states-map {(get gensym-map word) (list class-type 0)} + + ;; transitions from previous substring gensym to next substring gensym + transitions-map (into #{ [stateS (get gensym-map (str (first (seq word))) \a) (first (seq word))] } + (for [v (partition 2 1 (vals gensym-map))] + [(first v) (second v) (get states-char-map (second v))]))] + (make-NFA (into #{} (concat (seq word))) + states-map + stateS + accept-states-map + (make-transition-NFA transitions-map)))) + +;; Acts as a wrapper to either get the made nfa or form it with string-to-nfa +(defn get-nfa + [class-type first-token arguments] + (case first-token + "" (build-integer-literal-nfa) + "" (build-string-literal-nfa) + "" (build-character-literal-nfa) + "" (build-identifier-nfa) + "" (build-whitespace-nfa) + "" (build-comment-nfa) + (string-to-nfa class-type first-token))) + +;; Takes strings and forms nfas from them and links them into one nfa +(defn form-multiple-nfas + [& arguments] + (let + [stateS (gensym :s) + class-type (first arguments) + args (rest arguments) + + ;; Letters of all of the words in arguments + alphabet (apply concat (for [x (map seq (map char-array arguments))] x)) + + ;; Key: string for keyword, Value: NFA for that keyword + strings-nfas (into (sorted-map) (for [nfa-name args] + [nfa-name (get-nfa class-type nfa-name args)])) + + ;; All of the states in the nfas + all-states (apply union (map :states (vals strings-nfas))) + + ;; All of the accept states in the nfas + all-accept-states (apply union (map :accept-states (vals strings-nfas))) + + ;; All of the transitions in the nfas + merged-transitions (apply merge (map :transitions (vals strings-nfas))) + + ;; Setting epsilon transitions to all of the nfas start states + all-transitions (merge + ;; Merged transitions from the nfas + merged-transitions + ;; Episilon transition to each nfa + (make-transition-NFA (into [] + (for [nfa-start (map :start (vals strings-nfas))] + [stateS nfa-start e])))) + all-accept-priorities (apply union (map :accept-priorities (vals strings-nfas)))] + (->NFA (into #{} (concat alphabet)) + all-states + stateS + all-accept-states + all-transitions + all-accept-priorities))) + +;; Reading the file +(def read-file ;; change that notation read-file + (into [] + (for [line (str/split-lines (slurp "src/watcompiler/Language.txt"))] + (str/split line #" ")))) + +(def file-formed-nfa + (let [nfas (into [] + (for [x read-file] + (apply form-multiple-nfas x)))] + (apply merge-nfas (remove nil? nfas)))) + ;; complete nfa from all of the individual RE nfas (def complete-nfa - (merge-nfas integer-literal-nfa - string-literal-nfa - character-literal-nfa - identifier-nfa - whitespace-nfa - fileFormed-nfa)) + (merge-nfas file-formed-nfa)) + + +;; TODO +;; sorted map in form-multiple-nfas, is it needed? YES + +;; slurp in file input + +;; Makefile, gulp compile into it diff --git a/test/watcompiler/re_test.clj b/test/watcompiler/re_test.clj index 227bdc4..ff8192f 100644 --- a/test/watcompiler/re_test.clj +++ b/test/watcompiler/re_test.clj @@ -6,127 +6,149 @@ ;; Regex NFA tests (deftest integer-literal-tests - (is (= :INTEGER-LITERAL (run-NFA integer-literal-nfa "1010"))) - (is (= :INTEGER-LITERAL (run-NFA integer-literal-nfa "0"))) - (is (= :INTEGER-LITERAL (run-NFA integer-literal-nfa "1")))) + (let [integer-literal-nfa (build-integer-literal-nfa)] + + (is (= "INTEGER-LITERAL" (run-NFA integer-literal-nfa "1010"))) + (is (= "INTEGER-LITERAL" (run-NFA integer-literal-nfa "0"))) + (is (= "INTEGER-LITERAL" (run-NFA integer-literal-nfa "1"))))) (deftest string-literal-tests - (is (= :STRING-LITERAL (run-NFA string-literal-nfa "\"s\""))) - (is (= :STRING-LITERAL (run-NFA string-literal-nfa "\"thisis a string literal\""))) - (is (= :STRING-LITERAL (run-NFA string-literal-nfa "\"[]~`!%^&*$(&^%#.][` @$g literal\""))) - (is (= :STRING-LITERAL (run-NFA string-literal-nfa "\"\""))) + (let [string-literal-nfa (build-string-literal-nfa)] + + (is (= "STRING-LITERAL" (run-NFA string-literal-nfa "\"s\""))) + (is (= "STRING-LITERAL" (run-NFA string-literal-nfa "\"thisis a string literal\""))) + (is (= "STRING-LITERAL" (run-NFA string-literal-nfa "\"[]~`!%^&*$(&^%#.][` @$g literal\""))) + (is (= "STRING-LITERAL" (run-NFA string-literal-nfa "\"\""))) - (is (= :STRING-LITERAL (run-NFA string-literal-nfa "\" \\b \\t \\n \\f \\r \\' \\\\ \""))) - (is (= false (run-NFA string-literal-nfa "needquotes")))) + (is (= "STRING-LITERAL" (run-NFA string-literal-nfa "\" \\b \\t \\n \\f \\r \\' \\\\ \""))) + (is (= "STRING-LITERAL" (run-NFA string-literal-nfa "\"abc\\n\""))) + (is (= false (run-NFA string-literal-nfa "needquotes"))))) (deftest character-literal-tests - (is (= :CHARACTER-LITERAL (run-NFA character-literal-nfa "'s'"))) - (is (= :CHARACTER-LITERAL (run-NFA character-literal-nfa "'\\b'"))) - (is (= :CHARACTER-LITERAL (run-NFA character-literal-nfa "'0'"))) - (is (= false (run-NFA string-literal-nfa "'sa'")))) + (let [character-literal-nfa (build-character-literal-nfa)] + + (is (= "CHARACTER-LITERAL" (run-NFA character-literal-nfa "'s'"))) + (is (= "CHARACTER-LITERAL" (run-NFA character-literal-nfa "'\\b'"))) + (is (= "CHARACTER-LITERAL" (run-NFA character-literal-nfa "'0'"))) + (is (= false (run-NFA character-literal-nfa "'sa'"))))) + (deftest identifier-tests - (is (= :IDENTIFIER (run-NFA identifier-nfa "thisidentifier"))) - (is (= :IDENTIFIER (run-NFA identifier-nfa "a")))) + (let [identifier-nfa (build-identifier-nfa)] + + (is (= "IDENTIFIER" (run-NFA identifier-nfa "thisidentifier"))) + (is (= "IDENTIFIER" (run-NFA identifier-nfa "a"))))) (deftest whitespace-test - (is (= :WHITESPACE (run-NFA whitespace-nfa " \n\n")))) + (let [whitespace-nfa (build-whitespace-nfa)] + + (is (= "WHITESPACE" (run-NFA whitespace-nfa " \n\n"))))) + +(deftest comment-test + (let [comment-nfa (build-comment-nfa)] + + (is (= "COMMENT" (run-NFA comment-nfa "//this is a comment "))) + (is (= "COMMENT" (run-NFA comment-nfa "//"))) + (is (= "COMMENT" (run-NFA comment-nfa "/*multilinecomment\\n\\ncomment*/"))) + (is (= false (run-NFA comment-nfa "/*notmultiline"))) + (is (= false (run-NFA comment-nfa "/notacomment"))))) ;; Form the NFAs from a file -(deftest read-file - (def lines readFile) - (def formed fileFormed-nfa) +(deftest reading-file + (let [lines read-file + formed file-formed-nfa] - (is (= :BRACKET (run-NFA formed "]"))) - (is (= :BOOLEAN-LITERAL (run-NFA formed "true"))) - (is (= :BRACKET (run-NFA formed "{")))) + (is (= "BRACKET" (run-NFA formed "]"))) + (is (= "BOOLEAN-LITERAL" (run-NFA formed "true"))) + (is (= "BRACKET" (run-NFA formed "{"))))) ;; Test forming multiple nfas from multiple strings (deftest multiple-nfas-function-test - (def full-nfa (form-multiple-nfas :KEYWORD "int" "if")) - (is (= :KEYWORD (run-NFA full-nfa "int"))) - (is (= :KEYWORD (run-NFA full-nfa "if"))) - (is (= false (run-NFA full-nfa "in"))) - (is (= false (run-NFA full-nfa "nt")))) + (let [full-nfa (form-multiple-nfas "KEYWORD" "int" "if")] + (is (= "KEYWORD" (run-NFA full-nfa "int"))) + (is (= "KEYWORD" (run-NFA full-nfa "if"))) + (is (= false (run-NFA full-nfa "in"))) + (is (= false (run-NFA full-nfa "nt"))))) ;; Test function forming individual nfa (deftest function-test - (def int-nfa-test (string-to-nfa :INT "int")) + (let [int-nfa-test (string-to-nfa "INT" "int") + synchronized-nfa-test (string-to-nfa "KEYWORD" "synchronized")] + (is :MAP int-nfa-test) - (is (= :INT (run-NFA int-nfa-test "int"))) - (def synchronized-nfa-test (string-to-nfa :KEYWORD "synchronized")) + (is (= "INT" (run-NFA int-nfa-test "int"))) (is :MAP synchronized-nfa-test) - (is (= :KEYWORD (run-NFA synchronized-nfa-test "synchronized"))) + (is (= "KEYWORD" (run-NFA synchronized-nfa-test "synchronized"))) (is (= false (run-NFA synchronized-nfa-test "synchronize"))) - (is (= false (run-NFA synchronized-nfa-test "ynchronize")))) + (is (= false (run-NFA synchronized-nfa-test "ynchronize"))))) ;; Individual NFA tests (deftest int-test - (def int-nfa (string-to-nfa :KEYWORD "int")) - (is (= :KEYWORD (run-NFA complete-nfa "int"))) - (is (= :INTEGER-LITERAL (run-NFA complete-nfa "109")))) + (let [int-nfa (string-to-nfa "KEYWORD" "int")] + (is (= "KEYWORD" (run-NFA complete-nfa "int"))) + (is (= "INTEGER-LITERAL" (run-NFA complete-nfa "109"))))) (deftest operator-test ;; Operators - (is (= :BINARYOPERATOR (run-NFA complete-nfa "+"))) - (is (= :UNARYOPERATOR (run-NFA complete-nfa "++"))) - (is (= :BINARYOPERATOR (run-NFA complete-nfa ">"))) - (is (= :ASSIGNMENTOPERATOR (run-NFA complete-nfa ">>>="))) - (is (= :UNARYOPERATOR (run-NFA complete-nfa "!"))) - (is (= :BINARYOPERATOR (run-NFA complete-nfa "!=")))) + (is (= "BINARY-OPERATOR" (run-NFA complete-nfa "+"))) + (is (= "UNARY-OPERATOR" (run-NFA complete-nfa "++"))) + (is (= "BINARY-OPERATOR" (run-NFA complete-nfa ">"))) + (is (= "ASSIGNMENT-OPERATOR" (run-NFA complete-nfa ">>>="))) + (is (= "UNARY-OPERATOR" (run-NFA complete-nfa "!"))) + (is (= "BINARY-OPERATOR" (run-NFA complete-nfa "!=")))) ;; Booleans test (deftest boolean-test - (is (= :BOOLEAN-LITERAL (run-NFA complete-nfa "true"))) - (is (= :BOOLEAN-LITERAL (run-NFA complete-nfa "false"))) - (is (= :IDENTIFIER (run-NFA complete-nfa "tru"))) - (is (= :IDENTIFIER (run-NFA complete-nfa "fals")))) + (is (= "BOOLEAN-LITERAL" (run-NFA complete-nfa "true"))) + (is (= "BOOLEAN-LITERAL" (run-NFA complete-nfa "false"))) + (is (= "IDENTIFIER" (run-NFA complete-nfa "tru"))) + (is (= "IDENTIFIER" (run-NFA complete-nfa "fals")))) ;; Keyword test (deftest keyword-test ;; Individual Keywords on their nfas - (def int-nfa (string-to-nfa :KEYWORD "int")) - (def abstract-nfa (string-to-nfa :KEYWORD "abstract")) - (def default-nfa (string-to-nfa :KEYWORD "default")) - (def synchronize-nfa (string-to-nfa :KEYWORD "synchronize")) - (is (= :KEYWORD (run-NFA int-nfa "int"))) - (is (= :KEYWORD (run-NFA abstract-nfa "abstract"))) - (is (= :KEYWORD (run-NFA default-nfa "default"))) - (is (= false (run-NFA synchronized-nfa-test "synchronize"))) - (is (= false (run-NFA synchronized-nfa-test "ynchronize")))) + (let [int-nfa (string-to-nfa "KEYWORD" "int") + abstract-nfa (string-to-nfa "KEYWORD" "abstract") + default-nfa (string-to-nfa "KEYWORD" "default") + synchronize-nfa (string-to-nfa "KEYWORD" "synchronize")] + (is (= "KEYWORD" (run-NFA int-nfa "int"))) + (is (= "KEYWORD" (run-NFA abstract-nfa "abstract"))) + (is (= "KEYWORD" (run-NFA default-nfa "default"))) + (is (= "KEYWORD" (run-NFA synchronize-nfa "synchronize"))) + (is (= false (run-NFA synchronize-nfa "ynchronize"))))) ;; Test on a complete merged nfa (deftest merged-function-nfa-test (is :MAP complete-nfa) - (is (= :KEYWORD (run-NFA complete-nfa "int"))) - (is (= :KEYWORD (run-NFA complete-nfa "synchronized"))) - (is (= :INTEGER-LITERAL (run-NFA complete-nfa "9"))) - (is (= :UNARYOPERATOR (run-NFA complete-nfa "++"))) - (is (= :BOOLEAN-LITERAL (run-NFA complete-nfa "true"))) - (is (= :BOOLEAN-LITERAL (run-NFA complete-nfa "false")))) + (is (= "KEYWORD" (run-NFA complete-nfa "int"))) + (is (= "KEYWORD" (run-NFA complete-nfa "synchronized"))) + (is (= "INTEGER-LITERAL" (run-NFA complete-nfa "9"))) + (is (= "UNARY-OPERATOR" (run-NFA complete-nfa "++"))) + (is (= "BOOLEAN-LITERAL" (run-NFA complete-nfa "true"))) + (is (= "BOOLEAN-LITERAL" (run-NFA complete-nfa "false")))) (deftest complete-nfa-test - (is (= :KEYWORD (run-NFA complete-nfa "abstract"))) - (is (= :KEYWORD (run-NFA complete-nfa "default"))) - (is (= :KEYWORD (run-NFA complete-nfa "package"))) - (is (= :KEYWORD (run-NFA complete-nfa "synchronized"))) + (is (= "KEYWORD" (run-NFA complete-nfa "abstract"))) + (is (= "KEYWORD" (run-NFA complete-nfa "default"))) + (is (= "KEYWORD" (run-NFA complete-nfa "package"))) + (is (= "KEYWORD" (run-NFA complete-nfa "synchronized"))) ;; Booleans - (is (= :BOOLEAN-LITERAL (run-NFA complete-nfa "true"))) - (is (= :BOOLEAN-LITERAL (run-NFA complete-nfa "false"))) - (is (= :IDENTIFIER (run-NFA complete-nfa "tru"))) - (is (= :IDENTIFIER (run-NFA complete-nfa "fals"))) + (is (= "BOOLEAN-LITERAL" (run-NFA complete-nfa "true"))) + (is (= "BOOLEAN-LITERAL" (run-NFA complete-nfa "false"))) + (is (= "IDENTIFIER" (run-NFA complete-nfa "tru"))) + (is (= "IDENTIFIER" (run-NFA complete-nfa "fals"))) ;; Integer - (is (= :INTEGER-LITERAL (run-NFA complete-nfa "109"))) + (is (= "INTEGER-LITERAL" (run-NFA complete-nfa "109"))) ;; Operators - (is (= :BINARYOPERATOR (run-NFA complete-nfa "+"))) - (is (= :UNARYOPERATOR (run-NFA complete-nfa "++"))) - (is (= :BINARYOPERATOR (run-NFA complete-nfa ">"))) - (is (= :BINARYOPERATOR (run-NFA complete-nfa ">>>"))) - (is (= :ASSIGNMENTOPERATOR (run-NFA complete-nfa ">>>="))) + (is (= "BINARY-OPERATOR" (run-NFA complete-nfa "+"))) + (is (= "UNARY-OPERATOR" (run-NFA complete-nfa "++"))) + (is (= "BINARY-OPERATOR" (run-NFA complete-nfa ">"))) + (is (= "BINARY-OPERATOR" (run-NFA complete-nfa ">>>"))) + (is (= "ASSIGNMENT-OPERATOR" (run-NFA complete-nfa ">>>="))) ;; Terminal - (is (= :TERMINAL (run-NFA complete-nfa ";"))) + (is (= "TERMINAL" (run-NFA complete-nfa ";"))) ;; null - (is (= :NULL-LITERAL (run-NFA complete-nfa "null")))) + (is (= "NULL-LITERAL" (run-NFA complete-nfa "null")))) (deftest filter-regex-nfas ;; INT-LITERAL in Tokens.txt From 7946b1ca693e57ab0e1f424d2385f4f9c3f15d4e Mon Sep 17 00:00:00 2001 From: jonathantsang Date: Thu, 4 Apr 2019 10:30:38 -0400 Subject: [PATCH 10/13] comment tests --- test/watcompiler/re_test.clj | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/watcompiler/re_test.clj b/test/watcompiler/re_test.clj index ff8192f..b1379d3 100644 --- a/test/watcompiler/re_test.clj +++ b/test/watcompiler/re_test.clj @@ -47,9 +47,11 @@ (deftest comment-test (let [comment-nfa (build-comment-nfa)] - (is (= "COMMENT" (run-NFA comment-nfa "//this is a comment "))) + (is (= "COMMENT" (run-NFA comment-nfa "///this is a comment "))) (is (= "COMMENT" (run-NFA comment-nfa "//"))) (is (= "COMMENT" (run-NFA comment-nfa "/*multilinecomment\\n\\ncomment*/"))) + (is (= "COMMENT" (run-NFA comment-nfa "/*multilinecomment\\n\\ncomment****/"))) + (is (= false (run-NFA comment-nfa "//dawda\na"))) (is (= false (run-NFA comment-nfa "/*notmultiline"))) (is (= false (run-NFA comment-nfa "/notacomment"))))) From d4fa2b8c4c566d2574dbda6e5b951bb16d67acd9 Mon Sep 17 00:00:00 2001 From: jonathantsang Date: Sun, 7 Apr 2019 22:13:45 -0400 Subject: [PATCH 11/13] makefile --- Makefile | 4 ++++ joosc | 1 + project.clj | 2 +- src/watcompiler/re.clj | 8 -------- test/watcompiler/sample.java | 1 + 5 files changed, 7 insertions(+), 9 deletions(-) create mode 100755 joosc create mode 100644 test/watcompiler/sample.java diff --git a/Makefile b/Makefile index e69de29..cd8d133 100644 --- a/Makefile +++ b/Makefile @@ -0,0 +1,4 @@ +make: + +clean: + $(RM) joosc diff --git a/joosc b/joosc new file mode 100755 index 0000000..6e63e5c --- /dev/null +++ b/joosc @@ -0,0 +1 @@ +java -jar target/uberjar/watcompiler-0.1.0-SNAPSHOT-standalone.jar $1 diff --git a/project.clj b/project.clj index af46f1c..721e4ca 100644 --- a/project.clj +++ b/project.clj @@ -4,6 +4,6 @@ :license {:name "EPL-2.0" :url "https://www.eclipse.org/legal/epl-2.0/"} :dependencies [[org.clojure/clojure "1.9.0"]] - :main ^:skip-aot watcompiler.core + :main watcompiler.core :target-path "target/%s" :profiles {:uberjar {:aot :all}}) diff --git a/src/watcompiler/re.clj b/src/watcompiler/re.clj index 12cde1f..ed4d705 100644 --- a/src/watcompiler/re.clj +++ b/src/watcompiler/re.clj @@ -238,11 +238,3 @@ ;; complete nfa from all of the individual RE nfas (def complete-nfa (merge-nfas file-formed-nfa)) - - -;; TODO -;; sorted map in form-multiple-nfas, is it needed? YES - -;; slurp in file input - -;; Makefile, gulp compile into it diff --git a/test/watcompiler/sample.java b/test/watcompiler/sample.java new file mode 100644 index 0000000..92fed23 --- /dev/null +++ b/test/watcompiler/sample.java @@ -0,0 +1 @@ +int a 0 From 38c1252e2cfe5547cfd8e91ad46fd4bbd4a267b8 Mon Sep 17 00:00:00 2001 From: jonathantsang Date: Sun, 7 Apr 2019 23:58:46 -0400 Subject: [PATCH 12/13] add back aot from project --- project.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project.clj b/project.clj index 721e4ca..af46f1c 100644 --- a/project.clj +++ b/project.clj @@ -4,6 +4,6 @@ :license {:name "EPL-2.0" :url "https://www.eclipse.org/legal/epl-2.0/"} :dependencies [[org.clojure/clojure "1.9.0"]] - :main watcompiler.core + :main ^:skip-aot watcompiler.core :target-path "target/%s" :profiles {:uberjar {:aot :all}}) From 74bd42d8465f907b6ef1989dc7c6a8de38c2fb70 Mon Sep 17 00:00:00 2001 From: jonathantsang Date: Tue, 9 Apr 2019 12:40:03 -0400 Subject: [PATCH 13/13] lein uberjar later --- Makefile | 5 ++--- joosc | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index cd8d133..c58fc8a 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,3 @@ make: - -clean: - $(RM) joosc + + diff --git a/joosc b/joosc index 6e63e5c..bf86038 100755 --- a/joosc +++ b/joosc @@ -1 +1,2 @@ +lein uberjar java -jar target/uberjar/watcompiler-0.1.0-SNAPSHOT-standalone.jar $1