diff --git a/.directory b/.directory new file mode 100644 index 000000000..411e7e8e9 --- /dev/null +++ b/.directory @@ -0,0 +1,4 @@ +[Dolphin] +Timestamp=2022,2,19,10,32,56 +Version=4 +ViewMode=1 diff --git a/.gitignore b/.gitignore index 4acafde18..150b5982d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ + # File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig # Created by https://www.gitignore.io/api/visualstudiocode,linux,latex,python @@ -291,6 +292,11 @@ TSWLatexianTemp* ### Python ### # Byte-compiled / optimized / DLL files __pycache__/ +src/__pycache__/ +test/__pycache__/ +src/cmp/__pycache__/ +src/cmp/tools/__pycache__/ +.directory/ *.py[cod] *$py.class @@ -407,4 +413,3 @@ dmypy.json # End of https://www.gitignore.io/api/visualstudiocode,linux,latex,python # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) - diff --git a/Readme.md b/Readme.md index a47d48b9e..663062365 100644 --- a/Readme.md +++ b/Readme.md @@ -1,6 +1,6 @@ # COOL: Proyecto de Compilación -> Proyecto base para el compilador de 4to año en Ciencia de la Computación. +> Forked from: https://github.com/matcom/cool-compiler-2021, proyecto base para el compilador de 4to año en Ciencia de la Computación. ## Generalidades @@ -10,158 +10,19 @@ funcional para el lenguaje _COOL_. _COOL (Classroom Object-Oriented Language)_ es un pequeño lenguaje que puede ser implementado con un esfuerzo razonable en un semestre del curso. Aun así, _COOL_ mantiene muchas de las características de los lenguajes de programación modernos, incluyendo orientación a objetos, tipado estático y manejo automático de memoria. -## Cómo comenzar (o terminar) - -El proyecto de Compilación será recogido y evaluado **únicamente** a través de Github. Es imprescindible tener una cuenta de Github para cada participante, y que su proyecto esté correctamente hosteado en esta plataforma. A continuación le damos las instrucciones mínimas necesarias para ello: - -### 1. Si no lo han hecho ya, regístrense en [Github](https://github.com) todos los miembros del equipo (es gratis). - -![](img/img1.png) - -### 2. Haga click en el botón **Fork** para hacer una copia del proyecto en el perfil de Github de uno de los miembros. - -Opcionalmente pueden [crear una organización](https://github.com/organizations/new) y copiar el proyecto en el perfil de la misma. - -![](img/img2.png) - -### 3. Una vez hecho esto, tendrá un nuevo repositorio en `github/`. - -Revise que el repositorio de su equipo está en su perfil. -En este ejemplo se ha copiado a la cuenta de `github.com/apiad`. - -Debe indicar bajo el nombre del repositorio: `"forked from matcom/cool-compiler-2021"`. - -![](img/img3.png) - -### 4. Clone este proyecto en un repositorio local. - -Busque la URL de su proyecto en la interfaz web de Github. - -Asegúrese de clonar **su copia** y no el proyecto original en `matcom/cool-compiler-2021`. - -![](img/img4.png) - -```bash -$ git clone git@github.com:/cool-compiler-2021.git -``` - -> Donde `` es posiblemente el nombre de su equipo o del miembro donde se hizo el _fork_. - -A partir de este punto debe tener un proyecto `cool-compiler-2021` local. -El siguiente paso depende de si usted ya tiene su código versionado con `git` o no. - -### 5.A. Si tiene su proyecto en git (y no quiere perder la historia): - -#### 5.1. Mezcle hacia el nuevo respositorio su repositorio anterior: - -```bash -$ cd cool-compiler-2021 -$ git pull --allow-unrelated-histories master -``` - -#### 5.2. Organice su proyecto, código fuente y documentación, de acuerdo a las instrucciones de este documento, y vuelva a hacer `commit`. - -```bash -$ mv src/ -$ git add . -$ git commit -a -m "Mezclado con el proyecto base" -``` - -#### 5.3. A partir de este punto puede hacer `push` cada vez que tenga cambios que subir. - -```bash -$ git push origin master -``` - -### 5.B Si aún no tiene su proyecto en git (o no le importa la historia): - -#### 5.1. Simplemente copie el código de su proyecto en la carpeta correspondiente `src` y haga su primer commit. - -```bash -$ mv src/ -$ git commit -a -m "Hello Git!" -``` - -#### 5.2. A partir de este punto asegúrese de hacer `commit` de forma regular para mantener su repositorio actualizado. - -Si necesita saber más sobre `git`, todo lo imprescindible está en [esta guía](doc/github-git-cheat-sheet.pdf). - -#### 5.3. A partir de este punto puede hacer `push` cada vez que tenga cambios que subir. - -```bash -$ git push origin master -``` - -## Entregas - -En este proyecto se realizarán entregas parciales a lo largo del curso. Para realizar una entrega, siga los siguientes pasos. - -### 1. Cree un pull request al proyecto original desde su copia. - -![](img/img5.png) - -### 2. Asegúrese de tener la siguiente configuración antes de hacer click en **Create pull request**. - -- **base repository**: `matcom/cool-compiler-2021` (repositorio original) - - **branch**: `master` -- **head repository**: `/cool-compiler-2021` (repositorio propio) - - **branch**: `master` (o la que corresponda) - -> Asegúrese que se indica **Able to merge**. De lo contrario, existen cambios en el repositorio original que usted no tiene, y debe actualizarlos. - -> **NOTA**: Asegúrese que el _pull request_ se hace a la rama `master`. - -![](img/img6.png) - -### 3. Introduzca un título y descripción adecuados, y haga click en **Create pull request**. - -![](img/img7.png) - -### 4. Espere mientras se ejecutan las pruebas. - -Verá la indicación **Some checks haven't completed yet**. - -![](img/img8.png) - -Es posible que tenga que actualizar los cambios que se hayan hecho en el repositorio original, por ejemplo, si se han agregado nuevos tests. En este caso obtendrá el siguiente mensaje: - -> **This branch is out-of-date with base branch** - -Haga click en **Update branch** y siga las instrucciones. -### 5. Verifique que no hubo errores en las pruebas. - -Si ve el mensaje **(All | Some) checks have failed**, significa que su código no pasó las pruebas. - -![](img/img9.png) - -Para ver los resultados de las pruebas haga click en el link **Details**. - -![](img/img10.png) - - -### 6. Arregle los errores y repita el paso 5 hasta que todas las pruebas pasen. - -Para cualquier modificación que haga a su proyecto, haga _commit_ y _push_ para **su repositorio personal** y automáticamente se actualizará el estado del _pull request_ y se volverán a ejecutar las pruebas. **No es necesario** abrir un _pull request_ nuevo por cada entrega, sino actualizar el anterior. - -> **Por favor asegúrese de mantener un solo _pull request_ activo por equipo**. En caso de abrir uno nuevo, cerrar el anterior. - ## Sobre la implementación -Ponga todo su código e instrucciones necesarias en la carpeta `src`. Más información en [`src/Readme.md`](src/Readme.md). +Todo el código e instrucciones necesarias se encuentran en la carpeta `src`. Más información en [`src/Readme.md`](src/Readme.md). ## Sobre la documentación -Usted debe presentar un reporte escrito documentando el proceso de construcción de su compilador y los detalles más importantes de su funcionamiento. Más información en [`doc/Readme.md`](doc/Readme.md). - -## Sobre los equipos de desarrollo - -Para desarrollar el compilador del lenguaje COOL se trabajará en equipos de 2 o 3 integrantes. +Se presenta un reporte escrito documentando el proceso de construcción del compilador y los detalles más importantes de su funcionamiento. Más información en [`doc/Readme.md`](doc/Readme.md). ## Sobre los casos de prueba -La carpeta `tests` contiene todos los casos de prueba que son obligatorios de pasar para que su proyecto tenga derecho a ser evaluado. +La carpeta `tests` contiene casos de prueba. -Estos tests se ejecutan automáticamente cada vez que hace un _pull request_ al repositorio `matcom/cool-compiler-2021`. Solo aquellos proyectos que pasen todas las pruebas con éxito serán evaluados. +Estos tests se ejecutan automáticamente cada vez que hace un _pull request_ al repositorio `matcom/cool-compiler-2021`. Para ejecutar las pruebas localmente, debe tener instalado `Python 3.7`, `pip` y `make` (normalmente viene con Linux). Ejecute: diff --git a/customized_tests/lexer/no_strings.cl b/customized_tests/lexer/no_strings.cl new file mode 100644 index 000000000..bedf7bd00 --- /dev/null +++ b/customized_tests/lexer/no_strings.cl @@ -0,0 +1,9 @@ +" +class Cons { + xcar : Int ; + xcdr : String ; + + isNill ( ) : Bool { + false + } ; +} \ No newline at end of file diff --git a/customized_tests/lexer/reserve_words.cl b/customized_tests/lexer/reserve_words.cl new file mode 100644 index 000000000..06bf72036 --- /dev/null +++ b/customized_tests/lexer/reserve_words.cl @@ -0,0 +1,113 @@ +(* Integers, Identifiers, and Special Notation *) + +0007 123 +1 -1 +90 -09 +11113 -4r *a *self* c++ +5! = 120, 2 + 2 = 5 or E = mc2; p + 1 @ p = 1: for x in range(len(b)) +new / <- <<==> {( Int: Objet, Bool; String.string SELF_TYPE isvoid }) +class Class if then else fi testing Testing ~007agent_bond james_007B0N3SS___ +loop pool while tRuE or noT faLsE let in case of ESAC + +(* +#3 case 0007 +#3 inherits 123 +#3 '+' +#3 ; 1 +#3 '-' +#3 , 1 +#3 '+' +#3 . 90 +#3 '-' +#3 ( ) 09 +#3 '+' +#3 { } 11113 +#3 '-' +#3 @ 4 +#3 + r +#3 '*' +#3 - a +#3 '*' +#3 * self +#3 '*' +#3 / c +#3 '+' +#3 '+' +#4 < > 5 +#4 ERROR "!" +#4 '=' +#4 = 120 +#4 ',' +#4 ~ 2 +#4 '+' +#4 INT_CONST 2 +#4 '=' +#4 INT_CONST 5 +#4 OBJECTID or +#4 TYPEID E +#4 '=' +#4 OBJECTID mc2 +#4 ';' +#4 OBJECTID p +#4 '+' +#4 INT_CONST 1 +#4 '@' +#4 OBJECTID p +#4 '=' +#4 INT_CONST 1 +#4 ':' +#4 OBJECTID for +#4 OBJECTID x +#4 IN +#4 OBJECTID range +#4 '(' +#4 OBJECTID len +#4 '(' +#4 OBJECTID b +#4 ')' +#4 ')' +#5 NEW +#5 '/' +#5 ASSIGN +#5 '<' +#5 LE +#5 DARROW +#5 '{' +#5 '(' +#5 TYPEID Int +#5 ':' +#5 TYPEID Objet +#5 ',' +#5 TYPEID Bool +#5 ';' +#5 TYPEID String +#5 '.' +#5 OBJECTID string +#5 TYPEID SELF_TYPE +#5 ISVOID +#5 '}' +#5 ')' +#6 CLASS +#6 CLASS +#6 IF +#6 THEN +#6 ELSE +#6 FI +#6 OBJECTID testing +#6 TYPEID Testing +#6 '~' +#6 INT_CONST 007 +#6 OBJECTID agent_bond +#6 OBJECTID james_007B0N3SS___ +#7 LOOP +#7 POOL +#7 WHILE +#7 BOOL_CONST true +#7 OBJECTID or +#7 NOT +#7 BOOL_CONST false +#7 LET +#7 IN +#7 CASE +#7 OF +#7 ESAC +*) + +"asd \ No newline at end of file diff --git a/customized_tests/lexer/test_comment.cl b/customized_tests/lexer/test_comment.cl new file mode 100644 index 000000000..6283574f5 --- /dev/null +++ b/customized_tests/lexer/test_comment.cl @@ -0,0 +1,15 @@ +--Any characters between two dashes “--” and the next newline +--(or EOF, if there is no next newline) are treated as comments + +(*(*(* +Comments may also be written by enclosing +text in (∗ . . . ∗). The latter form 8 of comment may be nested. +Comments cannot cross file boundaries. +*)*)*) + +class Error() { + + (* case There was once a comment, + that was quite long. + But, the. Now, reader; is the comment + ever gonna end? \ No newline at end of file diff --git a/customized_tests/lexer/tryNoSymbol.cl b/customized_tests/lexer/tryNoSymbol.cl new file mode 100644 index 000000000..d9a423eb5 --- /dev/null +++ b/customized_tests/lexer/tryNoSymbol.cl @@ -0,0 +1,9 @@ +# +class Cons { + xcar : Int ; + xcdr : String ; + + isNill ( ) : Bool { + false + } ; +} \ No newline at end of file diff --git a/customized_tests/parser/missingcpar.cl b/customized_tests/parser/missingcpar.cl new file mode 100644 index 000000000..f208d50b1 --- /dev/null +++ b/customized_tests/parser/missingcpar.cl @@ -0,0 +1,15 @@ + +class Main { + main(): Object { + (new Alpha).print() + }; +}; + + +class Test { + testing8(x: Int, y: Int): Bool { + let z: Int <- 3, w: Int <- 4 + -- Missing ')' + in isvoid (3 + a * (x / w + new Int) - y - (((if tRue = not faLSe then ~z else 3 <= 4 + "hey".length() fi + a)/(0)*(((4 * 4))))) + }; +}; \ No newline at end of file diff --git a/customized_tests/parser/sum_if_then_else_objid.cl b/customized_tests/parser/sum_if_then_else_objid.cl new file mode 100644 index 000000000..32478666b --- /dev/null +++ b/customized_tests/parser/sum_if_then_else_objid.cl @@ -0,0 +1,101 @@ +(* Cool has four binary arithmetic operations: +, -, *, /. *) + +class Main { + main(): Object { + (new Alpha).print() + }; +}; + +class Test { + test1: Object; + + testing1(): Int { + 2 + 2 + }; + + test2: Int <- 1; + + test3: String <- "1"; + + testing2(a: Alpha, b: Int): Int { + 2 + 2 + }; + + testing3(): String { + "2 + 2" + }; + + testing4(x: Int, y: Int): Test { + self + }; + + testing5(a: String, b: String): IO { + If a.length() < b.length() THeN + new IO.out_string("La cadena \"".concat(b).concat("\" es mas larga que la cadena \"").concat(a).concat("\".")) + eLSe + if a.length() = b.length() THeN + new IO.out_string("La cadena \"".concat(a).concat("\" mide igual que la cadena \"").concat(b).concat("\".")) + ElsE + new IO.out_string("La cadena \"".concat(a).concat("\" es mas larga que la cadena \"").concat(b).concat("\".")) + fI + Fi + }; + + testing6(a: Int): IO { + let count: Int <- 0, pow: Int + in { + -- count <- 0; + pow <- 1; + while pow < a + loop + { + count <- count + 1; + pow <- pow * 2; + } + pool; + new IO.out_string("El logaritmo en base 2 de ").out_int(a).out_string(" es ").out_int(count); + } + }; + + testing7(): Object { + case 2 + 2 of + x: Int => new IO.out_string("Es un entero!"); + y: String => new IO.out_string("Es una cadena!"); + z: Bool => new IO.out_string("Es un booleano!"); + esac + }; + + a: Int <- 1; + + testing8(x: Int, y: Int): Bool { + let z: Int <- 3, w: Int <- 4 + -- Object identifiers starts with a lowercase letter + in isvoid (3 + a * (x / w + new Int) - y - (((if tRue = not faLSe then ~mazinger_Z else 3 <= 4 + "hey".length() fi) + a)/(0)*(((4 * 4))))) + }; +}; + +class Test2 { + test1: Test <- new Test; + + testing1(): Test { + test1.testing4(1 + 1, 1 + 2).testing4(2 + 3, 3 + 5).testing4(5 + 8, 8 + 13) + }; + + testing2(x: Int, y: Int): Test2 { + self + }; + + testing3(): Test2 { + testing2(1 + 1, 1 + 2).testing2(2 + 3, 3 + 5).testing2(5 + 8, true + fALSE) + }; + + testing4(): Object { + test1@Object.copy() + }; +}; + +class Alpha inherits IO { + print() : Object { + out_string("reached!!\n") + }; +}; \ No newline at end of file diff --git a/customized_tests/semantics/assignment.cl b/customized_tests/semantics/assignment.cl new file mode 100644 index 000000000..0575c98aa --- /dev/null +++ b/customized_tests/semantics/assignment.cl @@ -0,0 +1,12 @@ +--Attributes are local to the class in which they are defined or inherited. + +class A { + a: Int <- 5; + test(x1: Int, y1: Int): Int { + let x: Int <- x1, y: Int <-y1 in { + x <- "x + a"; + f <- y + a; + if b then x + y else x - y fi; + } + }; +}; \ No newline at end of file diff --git a/customized_tests/semantics/attrDecl.cl b/customized_tests/semantics/attrDecl.cl new file mode 100644 index 000000000..644d55381 --- /dev/null +++ b/customized_tests/semantics/attrDecl.cl @@ -0,0 +1,10 @@ +class A { }; +class B inherits A { }; +class C inherits B { }; +class D inherits B { }; + +class Main inherits IO { + test1: X <- new Main; + + main(): IO { out_string("Hello World!")}; +}; \ No newline at end of file diff --git a/customized_tests/semantics/attrDecl2.cl b/customized_tests/semantics/attrDecl2.cl new file mode 100644 index 000000000..7bb18965f --- /dev/null +++ b/customized_tests/semantics/attrDecl2.cl @@ -0,0 +1,10 @@ +class A { }; +class B inherits A { }; +class C inherits B { }; +class D inherits B { }; + +class Main inherits W { + test1: X <- new Main; + + main(): G { out_string("Hello World!")}; +}; \ No newline at end of file diff --git a/customized_tests/semantics/inheritance.cl b/customized_tests/semantics/inheritance.cl new file mode 100644 index 000000000..5b9033e6e --- /dev/null +++ b/customized_tests/semantics/inheritance.cl @@ -0,0 +1,10 @@ +class A inherits B{ }; +class B inherits C { }; +class C inherits A { }; +class D inherits B { }; + +class Main inherits A { + test1: B <- new Main; + + main(): C { out_string("Hello World!")}; +}; \ No newline at end of file diff --git a/customized_tests/semantics/methods.cl b/customized_tests/semantics/methods.cl new file mode 100644 index 000000000..7376722f1 --- /dev/null +++ b/customized_tests/semantics/methods.cl @@ -0,0 +1,3 @@ +class A { + f(x: Int, y: Int): Int { x + y }; +}; \ No newline at end of file diff --git a/doc/Readme.md b/doc/Readme.md index 3b2569f5c..0f99e034c 100644 --- a/doc/Readme.md +++ b/doc/Readme.md @@ -1,33 +1,29 @@ # Documentación -## Readme - -Modifique el contenido de este documento para documentar de forma clara y concisa los siguientes aspectos: +En el archivo report.md se explican los detalles de la implementación del proyecto. A continuación se pueden encontrar indicaciones para su uso. -- Cómo ejecutar (y compilar si es necesario) su compilador. -- Requisitos adicionales, dependencias, configuración, etc. -- Opciones adicionales que tenga su compilador. - -## Sobre los Equipos de Desarrollo +## Readme -Para desarrollar el compilador del lenguaje COOL se trabajará en equipos de 2 o 3 integrantes. El proyecto de Compilación será recogido y evaluado únicamente a través de Github. Es imprescindible tener una cuenta de Github para cada participante, y que su proyecto esté correctamente hosteado en esta plataforma. +Para ejecutar el compilador de Cool que aquí se presenta existen ciertos requerimientos a tener en cuenta. En el archivo *requirements.py* que se encuentra en el directorio raiz del proyecto se encuentran listados los mismos. -**⚠️ NOTA**: Debe completar el archivo `team.yml` con los datos correctos de cada miembro de su equipo. +Para una rápida instalación de todas estas dependencias se debe ejecutar el siguiente comando: -## Sobre los Materiales a Entregar +```bash +$ pip install -r requirements.txt +``` -Para la evaluación del proyecto Ud. debe entregar un informe en formato PDF (`report.pdf`) en esta carpeta, que resuma de manera organizada y comprensible la arquitectura e implementación de su compilador. -El documento no tiene límite de extensión. -En él explicará en más detalle su solución a los problemas que, durante la implementación de cada una de las fases del proceso de compilación, hayan requerido de Ud. especial atención. +Una vez añadidas las mismas se puede proceder a compilador un código dado en Cool a ensamblador. Con este fin, ejecute: -## Estructura del reporte +```bash +$ ./coolc.sh +``` -Usted es libre de estructurar su reporte escrito como más conveniente le parezca. A continuación le sugerimos algunas secciones que no deberían faltar, aunque puede mezclar, renombrar y organizarlas de la manera que mejor le parezca: +en una consola centrada en el directorio *src* que se encuentra en la raiz del proyecto. Aquí sería un archivo escrito en cool. -- **Uso del compilador**: detalles sobre las opciones de líneas de comando, si tiene opciones adicionales (e.j., `--ast` genera un AST en JSON, etc.). Básicamente lo mismo que pondrá en este Readme. -- **Arquitectura del compilador**: una explicación general de la arquitectura, en cuántos módulos se divide el proyecto, cuantas fases tiene, qué tipo de gramática se utiliza, y en general, como se organiza el proyecto. Una buena imagen siempre ayuda. -- **Problemas técnicos**: detalles sobre cualquier problema teórico o técnico interesante que haya necesitado resolver de forma particular. +El *output* esperado es un archivo con el mismo nombre pero en .mips, el cual debe correrse con el correspondiente intérprete: -## Sobre la Fecha de Entrega +```bash +$ spim -file +``` -Se realizarán recogidas parciales del proyecto a lo largo del curso. En el Canal de Telegram se anunciará la fecha y requisitos de cada entrega. +que finalmente nos mostrará el resultado correspondiente al programa de entrada. \ No newline at end of file diff --git a/doc/Report.md b/doc/Report.md new file mode 100644 index 000000000..7c89841c2 --- /dev/null +++ b/doc/Report.md @@ -0,0 +1,242 @@ +## Introducción + +COOL(Classroom Object-Oriented Language) es un lenguaje de programación orientado a +objetos que, aunque pequeño, tiene muchas caracterı́sticas relevantes de lenguajes modernos. Su orientación a objetos, su tipado dinámico y el resto de sus features lo hacen muy atractivo e ideal para un primer acercamiento al mundo de los Compiladores. + +En el presente trabajo se muestra una implementación de un Compilador funcional para COOL en el lenguaje de programación Python; lo llamamos: _El Compi_. + +En las próximas secciones se explican en detalle cada una de las fases en las que se divide el trabajo del mismo, cómo fue abordada cada una, las instrucciones para su uso y algunos otros detalles importantes a resaltar. + +## Requerimientos e instrucciones para la ejecución + +Para utilizar _El Compi_ y ejecutar un programa en el lenguaje COOL se han de seguir los siguientes pasos: + +1- Primero se debe verificar que se cuente con todas las dependencias necesarias. Para una rápida instalación de estas se puede correr el comando: + +```bash +$ pip install -r requirements.txt +``` + +2- Para compilar un código dado en COOL a MIPS, ejecute: + +```bash +$ ./coolc.sh +``` + +en una consola centrada en el directorio _src_ que se encuentra en la raiz del proyecto. Aquí sería un archivo escrito en cool. + +3- El _output_ esperado (en caso de no haberse detectado ningún error) es un archivo con el mismo nombre pero en .mips, el cual debe correrse con el correspondiente intérprete: + +```bash +$ spim -file +``` + +que finalmente nos mostrará el resultado generado por el programa de entrada (spim se encuentra entre los requirements especificados). + +Si se encuentran errores en el código de Cool proporcionado, el compilador los listará en consola, proporcionando detalles sobre su localización y tipo. Para más información sobre los errores de Cool manejados remitirse al README.md de la carpeta _src_. + +## Arquitectura del compilador: + +_El Compi_, para tener la funcionalidad completa de un compilador, transita por las fases fundamentales de: + +-Análisis sintáctico (Análisis léxico y parsing) + +-Análisis semántico (Recolección, creación y chequeo de tipos) + +-Generación de código (Traducción a un lenguaje intermedio y generación del correspondiente código mips) + +Más adelante se analizan a profundidad cada una. + +El código fuente del proyecto se encuentra en la carpeta **src**. En esta se hallan distribuidos los scripts por módulos según su funcionalidad. + +En el módulo de **parsing** se halla definida nuestra gramática, y se brindan herramientas para trabajar con el lexer de **PLY** y el parser LR1 definido. El módulo **semantic** implementa los nodos del ast de **Cool** con que se trabaja y los recorridos para las definiciones de tipos y el chequeo semántico en general. En **code_gen** encontramos todo lo referente al proceso de generación de código, desde los nodos de **CIL** hasta los de **MIPS** y el pipeline para hacer dichas conversiones. Finalmente, en **cmp** se mantienen algunos útiles proporcionados por el claustro de Compilación, algunos adaptados a los requerimientos del proyecto actual. + +Analicemos ahora los prometidos detalles de implementacion y diseño tan anunciados. + +## Gramática + +El primer paso para acercarnos al lenguaje objeto de análisis fue definir una gramática adecuada. Siguiendo lo referido por el manual de Cool (el cual se encuentra adjunto en la carpeta _doc_, con el resto de la documentación), se diseñó una gramática que respetara la precedencia necesaria de los operadores y la estructura requerida. En el archivo cool grammar.py puede observarse como fue modelada la misma. + +Como ahí se puede apreciar, un programa de Cool +consiste en una serie de definiciones de clases. Cada +clase a su vez posee un conjunto de atributos y de funciones. Las expresiones que pueden formar parte de +dichas funciones son el corazón del lenguaje. +En la imagen _1_ se pueden apreciar varios niveles intermedios de esta gramática, lo cuales, precisamente, definen diferentes tipos de expresiones: + +1. **< comp >** , que representa las operaciones de comparación entre elementos. + +2. **< arith >** , que engloba a las operaciones de suma y + resta. + +3. **< term >** , para la multiplicación y división. + +4. **< factor >** , como representación de los operadores + unarios isvoid y opuesto. + +5. **< element >** para las condicionales (_if-then-else_, _while_ y _case_), la instanciación con _new_, las expresiones entre paréntsis, los block, los dispatch. + +6. **< atom >** como el nivel más básico, donde se ex- + cuentran los números, ids, las expresiones boolea- + nas y los strings. + +![Figura 1.!](../img/grammar.png "Fragmento de la gramática de Cool.") + +## Análisis sintáctico + +### Tokenizer + +Para tokenizar la entrada se utilizó una herramienta bastante útil y práctica: [PLY](https://www.dabeaz.com/ply/ply.html), la cual consiste en una implementación en python de las herramientas de parsing _Lex_ y _Yacc_. Mediante el módulo lex que esta provee, es posible acceder a un analizador léxico ya implementado. + +Para utilizarlo, se definieron una serie de reglas que orientaran al tokenizador como trabajar en las cadenas de entrada. En el archivo +token rules se pueden observar las reglas utilizadas, las cuáles consisten fundamentalmente en definiciones de los patrones que sigue cada token deseado, con la ayuda de expresiones regulares. En este sentido, se +trabajó fundamentalmente con el módulo re de python, el cual permite definir dichas expresiones. + +Nótese que los _keywords_ no requieren de una regla específica para su detección, listarlos es suficiente para que _lex_ los tenga en cuenta en su análisis, pues son frases que se toman _as is_. Sin embargo, cabe destacar que para ser capaces de detectarlos independientemente del uso o no de mayúsculas y no confundirlos con types o identificadores, en las reglas de estos dos últimos se verifica la posibilidad de que la frase matcheada perteneza a la familia de palabras claves del lenguaje y se actúa acorde. + +Para la especificación de comentarios de más de una línea se arpvechó la facilidad de _lex_ de definir estados alternativos al _INITIAL_ o por defecto. Así fue posible asegurar que los símbolos de inicio - (_ - y cierre -_)- estuvieran balanceados con reglas específicas al estado _Comments_. + +Es importante destacar que los tokens de _lex_ registran la posición que ocupan en el texto de entrada, considerando el mismo como una array de caracterres. Esto, con la ayuda de una regla para la detección de saltos de línea nos permite tener bien identificada la fila y la columna de un caracter en el script inicial, lo cual es sumamente importante en futuras fases del compilador para ubicar y reportar los errores detectados. + +### Parser + +En cambio, para el parser, no fue la variante de ycc la que se decidió utilizar. En este caso, nos mantuvimos fieles a la implementación efectuada por el equipo en proyectos pasados, la cual se puede apreciar en el archivo _shift_reduce_ parsers. Este cuenta con las modificaciones pertinentes para adaptarse a los nuevos requerimientos, por ejemplo, para la detección de fila y columna se realiza ahora el parseo sobre tokens del lenguaje, en lugar de sobre simples lexemas. + +Con el uso del parser LR1 que aquí se provee y la gramática atributada de _cool_grammar.py_, es posible parsear un texto en cool y obtener un árbol correspondiente a una derivación de extrema derecha. + +La construcción de este árbol o ast (_abstract syntax tree_) es la base del resto del análisis que se efectúa por el compilador. A lo largo de la ejecución del proyecto se utilizan variaciones de estos árboles, pero este primero que se menciona está formado por los nodos que se encuentran en el archivo _ast_nodes.py_ y no es más que una representación bastante general de la jeraquía que sigue el programa parseado. + +Cada nodo posee los elementos que lo caracterizan, por ejemplo el _ClassDeclarationNode_ cuenta con tokens como: el _id_ que representa el nombre de la clase, un _parent_ o tipo del que hereda (que puede ser _None_ en caso de no existir), y la lista de _features_, o sea de definiciones de los métodos y atributos que posee. Además se añade un _token_ que permita ubicar el inicio de la declaración en el código de entrada. + +![Ast Nodes.](../img/classdec.png "ClassDeclarationNode in cool ast") + +## Chequeo semántico: + +Una vez cosntruido el ast con la sintaxis adecuada, la fase siguiente consiste en verificar que el programa en cuestión esté correcto semánticamente. + +Con este fin se realizan 3 recorridos sobre el árbol, apoyándonos en el patrón visitor propuesto: + +-TypeCollector: Cuyo objetivo es registrar los tipos definidos por el programa. Aquí sólo se lanza un error cuando se intenta redefinir un tipo, o sea cuando aparece su definición más de una vez en el script de entrada. + +-TypeBuilder: Recorrido que busca asignar los métodos y definiciones de atributos a sus clases correspondientes, y detectar errores relacionados con referencias a tipos inexistentes. En este caso es necesario notar que, como Cool permite la herencia, se debe asegurar en este recorrido que no existan ciclos entre las definiciones de clases. Además, para poder garantizar que no se redefinan métodos ni atributos, se asegura que en el momento de definición de un hijo ya se haya visitado al padre, de modo que se tenga constancia de los valores heredados para el análisis. En esta pasada se verifica también que se cumplan los requerimientos de la definición de una clase _Main_ con su método _main_ que no reciba parámetros, la cual funciona como punto de inicio de cualquier programa en Cool. + +-TypeChecker: En este último recorrido sí se visitan la totalidad de nodos del ast creado, no sólo los correspondientes a definiciones de clases, métodos o atributos como en las pasadas anteriores. A medida que se recorre el árbol (con el contexto ya populado con las tipos correspondientes al programa y sus propiedades), se va chequeando que se haga un uso correcto de tipos a lo largo de las expresiones utilizadas, que no se referencien variables o atributos inexistentes o fuera de scope, etc, reportando siempre los errores encontrados. + +Cada tipo con sus atributos y métodos se registra a partir de la clase _Context_. A su vez, para poder ubicar cada definición de variable en un contexto específico se utiliza la clase _Scope_ que nos permite ir registrando scopes anidados. + +Si tras terminar estos recorridos no se encuentran errores es posible afirmar que el código .cl de entrada describe un programa correcto de Cool, o al menos hasta que un runtime error indique lo contrario ;-). Para poder obtener la salida esperada de este, es necesario entonces pasar a la última fase del proceso de compilación. + +## Generación de código: + +El paso de Cool a Mips es demasiado complejo, por ello se divide el proceso de generación de código en dos fases: + +### Paso de Cool a CIL: + +Para la generación de código intermedio nos auxiliamos del lenguaje de máquina CIL, que cuenta con capacidades orientadas a objetos y nos va a permitir generar código MIPS de manera más sencilla. + +El ast de CIL se obtiene a partir de un recorrido por el ast de Cool, para el cual nos apoyamos, una vez más, en el patrón visitor. El objetivo de este recorrido es desenrollar cada expresión para garantizar que su traducción a MIPS genere una cantidad constante de código. + +CIL tiene 3 secciones: + +- TYPES: contiene declaraciones de tipos. + +- DATA: contiene todas las cadenas de texto constantes que serán usadas durante el programa. + +- CODE: contiene todas las funciones que serán usadas durante el programa. + +La primera sección que se contruye es la sección .TYPES con las declaraciones de los tipos que se van a usar en el programa. + +En CIL no existe el concepto de herencia, la forma +de asegurar que un tipo pueda acceder a sus métodos y atributos heredados es declarándolos explícitamente en su definición. Además, es necesario garantizar que el orden en que se definen los mismos en el padre se conserve en los descendientes. Para ello a la hora de definir un tipo A se declaran en orden los atributos y métodos correspondientes comenzando por los de su ancestro más lejano hasta llegar a su padre y a los propios. Nótese que se hace necesario guardar el tipo al que pertenece el atributo o método originalmente, a continuación se explica por qué. + +Dado un tipo A que hereda de B ¿Qué pasa con los atributos heredados cuando vamos a crear una instancia de A? ¿Cómo accedemos a la expresión con que se inicializa cada atributo si se declaró en otro tipo? Después de un breve análisis, salta a la luz que es necesario que los atributos tengan constructores. Entonces, inicializar un atributo heredado se traduce a asignarle el valor que devuelve el constructor del mismo :D. Para hacer el llamado a dicho constructor es necesario saber el tipo donde fue declarado el atributo originalmente, por eso se guarda en el proceso de construcción del tipo antes descrito. Lo mismo sucede con los métodos. + +La sección .DATA se llena a medida que se visitan cadenas de texto literales, además se añaden algunas otras que nos serán útiles más adelante. Por ejemplo, se guardan los nombres de cada tipo declarado para poder acceder a ellos y devolverlos en la función **type_name**. + +Expliquemos entonces de qué va la sección .CODE, que no por última es menos importante. De manera general, está conformada por las funciones de COOL que se traducen a CIL. En el cuerpo de estas funciones se encuentra la traducción de las expresiones de COOL. Este proceso se hace más complejo para ciertos tipos de expresiones, donde un poquito más de creatividad es a veces necesario. Analicemos una de estas. + +Las expresiones **case** son de la siguiente forma: + +``` +case < expr0 > of + < id1 > : < type1 > => < expr1 >; + . . . + < idn > : < typen > => < exprn >; +esac +``` + +Esta expresión se utiliza para hacer pruebas sobre el tipo de los objetos en tiempo de ejecución. Con ese fin, se evalúa **expr0** y se guarda su tipo dinámico **C**. Luego se selecciona la rama cuyo tipo **typek** es el más cercano a **C** entre los tipos con que **C** se conforma y se devuelve el valor del **exprk** correspondiente. + +El tipo dinámico **C** no se conoce hasta el momento de ejecución, que es cuando se evalúa la expresión, por tanto, la decisión de por qué rama se debe decantar el **case** no se puede tomar desde CIL. La solución consiste entonces en indicarle a MIPS los pasos que debe tomar en esta situación. ¿Cómo se hace esto? Se genera el código CIL para cualquiera de los posibles tipos dinámicos de **expr0**, que no son más que todos los tipos que heredan del tipo estático de **expr0**. + +### De CIL a MIPS: + +Para la generación de código MIPS se definió un visitor sobre el ast de CIL generado en el paso anterior, este visitor generará a su vez un ast de MIPS que representa las secciones .data y .text con sus instrucciones; donde cada nodo conoce su representación en MIPS. Posteriormente se visitará en nodo principal del ast de MIPS y se producirá el código que será ejecutado por el emulador de SPIM. + + Al visitar el cil.Program se visitarán los nodos de la sección dottype, para representar en .data la tabla de métodos virtuales, para cuando se produzcan llamadas a métodos no estáticos. Por cada TypeNode se registra en .data un label con el nombre del tipo, .word como tipo de almacenamiento, y una serie de labels, cada una correspondiente a un método del tipo. + +```mips +Object : .word, Object_abort, Object_copy, Object_type_name +``` + + Para acceder a un método específico de un tipo se busca en la dirección de memoria dada por el label correspondiente a este, sumada con el índice correspodiente al método, multiplicado por 4 este índice está dado por el orden en que se declararon los métodos, aquí se hallará un puntero al método deseado. + +El siguiente paso es visitar la sección dotdata, para registrar los strings declarados en el código de COOL, de la siguiente forma: + +``` +string_1 : .asciiz, "Hello, World.\n" +``` + +Finalmente se visitarán los nodos de la sección dotcode, que corresponden a las instrucciones del programa. + +Cada uno de estos nodos es un FunctionNode, en cada uno se van generando nodos del ast siguiendo la siguiente línea: + +- Se reserva el espacio de las variables locales correspondientes a la función. + +- Se actualiza el frame pointer #$fp# con el stack pointer. + +- Se guarda la dirección de retorno $ra en la pila. + +- Se guarda el frame pointer anterior en la pila. + +- Se visitan las instrucciones de la función. + +- Se restaura el puntero al bloque remplazándolo con el que había sido almacenado. + +- Se restaura la dirección de retorno. + +- Se libera el espacio que había sido reservado en la pila. + + Siempre se conocerá el offset, con respecto a $fp correspondiete a las variables locales y parámetros que se utilizan en el cuerpo de una función. + + Para realizar llamadas a funciones que reciban argumentos es obligatorio guardar los argumentos en la pila antes de llamar a la función. + + El recorrido por las instrucciones no es presenta gran complejidad, es simplemente traducir sencillas expresiones de CIL a expresiones de MIPS, sin embargo hay algunos casos interesantes que vale la pena destacar. + + - La reserva dinámica de memoria para instanciar tipos se realiza mediante Allocate, el compilador reservará un espacio de tamaño (CantidadAtributos + 1) * 4. En los primeros 4 bytes ser guarda la dirección del tipo de la instancia, y en las siguientes palabras están reservadas para los atributos. + + La representación de las instancias de tipos en memoria se estructuró así: + + | Tipo | Atributo 1 | Atributo 2 | .... | Atributo n | + | :-------: | :---------: | :---------: | :--: | :-----------: | + | Dirección | Dirección+4 | Dirección+8 | | Dirección+4*n | + + - Existen dos tipos de llamados a funciones, llamado estático y dinámico. + + El llamado estático es muy sencillo es simplementer saltar al label dado mediante la función de MIPS **jal** y al retornar, liberar el espacio en la pila correspondiente a los argumentos pasados a la función. + + Por otro lado el llamado dinámico es más complejo, pues dada la instancia y el índice del método, se busca en la pila la instancia, se toma la posición 0 que corresponde a la dirección(d) de su tipo, y a partir de esta se obtiene la función que está en d + 4 * i. Luego se salta al label de la función y por último se libera el espacio en la pila correspondiente a los argumentos pasados. + + Mucho nodos son importantes entre ellos los que corresponden a la entrada y salida, a las operaciones sobre cadenas, y operaciones lógicas y aritméticas, estos llevan más trabajo con la lógica de MIPS que no consideramos necesario de abordar. + + + + + + + + + + + + + diff --git a/doc/Report.pdf b/doc/Report.pdf new file mode 100644 index 000000000..908046ce2 Binary files /dev/null and b/doc/Report.pdf differ diff --git a/doc/team.yml b/doc/team.yml index c16162532..2639b83fc 100644 --- a/doc/team.yml +++ b/doc/team.yml @@ -1,10 +1,10 @@ members: - - name: Nombre Apellido1 Apellido2 - github: github_id - group: CXXX - - name: Nombre Apellido1 Apellido2 - github: github_id - group: CXXX - - name: Nombre Apellido1 Apellido2 - github: github_id - group: CXXX + - name: Amalia Ibarra Rodríguez + github: amaliaibarra + group: C412 + - name: Gabriela B. Martínez Giraldo + github: matcom-chacha + group: C412 + - name: Sandra Martos Llanes + github: smartos99 + group: C412 diff --git a/img/classdec.png b/img/classdec.png new file mode 100644 index 000000000..657c51373 Binary files /dev/null and b/img/classdec.png differ diff --git a/img/grammar.png b/img/grammar.png new file mode 100644 index 000000000..850b47892 Binary files /dev/null and b/img/grammar.png differ diff --git a/requirements.txt b/requirements.txt index 9eb0cad1a..9c1764f20 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,6 @@ pytest pytest-ordering +typer +ply +ipython +nbformat \ No newline at end of file diff --git a/src/Readme.md b/src/Readme.md index 1200371b5..bf4934828 100644 --- a/src/Readme.md +++ b/src/Readme.md @@ -1,14 +1,7 @@ # COOL: Proyecto de Compilación - -La evaluación de la asignatura Complementos de Compilación, inscrita en el programa del 4to año de la Licenciatura en Ciencia de la Computación de la Facultad de Matemática y Computación de la -Universidad de La Habana, consiste este curso en la implementación de un compilador completamente -funcional para el lenguaje _COOL_. - -_COOL (Classroom Object-Oriented Language)_ es un pequeño lenguaje que puede ser implementado con un esfuerzo razonable en un semestre del curso. Aun así, _COOL_ mantiene muchas de las características de los lenguajes de programación modernos, incluyendo orientación a objetos, tipado estático y manejo automático de memoria. - ### Sobre el Lenguaje COOL -Ud. podrá encontrar la especificación formal del lenguaje COOL en el documento _"COOL Language Reference Manual"_, que se distribuye junto con el presente texto. +Ud. podrá encontrar la especificación formal del lenguaje COOL en el documento _"COOL Language Reference Manual"_, en la carpeta *doc* en el directorio raiz del proyecto. ## Código Fuente diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/cmp/__init__.py b/src/cmp/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/cmp/ast.py b/src/cmp/ast.py new file mode 100644 index 000000000..9f2d775f5 --- /dev/null +++ b/src/cmp/ast.py @@ -0,0 +1,70 @@ +import cmp.visitor as visitor + + +class Node: + def evaluate(self): + raise NotImplementedError() + + +class AtomicNode(Node): + def __init__(self, lex): + self.lex = lex + + +class UnaryNode(Node): + def __init__(self, node): + self.node = node + + def evaluate(self): + value = self.node.evaluate() + return self.operate(value) + + @staticmethod + def operate(value): + raise NotImplementedError() + + +class BinaryNode(Node): + def __init__(self, left, right): + self.left = left + self.right = right + + def evaluate(self): + lvalue = self.left.evaluate() + rvalue = self.right.evaluate() + return self.operate(lvalue, rvalue) + + @staticmethod + def operate(lvalue, rvalue): + raise NotImplementedError() + + +def get_printer( + AtomicNode=AtomicNode, + UnaryNode=UnaryNode, + BinaryNode=BinaryNode, +): + class PrintVisitor(object): + @visitor.on("node") + def visit(self, node, tabs): + pass + + @visitor.when(UnaryNode) + def visit(self, node, tabs=0): + ans = "\t" * tabs + f"\\__ {node.__class__.__name__}" + child = self.visit(node.node, tabs + 1) + return f"{ans}\n{child}" + + @visitor.when(BinaryNode) + def visit(self, node, tabs=0): + ans = "\t" * tabs + f"\\__ {node.__class__.__name__} " + left = self.visit(node.left, tabs + 1) + right = self.visit(node.right, tabs + 1) + return f"{ans}\n{left}\n{right}" + + @visitor.when(AtomicNode) + def visit(self, node, tabs=0): + return "\t" * tabs + f"\\__ {node.__class__.__name__}: {node.lex}" + + printer = PrintVisitor() + return lambda ast: printer.visit(ast) diff --git a/src/cmp/automata.py b/src/cmp/automata.py new file mode 100644 index 000000000..4d2619364 --- /dev/null +++ b/src/cmp/automata.py @@ -0,0 +1,224 @@ +try: + import pydot +except: + pass + + +class State: + def __init__(self, state, final=False, formatter=lambda x: str(x), shape="circle"): + self.state = state + self.final = final + self.transitions = {} + self.epsilon_transitions = set() + self.tag = None + self.formatter = formatter + self.shape = shape + + # The method name is set this way from compatibility issues. + def set_formatter(self, value, attr="formatter", visited=None): + if visited is None: + visited = set() + elif self in visited: + return + + visited.add(self) + self.__setattr__(attr, value) + for destinations in self.transitions.values(): + for node in destinations: + node.set_formatter(value, attr, visited) + for node in self.epsilon_transitions: + node.set_formatter(value, attr, visited) + return self + + def has_transition(self, symbol): + return symbol in self.transitions + + def add_transition(self, symbol, state): + try: + self.transitions[symbol].append(state) + except: + self.transitions[symbol] = [state] + return self + + def add_epsilon_transition(self, state): + self.epsilon_transitions.add(state) + return self + + def recognize(self, string): + states = self.epsilon_closure + for symbol in string: + states = self.move_by_state(symbol, *states) + states = self.epsilon_closure_by_state(*states) + return any(s.final for s in states) + + def to_deterministic(self, formatter=lambda x: str(x)): + closure = self.epsilon_closure + start = State(tuple(closure), any(s.final for s in closure), formatter) + + closures = [closure] + states = [start] + pending = [start] + + while pending: + state = pending.pop() + symbols = {symbol for s in state.state for symbol in s.transitions} + + for symbol in symbols: + move = self.move_by_state(symbol, *state.state) + closure = self.epsilon_closure_by_state(*move) + + if closure not in closures: + new_state = State( + tuple(closure), any(s.final for s in closure), formatter + ) + closures.append(closure) + states.append(new_state) + pending.append(new_state) + else: + index = closures.index(closure) + new_state = states[index] + + state.add_transition(symbol, new_state) + + return start + + @staticmethod + def from_nfa(nfa, get_states=False): + states = [] + for n in range(nfa.states): + state = State(n, n in nfa.finals) + states.append(state) + + for (origin, symbol), destinations in nfa.map.items(): + origin = states[origin] + origin[symbol] = [states[d] for d in destinations] + + if get_states: + return states[nfa.start], states + return states[nfa.start] + + @staticmethod + def move_by_state(symbol, *states): + return { + s for state in states if state.has_transition(symbol) for s in state[symbol] + } + + @staticmethod + def epsilon_closure_by_state(*states): + closure = {state for state in states} + + l = 0 + while l != len(closure): + l = len(closure) + tmp = [s for s in closure] + for s in tmp: + for epsilon_state in s.epsilon_transitions: + closure.add(epsilon_state) + return closure + + @property + def epsilon_closure(self): + return self.epsilon_closure_by_state(self) + + @property + def name(self): + return self.formatter(self.state) + + def get(self, symbol): + target = self.transitions[symbol] + assert len(target) == 1 + return target[0] + + def __getitem__(self, symbol): + if symbol == "": + return self.epsilon_transitions + try: + return self.transitions[symbol] + except KeyError: + return None + + def __setitem__(self, symbol, value): + if symbol == "": + self.epsilon_transitions = value + else: + self.transitions[symbol] = value + + def __repr__(self): + return str(self) + + def __str__(self): + return str(self.state) + + def __hash__(self): + return hash(self.state) + + def __iter__(self): + yield from self._visit() + + def _visit(self, visited=None): + if visited is None: + visited = set() + elif self in visited: + return + + visited.add(self) + yield self + + for destinations in self.transitions.values(): + for node in destinations: + yield from node._visit(visited) + for node in self.epsilon_transitions: + yield from node._visit(visited) + + def graph(self): + G = pydot.Dot(rankdir="LR", margin=0.1) + G.add_node(pydot.Node("start", shape="plaintext", label="", width=0, height=0)) + + visited = set() + + def visit(start): + ids = id(start) + if ids not in visited: + visited.add(ids) + G.add_node( + pydot.Node( + ids, + label=start.name, + shape=self.shape, + style="bold" if start.final else "", + ) + ) + for tran, destinations in start.transitions.items(): + for end in destinations: + visit(end) + G.add_edge( + pydot.Edge(ids, id(end), label=tran, labeldistance=2) + ) + for end in start.epsilon_transitions: + visit(end) + G.add_edge(pydot.Edge(ids, id(end), label="ε", labeldistance=2)) + + visit(self) + G.add_edge(pydot.Edge("start", id(self), label="", style="dashed")) + + return G + + def _repr_svg_(self): + try: + return self.graph().create_svg().decode("utf8") + except: + pass + + def write_to(self, fname): + return self.graph().write_svg(fname) + + +def multiline_formatter(state): + return "\n".join(str(item) for item in state) + + +def lr0_formatter(state): + try: + return "\n".join(str(item)[:-4] for item in state) + except TypeError: + return str(state)[:-4] diff --git a/src/cmp/cil.py b/src/cmp/cil.py new file mode 100644 index 000000000..a0e8a7bb0 --- /dev/null +++ b/src/cmp/cil.py @@ -0,0 +1,467 @@ +import cmp.visitor as visitor + + +class Node: + pass + + +class ProgramNode(Node): + def __init__(self, dottypes, dotdata, dotcode): + self.dottypes = dottypes + self.dotdata = dotdata + self.dotcode = dotcode + + +class TypeNode(Node): + def __init__(self, name): + self.name = name + self.attributes = [] + self.methods = [] + + +class DataNode(Node): + def __init__(self, vname, value): + self.name = vname + self.value = value + + +class FunctionNode(Node): + def __init__(self, fname, params, localvars, instructions): + self.name = fname + self.params = params + self.localvars = localvars + self.instructions = instructions + + +class ParamNode(Node): + def __init__(self, name): + self.name = name + + +class LocalNode(Node): + def __init__(self, name): + self.name = name + + +class InstructionNode(Node): + pass + + +class AssignNode(InstructionNode): + def __init__(self, dest, source): + self.dest = dest + self.source = source + + +class ArithmeticNode(InstructionNode): + def __init__( + self, + dest, + left, + right, + ): + self.dest = dest + self.left = left + self.right = right + + +class PlusNode(ArithmeticNode): + pass + + +class MinusNode(ArithmeticNode): + pass + + +class StarNode(ArithmeticNode): + pass + + +class DivNode(ArithmeticNode): + pass + + +class LessNode(ArithmeticNode): + pass + + +class LessEqualNode(ArithmeticNode): + pass + + +class EqualNode(ArithmeticNode): + pass + + +class StrEqualNode(ArithmeticNode): + pass + + +class UnaryNode(InstructionNode): + def __init__(self, dest, expr): + self.dest = dest + self.expr = expr + + +class NotNode(UnaryNode): + pass + + +class IntComplementNode(UnaryNode): + def __init__(self, dest, source): + self.source = source + self.dest = dest + + +class GetAttribNode(InstructionNode): + def __init__(self, dest, instance, attr, typex): + self.dest = dest + self.instance = instance + self.attr = attr + self.type = typex + + +class SetAttribNode(InstructionNode): + def __init__(self, instance, attr, value, typex): + self.instance = instance + self.value = value + self.attr = attr + self.type = typex + + +class AllocateNode(InstructionNode): + def __init__(self, itype, dest): + self.type = itype + self.dest = dest + + +class TypeOfNode(InstructionNode): + def __init__(self, obj, dest, flag=False, typex=None): + self.obj = obj + self.dest = dest + self.flag = flag + self.type = typex + + +class LabelNode(InstructionNode): + def __init__(self, name): + self.name = name + + +class GotoNode(InstructionNode): + def __init__(self, label): + self.label = label + + +class GotoIfNode(InstructionNode): + def __init__(self, condition, label): + self.condition = condition + self.label = label + + +class StaticCallNode(InstructionNode): + def __init__(self, function, dest): + self.function = function + self.dest = dest + + +class DynamicCallNode(InstructionNode): + def __init__(self, instance_type, method_index, dest): + self.instance_type = instance_type + self.method_index = method_index + self.dest = dest + + +class ArgNode(InstructionNode): + def __init__(self, name): + self.name = name + + +class ReturnNode(InstructionNode): + def __init__(self, value=None): + self.value = value + + +class LoadNode(InstructionNode): + def __init__(self, dest, msg): + self.dest = dest + self.msg = msg + + +class LengthNode(InstructionNode): + def __init__(self, dest, source): + self.dest = dest + self.source = source + + +class ConcatNode(InstructionNode): + def __init__(self, dest, left, right): + self.dest = dest + self.left = left + self.right = right + + +class SubstringNode(InstructionNode): + def __init__(self, dest, source, index, length): + self.dest = dest + self.source = source + self.index = index + self.length = length + + +class ReadStringNode(InstructionNode): + def __init__(self, dest): + self.dest = dest + + +class ReadIntNode(InstructionNode): + def __init__(self, dest): + self.dest = dest + + +class RuntimeErrorNode(InstructionNode): + def __init__(self, msg): + self.msg = msg + + +class CopyNode(InstructionNode): + def __init__(self, dest, source): + self.dest = dest + self.source = source + + +class PrintStrNode(InstructionNode): + def __init__(self, str_addr): + self.str_addr = str_addr + + +class PrintIntNode(InstructionNode): + def __init__(self, int_addr): + self.int_addr = int_addr + + +class TypeNameNode(InstructionNode): + def __init__(self, dest, source): + self.dest = dest + self.source = source + + +class DefaultValueNode(InstructionNode): + def __init__(self, dest, typex): + self.dest = dest + self.type = typex + + +class IsVoidNode(InstructionNode): + def __init__(self, dest, value): + self.dest = dest + self.value = value + + +class CompareTypes(InstructionNode): + def __init__(self, dest, typeof, typex: str): + self.dest = dest + self.typeof = typeof + self.type = typex + + +class ExitNode(InstructionNode): + def __init__(self): + pass + + +class PrintVisitor(object): + @visitor.on("node") + def visit(self, node): + pass + + @visitor.when(ProgramNode) + def visit(self, node): + dottypes = "\n".join(self.visit(t) for t in node.dottypes) + dotdata = "\n".join(self.visit(t) for t in node.dotdata) + dotcode = "\n".join(self.visit(t) for t in node.dotcode) + + return f".TYPES\n{dottypes}\n\n.DATA\n{dotdata}\n\n.CODE\n{dotcode}" + + @visitor.when(TypeNode) + def visit(self, node): + attributes = "\n\t".join(f"attribute {x}" for x in node.attributes) + methods = "\n\t".join(f"method {x}: {y}" for x, y in node.methods) + + return f"type {node.name} {{\n\t{attributes}\n\n\t{methods}\n}}" + + @visitor.when(FunctionNode) + def visit(self, node): + params = "\n\t".join(self.visit(x) for x in node.params) + localvars = "\n\t".join(self.visit(x) for x in node.localvars) + instructions = "\n\t".join(self.visit(x) for x in node.instructions) + + return f"function {node.name} {{\n\t{params}\n\n\t{localvars}\n\n\t{instructions}\n}}" + + @visitor.when(DataNode) + def visit(self, node): + return f'{node.name} = "{node.value}"' + + @visitor.when(ParamNode) + def visit(self, node): + return f"PARAM {node.name}" + + @visitor.when(LocalNode) + def visit(self, node): + return f"LOCAL {node.name}" + + @visitor.when(AssignNode) + def visit(self, node): + return f"{node.dest} <- {node.source}" + + @visitor.when(PlusNode) + def visit(self, node): + return f"{node.dest} = {node.left} + {node.right}" + + @visitor.when(MinusNode) + def visit(self, node): + return f"{node.dest} = {node.left} - {node.right}" + + @visitor.when(StarNode) + def visit(self, node): + return f"{node.dest} = {node.left} * {node.right}" + + @visitor.when(DivNode) + def visit(self, node): + return f"{node.dest} = {node.left} / {node.right}" + + @visitor.when(LessNode) + def visit(self, node): + return f"{node.dest} = {node.left} < {node.right}" + + @visitor.when(LessEqualNode) + def visit(self, node): + return f"{node.dest} = {node.left} <= {node.right}" + + @visitor.when(EqualNode) + def visit(self, node): + return f"{node.dest} = {node.left} == {node.right}" + + @visitor.when(StrEqualNode) + def visit(self, node): + return f"{node.dest} = {node.left} == {node.right}" + + @visitor.when(NotNode) + def visit(self, node): + return f"{node.dest} = NOT {node.expr}" + + @visitor.when(IntComplementNode) + def visit(self, node): + return f"{node.dest} = ~ {node.source}" + + @visitor.when(LabelNode) + def visit(self, node): + return f"LABEL {node.name}" + + @visitor.when(GotoNode) + def visit(self, node): + return f"GOTO {node.label}" + + @visitor.when(GotoIfNode) + def visit(self, node): + return f"IF {node.condition} GOTO {node.label}" + + @visitor.when(AllocateNode) + def visit(self, node): + return f"{node.dest} = ALLOCATE {node.type}" + + @visitor.when(SetAttribNode) + def visit(self, node): + return f" SETATTR {node.instance} {node.attr} {node.value}" + + @visitor.when(GetAttribNode) + def visit(self, node): + return f" {node.dest} = GETATTR {node.instance} {node.attr}" + + @visitor.when(TypeOfNode) + def visit(self, node): + return f"{node.dest} = TYPEOF {node.obj}" + + @visitor.when(StaticCallNode) + def visit(self, node): + return f"{node.dest} = CALL {node.function}" + + @visitor.when(DynamicCallNode) + def visit(self, node): + return f"{node.dest} = VCALL {node.instance_type} {node.method_index}" + + @visitor.when(ArgNode) + def visit(self, node): + return f"ARG {node.name}" + + @visitor.when(ReturnNode) + def visit(self, node): + return f'RETURN {node.value if node.value is not None else ""}' + + @visitor.when(RuntimeErrorNode) + def visit(self, node): + return f"ABORT {node.msg}" + + @visitor.when(CopyNode) + def visit(self, node): + return f"{node.dest} = COPY {node.source}" + + @visitor.when(TypeNameNode) + def visit(self, node): + return f"{node.dest} = TYPE_NAME {node.source}" + + # @visitor.when(ToStrNode) + # def visit(self, node): + # return f"{node.dest} = STR {node.ivalue}" + + @visitor.when(ReadStringNode) + def visit(self, node): + return f"{node.dest} = READ STR" + + @visitor.when(ReadIntNode) + def visit(self, node): + return f"{node.dest} = READ INT" + + @visitor.when(PrintStrNode) + def visit(self, node): + return f"PRINT STR{node.str_addr}" + + @visitor.when(PrintIntNode) + def visit(self, node): + return f"PRINT INT {node.int_addr}" + + @visitor.when(LengthNode) + def visit(self, node): + return f"{node.dest} = LENGTH {node.source}" + + @visitor.when(LoadNode) + def visit(self, node): + return f"{node.dest} = LOAD {node.msg.name}" + + @visitor.when(ConcatNode) + def visit(self, node): + return f"{node.dest} = CONCAT {node.left} {node.right}" + + @visitor.when(SubstringNode) + def visit(self, node): + return f"{node.dest} = SUBSTRING {node.source} {node.index} {node.length}" + + @visitor.when(DefaultValueNode) + def visit(self, node): + return f"{node.dest} = DEFAULT {node.type}" + + @visitor.when(IsVoidNode) + def visit(self, node): + return f"{node.dest} = ISVOID {node.value}" + + @visitor.when(CompareTypes) + def visit(self, node): + return f"{node.dest} = {node.typeof} TYPE_EQUALS {node.type}" + + @visitor.when(ExitNode) + def visit(self, node): + return f"EXIT" + + +# printer = PrintVisitor() +# return lambda ast: printer.visit(ast) diff --git a/src/cmp/errors.py b/src/cmp/errors.py new file mode 100644 index 000000000..5c62b2733 --- /dev/null +++ b/src/cmp/errors.py @@ -0,0 +1,224 @@ +class CoolCompilerError(object): + """ + Errors reported by the compiler + """ + + def __init__(self, line, column, type, msg) -> None: + self.line = line + self.column = column + self.type = type + self.msg = msg + + def __str__(self) -> str: + return f"({self.line}, {self.column}) - {self.type}: {self.msg}" + + +# Compiler errors + + +class CompilerError(CoolCompilerError): + """ + Error reported when an anomaly in the compiler's input is detected. + For example, if the input file doesn't exist. + """ + + def __init__(self, msg) -> None: + CoolCompilerError.__init__(self, 0, 0, "CompilerError", msg) + + +class InvalidInputFileError(CompilerError): + """ + Reported when input file is invalid. + """ + + def __init__(self, path: str) -> None: + CompilerError.__init__(self, f"File `{path}` is not a valid file.") + + +# Lexicographic errors + + +class LexicographicError(CoolCompilerError): + """ + Error reported by lexer + """ + + def __init__(self, line: int, column: int, msg: str) -> None: + CoolCompilerError.__init__(self, line, column, "LexicographicError", msg) + + +class UnexpectedCharError(LexicographicError): + """ + Reported the lexer encounters an unexpected character. + """ + + def __init__(self, line: int, column: int, char: str) -> None: + LexicographicError.__init__(self, line, column, f"Unexpected `{char}`.") + + +# Syntactic errors + + +class SyntacticError(CoolCompilerError): + """ + Error reported by parser + """ + + def __init__(self, line: int, column: int, msg: str) -> None: + CoolCompilerError.__init__(self, line, column, "SyntacticError", msg) + + +class UnexpectedTokenError(SyntacticError): + """ + Reported the parser encounters an unexpected Token. + """ + + def __init__(self, line: int, column: int, token: str) -> None: + SyntacticError.__init__( + self, + line, + column, + f"Unexpected token `{token}`.", + ) + + +class UnexpectedEOFError(SyntacticError): + """ + Reported the parser encounters end of file unexpectedly. + """ + + def __init__(self) -> None: + SyntacticError.__init__(self, 0, 0, "Unexpected EOF.") + + +# Semantic errors + + +class NameError(CoolCompilerError): + """ + Error reported when an identifier is referenced in a scope where it is not visible. + """ + + def __init__(self, line: int, column: int, msg: str) -> None: + CoolCompilerError.__init__(self, line, column, "NameError", msg) + + +class TypeError(CoolCompilerError): + """ + Error reported when a problem with types is detected. Includes: + - type incompatibility between `rvalue` and `lvalue`, + - undefined operation between objects of certain types + - referenced type is undefined + """ + + def __init__(self, line: int, column: int, msg: str) -> None: + CoolCompilerError.__init__(self, line, column, "TypeError", msg) + + +class AttributeError(CoolCompilerError): + """ + Error reported when an attribute or method is referenced but undefined. + """ + + def __init__(self, line: int, column: int, msg: str) -> None: + CoolCompilerError.__init__(self, line, column, "AttributeError", msg) + + +class SemanticError(CoolCompilerError): + """ + Any kind of semantic error (like class redefinition) + """ + + def __init__(self, line, column, msg) -> None: + CoolCompilerError.__init__(self, line, column, "SemanticError", msg) + + +class IncompatibleTypesError(SemanticError): + def __init__(self, line, column, type_a, type_b) -> None: + SemanticError.__init__( + self, line, column, f"Cannot convert {type_a} into {type_b}." + ) + + +class WrongSignatureError(SemanticError): + def __init__(self, line, column, method) -> None: + SemanticError.__init__( + self, + line, + column, + f"Method {method} already defined with a different signature.", + ) + + +# ---------------------------------------------- +class Error(Exception): + "Base class for exceptions" + pass + + +class tokenizer_error(Error): + "raised when tokenizer got unespected sequences of characters" + + def __init__(self, text, line): + Error.__init__( + self, + f"Got {text} while analizing line {line}", + ) + + +class parsing_table_error(Error): + "raised when T[X,t] possess more than one production" + + def __init__(self, production1, production2, invalid_sentence): + Error.__init__( + self, + f"conflict betweeen {production1} and {production2}, invalid sentence: {invalid_sentence}", + ) + + +class shift_reduce_error(Error): + "raised when goto or action table in shift reduce parsers possess more than one production" + + def __init__(self, action1, action2, grammar, key=None): + if action1[0] == action2[0] == "REDUCE": + conflict = "Reduce-Reduce" + else: + conflict = "Shift-Reduce" + + Error.__init__( + self, + f"When analizing {key}, {conflict} conflict!!! betweeen {action1} and {action2}. Grammar given is not {grammar}", + ) + + +class invalid_sentence_error(Error): + "raised when w is not in G" + + def __init__( + self, + w, + pos, + actual_token, + expected_token=None, + message="", + output=None, + operations=None, + ): + if expected_token != None: + Error.__init__( + self, + f"Invalid sentence {w}. Expected {expected_token} at position {pos} but received {actual_token} instead. {message}", + ) + else: + Error.__init__( + self, + f"Unexpected token {actual_token} at position {pos}. Invalid sentence {w}. {message}. Secuencia de derivaciones: {output}. Operaciones: {operations}", + ) + + +class non_regular_production_error(Error): + def __init__(self, production): + Error.__init__( + self, + f"production {production} most be of the form: A -> a, A -> e or A -> aX", + ) diff --git a/src/cmp/evaluation.py b/src/cmp/evaluation.py new file mode 100644 index 000000000..269ff5225 --- /dev/null +++ b/src/cmp/evaluation.py @@ -0,0 +1,35 @@ +from cmp.pycompiler import EOF +from parsing.shift_reduce_parsers import ShiftReduceParser + + +def evaluate_reverse_parse(right_parse, operations, tokens): + if not right_parse or not operations or not tokens: + return + + right_parse = iter(right_parse) + tokens = iter(tokens) + stack = [] + for operation in operations: + if operation == ShiftReduceParser.SHIFT: + token = next(tokens) + stack.append(token) + elif operation == ShiftReduceParser.REDUCE: + production = next(right_parse) + head, body = production + attributes = production.attributes + assert all( + rule is None for rule in attributes[1:] + ), "There must be only synteticed attributes." + rule = attributes[0] + + if len(body): + synteticed = [None] + stack[-len(body) :] + value = rule(None, synteticed) + stack[-len(body) :] = [value] + else: + stack.append(rule(None, None)) + else: + raise Exception("Invalid action!!!") + assert len(stack) == 1 + assert isinstance(next(tokens).token_type, EOF) + return stack[0] diff --git a/src/cmp/nbpackage.py b/src/cmp/nbpackage.py new file mode 100644 index 000000000..5458e63b9 --- /dev/null +++ b/src/cmp/nbpackage.py @@ -0,0 +1,87 @@ +import io, os, sys, types + +from IPython import get_ipython +from nbformat import read +from IPython.core.interactiveshell import InteractiveShell + +def find_notebook(fullname, path=None): + """find a notebook, given its fully qualified name and an optional path + + This turns "foo.bar" into "foo/bar.ipynb" + and tries turning "Foo_Bar" into "Foo Bar" if Foo_Bar + does not exist. + """ + name = fullname.rsplit('.', 1)[-1] + if not path: + path = [''] + for d in path: + nb_path = os.path.join(d, name + ".ipynb") + if os.path.isfile(nb_path): + return nb_path + # let import Notebook_Name find "Notebook Name.ipynb" + nb_path = nb_path.replace("_", " ") + if os.path.isfile(nb_path): + return nb_path + +class NotebookLoader(object): + """Module Loader for Jupyter Notebooks""" + def __init__(self, path=None): + self.shell = InteractiveShell.instance() + self.path = path + + def load_module(self, fullname): + """import a notebook as a module""" + path = find_notebook(fullname, self.path) + + print ("importing Jupyter notebook from %s" % path) + + # load the notebook object + with io.open(path, 'r', encoding='utf-8') as f: + nb = read(f, 4) + + + # create the module and add it to sys.modules + # if name in sys.modules: + # return sys.modules[name] + mod = types.ModuleType(fullname) + mod.__file__ = path + mod.__loader__ = self + mod.__dict__['get_ipython'] = get_ipython + sys.modules[fullname] = mod + + # extra work to ensure that magics that would affect the user_ns + # actually affect the notebook module's ns + save_user_ns = self.shell.user_ns + self.shell.user_ns = mod.__dict__ + + try: + for cell in nb.cells: + if cell.cell_type == 'code': + # transform the input to executable Python + code = self.shell.input_transformer_manager.transform_cell(cell.source) + # run the code in themodule + exec(code, mod.__dict__) + finally: + self.shell.user_ns = save_user_ns + return mod + +class NotebookFinder(object): + """Module finder that locates Jupyter Notebooks""" + def __init__(self): + self.loaders = {} + + def find_module(self, fullname, path=None): + nb_path = find_notebook(fullname, path) + if not nb_path: + return + + key = path + if path: + # lists aren't hashable + key = os.path.sep.join(path) + + if key not in self.loaders: + self.loaders[key] = NotebookLoader(path) + return self.loaders[key] + +sys.meta_path.append(NotebookFinder()) \ No newline at end of file diff --git a/src/cmp/pycompiler.py b/src/cmp/pycompiler.py new file mode 100644 index 000000000..b78f03f70 --- /dev/null +++ b/src/cmp/pycompiler.py @@ -0,0 +1,512 @@ +import json + +class Symbol(object): + + def __init__(self, name, grammar): + self.Name = name + self.Grammar = grammar + + def __str__(self): + return self.Name + + def __repr__(self): + return repr(self.Name) + + def __add__(self, other): + if isinstance(other, Symbol): + return Sentence(self, other) + + raise TypeError(other) + + def __or__(self, other): + + if isinstance(other, (Sentence)): + return SentenceList(Sentence(self), other) + + raise TypeError(other) + + @property + def IsEpsilon(self): + return False + + def __len__(self): + return 1 + +class NonTerminal(Symbol): + + + def __init__(self, name, grammar): + super().__init__(name, grammar) + self.productions = [] + + + def __imod__(self, other): + + if isinstance(other, (Sentence)): + p = Production(self, other) + self.Grammar.Add_Production(p) + return self + + if isinstance(other, tuple): + assert len(other) > 1 + + if len(other) == 2: + other += (None,) * len(other[0]) + + assert len(other) == len(other[0]) + 2, "Debe definirse una, y solo una, regla por cada símbolo de la producción" + # assert len(other) == 2, "Tiene que ser una Tupla de 2 elementos (sentence, attribute)" + + if isinstance(other[0], Symbol) or isinstance(other[0], Sentence): + p = AttributeProduction(self, other[0], other[1:]) + else: + raise Exception("") + + self.Grammar.Add_Production(p) + return self + + if isinstance(other, Symbol): + p = Production(self, Sentence(other)) + self.Grammar.Add_Production(p) + return self + + if isinstance(other, SentenceList): + + for s in other: + p = Production(self, s) + self.Grammar.Add_Production(p) + + return self + + raise TypeError(other) + + @property + def IsTerminal(self): + return False + + @property + def IsNonTerminal(self): + return True + + @property + def IsEpsilon(self): + return False + +class Terminal(Symbol): + + def __init__(self, name, grammar): + super().__init__(name, grammar) + + @property + def IsTerminal(self): + return True + + @property + def IsNonTerminal(self): + return False + + @property + def IsEpsilon(self): + return False + +class EOF(Terminal): + + def __init__(self, Grammar): + super().__init__('$', Grammar) + +class Sentence(object): + + def __init__(self, *args): + self._symbols = tuple(x for x in args if not x.IsEpsilon) + self.hash = hash(self._symbols) + + def __len__(self): + return len(self._symbols) + + def __add__(self, other): + if isinstance(other, Symbol): + return Sentence(*(self._symbols + (other,))) + + if isinstance(other, Sentence): + return Sentence(*(self._symbols + other._symbols)) + + raise TypeError(other) + + def __or__(self, other): + if isinstance(other, Sentence): + return SentenceList(self, other) + + if isinstance(other, Symbol): + return SentenceList(self, Sentence(other)) + + raise TypeError(other) + + def __repr__(self): + return str(self) + + def __str__(self): + return ("%s " * len(self._symbols) % tuple(self._symbols)).strip() + + def __iter__(self): + return iter(self._symbols) + + def __getitem__(self, index): + return self._symbols[index] + + def __eq__(self, other): + return self._symbols == other._symbols + + def __hash__(self): + return self.hash + + @property + def IsEpsilon(self): + return False + +class SentenceList(object): + + def __init__(self, *args): + self._sentences = list(args) + + def Add(self, symbol): + if not symbol and (symbol is None or not symbol.IsEpsilon): + raise ValueError(symbol) + + self._sentences.append(symbol) + + def __iter__(self): + return iter(self._sentences) + + def __or__(self, other): + if isinstance(other, Sentence): + self.Add(other) + return self + + if isinstance(other, Symbol): + return self | Sentence(other) + + +class Epsilon(Terminal, Sentence): + + def __init__(self, grammar): + super().__init__('epsilon', grammar) + + + def __str__(self): + return "e" + + def __repr__(self): + return 'epsilon' + + def __iter__(self): + yield from () + + def __len__(self): + return 0 + + def __add__(self, other): + return other + + def __eq__(self, other): + return isinstance(other, (Epsilon,)) + + def __hash__(self): + return hash("") + + @property + def IsEpsilon(self): + return True + +class Production(object): + + def __init__(self, nonTerminal, sentence): + + self.Left = nonTerminal + self.Right = sentence + + def __str__(self): + + return '%s := %s' % (self.Left, self.Right) + + def __repr__(self): + return '%s -> %s' % (self.Left, self.Right) + + def __iter__(self): + yield self.Left + yield self.Right + + def __eq__(self, other): + return isinstance(other, Production) and self.Left == other.Left and self.Right == other.Right + + def __hash__(self): + return hash((self.Left, self.Right)) + + @property + def IsEpsilon(self): + return self.Right.IsEpsilon + +class AttributeProduction(Production): + + def __init__(self, nonTerminal, sentence, attributes): + if not isinstance(sentence, Sentence) and isinstance(sentence, Symbol): + sentence = Sentence(sentence) + super(AttributeProduction, self).__init__(nonTerminal, sentence) + + self.attributes = attributes + + def __str__(self): + return '%s := %s' % (self.Left, self.Right) + + def __repr__(self): + return '%s -> %s' % (self.Left, self.Right) + + def __iter__(self): + yield self.Left + yield self.Right + + + @property + def IsEpsilon(self): + return self.Right.IsEpsilon + + # sintetizar en ingles??????, pending aggrement + def syntetice(self): + pass + +class Grammar(): + + def __init__(self): + + self.Productions = [] + self.nonTerminals = [] + self.terminals = [] + self.startSymbol = None + # production type + self.pType = None + self.Epsilon = Epsilon(self) + self.EOF = EOF(self) + + self.symbDict = { '$': self.EOF } + + def NonTerminal(self, name, startSymbol = False): + + name = name.strip() + if not name: + raise Exception("Empty name") + + term = NonTerminal(name,self) + + if startSymbol: + + if self.startSymbol is None: + self.startSymbol = term + else: + raise Exception("Cannot define more than one start symbol.") + + self.nonTerminals.append(term) + self.symbDict[name] = term + return term + + def NonTerminals(self, names): + + ans = tuple((self.NonTerminal(x) for x in names.strip().split())) + + return ans + + + def Add_Production(self, production): + + if len(self.Productions) == 0: + self.pType = type(production) + + assert type(production) == self.pType, "The Productions most be of only 1 type." + + production.Left.productions.append(production) + self.Productions.append(production) + + + def Terminal(self, name): + + name = name.strip() + if not name: + raise Exception("Empty name") + + term = Terminal(name, self) + self.terminals.append(term) + self.symbDict[name] = term + return term + + def Terminals(self, names): + + ans = tuple((self.Terminal(x) for x in names.strip().split())) + + return ans + + + def __str__(self): + + mul = '%s, ' + + ans = 'Non-Terminals:\n\t' + + nonterminals = mul * (len(self.nonTerminals)-1) + '%s\n' + + ans += nonterminals % tuple(self.nonTerminals) + + ans += 'Terminals:\n\t' + + terminals = mul * (len(self.terminals)-1) + '%s\n' + + ans += terminals % tuple(self.terminals) + + ans += 'Productions:\n\t' + + ans += str(self.Productions) + + return ans + + def __getitem__(self, name): + try: + return self.symbDict[name] + except KeyError: + return None + + @property + def to_json(self): + + productions = [] + + for p in self.Productions: + head = p.Left.Name + + body = [] + + for s in p.Right: + body.append(s.Name) + + productions.append({'Head':head, 'Body':body}) + + d={'NonTerminals':[symb.Name for symb in self.nonTerminals], 'Terminals': [symb.Name for symb in self.terminals],\ + 'Productions':productions} + + # [{'Head':p.Left.Name, "Body": [s.Name for s in p.Right]} for p in self.Productions] + return json.dumps(d) + + @staticmethod + def from_json(data): + data = json.loads(data) + + G = Grammar() + dic = {'epsilon':G.Epsilon} + + for term in data['Terminals']: + dic[term] = G.Terminal(term) + + for noTerm in data['NonTerminals']: + dic[noTerm] = G.NonTerminal(noTerm) + + for p in data['Productions']: + head = p['Head'] + dic[head] %= Sentence(*[dic[term] for term in p['Body']]) + + return G + + def copy(self): + G = Grammar() + G.Productions = self.Productions.copy() + G.nonTerminals = self.nonTerminals.copy() + G.terminals = self.terminals.copy() + G.pType = self.pType + G.startSymbol = self.startSymbol + G.Epsilon = self.Epsilon + G.EOF = self.EOF + G.symbDict = self.symbDict.copy() + + return G + + @property + def IsAugmentedGrammar(self): + augmented = 0 + for left, right in self.Productions: + if self.startSymbol == left: + augmented += 1 + if augmented <= 1: + return True + else: + return False + + def AugmentedGrammar(self, force=False): + if not self.IsAugmentedGrammar or force: + + G = self.copy() + # S, self.startSymbol, SS = self.startSymbol, None, self.NonTerminal('S\'', True) + S = G.startSymbol + G.startSymbol = None + SS = G.NonTerminal('S\'', True) + if G.pType is AttributeProduction: + SS %= S + G.Epsilon, lambda x : x + else: + SS %= S + G.Epsilon + + return G + else: + return self.copy() + #endchange + +class Item: + + def __init__(self, production, pos, lookaheads=[]): + self.production = production + self.pos = pos + self.lookaheads = frozenset(look for look in lookaheads) + + def __str__(self): + s = str(self.production.Left) + " -> " + if len(self.production.Right) > 0: + for i,c in enumerate(self.production.Right): + if i == self.pos: + s += "." + s += str(self.production.Right[i]) + if self.pos == len(self.production.Right): + s += "." + else: + s += "." + s += ", " + str(self.lookaheads)[10:-1] + return s + + def __repr__(self): + return str(self) + + + def __eq__(self, other): + return ( + (self.pos == other.pos) and + (self.production == other.production) and + (set(self.lookaheads) == set(other.lookaheads)) + ) + + def __hash__(self): + return hash((self.production,self.pos,self.lookaheads)) + + @property + def IsReduceItem(self): + return len(self.production.Right) == self.pos + + @property + def NextSymbol(self): + if self.pos < len(self.production.Right): + return self.production.Right[self.pos] + else: + return None + + def NextItem(self): + if self.pos < len(self.production.Right): + return Item(self.production,self.pos+1,self.lookaheads) + else: + return None + + def Preview(self, skip=1): + unseen = self.production.Right[self.pos+skip:] + return [ unseen + (lookahead,) for lookahead in self.lookaheads ] + + def Center(self): + return Item(self.production, self.pos) \ No newline at end of file diff --git a/src/cmp/semantic.py b/src/cmp/semantic.py new file mode 100644 index 000000000..99118a03d --- /dev/null +++ b/src/cmp/semantic.py @@ -0,0 +1,319 @@ +import itertools as itt +from collections import OrderedDict + + +class SemanticError(Exception): + @property + def text(self): + return self.args[0] + + +class Attribute: + def __init__(self, name, typex): + self.name = name + self.type = typex + + def __str__(self): + return f"[attrib] {self.name} : {self.type.name};" + + def __repr__(self): + return str(self) + + +class Method: + def __init__(self, name, param_names, params_types, return_type): + self.name = name + self.param_names = param_names + self.param_types = params_types + self.return_type = return_type + self.tset = None + + def __str__(self): + params = ", ".join( + f"{n}:{t.name}" for n, t in zip(self.param_names, self.param_types) + ) + return f"[method] {self.name}({params}): {self.return_type.name};" + + def __eq__(self, other): + return ( + other.name == self.name + and other.return_type == self.return_type + and other.param_types == self.param_types + ) + + +class Type: + def __init__(self, name: str): + self.name = name + self.attributes = [] + self.methods = [] + self.parent = None + + def set_parent(self, parent): + if self.parent is not None: + raise SemanticError(f"Parent type is already set for {self.name}.") + if parent.name == "String" or parent.name == "Bool" or parent.name == "Int": + raise SemanticError(f"Is not possible to inherit from {parent.name}") + self.parent = parent + + def get_attribute(self, name: str, index: int, visited=None): + if visited is None: + visited = [] + try: + return next((attr, index) for attr in self.attributes if attr.name == name) + except StopIteration: + visited.append(self.name) + if self.parent is None: + raise SemanticError( + f'Attribute "{name}" is not defined in {self.name}.' + ) + try: + if self.parent.name in visited: + raise SemanticError( + f'Attribute "{name}" is not defined in {self.name}.' + ) + return self.parent.get_attribute(name, index + 1, visited=visited) + except SemanticError: + raise SemanticError( + f'Attribute "{name}" is not defined in {self.name}.' + ) + + def define_attribute(self, name: str, typex): + try: + attr, index = self.get_attribute(name, 0) + except SemanticError: + attribute = Attribute(name, typex) + self.attributes.append(attribute) + return attribute + else: + if index > 0: + mssg = "an inherited class" + else: + mssg = self.name + raise SemanticError(f'Attribute "{name}" is already defined in {mssg}.') + + def get_method(self, name: str, non_rec=False, visited=None): + if visited is None: + visited = [] + try: + return next(method for method in self.methods if method.name == name) + except StopIteration: + visited.append(self.name) + if non_rec or self.parent is None: + raise SemanticError(f'Method "{name}" is not defined in {self.name}.') + try: + if self.parent.name in visited: + raise SemanticError( + f'Method "{name}" is not defined in {self.name}.' + ) + return self.parent.get_method(name, visited=visited) + except SemanticError: + raise SemanticError(f'Method "{name}" is not defined in {self.name}.') + + def define_method( + self, name: str, param_names: list, param_types: list, return_type + ): + if name in (method.name for method in self.methods): + raise SemanticError(f'Method "{name}" already defined in {self.name}') + + method = Method(name, param_names, param_types, return_type) + self.methods.append(method) + return method + + def all_attributes(self, clean=True): + plain = ( + OrderedDict() if self.parent is None else self.parent.all_attributes(False) + ) + for attr in self.attributes: + plain[attr.name] = (attr, self) + return plain.values() if clean else plain + + def all_methods(self, clean=True): + plain = OrderedDict() if self.parent is None else self.parent.all_methods(False) + for method in self.methods: + plain[method.name] = (method, self) + return plain.values() if clean else plain + + def conforms_to(self, other): + + return ( + other.bypass() + or self == other + or self.name == "AUTO_TYPE" + or other.name == "AUTO_TYPE" + or self.parent != None + and self.parent.conforms_to(other) + ) + + def bypass(self): + return False + + def __str__(self): + output = f"type {self.name}" + parent = "" if self.parent is None else f" : {self.parent.name}" + output += parent + output += " {" + output += "\n\t" if self.attributes or self.methods else "" + output += "\n\t".join(str(x) for x in self.attributes) + output += "\n\t" if self.attributes else "" + output += "\n\t".join(str(x) for x in self.methods) + output += "\n" if self.methods else "" + output += "}\n" + return output + + def __repr__(self): + return str(self) + + +class ErrorType(Type): + def __init__(self): + Type.__init__(self, "") + + def conforms_to(self, other): + return True + + def bypass(self): + return True + + def __eq__(self, other): + return isinstance(other, Type) + + +class VoidType(Type): + def __init__(self): + Type.__init__(self, "Void") + + # def conforms_to(self, other): + # raise Exception("Invalid type at 'conforms_to' : void type.") + + # def bypass(self): + # return True + + def __eq__(self, other): + return isinstance(other, VoidType) + + +class IntType(Type): + def __init__(self): + Type.__init__(self, "Int") + + def __eq__(self, other): + return other.name == self.name or isinstance(other, IntType) + + +class StringType(Type): + def __init__(self): + Type.__init__(self, "String") + + +class BoolType(Type): + def __init__(self): + Type.__init__(self, "Bool") + + +class AutoType(Type): + def __init__(self): + Type.__init__(self, "AUTO_TYPE") + + +class ObjectType(Type): + def __init__(self): + Type.__init__(self, "Object") + + +class SelfType(Type): + def __init__(self): + Type.__init__(self, "SELF_TYPE") + + +class IOType(Type): + def __init__(self): + Type.__init__(self, "IO") + + +class Context: + def __init__(self): + self.types = {} + + def create_type(self, name: str): + if name in self.types: + raise SemanticError(f"Type with the same name {name} already in context.") + typex = self.types[name] = Type(name) + return typex + + def get_type(self, name: str): + try: + return self.types[name] + except KeyError: + raise SemanticError(f'Type "{name}" is not defined.') + + def __str__(self): + return ( + "{\n\t" + + "\n\t".join(y for x in self.types.values() for y in str(x).split("\n")) + + "\n}" + ) + + def __repr__(self): + return str(self) + + def __copy__(self): + newContext = Context() + for key, value in self.types.items(): + newContext.types[key] = value + return newContext + + +class VariableInfo: + def __init__(self, name, vtype=None, data=None): + self.name = name + self.type = vtype + self.data = data + + def __str__(self): + return f"{self.name}: {self.type}" + + +class Scope: + def __init__(self, parent=None): + self.locals = [] + self.parent = parent + self.children = [] + self.index = 0 if parent is None else len(parent) + + def __len__(self): + return len(self.locals) + + def create_child(self): + child = Scope(self) + self.children.append(child) + return child + + def define_variable(self, vname, vtype): + info = VariableInfo(vname, vtype) + self.locals.append(info) + return info + + def find_variable(self, vname, index=None): + locals = self.locals if index is None else itt.islice(self.locals, index) + try: + return next(x for x in locals if x.name == vname) + except StopIteration: + if self.parent is not None: + return self.parent.find_variable(vname, self.index) + return None + + def is_defined(self, vname): + return self.find_variable(vname) is not None + + def is_local(self, vname): + return any(True for x in self.locals if x.name == vname) + + def __str__(self): + output = "LOCALS: \n" + output += "\n".join(str(x) for x in self.locals) + output += "\n" + output += "PARENT:" + output += str(self.parent) + output += "\n" + return output diff --git a/src/cmp/tools/.ipynb_checkpoints/__init__-checkpoint.py b/src/cmp/tools/.ipynb_checkpoints/__init__-checkpoint.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/cmp/tools/.ipynb_checkpoints/automata-checkpoint.py b/src/cmp/tools/.ipynb_checkpoints/automata-checkpoint.py new file mode 100644 index 000000000..59643cc5b --- /dev/null +++ b/src/cmp/tools/.ipynb_checkpoints/automata-checkpoint.py @@ -0,0 +1,3 @@ +import zlib, base64 +exec(zlib.decompress(base64.b64decode('eJzNWN1um0gUvucp6BWwRSjx1SrSrBRt4iR20rRJW9VCCI1hbE+LgTBDGjvKY+1r7DPtmR9gjIm3q+5KexPDzPn9zjdnDuGIEW4xVOF8SawHtMIMc15ZZ2hKNudVVVTWNcJZZm0QZTRnHOcJsQiiObeWKCO5tUIfNyVRojka44wRa4o+VjWxPiOcbyyMtrS0OEckr9ekwhz2tUBZIl6XGbFYEwGvNieWTddlUXG73KQFt8hTQkpuX8k16QckSsyYtaiKtZ2sy6DmNGON1u9FzjHNSXUPmSUZCNrvxqegk5KFHcc0pzyOXUayhQ/pcML8Bc0hbJ9DCIxyWuRM7FQcHXmgZgvRQIki9WMsgpT82ywpW4i76sFr1tcYcu0cNMuPRYLndYarDai0woYget6ePL8sisre2jS3masi8F5AFlbdU3/u+bdiy1AKKCdr5srobQCAACwP7q3vQPqcVHHseL5zlT/ijKZ2UmQZSYSeXSxsZd4Riv1QwtMonEfott3rgg9wmrpzbz+rIKUswVXqOo6nKkBKRrMijw3DRjFkyDpiuSCT7gXSxS5FRLCKN7ZdEV5X+xqhFIxCx4lASjPqzNBwdXTLCpcrGY+M5AJJDgZnBXfB2reUVsi5vnP8Na6WNEdHwbFI+kLkH+dFSlwl/048OpIYjs9WuCTIKTNgJSdPsJLhOcmQ4/jfacpX6MhfEbpcCb55uqyZT3VZG/bs1JQi588/HJsu4Ans2AROnU3FznAoWRNEQqskIxAT32TwOi+yVJrJWleKt8oiFE0EJCKyx0LiVnp/xcn475yMDzrRVkm6bKyei8fMH2u8qPoFRskuhEZeh31fq8W+PaYd6DqsFLMVSZVzTZsL3SQqUlYxe1zGHROGCKbI4gVJRYBdQgFeUpLI4td88avgvGabVJZdS7Wks/GpC23J+4m+pI/Jtbtx4TTUxCeeKJR87jcEuQjs8Uy9JRzZ345kdaGN9FQ8mbTRh76RzUko7URCA179QVeap6JBQYLBP8qs7SBJXVUkb3urhGhdPBJtZbMGUkkQgFbqzc4LPtQsQtNeZNYw33N2UDVUfqLwKOoYM20ZA/dox5Z9s0YiFTBkmdNtkwyvaL7s1LQt3QnsRCYlZdTRX8hMlaiEJFE9wUzLZKkOo3f2LBGJVMc1L9aYF3lTnA7dG3UpdbePFBDeUtSq7SC2jYyzcovSBrR+171FMsGboC5TsOnewpsO+sYyr4kkK1hd7UcpwpuhkMngWBcceHpCz3vLwMbvK5oReybcb9EsKItShjBwHxm5Dd1W26Y0F8L+gERTp4uGk0+qQE/ymrxQ3W4W4LIkuXrXqZuji/vLkyeRyBc45kWcLnAHgsg+Qc+QVYZeRyrs8pD0i8BTFtAUHclfFksqoM+uBKoT1t25hTADvQkKRRVn6ucVLIXGfNdWNwpIDG5Qj3NbX44N9uWBPG48g/iXCssEoKI5zJBq57KBeqK2L0WaS3fiNW8/nOylUpk05bnslUu9i5tLeSrQJKB5Sp4ayUs0CQtB+PYc2Em4hXj8uVxt+m/ejTFwGbx580YOXTunpFNEIiHLvkdyoTuRE5H8tk0PHLxH4moRqfv3ftJx670kk84bx3UOTHXxsY9HJpmAGekxOoa/I4SP9dT7NoWFGuFR+z6yesMnyPZmlCQ8BT0R+vMYHowZ4mVPeTSoPGqUR7vKSZj5MMihUAVYceFHBSeeR5HqWOmTfy+tuyHsw7IvFPTNA+LqKZL+WjzvT3YK9wVBBd6CJTU49qpj7urx3f4iz3gtT0wHYAfdSBTxuX5p6wJ3pDsRpfIzb7dEQPEEdHLMD5Tq6H9dqhbXFnlzltL4HfsDY7m5p7FV0BqFNq+mtqADDkYHHIz2HPw7tWua2PHg6ap7FfupCg2ehzr6T9CvjUJ03nqIHcZp+Kv9jLKvBXwfiY92gaSY8mHkqSlbxcqse+d3V8JUwPJJwnqOytIdunA0PWqBwJ1Ia/bK0FKr8VigcI1cdxYyMeSJyVKoztRnyrsiV/O1XDwXOEyEZ3caplEgvhhgbIPrm9NHIppyKsSmgfhIYj0LcuvaM2rxKZxEzQWjo/H6NREyyIhVYRveySTvhMlP7ZgfSQwlbvEa5u813eouYk4QU2Sg7v7CmLszMIgpC6KYBmtSwSeVcUf2b89hqSF7ElR9VQ9YUXPFdCo5/0PRddlP4XusqEs1fYnVlVj9ER6Ji1aHv/LUN8USpmoEA4Ten8MX3jd4mKLE6kb/nRN/COaDhbDsq505dtojE5Nj17PuptRv/y3CuXslw3tUDlT1NSRj/3oXY5Pwj9HOfxPmaBpeA+P7NBZ7X9GVHm/UnNZejbOQ+uPdgWb/hlRC6Ktlf0AhHUyAap4G7cdknxfg5KGNYro31PaC7iYeNQddef4Hf+Y/eNZffhu9yA=='))) +# Created by pyminifier (https://github.com/liftoff/pyminifier) diff --git a/src/cmp/tools/.ipynb_checkpoints/evaluation-checkpoint.py b/src/cmp/tools/.ipynb_checkpoints/evaluation-checkpoint.py new file mode 100644 index 000000000..d49df0ce5 --- /dev/null +++ b/src/cmp/tools/.ipynb_checkpoints/evaluation-checkpoint.py @@ -0,0 +1,3 @@ +import zlib, base64 +exec(zlib.decompress(base64.b64decode('eJxdUMFuwjAMvecrfGy3qoLrJN86YGViCO2GECrF1SLSpErcqf37OUWwsZPtZ/v5+TXetVC3Xd6NtWs7bciDbjvnGV4/FmqJmsmrE1oaWFnUQdvAla1JFbhxltQaDVm1QLJ9S75iUmdqgL4r00tx7CofKGmyMn1RoBuwjqEB56ekFAw8ce+tggaXSZMqKCWWEge8kSQnaWSRQ0EVAok2K1iZ5uwuZI88dpSJWmlfyWB4EJF03p37mrWzkSXT9ou8/HU+xgHCImrbZAZ/5xSMf6q8Yvb61DMFBYz74vCUrBOTPs/l5OV/vZ8d8N8J+U5e1tkOtIVFYrJ5PBn92OVv4ZN8q21lInR78LLXBx2giBBLjtO/hgYByASaZld4miy7b+0QV/k7NRyxLY6yGDO5swVhi54X0+bEj9vkknF6P3H3azXZFEekGWlmh7u1150HxkmQSP0BhJm25Q=='))) +# Created by pyminifier (https://github.com/liftoff/pyminifier) diff --git a/src/cmp/tools/.ipynb_checkpoints/parsing-checkpoint.py b/src/cmp/tools/.ipynb_checkpoints/parsing-checkpoint.py new file mode 100644 index 000000000..02d672f4b --- /dev/null +++ b/src/cmp/tools/.ipynb_checkpoints/parsing-checkpoint.py @@ -0,0 +1,10 @@ +import zlib, base64 +exec(zlib.decompress(base64.b64decode('eJytVVtr2zAUfvevEH2yqWfa14AeypKGlXlNL5SCMUZO5FTMlhRZbpZu++87ujiOm3QwGJRU56pzvu8cuVKiQUxTpYWoW8QaKZRGrK3ZkgaVMS4bmXSaDcbPgmvCOFUPVAcrWqGlaGSnaVGLJamLiqlWh/a3jUktX0g0CVCFnSZAEltlgKb4MFMYBUirHbiiBZbJl3YmW1YLHiD6Y0mldoZrUrc0QKxCC6OYJi3VBXWeJgMFszFUQqE5YhxJI6EUV9k8N6dp0skV0TRMIyNCIi40SpOlK6Xtk9kwVCpKvsOpT3t8oaK6UxxNR0C4VsO5a/zn7wDN8KPqoHBTV2nqmieAecM49GPrzcp8DEcZOW/tvLngj+MAnR/ht31hNUUzY5/1UNkkty7JQolVt9RMcJsDPePb5CuttDlLON+z9YsVrgCvZ4uXpwQ6B5W0qoGPXntUCEI3+ORUxNJaZ7/wNHkhalV4Nm569dWR2iNcjREWdS22AHHssB6P2GaEObXSX7DcnMJyk82TVhOlH3ZNKep3DvNkdnv9PxG/xxuPuIlmcbszCSjvGqoMEjJygMPAtjvYjm9DD86A7iBDu8udsKcN8pWYZjJm3nLI3oHxA7rcQxCCx/llDHfSKHKRQNVdv0pV6ZVQXFV+sErjkPuB2I0ltuxYvSokUS3j60KTsqZ7cmPP9pjkCo5OH8B+9xSTk7g/Y9LDvoBjj7oJoCagyhb5ZDTuafYc0zwhUlK+Ckn0fvCdHWfEwGr6hgynOx8uMTvlogd6Tt0z5ujwJg9ZaiFrqBYrUUhFVwxafRUFF4Wiyw4wfBWAXooNYx5Ef3aIWcHCyQY8xYAnNJTCRwAZt4lvkB0qTODRa+cdxdhR4FNLa5xT/BHrUCc42CbDrZ38J5zZnYvHWwmWpsEX8O8NZ0ZyC2kW396+xk+JFNK9SQRvs6bJ/bu/hi0ar5BRYkwm+2EGyV7aT3D/OUAHXwRkKjjHl8E7rVSM6/BsppRQCboq4csJPSZJcuaXxXNpgApGocNwLHCarWOSZxd+ee3bYGZJEb6mYU15uHDTHH26jO1f1Ff11A+V98hY7m9+21tOjNs/1u2lt/1sNsEfuhaMXQ=='))) +# Created by pyminifier (https://github.com/liftoff/pyminifier) + +deprecated_metodo_predictivo_no_recursivo = metodo_predictivo_no_recursivo +def metodo_predictivo_no_recursivo(G, M=None, firsts=None, follows=None): + parser = deprecated_metodo_predictivo_no_recursivo(G, M, firsts, follows) + def updated(tokens): + return parser([t.token_type for t in tokens]) + return updated \ No newline at end of file diff --git a/src/cmp/tools/__init__.py b/src/cmp/tools/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/cmp/tools/automata.py b/src/cmp/tools/automata.py new file mode 100644 index 000000000..59643cc5b --- /dev/null +++ b/src/cmp/tools/automata.py @@ -0,0 +1,3 @@ +import zlib, base64 +exec(zlib.decompress(base64.b64decode('eJzNWN1um0gUvucp6BWwRSjx1SrSrBRt4iR20rRJW9VCCI1hbE+LgTBDGjvKY+1r7DPtmR9gjIm3q+5KexPDzPn9zjdnDuGIEW4xVOF8SawHtMIMc15ZZ2hKNudVVVTWNcJZZm0QZTRnHOcJsQiiObeWKCO5tUIfNyVRojka44wRa4o+VjWxPiOcbyyMtrS0OEckr9ekwhz2tUBZIl6XGbFYEwGvNieWTddlUXG73KQFt8hTQkpuX8k16QckSsyYtaiKtZ2sy6DmNGON1u9FzjHNSXUPmSUZCNrvxqegk5KFHcc0pzyOXUayhQ/pcML8Bc0hbJ9DCIxyWuRM7FQcHXmgZgvRQIki9WMsgpT82ywpW4i76sFr1tcYcu0cNMuPRYLndYarDai0woYget6ePL8sisre2jS3masi8F5AFlbdU3/u+bdiy1AKKCdr5srobQCAACwP7q3vQPqcVHHseL5zlT/ijKZ2UmQZSYSeXSxsZd4Riv1QwtMonEfott3rgg9wmrpzbz+rIKUswVXqOo6nKkBKRrMijw3DRjFkyDpiuSCT7gXSxS5FRLCKN7ZdEV5X+xqhFIxCx4lASjPqzNBwdXTLCpcrGY+M5AJJDgZnBXfB2reUVsi5vnP8Na6WNEdHwbFI+kLkH+dFSlwl/048OpIYjs9WuCTIKTNgJSdPsJLhOcmQ4/jfacpX6MhfEbpcCb55uqyZT3VZG/bs1JQi588/HJsu4Ans2AROnU3FznAoWRNEQqskIxAT32TwOi+yVJrJWleKt8oiFE0EJCKyx0LiVnp/xcn475yMDzrRVkm6bKyei8fMH2u8qPoFRskuhEZeh31fq8W+PaYd6DqsFLMVSZVzTZsL3SQqUlYxe1zGHROGCKbI4gVJRYBdQgFeUpLI4td88avgvGabVJZdS7Wks/GpC23J+4m+pI/Jtbtx4TTUxCeeKJR87jcEuQjs8Uy9JRzZ345kdaGN9FQ8mbTRh76RzUko7URCA179QVeap6JBQYLBP8qs7SBJXVUkb3urhGhdPBJtZbMGUkkQgFbqzc4LPtQsQtNeZNYw33N2UDVUfqLwKOoYM20ZA/dox5Z9s0YiFTBkmdNtkwyvaL7s1LQt3QnsRCYlZdTRX8hMlaiEJFE9wUzLZKkOo3f2LBGJVMc1L9aYF3lTnA7dG3UpdbePFBDeUtSq7SC2jYyzcovSBrR+171FMsGboC5TsOnewpsO+sYyr4kkK1hd7UcpwpuhkMngWBcceHpCz3vLwMbvK5oReybcb9EsKItShjBwHxm5Dd1W26Y0F8L+gERTp4uGk0+qQE/ymrxQ3W4W4LIkuXrXqZuji/vLkyeRyBc45kWcLnAHgsg+Qc+QVYZeRyrs8pD0i8BTFtAUHclfFksqoM+uBKoT1t25hTADvQkKRRVn6ucVLIXGfNdWNwpIDG5Qj3NbX44N9uWBPG48g/iXCssEoKI5zJBq57KBeqK2L0WaS3fiNW8/nOylUpk05bnslUu9i5tLeSrQJKB5Sp4ayUs0CQtB+PYc2Em4hXj8uVxt+m/ejTFwGbx580YOXTunpFNEIiHLvkdyoTuRE5H8tk0PHLxH4moRqfv3ftJx670kk84bx3UOTHXxsY9HJpmAGekxOoa/I4SP9dT7NoWFGuFR+z6yesMnyPZmlCQ8BT0R+vMYHowZ4mVPeTSoPGqUR7vKSZj5MMihUAVYceFHBSeeR5HqWOmTfy+tuyHsw7IvFPTNA+LqKZL+WjzvT3YK9wVBBd6CJTU49qpj7urx3f4iz3gtT0wHYAfdSBTxuX5p6wJ3pDsRpfIzb7dEQPEEdHLMD5Tq6H9dqhbXFnlzltL4HfsDY7m5p7FV0BqFNq+mtqADDkYHHIz2HPw7tWua2PHg6ap7FfupCg2ehzr6T9CvjUJ03nqIHcZp+Kv9jLKvBXwfiY92gaSY8mHkqSlbxcqse+d3V8JUwPJJwnqOytIdunA0PWqBwJ1Ia/bK0FKr8VigcI1cdxYyMeSJyVKoztRnyrsiV/O1XDwXOEyEZ3caplEgvhhgbIPrm9NHIppyKsSmgfhIYj0LcuvaM2rxKZxEzQWjo/H6NREyyIhVYRveySTvhMlP7ZgfSQwlbvEa5u813eouYk4QU2Sg7v7CmLszMIgpC6KYBmtSwSeVcUf2b89hqSF7ElR9VQ9YUXPFdCo5/0PRddlP4XusqEs1fYnVlVj9ER6Ji1aHv/LUN8USpmoEA4Ten8MX3jd4mKLE6kb/nRN/COaDhbDsq505dtojE5Nj17PuptRv/y3CuXslw3tUDlT1NSRj/3oXY5Pwj9HOfxPmaBpeA+P7NBZ7X9GVHm/UnNZejbOQ+uPdgWb/hlRC6Ktlf0AhHUyAap4G7cdknxfg5KGNYro31PaC7iYeNQddef4Hf+Y/eNZffhu9yA=='))) +# Created by pyminifier (https://github.com/liftoff/pyminifier) diff --git a/src/cmp/tools/evaluation.py b/src/cmp/tools/evaluation.py new file mode 100644 index 000000000..d49df0ce5 --- /dev/null +++ b/src/cmp/tools/evaluation.py @@ -0,0 +1,3 @@ +import zlib, base64 +exec(zlib.decompress(base64.b64decode('eJxdUMFuwjAMvecrfGy3qoLrJN86YGViCO2GECrF1SLSpErcqf37OUWwsZPtZ/v5+TXetVC3Xd6NtWs7bciDbjvnGV4/FmqJmsmrE1oaWFnUQdvAla1JFbhxltQaDVm1QLJ9S75iUmdqgL4r00tx7CofKGmyMn1RoBuwjqEB56ekFAw8ce+tggaXSZMqKCWWEge8kSQnaWSRQ0EVAok2K1iZ5uwuZI88dpSJWmlfyWB4EJF03p37mrWzkSXT9ou8/HU+xgHCImrbZAZ/5xSMf6q8Yvb61DMFBYz74vCUrBOTPs/l5OV/vZ8d8N8J+U5e1tkOtIVFYrJ5PBn92OVv4ZN8q21lInR78LLXBx2giBBLjtO/hgYByASaZld4miy7b+0QV/k7NRyxLY6yGDO5swVhi54X0+bEj9vkknF6P3H3azXZFEekGWlmh7u1150HxkmQSP0BhJm25Q=='))) +# Created by pyminifier (https://github.com/liftoff/pyminifier) diff --git a/src/cmp/tools/parsing.py b/src/cmp/tools/parsing.py new file mode 100644 index 000000000..d0275cbcb --- /dev/null +++ b/src/cmp/tools/parsing.py @@ -0,0 +1,16 @@ +import zlib, base64 +exec(zlib.decompress(base64.b64decode('eJytVVtr2zAUfvevEH2yqWfa14AeypKGlXlNL5SCMUZO5FTMlhRZbpZu++87ujiOm3QwGJRU56pzvu8cuVKiQUxTpYWoW8QaKZRGrK3ZkgaVMS4bmXSaDcbPgmvCOFUPVAcrWqGlaGSnaVGLJamLiqlWh/a3jUktX0g0CVCFnSZAEltlgKb4MFMYBUirHbiiBZbJl3YmW1YLHiD6Y0mldoZrUrc0QKxCC6OYJi3VBXWeJgMFszFUQqE5YhxJI6EUV9k8N6dp0skV0TRMIyNCIi40SpOlK6Xtk9kwVCpKvsOpT3t8oaK6UxxNR0C4VsO5a/zn7wDN8KPqoHBTV2nqmieAecM49GPrzcp8DEcZOW/tvLngj+MAnR/ht31hNUUzY5/1UNkkty7JQolVt9RMcJsDPePb5CuttDlLON+z9YsVrgCvZ4uXpwQ6B5W0qoGPXntUCEI3+ORUxNJaZ7/wNHkhalV4Nm569dWR2iNcjREWdS22AHHssB6P2GaEObXSX7DcnMJyk82TVhOlH3ZNKep3DvNkdnv9PxG/xxuPuIlmcbszCSjvGqoMEjJygMPAtjvYjm9DD86A7iBDu8udsKcN8pWYZjJm3nLI3oHxA7rcQxCCx/llDHfSKHKRQNVdv0pV6ZVQXFV+sErjkPuB2I0ltuxYvSokUS3j60KTsqZ7cmPP9pjkCo5OH8B+9xSTk7g/Y9LDvoBjj7oJoCagyhb5ZDTuafYc0zwhUlK+Ckn0fvCdHWfEwGr6hgynOx8uMTvlogd6Tt0z5ujwJg9ZaiFrqBYrUUhFVwxafRUFF4Wiyw4wfBWAXooNYx5Ef3aIWcHCyQY8xYAnNJTCRwAZt4lvkB0qTODRa+cdxdhR4FNLa5xT/BHrUCc42CbDrZ38J5zZnYvHWwmWpsEX8O8NZ0ZyC2kW396+xk+JFNK9SQRvs6bJ/bu/hi0ar5BRYkwm+2EGyV7aT3D/OUAHXwRkKjjHl8E7rVSM6/BsppRQCboq4csJPSZJcuaXxXNpgApGocNwLHCarWOSZxd+ee3bYGZJEb6mYU15uHDTHH26jO1f1Ff11A+V98hY7m9+21tOjNs/1u2lt/1sNsEfuhaMXQ=='))) +# Created by pyminifier (https://github.com/liftoff/pyminifier) + +deprecated_metodo_predictivo_no_recursivo = metodo_predictivo_no_recursivo +def metodo_predictivo_no_recursivo(G, M=None, firsts=None, follows=None): + parser = deprecated_metodo_predictivo_no_recursivo(G, M, firsts, follows) + def updated(tokens): + return parser([t.token_type for t in tokens]) + return updated + +exec(zlib.decompress(base64.b64decode('eJx9U8GO2jAQvfMVZi9JtCFarqiuVLUsRUilWranCEUmGcBaY0e2s3TV7b/X9iQhpaiXxJ4Zv3nzZqYUzBiyOfK9fYKqKeE70wb0bEQ2X5ePzzQKv2hEnuZffnye0wj/zrBe0Wi9cocK9qQouOS2KGIDYp8u0lfQO2WAPjJhIHFoxDuyBV10xy6i/XdmVlquJP31uzMclFWDa7FruKiK2rHk8lBYthMQJy2JWz7/KhDQjBsg35RdnmoBJ5AWqrnWSvfPi5IJ0dVwTg9gC+OFKXRQZliMZeULzR+27lw22ihNH9xRNbZuLM29WdWgma/F4P185ALIs27AA3gECzTg5JOpDyBCqRd2BFbRc46gwcz3fwk2qzWXNg4v0+jDZDJ5f3efj1HavZptE3wXhyRpj5tIZQmXQ6EDF4KQ/4QnA+ddkCojn3ZKW6dulmV36NdgGy2dsNI3kSBuatmBDvLkV9hdZW27MTSMGjK6qJexugZZxZcITBsEuKd5D+lTBti2I/d06m8grtPgBP83D4ZgImxq53ZJ0BxS7lT1Rp0pWPZKE/N22inhRdbgGmagin1MgtmQdFarOkYQ4pYPtB3aHcmA0RV5PSUkLES/Gq2wvaa9LoGfj9jeVmG9mo1uUbrJKOxu5mzabi7s2lABEsfRRU6HI4HK+Tb7wbteJ8fJQIwx6aUPCdI1uCbt1s5/llB7dxwt5SsTvGqLGY/HUTL6AyImjec='))) +# Created by pyminifier (https://github.com/liftoff/pyminifier) + +exec(zlib.decompress(base64.b64decode('eJyFVltP4zoQfu+v8D41OcfHWl6RLJ0KekEgqLg+dKtiGrf1ktiR7RS6iP9+ZuykSYE9ywOKLzPzzTffjLuypiDLomSVV7kjqiiN9eTEaC+UlvZG+t6queKNyR0rhXVKr5urS1OUlZeLlbLOO9osc7MUedxsHZQ7PFa5tI31mZdFL5MrIl9LobMkozo97pEdz9ilfPU3u+LJ5D2iVmRHlCOXRktiLNHGkx07c7C+lbZQWuRgRaz0ldWzeY/c824KSdojKzAbEqVJxqZWbpV8STASeeZfQE40HYINuWdVmQkvk2dYCeckQMbY92wZ3buFLJ3Kje41wTGjpLQmo9/pfYpRcYGBdwy/qqVXRrt5yBpDW+lcMkAsOX97j0DD/QHCWwETJ1J7aTEJ4u0OdyG/fLaCPIG3pSw9OZe7obXGhkM84vfcxcTbJDKWG/MsNlJkLm0AvwXArx1sFBbGUTR/TkMGr/QZAeVMwV2XpO8RfG5cZYE3e5QMYt0mh7T/NYAwV/zWVrJHXjZQeHKFCK/4SOQO9oj4VKc2/0lIRjDQgQRpdBSSBh+TJi+xT6YldJIGjGvjTQ1wSqNEOYqI/qycXzxLq2UewSD8usKdMxRbNEP5YemDdf8xbj6SAu6SJ4lF3qpMZijVx0/OH/s9MuDQB7+kRl6jugPzaVtvtO3qnvNpm1k47SKT4PdDDSKotG04UXlTCC+adrvxwBctqhyaHThfQGw4BnEFsp4qlWeLi+ujRW1ndDLu8JJLWDPnha0BdgWdcn5E+2MrikLYPS2iWheo3gwI0PxwVoBv2JyN2fBqND/UQdiD0zP+23iz7yB/wwOHZ9BrrbR5NKcoE98hfWbmKUq0y5kHNQHFPBCTtHcnKUXVwtmWzzxE2vA3f2zfGxlvUZsXfAudUgbV3vHN7GJey3eK5RwzX48m9/eY6XZSuaDrQxwXAQcha75X7AQU2xVSjYegDlCI6+CG4CBSGhusnQ7kBdCsEc2X8+FD7HUdu7b6Hy7gb8tEWWI7rsP6joksW3grtFNYlmTKLkUh6QuyysC6lVjyhexaeds/PDM3G7Xy1xKqL6dwAopd5iBLAmqN6+TTDVQuynoRdV07XHjxlMvkIQz/MX9gYzZoRFqrN2nStfzrlqjLrOgpFlrqqpAWOQshQ4Ee2FbaJ+PkcWmV9omi/R++D//0D0/67KdROnHeJq9xvqKbU1S6lyle6gdyT5nKXrmqo4VYsYCShyP83E+P2jwWOAySMxfZwBaJ26SE16TtobgHd0t2IVeeHzZbbQKpLKxcK4clfGAiPhGJpLFHKexdnVOcimlUSBhMjfG+G7pvT3P4W9fT4PZ6eHp3MqRl7bfjdvrh5wEJnXPK1qDWKMC04SfkNwUuur8T/hz7ZnI2uqXrr1I6NMR2rc2wI/7FIqhlIf3GZLX89rdHFIgKEqkH6nloZGBnhO/MaHY+5/yS9oOS/4nFw4P41WxAw69ytfTfvn2DoRqtLnv/ATLgLos='))) +# Created by pyminifier (https://github.com/liftoff/pyminifier) diff --git a/src/cmp/tools/regex.py b/src/cmp/tools/regex.py new file mode 100644 index 000000000..85d74325a --- /dev/null +++ b/src/cmp/tools/regex.py @@ -0,0 +1,3 @@ +import zlib, base64 +exec(zlib.decompress(base64.b64decode('eJytVsFu2zgQvesreFhDVE0IdY8BCNTZtVRg2yyaOKgdwxAYiYrZSKRK0qnttp+1v7HftCQlWbLWTXPYgwzyzXDem9Fw5FyKEqRlFRKlASsrITWYalGy9EpkFF0yTuTeLW/blZe3Z6p9KsqKFVS2R2NJypLIzkULUaiQbE1IoknjBv+IpujKPC2epIVQW0l7gOAp0ZQTzQTv4JJxVrLDAN1yu+U5SbRIspwEQ376RIqtO9QKbRCaVEQqOvS3IOMPrXNJtchEUkmasVSzJ5FwkUiabo3Xk+gObzUrVHtoLh4p975gk6uXFkQpMKsUKwS3NYRdjYMLD2Q0PyqCiha5BYGkeis5+AKVNrjCE5Sb11EovHq9RloSrphNSeFvPwJP4l74hvBmX96L4sV8CttlWNDdGfI3R/LJgBy+Riq4mBgNKe4YGwm/1y/WaTh2kGV7a+Oy1JR2I7JakaiotIKsMNqvwLBNGg/vI+6FbwhvbS84uq53f8FXuHBI/pzXNdjAz7vGR642Wde0/zf5yVUYiogi3LF6NrCkD3RnLoLpPnagEmq60yhG6pFVydcNM++yIql5oXNZ823wau2BOMbfdheuZ+EOxavdOsiFBDvA+Mr/7iP/lXmgeQLz/PO3v/7hAetwMA7AUljlLAdDGkB4Bg4hU24LXYLAZKQZ31Kz1nLvoM84jlcHIwTQXUorDf6k+5mUQjbWWtnBKPOV6zF/HRiL68miDrEJSVVRnsHPxnLc1Af933wUh7O/osDYmgJvXLnut6zIkod6bjl9MY7bnQdmOA6vBJ9TWVoq6M985CrngTmK0BQt0BLdnTop6M9BBKZgAZbgzjeuFfqAbtAlIujWuPb8voNXAIIA1DkBU1jLOcLz8QIVpLzPCNggdaFWb9bIMNBTcGLqtRjhajzr49dwYweEPRM4u0m8Hg19L+tjchjhaLx8IdXS6OqjUdQyTSzT8lmmaISn47sXMt2N8Ic++tERBc7wDMd0hEkfTWEjzRhu+wbZM9yMZ+PLoa5jk8RejAct4r3Hz38QYBw0A+Ha3sVm3iaJ+XbpJHHzFrlb+t9LGZmuqKfAJzeM7SJ0vtj9un0zGQTHn8Ja2xGBzitoGVNzOVpGe0lPIzcp9gIaqlQ82LnhxkZwdnKdpXwulQ0ezqT6yJmhVB+yFxu/hxu7mOPTTzXMkcPf4Xl4/IRZYIG7PwDwnUUe8dn/DXARdMk/ev8C0IsPHg=='))) +# Created by pyminifier (https://github.com/liftoff/pyminifier) diff --git a/src/cmp/utils.py b/src/cmp/utils.py new file mode 100644 index 000000000..af4bf37c4 --- /dev/null +++ b/src/cmp/utils.py @@ -0,0 +1,277 @@ +from cmp.pycompiler import Production, Sentence, Symbol, EOF, Epsilon + + +class ContainerSet: + def __init__(self, *values, contains_epsilon=False): + self.set = set(values) + self.contains_epsilon = contains_epsilon + + def add(self, value): + n = len(self.set) + self.set.add(value) + return n != len(self.set) + + def extend(self, values): + change = False + for value in values: + change |= self.add(value) + return change + + def set_epsilon(self, value=True): + last = self.contains_epsilon + self.contains_epsilon = value + return last != self.contains_epsilon + + def update(self, other): + n = len(self.set) + self.set.update(other.set) + return n != len(self.set) + + def epsilon_update(self, other): + return self.set_epsilon(self.contains_epsilon | other.contains_epsilon) + + def hard_update(self, other): + return self.update(other) | self.epsilon_update(other) + + def find_match(self, match): + for item in self.set: + if item == match: + return item + return None + + def __len__(self): + return len(self.set) + int(self.contains_epsilon) + + def __str__(self): + return "%s-%s" % (str(self.set), self.contains_epsilon) + + def __repr__(self): + return str(self) + + def __iter__(self): + return iter(self.set) + + def __nonzero__(self): + return len(self) > 0 + + def __eq__(self, other): + if isinstance(other, set): + return self.set == other + return ( + isinstance(other, ContainerSet) + and self.set == other.set + and self.contains_epsilon == other.contains_epsilon + ) + + +def inspect(item, grammar_name="G", mapper=None): + try: + return mapper[item] + except (TypeError, KeyError): + if isinstance(item, dict): + items = ",\n ".join( + f"{inspect(key, grammar_name, mapper)}: {inspect(value, grammar_name, mapper)}" + for key, value in item.items() + ) + return f"{{\n {items} \n}}" + elif isinstance(item, ContainerSet): + args = ( + f'{ ", ".join(inspect(x, grammar_name, mapper) for x in item.set) } ,' + if item.set + else "" + ) + return f"ContainerSet({args} contains_epsilon={item.contains_epsilon})" + elif isinstance(item, EOF): + return f"{grammar_name}.EOF" + elif isinstance(item, Epsilon): + return f"{grammar_name}.Epsilon" + elif isinstance(item, Symbol): + return f"G['{item.Name}']" + elif isinstance(item, Sentence): + items = ", ".join(inspect(s, grammar_name, mapper) for s in item._symbols) + return f"Sentence({items})" + elif isinstance(item, Production): + left = inspect(item.Left, grammar_name, mapper) + right = inspect(item.Right, grammar_name, mapper) + return f"Production({left}, {right})" + elif isinstance(item, tuple) or isinstance(item, list): + ctor = ("(", ")") if isinstance(item, tuple) else ("[", "]") + return f'{ctor[0]} {("%s, " * len(item)) % tuple(inspect(x, grammar_name, mapper) for x in item)}{ctor[1]}' + else: + raise ValueError(f"Invalid: {item}") + + +def pprint(item, header=""): + if header: + print(header) + + if isinstance(item, dict): + for key, value in item.items(): + print(f"{key} ---> {value}") + elif isinstance(item, list): + print("[") + for x in item: + print(f" {repr(x)}") + print("]") + else: + print(item) + + +class Token: + """ + Basic token class. + + Parameters + ---------- + lex : str + Token's lexeme. + token_type : Enum + Token's type. + location : (Int, Int) + (Row, position since the start of the text). + """ + + def __init__(self, lex, token_type, location): + self.lex = lex + self.token_type = token_type + self.location = location + + def __str__(self): + return f"{self.token_type}: {self.lex} at ({self.location[0]}, {self.location[1]})" + + def __repr__(self): + return str(self) + + @property + def is_valid(self): + return True + + +class UnknownToken(Token): + def __init__(self, lex, location): + Token.__init__(self, lex, None, location) + + def transform_to(self, token_type): + return Token(self.lex, token_type, self.location) + + @property + def is_valid(self): + return False + +class DisjointSet: + def __init__(self, *items): + self.nodes = {x: DisjointNode(x) for x in items} + + def merge(self, items): + items = (self.nodes[x] for x in items) + try: + head, *others = items + for other in others: + head.merge(other) + except ValueError: + pass + + @property + def representatives(self): + return {n.representative for n in self.nodes.values()} + + @property + def groups(self): + return [ + [n for n in self.nodes.values() if n.representative == r] + for r in self.representatives + ] + + def __len__(self): + return len(self.representatives) + + def __getitem__(self, item): + return self.nodes[item] + + def __str__(self): + return str(self.groups) + + def __repr__(self): + return str(self) + + +class DisjointNode: + def __init__(self, value): + self.value = value + self.parent = self + + @property + def representative(self): + if self.parent != self: + self.parent = self.parent.representative + return self.parent + + def merge(self, other): + other.representative.parent = self.representative + + def __str__(self): + return str(self.value) + + def __repr__(self): + return str(self) + + +def find_least_type(type_a, type_b, context): + if type_a is None: + return type_b + + if type_b is None: + return type_a + + if type_a.conforms_to(type_b): + return type_b + + if type_b.conforms_to(type_a): + return type_a + + solve = type_a.parent + while solve is not None: + if type_b.conforms_to(solve): + return solve + solve = solve.parent + + return context.get_type("Object") + + +def least_type(type_set, context): + solve = None + for item in type_set: + typex = context.get_type(item) + solve = find_least_type(solve, typex, context) + + return solve.name + + +def union(set_a, set_b): + for item in set_b: + set_a.add(item) + return set_a + + +def intersection(set_a, set_b): + solve = set() + for item in set_a: + if item in set_b: + solve.add(item) + return solve + + +def reduce_set(set_a, set_b): + if "!static_type_declared" in set_a: + return set_a + + if "InferenceError" in set_a: + return union(set_a, set_b) + + _intersection = intersection(set_a, set_b) + if len(_intersection) == 0: + _union = union(set_a, set_b) + _union.add("InferenceError") + return _union + else: + return _intersection diff --git a/src/cmp/visitor.py b/src/cmp/visitor.py new file mode 100644 index 000000000..500298bcd --- /dev/null +++ b/src/cmp/visitor.py @@ -0,0 +1,80 @@ +# The MIT License (MIT) +# +# Copyright (c) 2013 Curtis Schlak +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import inspect + +__all__ = ['on', 'when'] + +def on(param_name): + def f(fn): + dispatcher = Dispatcher(param_name, fn) + return dispatcher + return f + + +def when(param_type): + def f(fn): + frame = inspect.currentframe().f_back + func_name = fn.func_name if 'func_name' in dir(fn) else fn.__name__ + dispatcher = frame.f_locals[func_name] + if not isinstance(dispatcher, Dispatcher): + dispatcher = dispatcher.dispatcher + dispatcher.add_target(param_type, fn) + def ff(*args, **kw): + return dispatcher(*args, **kw) + ff.dispatcher = dispatcher + return ff + return f + + +class Dispatcher(object): + def __init__(self, param_name, fn): + frame = inspect.currentframe().f_back.f_back + top_level = frame.f_locals == frame.f_globals + self.param_index = self.__argspec(fn).args.index(param_name) + self.param_name = param_name + self.targets = {} + + def __call__(self, *args, **kw): + typ = args[self.param_index].__class__ + d = self.targets.get(typ) + if d is not None: + return d(*args, **kw) + else: + issub = issubclass + t = self.targets + ks = t.keys() + ans = [t[k](*args, **kw) for k in ks if issub(typ, k)] + if len(ans) == 1: + return ans.pop() + return ans + + def add_target(self, typ, target): + self.targets[typ] = target + + @staticmethod + def __argspec(fn): + # Support for Python 3 type hints requires inspect.getfullargspec + if hasattr(inspect, 'getfullargspec'): + return inspect.getfullargspec(fn) + else: + return inspect.getargspec(fn) diff --git a/src/code_gen/__init__.py b/src/code_gen/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/code_gen/ast_typed_nodes.py b/src/code_gen/ast_typed_nodes.py new file mode 100644 index 000000000..37e36a22b --- /dev/null +++ b/src/code_gen/ast_typed_nodes.py @@ -0,0 +1,196 @@ +from cmp.semantic import Context + + +class Node: + pass + + +class ProgramNode(Node): + def __init__(self, declarations, context=None): + self.declarations = declarations + self.context = context + + +class ExpressionNode(Node): + def __init__(self, etype): + self.static_type = etype + + +class ClassDeclarationNode: + def __init__(self, idx, features, parent=None): + self.id = idx + self.parent = parent + self.features = features + +class FuncDeclarationNode: + def __init__(self, idx, params, return_type, body): + self.id = idx + self.params = params + self.type = return_type + self.body = body + + +class AttrDeclarationNode: + def __init__(self, idx, typex, init_exp=None): + self.id = idx + self.type = typex + self.init_exp = init_exp + + +class AssignNode(ExpressionNode): + def __init__(self, idx, expr, etype = None ): + self.id = idx + self.expr = expr + self.static_type = etype + + +class LetNode(ExpressionNode): + def __init__(self, identifiers, body, etype = None ): + self.identifiers = identifiers + self.body = body + self.static_type = etype + + +class VarDeclarationNode: + def __init__(self, idx, typex, expr=None, etype = None ): + self.id = idx + self.type = typex + self.expr = expr + self.static_type = etype + + +class IfNode(ExpressionNode): + def __init__(self, if_exp, then_exp, else_exp, etype = None ): + self.if_expr = if_exp + self.then_expr = then_exp + self.else_expr = else_exp + self.static_type = etype + + +class WhileNode(ExpressionNode): + def __init__(self, condition, body, etype = None ): + self.condition = condition + self.body = body + self.static_type = etype + + +class CaseNode(ExpressionNode): + def __init__(self, exp, case_items, etype = None ): + self.expr = exp + self.case_items = case_items + self.static_type = etype + + +class CaseItemNode(ExpressionNode): + def __init__(self, idx, typex, exp, etype = None ): + self.id = idx + self.type = typex + self.expr = exp + self.static_type = etype + + +class CallNode(ExpressionNode): + def __init__(self, idx, args, obj=None, at_type=None, obj_type = None, etype = None): + self.obj = obj + self.id = idx + self.args = args + self.at_type = at_type + self.obj_type = obj_type + self.static_type = etype + +class BlockNode(ExpressionNode): + def __init__(self, expression_list, etype = None ): + self.expression_list = expression_list + self.static_type = etype + + +class AtomicNode(ExpressionNode): + def __init__(self, lex, etype = None ): + self.lex = lex + self.static_type = etype + + +class UnaryNode(ExpressionNode): + def __init__(self, expr, etype = None ): + self.expr = expr + self.static_type = etype + + +class BinaryNode(ExpressionNode): + def __init__(self, left, right, etype = None ): + self.left = left + self.right = right + self.static_type = etype + + +class ArithmeticOperation(BinaryNode): + pass + + +class ComparisonOperation(BinaryNode): + pass + + +class ConstantNumNode(AtomicNode): + pass + + +class VariableNode(AtomicNode): + pass + + +class StringNode(AtomicNode): + pass + + +class BooleanNode(AtomicNode): + pass + + +class InstantiateNode(AtomicNode): + pass + + +class NotNode(UnaryNode): + pass + + +class IsvoidNode(UnaryNode): + pass + + +class NegNode(UnaryNode): + pass + + +class PlusNode(ArithmeticOperation): + pass + + +class MinusNode(ArithmeticOperation): + pass + + +class StarNode(ArithmeticOperation): + pass + + +class DivNode(ArithmeticOperation): + pass + + +class LessNode(ComparisonOperation): + pass + + +class LessEqualNode(ComparisonOperation): + pass + + +class EqualNode(ComparisonOperation): + pass + + +class DefaultValueNode(ExpressionNode): + def __init__(self, typex): + self.type = typex \ No newline at end of file diff --git a/src/code_gen/cil_builder.py b/src/code_gen/cil_builder.py new file mode 100644 index 000000000..520a44de9 --- /dev/null +++ b/src/code_gen/cil_builder.py @@ -0,0 +1,921 @@ + +import cmp.nbpackage +import cmp.visitor as visitor + +import code_gen.ast_typed_nodes as cool + +from cmp.cil import ( + ProgramNode, + TypeNode, + DataNode, + FunctionNode, + ParamNode, + LocalNode, + AssignNode, + AllocateNode, + TypeOfNode, + LabelNode, + GotoIfNode, + GotoNode, + StaticCallNode, + DynamicCallNode, + ArgNode, + ReturnNode, + LoadNode, + LengthNode, + ConcatNode, + SubstringNode, + ReadStringNode, + ReadIntNode, + PrintStrNode, + PrintIntNode, + PlusNode, + MinusNode, + StarNode, + DivNode, + NotNode, + IntComplementNode, + LessNode, + LessEqualNode, + EqualNode, + StrEqualNode, + RuntimeErrorNode, + CopyNode, + TypeNameNode, + SetAttribNode, + GetAttribNode, + DefaultValueNode, + IsVoidNode, + ExitNode, + CompareTypes +) +from semantic.cool_visitor import FormatVisitor + +from cmp.semantic import Attribute, Method, Type +from cmp.semantic import VoidType, ErrorType, IntType +from cmp.semantic import Context, VariableInfo + +from cmp.semantic import Scope + + +class CILBuilder: + def __init__(self): + self.types = [] + self.code = [] + self.data = [] + self.current_type = None + self.current_function = None + self.string_count = 0 + self._count = 0 + self.internal_count = 0 + self.context = None + self.methods = {} + self.attrs = {} + + def generate_next_string_id(self): + self.string_count += 1 + return "string_" + str(self.string_count) + + def next_id(self): + self._count += 1 + return str(self._count) + + def to_function_name(self, method_name, type_name): + return f"{type_name}_{method_name}" + + def to_data_name(self, type_name, value): + return f"{type_name}_{value}" + + def to_attr_name(self, type_name, attr_name): + return f"{type_name}_{attr_name}" + + @property + def params(self): + return self.current_function.params + + @property + def localvars(self): + return self.current_function.localvars + + @property + def instructions(self): + return self.current_function.instructions + + def get_method_id(self, typex, name): + method_id, _ = self.methods[typex][name] + return method_id + + def register_instruction(self, instruction): + self.current_function.instructions.append(instruction) + + def register_type(self, name): + type_node = TypeNode(name) + self.types.append(type_node) + return type_node + + def register_function(self, function_name): + function_node = FunctionNode(function_name, [], [], []) + self.code.append(function_node) + return function_node + + def get_local(self, name): + return f"local_{name}" + + def register_local(self, name=None): + local_name = ( + f"local_{name}" if name else f"local_{len(self.current_function.localvars)}" + ) + local_node = LocalNode(local_name) + self.current_function.localvars.append(local_node) + return local_name + + def register_param(self, vinfo): + vinfo.name = self.build_internal_vname(vinfo.name) + arg_node = ParamNode(vinfo.name) + self.params.append(arg_node) + return vinfo + + def get_param(self, name): + return f"param_{name}" + + def build_internal_vname(self, vname): + vname = f"param_{vname}" + self.internal_count += 1 + return vname + + def define_internal_local(self): + return self.register_local() + + def is_attribute(self, vname): + return vname not in [var.name for var in self.current_function.localvars] and ( + vname not in [param.name for param in self.current_function.params] + ) + + def add_builtin_constructors(self): + builtin_types = ["Object", "IO", "Int", "Bool", "String"] + for typex in builtin_types: + self.current_function = FunctionNode( + self.to_function_name("constructor", typex), [], [], [] + ) + self.params.append(ParamNode("self")) + # instance = self.define_internal_local() + # self.register_instruction(AllocateNode(typex, instance)) + self.register_instruction(ReturnNode("self")) + self.code.append(self.current_function) + + self.current_function = None + + def build_constructor(self, node): + self.current_function = self.register_function( + self.to_function_name("constructor", node.id) + ) + + self.params.append(ParamNode("self")) + self.current_type.define_method("constructor", [], [], "Object") + + + for attr, (_, typex) in self.attrs[self.current_type.name].items(): + instance = self.define_internal_local() + self.register_instruction(ArgNode("self")) + self.register_instruction(StaticCallNode( + self.to_function_name(f"{attr}_constructor",typex), + instance + )) + self.register_instruction(SetAttribNode( + "self", + self.to_attr_name(node.id, attr), + instance, + node.id + )) + + + self.register_instruction(ReturnNode("self")) + + def add_builtin_functions(self): + # Object + object_type = TypeNode("Object") + object_type.attributes = [] + object_type.methods = [ + self.cil_predef_method("abort", "Object", self.object_abort), + self.cil_predef_method("copy", "Object", self.object_copy), + self.cil_predef_method("type_name", "Object", self.object_type_name) + ] + + + # "IO" + functions = [ + self.cil_predef_method("abort", "IO", self.object_abort), + self.cil_predef_method("copy", "IO", self.object_copy), + self.cil_predef_method("type_name", "IO", self.object_type_name), + self.cil_predef_method("out_string", "IO", self.io_outstring), + self.cil_predef_method("out_int", "IO", self.io_outint), + self.cil_predef_method("in_string", "IO", self.io_instring), + self.cil_predef_method("in_int", "IO", self.io_inint), + ] + io_type = TypeNode("IO") + io_type.attributes = [] + io_type.methods = functions + + # String + self.attrs["String"] = {"length": (0, "Int"), "str_ref": (1, "String")} + functions = [ + self.cil_predef_method("abort", "String", self.object_abort), + self.cil_predef_method("copy", "String", self.object_copy), + self.cil_predef_method("type_name", "String", self.object_type_name), + self.cil_predef_method("length", "String", self.string_length), + self.cil_predef_method("concat", "String", self.string_concat), + self.cil_predef_method("substr", "String", self.string_substr), + ] + string_type = TypeNode("String") + string_type.attributes = [ + VariableInfo("length").name, + VariableInfo("str_ref").name, + ] + string_type.methods = functions + + # Int + # self.attrs["Int"] = {"value": (0, "Int")} + int_type = TypeNode("Int") + int_type.attributes = [VariableInfo("value").name] + int_type.methods = [ + self.cil_predef_method("abort", "Int", self.object_abort), + self.cil_predef_method("copy", "Int", self.object_copy), + self.cil_predef_method("type_name", "Int", self.object_type_name), + ] + + # Bool + # self.attrs["Bool"] = {"value": (0, "Int")} + bool_type = TypeNode("Bool") + bool_type.attributes = [VariableInfo("value").name] + bool_type.methods = [ + self.cil_predef_method("abort", "Bool", self.object_abort), + self.cil_predef_method("copy", "Bool", self.object_copy), + self.cil_predef_method("type_name", "Bool", self.object_type_name), + ] + + for typex in [object_type, io_type, string_type, int_type, bool_type]: + self.types.append(typex) + + # predefined functions cil + def cil_predef_method(self, mname, cname, specif_code): + self.current_type = self.context.get_type(cname) + self.current_method = self.current_type.get_method(mname) + self.current_function = FunctionNode( + self.to_function_name(mname, cname), [], [], [] + ) + + #specif_code() + if mname == "abort": #Agregado por Sandra + specif_code(cname) + else: + specif_code() + + self.code.append(self.current_function) + self.current_function = None + self.current_type = None + + return (mname, self.to_function_name(mname, cname)) + + def register_abort(self): + self.current_function = FunctionNode( + self.to_function_name("abort", self.current_type.name), [], [], [] + ) + self.object_abort(self.current_type.name) + self.code.append(self.current_function) + self.current_function = None + + def register_copy(self): + self.current_function = FunctionNode( + self.to_function_name("copy", self.current_type.name), [], [], [] + ) + self.object_copy() + self.code.append(self.current_function) + self.current_function = None + + def register_type_name(self): + self.current_function = FunctionNode( + self.to_function_name("type_name", self.current_type.name), [], [], [] + ) + self.object_type_name() + self.code.append(self.current_function) + self.current_function = None + + def string_length(self): + self.params.append(ParamNode("self")) + + result = self.define_internal_local() + + self.register_instruction(LengthNode(result, "self")) + self.register_instruction(ReturnNode(result)) + + def string_concat(self): + self.params.append(ParamNode("self")) + other_arg = VariableInfo("other_arg") + self.register_param(other_arg) + + ret_vinfo = self.define_internal_local() + + self.register_instruction(ConcatNode(ret_vinfo, "self", other_arg.name)) + self.register_instruction(ReturnNode(ret_vinfo)) + + def string_substr(self): + self.params.append(ParamNode("self")) + idx_arg = VariableInfo("idx_arg") + self.register_param(idx_arg) + length_arg = VariableInfo("length_arg") + self.register_param(length_arg) + + ret_vinfo = self.define_internal_local() + + self.register_instruction( + SubstringNode(ret_vinfo, "self", idx_arg.name, length_arg.name) + ) + self.register_instruction(ReturnNode(ret_vinfo)) + + def object_abort(self,type): + self.data.append( + DataNode(f"abort_{type}", f"Abort called from class {type}\n") + ) + error = f"abort_{type}" + self.register_instruction(RuntimeErrorNode(error)) + + def object_copy(self): + self.params.append(ParamNode("self")) + copy_local = self.define_internal_local() + self.register_instruction(AllocateNode(self.current_type.name, copy_local)) + + for attr in self.attrs[self.current_type.name].keys(): + attr_copy_local = self.define_internal_local() + attr_name = ( + self.to_attr_name(self.current_type.name, attr) + if self.current_type.name not in ["Int", "String", "Bool"] + else attr + ) + self.register_instruction( + GetAttribNode( + attr_copy_local, + "self", + attr_name, + self.current_type.name, + ) + ) + self.register_instruction( + SetAttribNode( + copy_local, + attr_name, + attr_copy_local, + self.current_type.name, + ) + ) + + self.register_instruction(ReturnNode(copy_local)) + + def object_type_name(self): + self.params.append(ParamNode("self")) + self.data.append( + DataNode(f"type_name_{self.current_type.name}", f"{self.current_type.name}") + ) + type_name = self.define_internal_local() + self.register_instruction( + LoadNode( + type_name, + VariableInfo( + f"type_name_{self.current_type.name}", + None, + f"{self.current_type.name}", + ), + ) + ) + self.register_instruction(ReturnNode(type_name)) + + def io_outstring(self): + self.params.append(ParamNode("self")) + str_arg = VariableInfo("str") + self.register_param(str_arg) + self.register_instruction(PrintStrNode(str_arg.name)) + self.register_instruction(ReturnNode("self")) + + def io_outint(self): + self.params.append(ParamNode("self")) + int_arg = VariableInfo("int") + self.register_param(int_arg) + self.register_instruction(PrintIntNode(int_arg.name)) + self.register_instruction(ReturnNode("self")) + + def io_instring(self): + self.params.append(ParamNode("self")) + ret_vinfo = self.define_internal_local() + self.register_instruction(ReadStringNode(ret_vinfo)) + self.register_instruction(ReturnNode(ret_vinfo)) + + def io_inint(self): + self.params.append(ParamNode("self")) + ret_vinfo = self.define_internal_local() + self.register_instruction(ReadIntNode(ret_vinfo)) + self.register_instruction(ReturnNode(ret_vinfo)) + + def reset_state(self): + self.types = [] + self.code = [] + self.data = [] + self.current_type = None + self.current_function = None + self.string_count = 0 + self._count = 0 + self.context = None + + @visitor.on("node") + def visit(self, node=None, return_var=None): + pass + + @visitor.when(cool.ProgramNode) + def visit(self, node, return_var=None): + self.context = node.context + + for type in self.context.types.values(): + self.attrs[type.name] = { + attr.name: (i, htype.name) + for i, (attr, htype) in enumerate(type.all_attributes()) + } + self.methods[type.name] = { + method.name: (i, htype.name) + if htype.name != "Object" or method.name not in ["abort","type_name", "copy"] + else (i, type.name) + for i, (method, htype) in enumerate(type.all_methods()) + } + self.current_function = FunctionNode("main", [], [], []) + self.code.append(self.current_function) + + + + main_constructor = self.to_function_name("constructor", "Main") + main_method_name = self.to_function_name("main", "Main") + + # Get instance from constructor + a = self.define_internal_local() + self.register_instruction(AllocateNode("Main", a)) + self.register_instruction(ArgNode(a)) + instance= self.define_internal_local() + self.register_instruction(StaticCallNode(main_constructor, instance)) + + # Pass instance as parameter and call Main_main + result = self.define_internal_local() + self.register_instruction(ArgNode(instance)) + self.register_instruction(StaticCallNode(main_method_name, result)) + + # self.register_instruction(ReturnNode(0)) + self.register_instruction(ExitNode()) + + self.current_function = None + + self.add_builtin_functions() + self.add_builtin_constructors() + + for declaration in node.declarations: + self.visit(declaration) + + program_node = ProgramNode(self.types, self.data, self.code) + + self.reset_state() + + return program_node + + @visitor.when(cool.ClassDeclarationNode) + def visit(self, node, return_var=None): + self.current_type = self.context.get_type(node.id) + + self.register_abort() + self.register_copy() + self.register_type_name() + + type_node = self.register_type(self.current_type.name) + + current_type = self.current_type + while current_type is not None: + attributes = [ + (node.id + "_" + attr.name) for attr in current_type.attributes + ] + + type_node.attributes.extend(attributes[::-1]) + + current_type = current_type.parent + + type_node.attributes.reverse() + + type_node.methods = [(method_name, self.to_function_name(method_name, typex)) for method_name,(_, typex) in self.methods[node.id].items()] + print(type_node.methods) + self.build_constructor(node) + + + for feature in node.features: + self.visit(feature) + + @visitor.when(cool.AttrDeclarationNode) + def visit(self, node, return_var=None): + self.current_function = self.register_function(self.to_function_name(f"{node.id}_constructor", self.current_type.name)) + + self.params.append(ParamNode("self")) + + # Assign init_expr if not None + if node.init_exp: + init_expr_value = self.define_internal_local() + self.visit(node.init_exp, init_expr_value) + self.register_instruction(ReturnNode(init_expr_value)) + + else: # Assign default value + default_var = self.define_internal_local() + self.register_instruction(DefaultValueNode(default_var, node.type)) + self.register_instruction(ReturnNode(default_var)) + + self.current_function = None + + + @visitor.when(cool.FuncDeclarationNode) + def visit(self, node, return_var=None): + self.current_method = self.current_type.get_method(node.id) + + # Add function to .CODE + self.current_function = self.register_function( + self.to_function_name(node.id, self.current_type.name) + ) + + # Add params + self.current_function.params.append(ParamNode("self")) + for pname, _ in node.params: + self.register_param(VariableInfo(pname)) + + # Body + value = self.define_internal_local() + self.visit(node.body, value) + + # Return + if isinstance(self.current_method.return_type, VoidType): + value = None + + self.register_instruction(ReturnNode(value)) + + self.current_method = None + self.current_function = None + + @visitor.when(cool.AssignNode) + def visit(self, node, return_var): + self.visit(node.expr, return_var) + + local_id = self.get_local(node.id) + if any(local_id == l.name for l in self.current_function.localvars): + self.register_instruction(AssignNode(local_id, return_var)) + return + + param_id = self.get_param(node.id) + if any(param_id == p.name for p in self.current_function.params): + self.register_instruction(AssignNode(param_id, return_var)) + return + + self.register_instruction( + SetAttribNode( + "self", + self.to_attr_name(self.current_type.name, node.id), + return_var, + self.current_type.name, + ) + ) + + @visitor.when(cool.CallNode) + def visit(self, node, return_var): + obj_type = self.current_type.name + instance = self.define_internal_local() + if node.obj: + self.visit(node.obj, instance) + obj_type = node.obj.static_type.name + + else: + self.register_instruction(AssignNode(instance, "self")) + + instance_type = None + if not node.at_type: + instance_type = self.define_internal_local() + if obj_type in ["Int","Bool"]: + self.register_instruction(TypeOfNode(instance, instance_type,True,obj_type)) + else: + self.register_instruction(TypeOfNode(instance, instance_type)) + + args = [instance] + for arg in node.args: + arg_value = self.define_internal_local() + self.visit(arg, arg_value) + args.append(arg_value) + + for arg in args: + self.register_instruction(ArgNode(arg)) + + if node.at_type: + self.register_instruction( + StaticCallNode(self.to_function_name(node.id, node.at_type), return_var) + ) + + else: + method_index = self.get_method_id(obj_type, node.id) + self.register_instruction( + DynamicCallNode(instance_type, method_index, return_var) + ) + + @visitor.when(cool.IfNode) + def visit(self, node, return_var): + # IF condition GOTO label + condition_value = self.define_internal_local() + self.visit(node.if_expr, condition_value) + then_label = "THEN_" + self.next_id() + self.register_instruction(GotoIfNode(condition_value, then_label)) + + # Else + self.visit(node.else_expr, return_var) + + # GOTO end_label + end_label = "END_IF_" + self.next_id() # Example: END_IF_120 + self.register_instruction(GotoNode(end_label)) + + # Then label + self.register_instruction(LabelNode(then_label)) + self.visit(node.then_expr, return_var) + + # end_label + self.register_instruction(LabelNode(end_label)) + + @visitor.when(cool.WhileNode) + def visit(self, node, return_var): + # While label + while_label = "WHILE_" + self.next_id() + self.register_instruction(LabelNode(while_label)) + + # Condition + c = self.define_internal_local() + self.visit(node.condition, c) + + # If condition GOTO body_label + body_label = "BODY_" + self.next_id() + self.register_instruction(GotoIfNode(c, body_label)) + + # GOTO end_while label + end_while_label = "END_WHILE_" + self.next_id() + self.register_instruction(GotoNode(end_while_label)) + + # Body + self.register_instruction(LabelNode(body_label)) + self.visit(node.body, self.define_internal_local()) + + # GOTO while label + self.register_instruction(GotoNode(while_label)) + + # End while label + self.register_instruction(LabelNode(end_while_label)) + + self.register_instruction(DefaultValueNode(return_var, "Void")) + + @visitor.when(cool.BlockNode) + def visit(self, node, return_var): + for expr in node.expression_list: + self.visit(expr, return_var) + + @visitor.when(cool.LetNode) + def visit(self, node, return_var): + for var_dec in node.identifiers: + self.visit(var_dec) + + self.visit(node.body, return_var) + + @visitor.when(cool.VarDeclarationNode) + def visit(self, node, return_var=None): + # Add LOCAL variable + idx = self.get_local(node.id) + if not any(idx == l.name for l in self.current_function.localvars): + self.register_local(node.id) + + # Add Assignment Node + if node.expr: + self.visit(node.expr, idx) + else: + self.register_instruction(DefaultValueNode(idx, node.type)) + + @visitor.when(cool.CaseNode) + def visit(self, node, return_var=None): + def get_children(static_type): + children = [] + for t in self.context.types.values(): + if t.conforms_to(static_type) and t.name != "AUTO_TYPE": + children.append(t) + + return children + + def get_least_type(expr_dynamic_type): + case_item_types = [case_item.type for case_item in node.case_items] + solve = expr_dynamic_type + while solve is not None: + if solve.name in case_item_types: + return solve.name + solve = solve.parent + + return None + + def get_asserted_branch(least_type:str): + for case_item in node.case_items: + if case_item.type == least_type: + return case_item + return None + + expr_value = self.define_internal_local() + self.visit(node.expr, expr_value) + + possible_dynamic_types = get_children(node.expr.static_type) + + branch_labels = [] + for t in possible_dynamic_types: + dynamic_type = self.define_internal_local() + self.register_instruction(TypeOfNode(expr_value, dynamic_type)) + + label = "BRANCH" + self.next_id() + equals = self.define_internal_local() + self.register_instruction(CompareTypes(equals, dynamic_type, t.name)) + self.register_instruction(GotoIfNode(equals,label)) + + least_type = get_least_type(t) + asserted_branch = get_asserted_branch(least_type) + branch_labels.append((asserted_branch, label)) + + + self.data.append( + DataNode("runtime_error", "No branch can be selected for evaluation") + ) + error = "runtime_error" + self.register_instruction(RuntimeErrorNode(error)) + + end_case_label = "END_CASE_" + self.next_id() + for branch, label in branch_labels: + if not branch: + continue + self.register_instruction(LabelNode(label)) + new_local = self.register_local(branch.id) + self.register_instruction(AssignNode(new_local, expr_value)) + + self.visit(branch.expr, return_var) + self.register_instruction(GotoNode(end_case_label)) + + self.register_instruction(LabelNode(end_case_label)) + + @visitor.when(cool.CaseItemNode) + def visit(self, node, return_var=None): + pass + + # Arithmetic and comparison operators + @visitor.when(cool.PlusNode) + def visit(self, node, return_var): + left = self.define_internal_local() + self.visit(node.left, left) + + right = self.define_internal_local() + self.visit(node.right, right) + + self.register_instruction(PlusNode(return_var, left, right)) + + @visitor.when(cool.MinusNode) + def visit(self, node, return_var): + left = self.define_internal_local() + self.visit(node.left, left) + + right = self.define_internal_local() + self.visit(node.right, right) + + self.register_instruction(MinusNode(return_var, left, right)) + + @visitor.when(cool.StarNode) + def visit(self, node, return_var): + left = self.define_internal_local() + self.visit(node.left, left) + + right = self.define_internal_local() + self.visit(node.right, right) + + self.register_instruction(StarNode(return_var, left, right)) + + @visitor.when(cool.DivNode) + def visit(self, node, return_var): + left = self.define_internal_local() + self.visit(node.left, left) + + right = self.define_internal_local() + self.visit(node.right, right) + + self.register_instruction(DivNode(return_var, left, right)) + + @visitor.when(cool.LessEqualNode) + def visit(self, node, return_var): + left = self.define_internal_local() + self.visit(node.left, left) + + right = self.define_internal_local() + self.visit(node.right, right) + + self.register_instruction(LessEqualNode(return_var, left, right)) + + @visitor.when(cool.LessNode) + def visit(self, node, return_var): + left = self.define_internal_local() + self.visit(node.left, left) + + right = self.define_internal_local() + self.visit(node.right, right) + + self.register_instruction(LessNode(return_var, left, right)) + + @visitor.when(cool.EqualNode) + def visit(self, node, return_var): + left = self.define_internal_local() + self.visit(node.left, left) + + right = self.define_internal_local() + self.visit(node.right, right) + + if node.left.static_type.name == "String": + self.register_instruction(StrEqualNode(return_var, left, right)) + else: + self.register_instruction(EqualNode(return_var, left, right)) + + # Unary operators + @visitor.when(cool.InstantiateNode) # NewNode + def visit(self, node, return_var): + _self = self.define_internal_local() + self.register_instruction(AllocateNode(node.lex, _self)) + self.register_instruction(ArgNode(_self)) + self.register_instruction( + StaticCallNode(self.to_function_name("constructor", node.lex), return_var) + ) + + @visitor.when(cool.IsvoidNode) + def visit(self, node, return_var): + value = self.define_internal_local() + self.visit(node.expr, value) + self.register_instruction(IsVoidNode(return_var, value)) + + @visitor.when(cool.NotNode) + def visit(self, node, return_var): + value = self.define_internal_local() + self.visit(node.expr, value) + constant = self.define_internal_local() + self.register_instruction( + StaticCallNode(self.to_function_name("constructor", "Bool"), constant) + ) + self.register_instruction(AssignNode(constant, 1)) + self.register_instruction(MinusNode(return_var, constant, value)) + + @visitor.when(cool.NegNode) + def visit(self, node, return_var): + value = self.define_internal_local() + self.visit(node.expr, value) + self.register_instruction(IntComplementNode(return_var, value)) + + @visitor.when(cool.ConstantNumNode) + def visit(self, node, return_var): + self.register_instruction(AssignNode(return_var, int(node.lex))) + + @visitor.when(cool.VariableNode) + def visit(self, node, return_var): + + if node.lex == "self": + self.register_instruction(AssignNode(return_var, "self")) + return + + local_id = self.get_local(node.lex) + if any(local_id == l.name for l in self.current_function.localvars): + self.register_instruction(AssignNode(return_var, local_id)) + return + + param_id = self.get_param(node.lex) + if any(param_id == p.name for p in self.current_function.params): + self.register_instruction(AssignNode(return_var, param_id)) + return + + self.register_instruction( + GetAttribNode( + return_var, + "self", + self.to_attr_name(self.current_type.name, node.lex), + self.current_type.name, + ) + ) + + @visitor.when(cool.StringNode) + def visit(self, node, return_var): + idx = self.generate_next_string_id() + self.data.append(DataNode(idx, node.lex)) + self.register_instruction( + LoadNode(return_var, VariableInfo(idx, None, node.lex)) + ) + + @visitor.when(cool.BooleanNode) + def visit(self, node, return_var): + self.register_instruction( + AssignNode(return_var, 1 if node.lex == "true" else 0) + ) + + @visitor.when(cool.DefaultValueNode) + def visit(self, node, return_var): + self.register_instruction(DefaultValueNode(return_var, node.type)) diff --git a/src/code_gen/cil_nodes.py b/src/code_gen/cil_nodes.py new file mode 100644 index 000000000..5d0e66952 --- /dev/null +++ b/src/code_gen/cil_nodes.py @@ -0,0 +1,99 @@ +class CIL_Node: + pass + + +class ProgramCil(CIL_Node): + def __init__(self, types, data, code): + self.types = types + self.data = data + self.code = code + + +class TypeCil(CIL_Node): + def __init__(self, idx, attributes=[], methods=[]): + self.id = idx + self.attributes = attributes + self.methods = methods + + +class AttributeCil(CIL_Node): + def __init__(self, idx): + self.id = idx + + +class MethodCil(CIL_Node): + def __init__(self, idx, ref): + self.id = idx + self.ref = ref + + +class FunctionCil(CIL_Node): + def __init__(self, idx, args=[], localsx=[], body=[]): + self.id = idx + self.args = args + self.locals = localsx + self.body = body + + +class IfCil(CIL_Node): + def __init__(self, condition, label): + self.condition = condition + self.label = label + + +class ArgCil(CIL_Node): + def __init__(self, idx): + self.id = idx + + +class LocalCil(CIL_Node): + def __init__(self, idx): + self.id = idx + + +class AssignmentCil(CIL_Node): + def __init__(self, idx, expr): + self.id = idx + self.expr = expr + + +class StringCil(CIL_Node): + def __init__(self, idx: str, text: str): + self.id = idx + self.text = text + + +class LabelCil(CIL_Node): + def __init__(self, idx): + self.id = idx + + +class GotoCil(CIL_Node): + def __init__(self, label): + self.label = label + + +class GetAttrCil(CIL_Node): + def __init__(self, typex, attr): + self.type = typex + self.attr = attr + + +class SetAttr(CIL_Node): + def __init__(self, typex, attr, value): + self.type = typex + self.attr = attr + self.value = value + + +class GetIndex(CIL_Node): + def __init__(self, array, index): + self.array = array + self.index = index + + +class SetIndex(CIL_Node): + def __init__(self, array, index, value): + self.array = array + self.index = index + self.value = value diff --git a/src/code_gen/mips_builder.py b/src/code_gen/mips_builder.py new file mode 100644 index 000000000..2f6cb178e --- /dev/null +++ b/src/code_gen/mips_builder.py @@ -0,0 +1,1124 @@ +import cmp.visitor as visitor +import cmp.cil as cil +import random +import enum + +from code_gen import mips_nodes as mips + +# type_info offsets +TYPENAME_OFFSET = 0 +FUNCTION_OFFSET = 4 +RA_OFFSET = 8 +OLD_FP_OFFSET = 4 +TYPEINFO_ATTR_OFFSET = 0 + +# str attributes offsets +LENGTH_ATTR_OFFSET = 4 +CHARS_ATTR_OFFSET = 8 + +FP_ARGS_DISTANCE = 3 # how far finishes $fp from arguments in method call +FP_LOCALS_DISTANCE = 0 # how far finishes $fp from localvars in method call + +ABORT_SIGNAL = "ABORT_SIGNAL" # CIL +CASE_MISSMATCH = "case_missmatch" # CIL +CASE_VOID = "case_on_void" # MIPS +DISPATCH_VOID = "dispatch_on_void" # MIPS +ZERO_DIVISION = "division_by_zero" # MIPS +SUBSTR_OUT_RANGE = "substr_out_of_range" # MIPS +HEAP_OVERFLOW = "heap_overflow" +STRING_SIZE = 12 +VOID = "Void" +STR_CMP = "string_comparer" +EMPTY_STRING = "empty_string" +LENGTH = "length" +COPY = "copy" +INPUT_STR_BUFFER = "input_str_buffer" +BUFFER_SIZE = 1024 + +# temporary registers +t0 = "$t0" +t1 = "$t1" +t2 = "$t2" +t3 = "$t3" +t4 = "$t4" +t5 = "$t5" +t6 = "$t6" # convenios +t7 = "$t7" # convenios +t8 = "$t8" +t9 = "$t9" + +# Arguments Registers +a0 = "$a0" +a1 = "$a1" +a2 = "$a2" +a3 = "$a3" + +# frame pointer +fp = "$fp" +# stack pointer +sp = "$sp" + +ra = "$ra" +lo = "lo" +hi = "hi" +v0 = "$v0" +s0 = "$s0" +s1 = "$s1" +s2 = "$s2" +s3 = "$s3" +zero = "$zero" + +SYSCALL_PRINT_INT = 1 +SYSCALL_PRINT_STR = 4 +SYSCALL_READ_INT = 5 +SYSCALL_READ_STR = 8 +SYSCALL_SBRK = 9 +SYSCALL_EXIT = 10 + +SELF_TYPE = "SELF_TYPE" +INT = "Int" +BOOL = "Bool" +STRING = "String" +OBJECT = "Object" +IO = "IO" + + +class MemoryManager: + def __init__(self): + self.all_reg = [t0, t1, t2, t3, t4, t5, t9] + + self.used_reg = [] + self.stored = [] + + def get_unused_reg(self): + unused = list(set(self.all_reg) - set(self.used_reg)) + list( + set(self.used_reg) - set(self.all_reg) + ) + reg = random.choice(unused) + self.used_reg.append(reg) + return reg + + def clean(self): + self.used_reg = self.stored + self.stored = [] + + def save(self): + self.stored = self.used_reg.copy() + + +class MIPSBuilder: + def __init__(self): + self.mips_code = "" + self.main_size = 0 + self.text = [] + self.data = [] + self.params = [] + self.locals = [] + self.types = {} + self.attr_offset = {} + self.memo = MemoryManager() + self.pushed_args = 0 + + + def get_offset(self,x): + if x in self.locals: + index = self.locals.index(x) + return 4 * index + elif x in self.params: + index = self.params.index(x) + return 4 * (-len(self.params) + index) + + def register_instruction(self, instruction_type, *args): + instruction = instruction_type(*args) + self.current_procedure.instructions.append(instruction) + + def register_data(self, data_type, *args): + data = data_type(*args) + self.data.append(data) + + def register_push(self, reg): + self.register_instruction(mips.StoreWordNode, reg, 0, sp) + self.register_instruction(mips.AddiNode, sp, sp, 4) + + def register_pop(self, reg): + self.register_instruction(mips.LoadWordNode, reg, 4, sp) + self.register_instruction(mips.AddiNode, sp, sp, 4) + + def generate_exception_messages(self): + self.register_data( + mips.DataTypeNode, ".asciiz", ABORT_SIGNAL, ['"Program execution aborted"'] + ) + self.register_data( + mips.DataTypeNode, + ".asciiz", + CASE_MISSMATCH, + ['"Execution of a case statement without a matching branch"'], + ) + self.register_data(mips.DataTypeNode, ".asciiz", CASE_VOID, ['"Case on void"']) + self.register_data( + mips.DataTypeNode, ".asciiz", DISPATCH_VOID, ['"Dispatch on void"'] + ) + self.register_data( + mips.DataTypeNode, ".asciiz", ZERO_DIVISION, ['"Division by zero"'] + ) + self.register_data( + mips.DataTypeNode, ".asciiz", SUBSTR_OUT_RANGE, ['"Substring out of range"'] + ) + self.register_data( + mips.DataTypeNode, ".asciiz", HEAP_OVERFLOW, ['"Heap overflow"'] + ) + + def generate_extra_static_labels(self): + self.register_data(mips.DataTypeNode, ".word", VOID, [-1]) + self.register_data(mips.DataTypeNode, ".asciiz", EMPTY_STRING, ['"\"\""']) + self.register_data(mips.DataTypeNode, ".space", INPUT_STR_BUFFER, [BUFFER_SIZE]) + + def generate_attr_offset(self, type): + attributes = self.types[type].attributes + self.attr_offset[type] = {} + for i, attr in enumerate(attributes): + self.attr_offset[type][attr] = 4 * (i + 1) + + def generate_str_length(self): + # calculates the length of the null-terminated char array referenced by $a0 and stores it in $a0 + self.memo.save() + reg1 = self.memo.get_unused_reg() + reg2 = self.memo.get_unused_reg() + + self.current_procedure = mips.ProcedureNode(LENGTH) + + self.register_instruction(mips.LoadInmediate, reg1, 0) + + self.register_instruction(mips.Label, "length_loop") + self.register_instruction(mips.LoadByteNode, reg2, 0, a0) + self.register_instruction(mips.BranchOnEqualNode, zero, reg2, "length_end") + + self.register_instruction(mips.AddiNode, reg1, reg1, 1) + self.register_instruction(mips.AddiNode, a0, a0, 1) + self.register_instruction(mips.Jump, "length_loop") + + self.register_instruction(mips.Label, "length_end") + self.register_instruction(mips.MoveNode, a0, reg1) + self.register_instruction(mips.Jump, ra) + + self.text.append(self.current_procedure) + self.memo.clean() + + def generate_copy(self): + # copies from t1 to t6 a0 bytes + self.memo.save() + self.current_procedure = mips.ProcedureNode(COPY) + + self.register_instruction(mips.Label, "copy_loop") + self.register_instruction(mips.BranchOnEqualNode, zero, a0, "copy_end") + self.register_instruction(mips.LoadByteNode, t8, 0, t7) + self.register_instruction(mips.StoreByteNode, t8, 0, t6) + self.register_instruction(mips.AddiNode, t6, t6, 1) + self.register_instruction(mips.AddiNode, t7, t7, 1) + self.register_instruction(mips.AddiNode, a0, a0, -1) + self.register_instruction(mips.Jump, "copy_loop") + + self.register_instruction(mips.Label, "copy_end") + self.register_instruction(mips.Jump, ra) + + self.text.append(self.current_procedure) + self.memo.clean() + + + def generate_input(self): + # copies from t7 to t6 a0 bytes + self.memo.save() + reg4 = self.memo.get_unused_reg() + reg5 = self.memo.get_unused_reg() + + self.current_procedure = mips.ProcedureNode("Input") + + self.register_instruction(mips.Label, "input_loop") + + self.register_instruction(mips.LoadByteNode, t8, 0, t7) + self.register_instruction(mips.StoreByteNode, t8, 0, t6) + + self.register_instruction(mips.AddiNode, t6, t6, 1) + self.register_instruction(mips.AddiNode, t7, t7, 1) + self.register_instruction(mips.BranchOnGreaterZero,t8,"input_loop") + self.register_instruction(mips.AddiNode,t6,t6,-2) + + self.register_instruction(mips.LoadInmediate,reg4,10) + self.register_instruction(mips.LoadByteNode,reg5,0,t6) + self.register_instruction(mips.BranchOnNotEqualNode,reg4,reg5,"input_end") + self.register_instruction(mips.LoadInmediate,t8,0) + self.register_instruction(mips.StoreByteNode,t8,0,t6) + + self.register_instruction(mips.Label, "input_end") + self.register_instruction(mips.Jump, ra) + + self.text.append(self.current_procedure) + + + + + def generate_str_cmp(self): + self.current_procedure = mips.ProcedureNode(STR_CMP) + + + #comparing char by char + self.register_instruction(mips.CommentNode, "Comparing char by char") + self.register_instruction(mips.LoadWordNode, s0, CHARS_ATTR_OFFSET, t6) #char array pointer offset + self.register_instruction(mips.LoadWordNode, s1, CHARS_ATTR_OFFSET, t7) + + #char by char loop + self.register_instruction(mips.Label, "strcmp_loop") + self.register_instruction(mips.LoadInmediate,s2,0) + self.register_instruction(mips.LoadByteNode, s2, 0, s0) + + self.register_instruction(mips.LoadInmediate,s3,0) + self.register_instruction(mips.LoadByteNode, s3, 0, s1) + + self.register_instruction(mips.SetEq,a0,s2,s3) + self.register_instruction(mips.BranchOnEqZero,a0,"end_loop") + + + self.register_instruction(mips.BranchOnEqZero,s3, "end_loop") + self.register_instruction(mips.BranchOnEqZero,s2, "end_loop") + self.register_instruction(mips.AddiNode,s0,s0,1) + self.register_instruction(mips.AddiNode,s1,s1,1) + + self.register_instruction(mips.Jump, "strcmp_loop") + + self.register_instruction(mips.Label, "end_loop") + self.register_instruction(mips.JumpRegister, ra) + + self.text.append(self.current_procedure) + + def generate_auxiliar_procedures(self): + self.generate_str_length() + self.generate_copy() + self.generate_str_cmp() + self.generate_input() + + @visitor.on("node") + def visit(self, node=None): + pass + + @visitor.when(cil.ProgramNode) + def visit(self, node): + for type in node.dottypes: + self.visit(type) + self.generate_attr_offset(type.name) + + self.generate_extra_static_labels() + self.generate_exception_messages() + + self.generate_auxiliar_procedures() + + for str_data in node.dotdata: + self.visit(str_data) + + for instruction in node.dotcode: + self.visit(instruction) + + return mips.ProgramNode(self.data, self.text) + + @visitor.when(cil.TypeNode) + def visit(self, node): + self.types[node.name] = node + if node.name == "Main": + self.main_size = (len(node.attributes) + 1) * 4 + values = [] + for func in node.methods: + values.append(func[1]) + + self.register_data(mips.DataTypeNode, ".word", node.name, values) + self.register_data( + mips.DataTypeNode, ".asciiz", f"{node.name}_cname", [f'"{node.name}"'] + ) + + @visitor.when(cil.DataNode) + def visit(self, node): + self.register_data(mips.DataTypeNode, ".asciiz", node.name, [f'"{node.value}"']) + + + + @visitor.when(cil.ArgNode) + def visit(self, node): + self.memo.save() + self.register_instruction(mips.CommentNode, f"Receiving Arg {node.name}") + reg = self.memo.get_unused_reg() + + offset = self.get_offset(node.name) + self.register_instruction(mips.LoadWordNode, reg, offset, fp) + self.register_push(reg) + self.pushed_args += 1 + + self.memo.clean() + + @visitor.when(cil.FunctionNode) + def visit(self, node): + self.memo.save() + locals_save = self.locals + params_save = self.params + self.locals, self.params = [], [] + self.current_procedure = mips.ProcedureNode(node.name) + + saved_fp = self.memo.get_unused_reg() + self.register_instruction(mips.MoveNode, saved_fp, fp) + + self.register_instruction(mips.CommentNode, "New $fp") + self.register_instruction(mips.MoveNode, fp, sp) + + self.register_instruction(mips.CommentNode, "Reserving space for locals") + self.register_instruction(mips.AddiNode, sp, sp, 4 * len(node.localvars)) + + self.register_instruction(mips.CommentNode, "Pushing $ra") + self.register_push(ra) + + self.register_instruction(mips.CommentNode, "Saving $fp") + self.register_push(saved_fp) + self.memo.clean() + + for local in node.localvars: + self.locals.append(local.name) + + for param in node.params: + self.params.append(param.name) + + self.register_instruction(mips.CommentNode, "Executing instructions") + for inst in node.instructions: + self.visit(inst) + + self.text.append(self.current_procedure) + self.locals = locals_save + self.params = params_save + + @visitor.when(cil.LoadNode) + def visit(self, node: cil.LoadNode): + self.memo.save() + self.register_instruction(mips.CommentNode, "Executing Load") + _size = STRING_SIZE + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_SBRK) + self.register_instruction(mips.LoadInmediate, a0, _size) + self.register_instruction(mips.SyscallNode) + + dest_offset = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode, v0, dest_offset, fp) + + reg = self.memo.get_unused_reg() + self.register_instruction(mips.LoadAddress, reg, STRING) + self.register_instruction(mips.StoreWordNode, reg, 0, v0) + + # storing string length + self.register_instruction(mips.LoadInmediate, reg, len(node.msg.data)) + self.register_instruction(mips.StoreWordNode, reg, LENGTH_ATTR_OFFSET, v0) + + # storing string chars ref + self.register_instruction(mips.LoadAddress, reg, node.msg.name) + self.register_instruction(mips.StoreWordNode, reg, CHARS_ATTR_OFFSET, v0) + self.memo.clean() + + @visitor.when(cil.LengthNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "Executing Length") + self.memo.save() + reg = self.memo.get_unused_reg() + source_offset = self.get_offset(node.source) + self.register_instruction(mips.LoadWordNode, reg, source_offset, fp) + self.register_instruction(mips.LoadWordNode, reg, LENGTH_ATTR_OFFSET, reg) + dest_offset = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode, reg,dest_offset, fp) + self.memo.clean() + + + #All return value is saved in register a1 + @visitor.when(cil.ReturnNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "Executing Return") + if isinstance(node.value, int): + self.register_instruction(mips.LoadInmediate, a1, node.value) + else: + offset = self.get_offset(node.value) + self.register_instruction(mips.LoadWordNode, a1, offset, fp) + + self.register_instruction(mips.CommentNode, "Restoring saved $fp") + self.register_instruction(mips.AddiNode, sp, sp, -4) + self.register_instruction(mips.LoadWordNode, fp, 0, sp) + + self.register_instruction(mips.CommentNode, "Restoring saved $ra") + self.register_instruction(mips.AddiNode, sp, sp, -4) + self.register_instruction(mips.LoadWordNode, ra, 0, sp) + + AR = -4 * (len(self.locals)) + + self.register_instruction(mips.CommentNode, "Cleaning stack after call") + self.register_instruction(mips.AddiNode, sp, sp, AR) + + self.register_instruction(mips.CommentNode, "Return jump") + self.register_instruction(mips.JumpRegister, ra) + + @visitor.when(cil.GotoNode) + def visit(self,node): + self.register_instruction(mips.CommentNode,"Executing Goto") + self.register_instruction(mips.Jump,node.label) + + @visitor.when(cil.GotoIfNode) + def visit(self,node): + self.memo.save() + + reg = self.memo.get_unused_reg() + cond_offset = self.get_offset(node.condition) + + self.register_instruction(mips.LoadWordNode,reg,cond_offset,fp) + + self.register_instruction(mips.BranchOnNotEqZero,reg,node.label) + self.memo.clean() + + + + + @visitor.when(cil.AllocateNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "Executing Allocate") + self.memo.save() + _size = (len(self.types[node.type].attributes) + 1) * 4 + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_SBRK) + self.register_instruction(mips.LoadInmediate, a0, _size) + self.register_instruction(mips.SyscallNode) + + dest_offset = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode, v0, dest_offset, fp) + + reg = self.memo.get_unused_reg() + self.register_instruction(mips.LoadAddress, reg, node.type) + self.register_instruction(mips.StoreWordNode, reg, 0, v0) + self.memo.clean() + + @visitor.when(cil.AssignNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "Executing Assign") + self.memo.save() + + reg = self.memo.get_unused_reg() + + if isinstance(node.source, int): + self.register_instruction(mips.LoadInmediate, reg, node.source) + else: + source_offset = self.get_offset(node.source) + self.register_instruction(mips.LoadWordNode, reg, source_offset, fp) + + dest_offset = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode, reg, dest_offset, fp) + + self.memo.clean() + + @visitor.when(cil.RuntimeErrorNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "Executing RuntimeError") + self.register_instruction(mips.CommentNode, "Printing Abort Message") + self.register_instruction(mips.LoadAddress, a0, node.msg) + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_PRINT_STR) + self.register_instruction(mips.SyscallNode) + + self.register_instruction(mips.CommentNode, "Aborting execution") + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_EXIT) + self.register_instruction(mips.SyscallNode) + + @visitor.when(cil.StaticCallNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "Executing Static Call") + self.register_instruction(mips.JumpAndLink, node.function) + + dest_offset = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode, a1, dest_offset, fp) + self.register_instruction(mips.AddiNode, sp, sp, self.pushed_args * -4) + self.pushed_args = 0 + + @visitor.when(cil.DynamicCallNode) + def visit(self, node): + # self.print(node.instance_type) + self.memo.save() + self.register_instruction(mips.CommentNode, "Executing Dynamic Call") + + reg1 = self.memo.get_unused_reg() + inst_offset = self.get_offset(node.instance_type) + self.register_instruction(mips.LoadWordNode, reg1, inst_offset, fp) + + # getting function + reg2 = self.memo.get_unused_reg() + self.register_instruction(mips.LoadWordNode, reg2, node.method_index * 4, reg1) + + self.register_instruction(mips.JumpRegister, reg2) + + # putting the return vslue in destination + dest_offset = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode, a1, dest_offset, fp) + self.register_instruction(mips.AddiNode, sp, sp, self.pushed_args * -4) + self.pushed_args = 0 + self.memo.clean() + + @visitor.when(cil.GetAttribNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "Executing GetAttr") + self.memo.save() + reg1 = self.memo.get_unused_reg() + reg2 = self.memo.get_unused_reg() + + instance_offset = self.get_offset(node.instance) + self.register_instruction(mips.LoadWordNode, reg1, instance_offset, fp) + + attr_offs = self.attr_offset[node.type][node.attr] + self.register_instruction(mips.LoadWordNode, reg2, attr_offs, reg1) + + dest_offs = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode, reg2, dest_offs, fp) + self.memo.clean() + + @visitor.when(cil.SetAttribNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "Executing SetAttr") + self.memo.save() + reg1 = self.memo.get_unused_reg() + reg2 = self.memo.get_unused_reg() + + instance_offset = self.get_offset(node.instance) + self.register_instruction(mips.LoadWordNode, reg1, instance_offset, fp) + + value_offset = self.get_offset(node.value) + self.register_instruction(mips.LoadWordNode, reg2, value_offset, fp) + + attr_os = self.attr_offset[node.type][node.attr] + self.register_instruction(mips.StoreWordNode, reg2, attr_os, reg1) + + self.memo.clean() + + @visitor.when(cil.DefaultValueNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "Executing DefaultValue") + self.memo.save() + reg = self.memo.get_unused_reg() + dest_offset = self.get_offset(node.dest) + if node.type in [INT, BOOL]: + self.register_instruction(mips.LoadInmediate, reg, 0) + self.register_instruction(mips.StoreWordNode, reg, dest_offset, fp) + elif node.type == STRING: + _size = STRING_SIZE + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_SBRK) + self.register_instruction(mips.LoadInmediate, a0, _size) + self.register_instruction(mips.SyscallNode) + + self.register_instruction(mips.StoreWordNode, v0, dest_offset, fp) + reg = self.memo.get_unused_reg() + self.register_instruction(mips.LoadAddress, reg, STRING) + self.register_instruction(mips.StoreWordNode, reg, 0, v0) + + self.register_instruction(mips.LoadInmediate, reg, 0) + self.register_instruction( + mips.StoreWordNode, reg, LENGTH_ATTR_OFFSET, v0 + ) # pq en vo esta el allocate + self.register_instruction(mips.LoadAddress, reg, EMPTY_STRING) + self.register_instruction(mips.StoreWordNode, reg, CHARS_ATTR_OFFSET, v0) + elif node.type != VOID: + _size = (len(self.types[node.type].attributes) + 1) * 4 + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_SBRK) + self.register_instruction(mips.LoadInmediate, a0, _size) + self.register_instruction(mips.SyscallNode) + + self.register_instruction(mips.StoreWordNode, v0, dest_offset, fp) + reg = self.memo.get_unused_reg() + self.register_instruction(mips.LoadAddress, reg, node.type) + self.register_instruction(mips.StoreWordNode, reg, 0, v0) + else: + self.register_instruction(mips.LoadAddress, reg, VOID) + self.register_instruction(mips.StoreWordNode, reg, dest_offset, fp) + + self.memo.clean() + + @visitor.when(cil.PlusNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "Executing Plus Operation") + self.memo.save() + reg_l = self.memo.get_unused_reg() + reg_r = self.memo.get_unused_reg() + reg_dest = self.memo.get_unused_reg() + + left_offset = self.get_offset(node.left) + right_offset = self.get_offset(node.right) + + self.register_instruction(mips.LoadWordNode, reg_l, left_offset, fp) + self.register_instruction(mips.LoadWordNode, reg_r, right_offset, fp) + + self.register_instruction(mips.AddNode, reg_dest, reg_l, reg_r) + + offset = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode, reg_dest, offset, fp) + + self.memo.clean() + + @visitor.when(cil.MinusNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "Executing Minus Operation") + self.memo.save() + reg_l = self.memo.get_unused_reg() + reg_r = self.memo.get_unused_reg() + reg_dest = self.memo.get_unused_reg() + + left_offset = self.get_offset(node.left) + right_offset = self.get_offset(node.right) + + self.register_instruction(mips.LoadWordNode, reg_l, left_offset, fp) + self.register_instruction(mips.LoadWordNode, reg_r, right_offset, fp) + + self.register_instruction(mips.SubNode, reg_dest, reg_l, reg_r) + + offset = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode, reg_dest, offset, fp) + + self.memo.clean() + + @visitor.when(cil.StarNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "Executing Star Operation") + self.memo.save() + reg1 = self.memo.get_unused_reg() + reg2 = self.memo.get_unused_reg() + + left_offset = self.get_offset(node.left) + right_offset = self.get_offset(node.right) + + self.register_instruction(mips.LoadWordNode, reg1, left_offset, fp) + self.register_instruction(mips.LoadWordNode, reg2, right_offset, fp) + + self.register_instruction(mips.MultNode, reg1, reg2) + + dest_offset = self.get_offset(node.dest) + self.register_instruction(mips.MoveFromLo, reg1) + self.register_instruction(mips.StoreWordNode, reg1, dest_offset, fp) + + self.memo.clean() + + @visitor.when(cil.DivNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "Executing Div Operation") + self.memo.save() + reg1 = self.memo.get_unused_reg() + reg2 = self.memo.get_unused_reg() + + left_offset = self.get_offset(node.left) + right_offset = self.get_offset(node.right) + + self.register_instruction(mips.LoadWordNode, reg1, left_offset, fp) + self.register_instruction(mips.LoadWordNode, reg2, right_offset, fp) + + self.register_instruction(mips.DivideNode, reg1, reg2) + + dest_offset = self.get_offset(node.dest) + self.register_instruction(mips.MoveFromLo, reg1) + self.register_instruction(mips.StoreWordNode, reg1, dest_offset, fp) + + self.memo.clean() + + @visitor.when(cil.IntComplementNode) + def visit(self,node): + self.register_instruction(mips.CommentNode,"Executing Int Complement") + self.memo.save() + + source_offset = self.get_offset(node.source) + dest_offset = self.get_offset(node.dest) + + reg1 = self.memo.get_unused_reg() + reg2 = self.memo.get_unused_reg() + + self.register_instruction(mips.LoadWordNode, reg1, source_offset, fp) + self.register_instruction(mips.NotNode, reg2, reg1) + self.register_instruction(mips.AddiNode, reg2, reg2, 1) + self.register_instruction(mips.StoreWordNode, reg2, dest_offset, fp) + self.memo.clean() + + + @visitor.when(cil.LessNode) + def visit(self,node): + self.register_instruction(mips.CommentNode,"Executing Less Operation") + self.memo.save() + r_left = self.memo.get_unused_reg() + r_right = self.memo.get_unused_reg() + r_dest = self.memo.get_unused_reg() + + if isinstance(node.left,int): + self.register_instruction(mips.LoadInmediate,r_left,node.left) + else: + left_off = self.get_offset(node.left) + self.register_instruction(mips.LoadWordNode,r_left,left_off,fp) + + if isinstance(node.right,int): + self.register_instruction(mips.LoadInmediate,r_right,node.right) + else: + right_off = self.get_offset(node.right) + self.register_instruction(mips.LoadWordNode,r_right,right_off,fp) + + self.register_instruction(mips.SetOnLessThan,r_dest,r_left,r_right) + + dest_off = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode,r_dest,dest_off,fp) + + self.memo.clean() + + @visitor.when(cil.LessEqualNode) + def visit(self,node): + self.register_instruction(mips.CommentNode,"Executing Less Equal Operation") + self.memo.save() + r_left = self.memo.get_unused_reg() + r_right = self.memo.get_unused_reg() + r_dest = self.memo.get_unused_reg() + + if isinstance(node.left,int): + self.register_instruction(mips.LoadInmediate,r_left,node.left) + else: + left_off = self.get_offset(node.left) + self.register_instruction(mips.LoadWordNode,r_left,left_off,fp) + + if isinstance(node.right,int): + self.register_instruction(mips.LoadInmediate,r_right,node.right) + else: + right_off = self.get_offset(node.right) + self.register_instruction(mips.LoadWordNode,r_right,right_off,fp) + + self.register_instruction(mips.SetOnLessOrEq,r_dest,r_left,r_right) + + dest_off = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode,r_dest,dest_off,fp) + + self.memo.clean() + + @visitor.when(cil.EqualNode) + def visit(self,node): + self.register_instruction(mips.CommentNode,"Executing Equal Operation") + self.memo.save() + r_left = self.memo.get_unused_reg() + r_right = self.memo.get_unused_reg() + r_dest = self.memo.get_unused_reg() + + if isinstance(node.left,int): + self.register_instruction(mips.LoadInmediate,r_left,node.left) + else: + left_off = self.get_offset(node.left) + self.register_instruction(mips.LoadWordNode,r_left,left_off,fp) + + if isinstance(node.right,int): + self.register_instruction(mips.LoadInmediate,r_right,node.right) + else: + right_off = self.get_offset(node.right) + self.register_instruction(mips.LoadWordNode,r_right,right_off,fp) + + self.register_instruction(mips.SetEq,r_dest,r_left,r_right) + + dest_off = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode,r_dest,dest_off,fp) + + self.memo.clean() + + @visitor.when(cil.CompareTypes) + def visit(self,node): + self.memo.save() + reg1 = self.memo.get_unused_reg() + reg2 = self.memo.get_unused_reg() + + typeof_offset = self.get_offset(node.typeof) + self.register_instruction(mips.LoadWordNode,reg1,typeof_offset,fp) + self.register_instruction(mips.LoadAddress,reg2,node.type) + + self.register_instruction(mips.SetEq,a2,reg1,reg2) + dest_offset = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode,a2,dest_offset,fp) + + self.memo.clean() + + + + @visitor.when(cil.StrEqualNode) + def visit(self,node): + self.register_instruction(mips.CommentNode,"Executing StrEqual Operation") + left_off = self.get_offset(node.left) + right_off = self.get_offset(node.right) + self.register_instruction(mips.LoadWordNode, t6, left_off, fp) + self.register_instruction(mips.LoadWordNode, t7, right_off, fp) + + self.register_instruction(mips.JumpAndLink, STR_CMP) + dest_off = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode, a0, dest_off, fp) + + + @visitor.when(cil.SubstringNode) + def visit(self, node): + self.register_instruction(mips.CommentNode,"Executing Substring") + + #allocating new char array + if isinstance(node.length,int): + self.register_instruction(mips.LoadInmediate,s0,node.length) + else: + length_off = self.get_offset(node.length) + self.register_instruction(mips.LoadWordNode, s0, length_off, fp)#salvando el length del substr + self.register_instruction(mips.MoveNode, a0, s0) + self.register_instruction(mips.AddiNode, a0, a0, 1) + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_SBRK) + self.register_instruction(mips.SyscallNode) + self.register_instruction(mips.MoveNode, t6, v0)#saving the dest char arr in t6 + + #loading ref to char array of source string + self.register_instruction(mips.CommentNode,"Loading reference to char array of source string") + + source_off = self.get_offset(node.source) + self.register_instruction(mips.LoadWordNode, t7, source_off, fp) + self.register_instruction(mips.LoadWordNode, t7, CHARS_ATTR_OFFSET, t7) + + if isinstance(node.index,int): + self.register_instruction(mips.LoadInmediate,s2,node.index) + else: + index_offset = self.get_offset(node.index) + self.register_instruction(mips.LoadWordNode,s2,index_offset,fp) + + self.register_instruction(mips.AddNode, t7, t7, s2)#saving the source char arr in t7 + + self.register_instruction(mips.MoveNode, s1, t6) + + #this copies from t7 to t6 a0 bytes + self.register_instruction(mips.CommentNode,"Copying bytes from one char array to another") + self.register_instruction(mips.MoveNode, a0, s0) + self.register_instruction(mips.JumpAndLink, COPY) + + self.register_instruction(mips.CommentNode,"Null-terminating the string") + self.register_instruction(mips.StoreByteNode, zero, 0, t6) + + self.register_instruction(mips.CommentNode,"Allocating new String instance") + dest_offset = self.get_offset(node.dest) + _size = STRING_SIZE + self.register_instruction(mips.LoadInmediate,v0,SYSCALL_SBRK) + self.register_instruction(mips.LoadInmediate,a0,_size) + self.register_instruction(mips.SyscallNode) + + self.register_instruction(mips.StoreWordNode,v0,dest_offset,fp) + reg = self.memo.get_unused_reg() + self.register_instruction(mips.LoadAddress,reg,STRING) + self.register_instruction(mips.StoreWordNode,reg,0,v0) + + + #storing string length + self.register_instruction(mips.CommentNode,"Storing length and reference to char array") + if isinstance(node.length,int): + self.register_instruction(mips.LoadInmediate,s0,node.length) + else: + length_off = self.get_offset(node.length) + self.register_instruction(mips.LoadWordNode, s0, length_off, fp) + self.register_instruction(mips.StoreWordNode, s0, LENGTH_ATTR_OFFSET, v0) + + #storing string chars ref + self.register_instruction(mips.StoreWordNode, s1, CHARS_ATTR_OFFSET, v0) + + @visitor.when(cil.ConcatNode) + def visit(self, node): + self.memo.save() + left_offset = self.get_offset(node.left) + right_offset = self.get_offset(node.right) + #cargar los length + self.register_instruction(mips.CommentNode,"Loading length") + self.register_instruction(mips.LoadWordNode, s1, left_offset, fp) + self.register_instruction(mips.LoadWordNode, s1, LENGTH_ATTR_OFFSET, s1) + self.register_instruction(mips.LoadWordNode, s2, right_offset, fp) + self.register_instruction(mips.LoadWordNode, s2, LENGTH_ATTR_OFFSET, s2) + + reg1 = self.memo.get_unused_reg() #sum of lengths + self.register_instruction(mips.AddNode, reg1, s1, s2) + + #crear el nuevo array de bytes + self.register_instruction(mips.CommentNode,"Allocating new char array") + self.register_instruction(mips.MoveNode, a0, reg1) + self.register_instruction(mips.AddiNode, a0, a0, 1) + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_SBRK) + self.register_instruction(mips.SyscallNode) + self.register_instruction(mips.MoveNode, t6, v0)#saving the dest char arr in t6 + + reg2 = self.memo.get_unused_reg() + self.register_instruction(mips.MoveNode, reg2, v0) + + self.register_instruction(mips.CommentNode,"Copying bytes from first string") + self.register_instruction(mips.LoadWordNode, t7, left_offset, fp) + self.register_instruction(mips.LoadWordNode, t7, CHARS_ATTR_OFFSET, t7) + self.register_instruction(mips.MoveNode, a0, s1) + self.register_instruction(mips.JumpAndLink, COPY) + + self.register_instruction(mips.CommentNode,"Copying bytes from second string") + self.register_instruction(mips.LoadWordNode, t7, right_offset, fp) + self.register_instruction(mips.LoadWordNode, t7, CHARS_ATTR_OFFSET, t7) + self.register_instruction(mips.MoveNode, a0, s2) + self.register_instruction(mips.JumpAndLink, COPY) + + self.register_instruction(mips.CommentNode,"Null-terminating the string") + self.register_instruction(mips.StoreByteNode, zero, 0, t6) + + self.register_instruction(mips.CommentNode,"Allocating new String instance") + dest_offset = self.get_offset(node.dest) + _size = STRING_SIZE + self.register_instruction(mips.LoadInmediate,v0,SYSCALL_SBRK) + self.register_instruction(mips.LoadInmediate,a0,_size) + self.register_instruction(mips.SyscallNode) + + self.register_instruction(mips.StoreWordNode,v0,dest_offset,fp) + reg3 = self.memo.get_unused_reg() + self.register_instruction(mips.LoadAddress,reg3,STRING) + self.register_instruction(mips.StoreWordNode,reg3,0,v0) + + #storing string length + self.register_instruction(mips.CommentNode,"Storing length and reference to char array") + self.register_instruction(mips.StoreWordNode, reg1, LENGTH_ATTR_OFFSET, v0) + + #storing string chars ref + self.register_instruction(mips.StoreWordNode, reg2, CHARS_ATTR_OFFSET, v0) + + self.memo.clean() + + @visitor.when(cil.ReadIntNode) + def visit(self, node): + dest_off = self.get_offset(node.dest) + self.register_instruction(mips.CommentNode,"ReadIntNode") + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_READ_INT) + self.register_instruction(mips.SyscallNode) + self.register_instruction(mips.StoreWordNode, v0, dest_off, fp) + + + + @visitor.when(cil.ReadStringNode) + def visit(self,node): + self.memo.save() + self.register_instruction(mips.CommentNode, "ReadStrNode") + self.register_instruction(mips.CommentNode, "Reading String to buffer") + self.register_instruction(mips.LoadAddress, a0, INPUT_STR_BUFFER) + self.register_instruction(mips.LoadInmediate, a1, BUFFER_SIZE) + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_READ_STR) + self.register_instruction(mips.SyscallNode) + + self.register_instruction(mips.CommentNode, "Saving reference to read string") + reg1 = self.memo.get_unused_reg() + self.register_instruction(mips.MoveNode, t7, a0) + + self.register_instruction(mips.CommentNode, "Calculating str length") + self.register_instruction(mips.JumpAndLink, LENGTH) + + reg4 = self.memo.get_unused_reg() + self.register_instruction(mips.MoveNode, reg4, a0) # saving length + self.register_instruction( + mips.CommentNode, "Allocating char array for new string" + ) + self.register_instruction(mips.AddNode, a0, a0, 1) ####?????????? + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_SBRK) + self.register_instruction(mips.SyscallNode) + + self.register_instruction(mips.MoveNode, t6, v0) + + reg3 = self.memo.get_unused_reg() + self.register_instruction( + mips.MoveNode, reg3, v0 + ) # saving pointer to char array + + self.register_instruction( + mips.CommentNode, "Copying bytes from one char array to another" + ) + self.register_instruction(mips.JumpAndLink, "Input") + + self.register_instruction(mips.CommentNode, "Allocating new String instance") + _size = STRING_SIZE + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_SBRK) + self.register_instruction(mips.LoadInmediate, a0, _size) + self.register_instruction(mips.SyscallNode) + + dest_offset = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode, v0, dest_offset, fp) + + reg = self.memo.get_unused_reg() + self.register_instruction(mips.LoadAddress, reg, STRING) + self.register_instruction(mips.StoreWordNode, reg, 0, v0) + + # storing string length + self.register_instruction( + mips.CommentNode, "Storing length and reference to char array" + ) + self.register_instruction(mips.AddiNode,reg4,reg4,-1) + self.register_instruction(mips.StoreWordNode, reg4, LENGTH_ATTR_OFFSET, v0) + + # storing string chars ref + self.register_instruction(mips.StoreWordNode, reg3, CHARS_ATTR_OFFSET, v0) + self.memo.clean() + + + + + + @visitor.when(cil.PrintStrNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "PrintStringNode") + straddr_offset = self.get_offset(node.str_addr) + self.register_instruction(mips.LoadWordNode, a0, straddr_offset, fp) + self.register_instruction(mips.LoadWordNode, a0, CHARS_ATTR_OFFSET, a0) + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_PRINT_STR) + self.register_instruction(mips.SyscallNode) + + @visitor.when(cil.PrintIntNode) + def visit(self, node): + self.register_instruction(mips.CommentNode, "PrintIntNode") + if isinstance(node.int_addr, int): + self.register_instruction(mips.LoadInmediate, a0, node.int_addr) + else: + int_offset = self.get_offset(node.int_addr) + self.register_instruction(mips.LoadWordNode, a0, int_offset, fp) + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_PRINT_INT) + self.register_instruction(mips.SyscallNode) + + @visitor.when(cil.TypeOfNode) + def visit(self, node): + self.memo.save() + if node.flag: + self.register_instruction(mips.CommentNode, "Executing typeof") + reg1 = self.memo.get_unused_reg() + self.register_instruction(mips.LoadAddress,reg1,node.type) + dest_offset = self.get_offset(node.dest) + self.register_instruction(mips.StoreWordNode, reg1, dest_offset, fp) + else: + obj_offset = self.get_offset(node.obj) + dest_offset = self.get_offset(node.dest) + reg1 = self.memo.get_unused_reg() + self.register_instruction(mips.CommentNode, "Executing typeof") + self.register_instruction(mips.LoadWordNode, reg1, obj_offset, fp) + self.register_instruction(mips.LoadWordNode, reg1, TYPEINFO_ATTR_OFFSET, reg1) + self.register_instruction(mips.StoreWordNode, reg1, dest_offset, fp) + + self.memo.clean() + + @visitor.when(cil.IsVoidNode) + def visit(self, node): + self.register_instruction(mips.CommentNode,"Executing IsVoid") + self.memo.save() + reg1 = self.memo.get_unused_reg() + reg2 = self.memo.get_unused_reg() + source_off = self.get_offset(node.value) + dest_off = self.get_offset(node.dest) + + self.register_instruction(mips.LoadWordNode, reg1, source_off, fp) + self.register_instruction(mips.LoadAddress, reg2, VOID) + self.register_instruction(mips.SetEq, reg1, reg1, reg2) + + self.register_instruction(mips.StoreWordNode, reg1, dest_off, fp) + self.memo.clean() + + + @visitor.when(cil.ExitNode) + def visit(self, node): + self.register_instruction(mips.LoadInmediate, v0, SYSCALL_EXIT) + self.register_instruction(mips.SyscallNode) + + @visitor.when(cil.LabelNode) + def visit(self,node): + self.register_instruction(mips.Label,node.name) + + @visitor.when(cil.CopyNode) + def visit(self, node): + pass + + @visitor.when(cil.LocalNode) + def visit(self,node): + pass diff --git a/src/code_gen/mips_nodes.py b/src/code_gen/mips_nodes.py new file mode 100644 index 000000000..2e8288b89 --- /dev/null +++ b/src/code_gen/mips_nodes.py @@ -0,0 +1,294 @@ +# from re import L +# from soupsieve import select + +# from src.cmp.cil import InstructionNode + + +# from matplotlib.pyplot import cla + + +class MIPS_Node: + pass + + +class ProgramNode(MIPS_Node): + def __init__(self, data, code): + self.data = data + self.text = code + + +class DataNode(MIPS_Node): + pass + + +class InstructionNode(MIPS_Node): + pass + + +class DataTransferNode(InstructionNode): + pass + + +class ProcedureNode(InstructionNode): + def __init__(self, label): + self.label = label + self.instructions = [] + + +class DataTransferWithOffset(DataTransferNode): + def __init__(self, source, offset, dest): + self.source = source + self.offset = offset + self.destination = dest + + +class LoadWordNode(DataTransferWithOffset): + def __str__(self): + return f"lw {self.source}, {str(self.offset)}({self.destination})" + + +class LoadByteNode(DataTransferWithOffset): + def __str__(self): + return f"lb {self.source}, {str(self.offset)}({self.destination})" + + +class StoreWordNode(DataTransferWithOffset): + def __str__(self): + return f"sw {self.source}, {str(self.offset)}({self.destination})" + + +class StoreByteNode(DataTransferWithOffset): + def __str__(self): + return f"sb {self.source}, {str(self.offset)}({self.destination})" + + +class LoadNode(DataTransferNode): + def __init__(self, dest, value): + self.destination = dest + self.value = value + + +class LoadInmediate(LoadNode): + def __str__(self): + return f"li {self.destination}, {self.value}" + + +class LoadAddress(LoadNode): + def __str__(self): + return f"la {self.destination}, {self.value}" + + +class MoveNode(DataTransferNode): + def __init__(self, destination, source): + self.destination = destination + self.source = source + + def __str__(self): + return f"move {self.destination} {self.source}" + + +class DataTypeNode(DataNode): + def __init__(self, datatype, name, vt_values): + self.datatype = datatype + self.name = name + self.vt_values = vt_values + + def __str__(self): + values = "" + for value in self.vt_values: + values += f", {value}" + return f"{self.name} : {self.datatype}{values}" + + +class NotNode(InstructionNode): + def __init__(self, dest, source): + self.dest = dest + self.source = source + + def __str__(self): + return f"not {self.dest}, {self.source}" + + +class NegNode(InstructionNode): + def __init__(self, dest, source): + self.dest = dest + self.source = source + + def __str__(self): + return f"neg {self.dest}, {self.source}" + +class ArithAnfLogicNode(InstructionNode): + def __init__(self, destination, left, right): + self.destination = destination + self.left = left + self.right = right + + +class AddNode(ArithAnfLogicNode): + def __str__(self): + return f"add {self.destination}, {self.left}, {self.right}" + + +class AddiNode(ArithAnfLogicNode): + def __str__(self): + return f"addi {self.destination}, {self.left}, {self.right}" + + +class SubNode(ArithAnfLogicNode): + def __str__(self): + return f"sub {self.destination}, {self.left}, {self.right}" + + +class HiLoOperationNode(InstructionNode): + def __init__(self, left, right): + self.left = left + self.right = right + + +class MultNode(HiLoOperationNode): + def __str__(self): + return f"mult {self.left}, {self.right}" + + +class DivideNode(HiLoOperationNode): + def __str__(self): + return f"div {self.left}, {self.right}" + + +class MoveFromHi(InstructionNode): + def __init__(self, register): + self.register = register + + def __str__(self): + return f"mfhi {self.register}" + + +class MoveFromLo(InstructionNode): + def __init__(self, register): + self.register = register + + def __str__(self): + return f"mflo {self.register}" + + +class ConditionalBranch(InstructionNode): + def __init__(self, c1, c2, jump): + self.c1 = c1 + self.c2 = c2 + self.jump = jump + + +class BranchOnEqualNode(ConditionalBranch): + def __str__(self): + return f"beq {self.c1}, {self.c2}, {self.jump}" + + +class BranchOnNotEqualNode(ConditionalBranch): + def __str__(self): + return f"bne {self.c1}, {self.c2}, {self.jump}" + + +class BranchOnGreaterThanNode(ConditionalBranch): + def __str__(self): + return f"bgt {self.c1}, {self.c2}, {self.jump}" + + +class BranchOnGreaterOrEqNode(ConditionalBranch): + def __str__(self): + return f"bge {self.c1}, {self.c2}, {self.jump}" + + +class BranchOnLessThanNode(ConditionalBranch): + def __str__(self): + return f"blt {self.c1}, {self.c2}, {self.jump}" + + +class BranchOnLessOrEqNode(ConditionalBranch): + def __str__(self): + return f"ble {self.c1}, {self.c2}, {self.jump}" + +class BranchOnNotEqZero(InstructionNode): + def __init__(self,reg,label): + self.reg = reg + self.label = label + + def __str__(self): + return f'bnez {self.reg}, {self.label}' + +class BranchOnEqZero(InstructionNode): + def __init__(self,reg,label): + self.reg = reg + self.label = label + + def __str__(self): + return f'beqz {self.reg}, {self.label}' + +class BranchOnGreaterZero(InstructionNode): + def __init__(self,reg,label): + self.reg = reg + self.label = label + + def __str__(self): + return f'bgtz {self.reg}, {self.label}' + + + +class ComparisonNode(InstructionNode): + def __init__(self, dest,m1, m2 ): + self.m1 = m1 + self.m2 = m2 + self.destination = dest + + +class SetOnLessThan(ComparisonNode): + def __str__(self): + return f"slt {self.destination}, {self.m1}, {self.m2}" + + +class SetOnLessOrEq(ComparisonNode): + def __str__(self): + return f"sle {self.destination}, {self.m1}, {self.m2}" + +class SetEq(ComparisonNode): + def __str__(self): + return f"seq {self.destination}, {self.m1}, {self.m2}" + +class UnconditionalJumpNode(InstructionNode): + def __init__(self, jump): + self.jump = jump + + +class Jump(UnconditionalJumpNode): + def __str__(self): + return f"j {self.jump}" + + +class JumpRegister(UnconditionalJumpNode): + def __str__(self): + return f"jalr {self.jump}" + + +class JumpAndLink(UnconditionalJumpNode): + def __str__(self): + return f"jal {self.jump}" + + +class Label(InstructionNode): + def __init__(self, label): + self.label = label + + def __str__(self): + return f"{self.label}:" + + +class SyscallNode(InstructionNode): + def __str__(self): + return f"syscall" + + +class CommentNode(MIPS_Node): + def __init__(self, text): + self.text = text + + def __str__(self): + return f"#{self.text}" diff --git a/src/code_gen/mips_writer.py b/src/code_gen/mips_writer.py new file mode 100644 index 000000000..5801ed400 --- /dev/null +++ b/src/code_gen/mips_writer.py @@ -0,0 +1,36 @@ +import code_gen.mips_nodes as mips +import cmp.visitor as visitor + +class MIPSWriter(object): + def __init__(self): + self.tabs = 0 + self.output = [] + + def emit(self, msg): + self.output.append(self.tabs*" " + msg) + + def black(self): + self.output.append('') + + def visit(self, node:mips.ProgramNode): + self.emit(".data") + self.black() + for data in node.data: + self.emit(str(data)) + + self.black() + self.emit(".text") + self.emit(".globl main") + self.black() + for proc in node.text: + self.emit(f'{proc.label}:') + self.tabs += 4 + for inst in proc.instructions: + self.emit(str(inst)) + self.tabs -= 4 + + + + + + \ No newline at end of file diff --git a/src/code_gen/util_values.py b/src/code_gen/util_values.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/coolc.sh b/src/coolc.sh index 3088de4f9..1f7bffc38 100755 --- a/src/coolc.sh +++ b/src/coolc.sh @@ -4,8 +4,8 @@ INPUT_FILE=$1 OUTPUT_FILE=${INPUT_FILE:0: -2}mips # Si su compilador no lo hace ya, aquí puede imprimir la información de contacto -echo "LINEA_CON_NOMBRE_Y_VERSION_DEL_COMPILADOR" # TODO: Recuerde cambiar estas -echo "Copyright (c) 2019: Nombre1, Nombre2, Nombre3" # TODO: líneas a los valores correctos +echo "EL_COMPI 1.0" +echo "Copyright (c) 2022: Amalia_Ibarra, Sandra_Martos, Gabriela_Martinez" # Llamar al compilador -echo "Compiling $INPUT_FILE into $OUTPUT_FILE" +exec python3 main.py $INPUT_FILE diff --git a/src/main.py b/src/main.py new file mode 100644 index 000000000..4520a750b --- /dev/null +++ b/src/main.py @@ -0,0 +1,119 @@ +from parsing.lexical_analizer import tokenize_cool_text +from parsing.cool_grammar import define_cool_grammar +from semantic.cool_visitor import FormatVisitorST +from parsing.visitor_type_ast import FormatVisitorTypedAst + +from semantic.type_collector import TypeCollector +from semantic.type_builder import TypeBuilder +from semantic.type_checker import TypeChecker + +from parsing.shift_reduce_parsers import LR1Parser, DerivationTree +from cmp.errors import parsing_table_error, Error + +from cmp.evaluation import evaluate_reverse_parse +from pathlib import Path +from cmp.errors import InvalidInputFileError +from semantic.cool_visitor import FormatVisitor +from code_gen.cil_builder import CILBuilder +from code_gen.mips_builder import MIPSBuilder +from code_gen.mips_writer import MIPSWriter +from cmp.cil import PrintVisitor +import typer + + +def report_and_exit(errors): + if len(errors) == 0: + raise typer.Exit(code=0) + + # typer.echo(errors[0]) + for error in errors: + typer.echo(error) + raise typer.Exit(code=1) + + +def pipeline(input_file: Path, output_file: Path = None): + errors = [] + + if not input_file.is_file: + errors.append(InvalidInputFileError(str(input_file))) + + if len(errors) > 0: + report_and_exit(errors) + + text = input_file.read_text() + + # main_error1 = ["A class Main with a method main most be provided"] + # main_error2 = ['"main" method in class Main does not receive any parameters'] + + # define grammar + grammar, idx, type_id, string, num = define_cool_grammar() + + tokens = tokenize_cool_text(grammar, idx, type_id, string, num, text, errors) + + if len(errors) > 0: + report_and_exit(errors) + parser = LR1Parser(grammar, errors) + + if len(errors) > 0: + report_and_exit(errors) + + parse, operations = parser(tokens) + + if len(errors) > 0: + report_and_exit(errors) + + # get parsing tree + ast = evaluate_reverse_parse(parse, operations, tokens) + + # print("-------------------------------Initial AST-------------------------------") + # formatter = FormatVisitorST() + # tree = formatter.visit(ast) + # print(tree) + + visitors = [TypeCollector(errors), TypeBuilder(errors)] + for visitor in visitors: + ast = visitor.visit(ast) + + type_checker = TypeChecker(errors) + scope, typed_ast = type_checker.visit(ast) + + # formatter = FormatVisitorTypedAst() + # print("-------------------------------Typed AST-------------------------------") + # tree = formatter.visit(typed_ast) + # print(tree) + + if len(errors) > 0: + report_and_exit(errors) + + cool_to_cil_visitor = CILBuilder() + cil_ast = cool_to_cil_visitor.visit(typed_ast) + + formatter = PrintVisitor() + tree = formatter.visit(cil_ast) + print(tree) + + cil_to_mips_visitor = MIPSBuilder() + mips_ast = cil_to_mips_visitor.visit(cil_ast) + + mips_writer = MIPSWriter() + output = mips_writer.visit(mips_ast) + + output = '\n'.join(mips_writer.output) + + if output_file is None: + output_file = input_file.with_suffix(".mips") + + with output_file.open("w") as file: + print(output, file=file) + + #with open(f'{input_file[:-3]}.mips','w') as f: + # f.write(f'{output}') + #output_file.write_text(output) + + +if __name__ == "__main__": + #input_file = Path("/home/sandra/Desktop/FinalProjects/Compiler/cool-compiler-2021/customized_tests/code_gen/test_goto_if.cl") + #output_file = Path("/home/sandra/Desktop/FinalProjects/Compiler/cool-compiler-2021/customized_tests/test_hello_world.mips") + + #pipeline() + typer.run(pipeline) diff --git a/src/parsing/__init__.py b/src/parsing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/parsing/cool_grammar.py b/src/parsing/cool_grammar.py new file mode 100644 index 000000000..60eeae8c3 --- /dev/null +++ b/src/parsing/cool_grammar.py @@ -0,0 +1,188 @@ +from cmp.pycompiler import Grammar +from semantic.ast_nodes import ( + ProgramNode, + ClassDeclarationNode, + FuncDeclarationNode, + AttrDeclarationNode, + IfNode, + WhileNode, + LetNode, + CaseNode, + IsvoidNode, + AssignNode, + VarDeclarationNode, + CaseItemNode, + NotNode, + LessNode, + LessEqualNode, + EqualNode, + PlusNode, + MinusNode, + StarNode, + DivNode, + NegNode, + InstantiateNode, + BlockNode, + CallNode, + ConstantNumNode, + VariableNode, + BooleanNode, + StringNode, +) + + +def define_cool_grammar(print_grammar=False): + # grammar + G = Grammar() + + # non-terminals + program = G.NonTerminal("", startSymbol=True) + class_list, def_class = G.NonTerminals(" ") + feature_list, def_attr, def_func = G.NonTerminals( + " " + ) + param_list, param_list_rest, param = G.NonTerminals(" ") + expr, not_exp, comp, arith, term, factor, element, atom = G.NonTerminals( + " " + ) + identifiers_list, identifier_init = G.NonTerminals(" ") + block, case_block, case_item = G.NonTerminals(" ") + func_call, arg_list, arg_list_rest = G.NonTerminals(" ") + + # terminals + classx, inherits, notx, isvoid = G.Terminals("class inherits not isvoid") + let, inx = G.Terminals("let in") + ifx, then, elsex, fi = G.Terminals("if then else fi") + whilex, loop, pool = G.Terminals("while loop pool") + case, of, esac = G.Terminals("case of esac") + semi, colon, comma, dot, opar, cpar, ocur, ccur, at, larrow, rarrow = G.Terminals( + "; : , . ( ) { } @ <- =>" + ) + equal, plus, minus, star, div, less, equal, lesseq, neg = G.Terminals( + "= + - * / < = <= ~" + ) + idx, type_id, num, new, string, true, false = G.Terminals("id type_id int new string true false") + + # productions + program %= class_list, lambda h, s: ProgramNode(s[1]) + + class_list %= def_class + class_list, lambda h, s: [s[1]] + s[2] + class_list %= def_class, lambda h, s: [s[1]] + + def_class %= ( + classx + type_id + ocur + feature_list + ccur + semi, + lambda h, s: ClassDeclarationNode(s[2], s[4], s[1]), + ) + def_class %= ( + classx + type_id + inherits + type_id + ocur + feature_list + ccur + semi, + lambda h, s: ClassDeclarationNode(s[2], s[6], s[1], s[4]), + ) + + feature_list %= def_attr + semi + feature_list, lambda h, s: [s[1]] + s[3] + feature_list %= def_func + semi + feature_list, lambda h, s: [s[1]] + s[3] + feature_list %= G.Epsilon, lambda h, s: [] + + def_attr %= ( + idx + colon + type_id + larrow + expr, + lambda h, s: AttrDeclarationNode(s[1], s[3], s[5], s[4]), + ) + def_attr %= idx + colon + type_id, lambda h, s: AttrDeclarationNode(s[1], s[3], token = s[2]) + + def_func %= ( + idx + opar + param_list + cpar + colon + type_id + ocur + expr + ccur, + lambda h, s: FuncDeclarationNode(s[1], s[3], s[6], s[8], s[2]), + ) + + param_list %= param + param_list_rest, lambda h, s: [s[1]] + s[2] + param_list %= param, lambda h, s: [s[1]] + param_list %= G.Epsilon, lambda h, s: [] + + param_list_rest %= comma + param + param_list_rest, lambda h, s: [s[2]] + s[3] + param_list_rest %= comma + param, lambda h, s: [s[2]] + param %= idx + colon + type_id, lambda h, s: (s[1], s[3]) + + expr %= idx + larrow + expr, lambda h, s: AssignNode(s[1], s[3], s[2]) + expr %= let + identifiers_list + inx + expr, lambda h, s: LetNode(s[2], s[4], s[1]) + expr %= notx + comp, lambda h, s: NotNode(s[2], s[1]) + expr %= comp, lambda h, s: s[1] + + identifiers_list %= ( + identifier_init + comma + identifiers_list, + lambda h, s: [s[1]] + s[3], + ) + identifiers_list %= identifier_init, lambda h, s: [s[1]] + + identifier_init %= ( + idx + colon + type_id + larrow + expr, + lambda h, s: VarDeclarationNode(s[1], s[3], s[5]), + ) + identifier_init %= idx + colon + type_id, lambda h, s: VarDeclarationNode(s[1], s[3]) + + comp %= arith + less + arith, lambda h, s: LessNode(s[1], s[3], s[2]) + comp %= arith + less + notx + expr, lambda h, s: LessNode(s[1], NotNode(s[4], s[3]) , s[2]) + comp %= arith + equal + arith, lambda h, s: EqualNode(s[1], s[3], s[2]) + comp %= arith + equal + notx + expr, lambda h, s: EqualNode(s[1], NotNode(s[4], s[3]) , s[2]) + comp %= arith + lesseq + arith, lambda h, s: LessEqualNode(s[1], s[3], s[2]) + comp %= arith + lesseq + notx + expr, lambda h, s: LessEqualNode(s[1], NotNode(s[4], s[3]) , s[2]) + comp %= arith, lambda h, s: s[1] + + arith %= arith + plus + term, lambda h, s: PlusNode(s[1], s[3], s[2]) + arith %= arith + minus + term, lambda h, s: MinusNode(s[1], s[3], s[2]) + arith %= term, lambda h, s: s[1] + + term %= term + star + factor, lambda h, s: StarNode(s[1], s[3], s[2]) + term %= term + div + factor, lambda h, s: DivNode(s[1], s[3], s[2]) + term %= factor, lambda h, s: s[1] + + factor %= isvoid + element, lambda h, s: IsvoidNode(s[2], s[1]) + factor %= neg + element, lambda h, s: NegNode(s[2], s[1]) + factor %= element, lambda h, s: s[1] + + element %= ( + ifx + expr + then + expr + elsex + expr + fi, + lambda h, s: IfNode(s[2], s[4], s[6], s[1]), + ) + element %= whilex + expr + loop + expr + pool, lambda h, s: WhileNode(s[2], s[4], s[1]) + element %= case + expr + of + case_block + esac, lambda h, s: CaseNode(s[2], s[4], s[1]) + element %= new + type_id, lambda h, s: InstantiateNode(s[2], s[1]) + element %= opar + expr + cpar, lambda h, s: s[2] + element %= ocur + block + ccur, lambda h, s: BlockNode(s[2], s[1]) + element %= (element + dot + func_call, lambda h, s: CallNode(*s[3], obj=s[1], token = s[2])) + element %= ( + element + at + type_id + dot + func_call, + lambda h, s: CallNode(*s[5], obj=s[1], at_type=s[3], token = s[2]), + ) + element %= func_call, lambda h, s: CallNode(*s[1],) + element %= atom, lambda h, s: s[1] + + case_block %= case_item + case_block, lambda h, s: [s[1]] + s[2] + case_block %= case_item, lambda h, s: [s[1]] + case_item %= ( + idx + colon + type_id + rarrow + expr + semi, + lambda h, s: CaseItemNode(s[1], s[3], s[5], s[4]), + ) + + atom %= num, lambda h, s: ConstantNumNode(s[1]) + atom %= idx, lambda h, s: VariableNode(s[1]) + atom %= ( + true, + lambda h, s: BooleanNode(s[1]), + ) + atom %= false, lambda h, s: BooleanNode(s[1]) + atom %= string, lambda h, s: StringNode(s[1]) + + block %= expr + semi, lambda h, s: [s[1]] + block %= expr + semi + block, lambda h, s: [s[1]] + s[3] + + func_call %= idx + opar + arg_list + cpar, lambda h, s: (s[1], s[3]) + + arg_list %= expr + arg_list_rest, lambda h, s: [s[1]] + s[2] + arg_list %= expr, lambda h, s: [s[1]] + arg_list %= G.Epsilon, lambda h, s: [] + + arg_list_rest %= comma + expr + arg_list_rest, lambda h, s: [s[2]] + s[3] + arg_list_rest %= comma + expr, lambda h, s: [s[2]] + + if print_grammar: + print(G) + return (G, idx, type_id, string, num) diff --git a/src/parsing/cool_tokenizer.py b/src/parsing/cool_tokenizer.py new file mode 100644 index 000000000..6b474cab3 --- /dev/null +++ b/src/parsing/cool_tokenizer.py @@ -0,0 +1,38 @@ +from cmp.utils import Token, tokenizer + + +def tokenize_cool_text(G, text, idx, num, print_tokens=False): + fixed_tokens = { + t.Name: Token(t.Name, t) for t in G.terminals if t not in {idx, num} + } + + @tokenizer(G, fixed_tokens) + def tokenize_text(token): + lex = token.lex + try: + float(lex) + return token.transform_to(num) + except ValueError: # verificar los string + return token.transform_to(idx) + + # (do something like if(lex[0] == " and lex[-1] ==")) + tokens = tokenize_text(text) + if print_tokens: + pprint_tokens(tokens) + return tokens + + +# pie co los lex, arreglar como toca +def pprint_tokens(tokens): + indent = 0 + pending = [] + for token in tokens: + pending.append(token) + if token.lex in {"{", "}", ";"}: + if token.lex == "}": + indent -= 1 + print(" " * indent + " ".join(str(t.token_type) for t in pending)) + pending.clear() + if token.lex == "{": + indent += 1 + print(" ".join([str(t.token_type) for t in pending])) diff --git a/src/parsing/lexical_analizer.py b/src/parsing/lexical_analizer.py new file mode 100644 index 000000000..27f2dd339 --- /dev/null +++ b/src/parsing/lexical_analizer.py @@ -0,0 +1,78 @@ +import ply.lex as lex +import parsing.tokens_rules as tokens_rules +from cmp.utils import Token + +def pprint_tokens(tokens): + indent = 0 + pending = [] + for token in tokens: + pending.append(token) + if token.lex in {"{", "}", ";"}: + if token.lex == "}": + indent -= 1 + print(" " * indent + " ".join(str(t.token_type) for t in pending)) + pending.clear() + if token.lex == "{": + indent += 1 + print(" ".join([str(t.token_type) for t in pending])) + +def find_column(input, lexpos): + line_start = input.rfind('\n', 0, lexpos) + 1 + return (lexpos - line_start) + 1 + +def tokenize_cool_text(grammar, idx, type_id, string, num, data, errors, printing=False): + # lexer starts with: lexpos = 0, lineno = 1, last_new_line = 0 + # lexpos: Within token rule functions, this points to the first character after the matched text. + lexer = lex.lex(module = tokens_rules) + lexer.last_new_line_pos = 0 + lexer.errors = errors + + # Give the lexer some input + lexer.input(data) + + lessequal = grammar.__getitem__("<=") + rarrow = grammar.__getitem__("=>") + larrow = grammar.__getitem__("<-") + + fixed_tokens_names = { + t.Name: (t.Name, t) + for t in grammar.terminals + if t not in {idx, type_id, string, num, lessequal, rarrow, larrow} + } + + fixed_tokens_names["larrow"] = ("<-", larrow) + fixed_tokens_names["rarrow"] = ("=>", rarrow) + fixed_tokens_names["lessequal"] = ("<=", lessequal) + + tokens = [] + pos_data = [] + # Tokenize + while True: + tok = lexer.token() + if not tok: # append EOF + if len(pos_data) > 0: + last_lineno, last_col = pos_data[-1] + col = last_col + len(tokens[-1].lex) + else: # empty program + last_lineno = 0 + col = -1 + tokens.append(Token("$", grammar.EOF, (last_lineno, find_column(data, col)))) + break # No more input + else: + try: + tval, ttype = fixed_tokens_names[tok.type] + except: + tval = tok.value + if tok.type == "string": + ttype = string + elif tok.type == "id": + ttype = idx + elif tok.type == "type_id": + ttype = type_id + else: + ttype = num + tokens.append(Token(tval, ttype, (tok.lineno, find_column(data, tok.lexpos)))) + + if printing: + pprint_tokens(tokens) + return tokens diff --git a/src/parsing/methods.py b/src/parsing/methods.py new file mode 100644 index 000000000..dd8ca0947 --- /dev/null +++ b/src/parsing/methods.py @@ -0,0 +1,148 @@ +from cmp.pycompiler import ( + Symbol, + NonTerminal, + Terminal, + EOF, + Sentence, + SentenceList, + Epsilon, + Production, + Grammar, +) +from cmp.utils import ContainerSet +from cmp.errors import parsing_table_error, invalid_sentence_error +from cmp.automata import State + + +# Computes First(alpha), given First(Vt) and First(Vn) +# alpha in (Vt U Vn)* +def compute_local_first(firsts, alpha): + first_alpha = ContainerSet() + + try: + alpha_is_epsilon = alpha.IsEpsilon + except: + alpha_is_epsilon = False + + ################################################### + # alpha == epsilon ? First(alpha) = { epsilon } + ################################################### + if alpha_is_epsilon or len(alpha) == 0: + first_alpha.set_epsilon() + return first_alpha + ################################################### + + ################################################### + # alpha = X1 ... XN + # First(Xi) subconjunto First(alpha) + # epsilon pertenece a First(X1)...First(Xi) ? First(Xi+1) subconjunto de First(X) y First(alpha) + # epsilon pertenece a First(X1)...First(XN) ? epsilon pertence a First(X) y al First(alpha) + ################################################### + if alpha[0].IsTerminal: + first_alpha.add(alpha[0]) + return first_alpha + + # if alpha[0].IsNonTerminal: + # first_alpha.update(firsts[alpha[0]]) + + for item in alpha: + if firsts[item].contains_epsilon: + first_alpha.update(firsts[item]) + else: + first_alpha.update(firsts[item]) + break + + else: + first_alpha.set_epsilon() + + ################################################### + + # First(alpha) + return first_alpha + + +# Computes First(Vt) U First(Vn) U First(alpha) +# P: X -> alpha +def compute_firsts(G): + firsts = {} + change = True + + # init First(Vt) + for terminal in G.terminals: + firsts[terminal] = ContainerSet(terminal) + + # init First(Vn) + for nonterminal in G.nonTerminals: + firsts[nonterminal] = ContainerSet() + + while change: + change = False + + # P: X -> alpha + for production in G.Productions: + X = production.Left + alpha = production.Right + + # get current First(X) + first_X = firsts[X] + + # init First(alpha) + try: + first_alpha = firsts[alpha] + except: + first_alpha = firsts[alpha] = ContainerSet() + + # CurrentFirst(alpha)??? + local_first = compute_local_first(firsts, alpha) + + # update First(X) and First(alpha) from CurrentFirst(alpha) + change |= first_alpha.hard_update(local_first) + change |= first_X.hard_update(local_first) + + # First(Vt) + First(Vt) + First(RightSides) + return firsts + + +def compute_follows(G, firsts): + follows = {} + change = True + + # local_firsts = {} + + # init Follow(Vn) + for nonterminal in G.nonTerminals: + follows[nonterminal] = ContainerSet() + follows[G.startSymbol] = ContainerSet(G.EOF) + + while change: + change = False + + # P: X -> alpha + for production in G.Productions: + X = production.Left + alpha = production.Right + + follow_X = follows[X] + + ################################################### + # X -> zeta Y beta + # First(beta) - { epsilon } subset of Follow(Y) + # beta ->* epsilon or X -> zeta Y ? Follow(X) subset of Follow(Y) + ################################################### + + for i in range(0, len(alpha) - 1): + if alpha[i].IsNonTerminal: + beta = Sentence(*alpha[i + 1 :]) + firsts_beta = compute_local_first(firsts, beta) + change |= follows[alpha[i]].update(firsts_beta) + + if firsts_beta.contains_epsilon: + change |= follows[alpha[i]].update(follow_X) + + if not alpha.IsEpsilon and alpha[-1].IsNonTerminal: + change |= follows[alpha[-1]].update(follow_X) + + ################################################### + + # Follow(Vn) + return follows diff --git a/src/parsing/parser_automatons.py b/src/parsing/parser_automatons.py new file mode 100644 index 000000000..32f2124a0 --- /dev/null +++ b/src/parsing/parser_automatons.py @@ -0,0 +1,225 @@ +from cmp.pycompiler import Item +from cmp.automata import State, lr0_formatter, multiline_formatter +from cmp.utils import ContainerSet +from parsing.methods import compute_firsts, compute_local_first, compute_follows + +# LR0 automaton -> for SLR and LALR parsers +def build_LR0_automaton(G): + assert len(G.startSymbol.productions) == 1, "Grammar must be augmented" + + start_production = G.startSymbol.productions[0] + start_item = Item(start_production, 0) + + automaton = State(start_item, True) + + pending = [start_item] + visited = {start_item: automaton} + + while pending: + current_item = pending.pop() + if current_item.IsReduceItem: + continue + + # (Decide which transitions to add) + # agregar las epsilon transiciones + # a estados donde el item posee producciones a partir del simbolo actual en la posicion 0 + # y agregar la transicion a partir del simbolo siguiente + + next_item = current_item.NextItem() + try: + next_state = visited[next_item] + except KeyError: + next_state = State(next_item, True) + visited[next_item] = next_state + pending.append(next_item) + + if current_item.NextSymbol.IsNonTerminal: + epsilon_productions = current_item.NextSymbol.productions + else: + epsilon_productions = None + + current_state = visited[current_item] + # (Adding the decided transitions) + current_state.add_transition(current_item.NextSymbol.Name, next_state) + + if epsilon_productions: + for eproduction in epsilon_productions: + epItem = Item(eproduction, 0) + try: + epState = visited[epItem] + except KeyError: + epState = State(epItem, True) + visited[epItem] = epState + pending.append(epItem) + current_state.add_epsilon_transition(epState) + + return automaton + + +# LR1 automaton +def expand(item, firsts): + next_symbol = item.NextSymbol + if next_symbol is None or not next_symbol.IsNonTerminal: + return [] + + lookaheads = ContainerSet() + # (Compute lookahead for child items) + previews = item.Preview() + for preview in previews: + lookaheads.update(compute_local_first(firsts, preview)) + + assert not lookaheads.contains_epsilon + # (Build and return child items) + items = [] + for production in next_symbol.productions: + items.append(Item(production, 0, lookaheads)) + + return items + + +def compress(items): + centers = {} + + for item in items: + center = item.Center() + try: + lookaheads = centers[center] + except KeyError: + centers[center] = lookaheads = set() + lookaheads.update(item.lookaheads) + + return { + Item(x.production, x.pos, set(lookahead)) for x, lookahead in centers.items() + } + + +def closure_lr1(items, firsts): + closure = ContainerSet(*items) + + changed = True + while changed: + changed = False + + new_items = ContainerSet() + # Your code here!!! + for item in closure: + new_items.extend(expand(item, firsts)) + + changed = closure.update(new_items) + + return compress(closure) + + +def goto_lr1(items, symbol, firsts=None, just_kernel=False): + assert ( + just_kernel or firsts is not None + ), "`firsts` must be provided if `just_kernel=False`" + items = frozenset(item.NextItem() for item in items if item.NextSymbol == symbol) + return items if just_kernel else closure_lr1(items, firsts) + + +def build_LR1_automaton(G): + assert len(G.startSymbol.productions) == 1, "Grammar must be augmented" + + firsts = compute_firsts(G) + firsts[G.EOF] = ContainerSet(G.EOF) + + start_production = G.startSymbol.productions[0] + start_item = Item(start_production, 0, lookaheads=(G.EOF,)) + start = frozenset([start_item]) # como cabecera solo queda el kernel + + closure = closure_lr1(start, firsts) + automaton = State( + frozenset(closure), True + ) # en visited si se guarda el estado completo + + pending = [start] + visited = {start: automaton} + + while pending: + current = pending.pop() + current_state = visited[current] + + closure = closure_lr1(current, firsts) + for symbol in G.terminals + G.nonTerminals: + # (Get/Build `next_state`) + # closure = closure_lr1(current,firsts) + goto = goto_lr1(closure, symbol, firsts, True) + + if not goto: + continue + + try: + next_state = visited[goto] + except KeyError: + next_state = visited[goto] = State( + frozenset(closure_lr1(goto, firsts)), True + ) + pending.append(goto) + + current_state.add_transition(symbol.Name, next_state) + + automaton.set_formatter(multiline_formatter) + return automaton + + +def build_LALR_automaton(G): + assert len(G.startSymbol.productions) == 1, "Grammar must be augmented" + + lr1_automaton = build_LR1_automaton(G) + + same_kernel = {} + for node in lr1_automaton: + just_center = frozenset([item.Center() for item in node.state]) + try: + same_kernel[just_center].append(node) + except KeyError: + same_kernel[just_center] = [node] + + start = frozenset( + [item.Center() for item in lr1_automaton.state] + ) # como cabecera solo quedan los items sin lookahead + automaton = State( + lr1_automaton.state, True + ) # en visited se guarda el estado que corresponde a la fusion de estaods ocn el mismo nucleo + + pending = [start] + visited = {start: automaton} + + while pending: + current = pending.pop() + current_state = visited[current] # se van a actualizar + # todos los estados con los que el estado actual tiene alguna transicion + lr1_state = same_kernel[current][0] + + # chequear que cada estado del cjto analizado tenga esa transicion + for symbol in G.terminals + G.nonTerminals: + if lr1_state.has_transition(symbol.Name): + state = lr1_state.transitions[symbol.Name][0] + center_items = frozenset([item.Center() for item in state.state]) + try: + next_state = visited[center_items] + except KeyError: + kernel_set = same_kernel[center_items] + items_with_lookahead = {} + for node in kernel_set: + for item in node.state: + try: + current_item = items_with_lookahead[item.Center()] + except KeyError: + current_item = items_with_lookahead[ + item.Center() + ] = set() + current_item.update(item.lookaheads) + completed_items = [ + Item(item.production, item.pos, lookaheads) + for item, lookaheads in items_with_lookahead.items() + ] + next_state = State(frozenset(completed_items), True) + visited[center_items] = next_state + pending.append(center_items) + + current_state.add_transition(symbol.Name, next_state) + + automaton.set_formatter(multiline_formatter) + return automaton diff --git a/src/parsing/shift_reduce_parsers.py b/src/parsing/shift_reduce_parsers.py new file mode 100644 index 000000000..47353ce52 --- /dev/null +++ b/src/parsing/shift_reduce_parsers.py @@ -0,0 +1,274 @@ +from parsing.parser_automatons import ( + build_LR0_automaton, + build_LR1_automaton, + build_LALR_automaton, +) +from parsing.methods import compute_firsts, compute_local_first, compute_follows +from cmp.automata import State +from cmp.errors import shift_reduce_error, invalid_sentence_error, SyntacticError + +class ShiftReduceParser: + SHIFT = "SHIFT" + REDUCE = "REDUCE" + OK = "OK" + + def __init__(self, G, errors, verbose=False): + self.G = G + self.verbose = verbose + self.action = {} + self.goto = {} + self.automaton = self._build_parsing_table() + self.errors = errors + + def _build_parsing_table(self): + raise NotImplementedError() + + def __call__(self, w): + stack = [0] + cursor = 0 + output = [] + operations = [] + + while True: + state = stack[-1] + lookahead = w[cursor].token_type + if self.verbose: + print(stack, "<---||--->", w[cursor:]) + + # Detect error + try: + action, tag = self.action[state, lookahead] + + except KeyError: + current_token = w[cursor] + self.errors.append( + SyntacticError( + current_token.location[0], + current_token.location[1], + "ERROR at or near "+ str(current_token.lex) + ) + ) + + return output, operations + + # Shift case + if action == self.SHIFT: + operations.append(action) + stack.append(tag) + cursor += 1 + + # Reduce case + elif action == self.REDUCE: + operations.append(action) + for _ in range(len(tag.Right)): + stack.pop() + output.append(tag) + stack.append(self.goto[stack[-1], tag.Left]) + + # OK case + elif action == self.OK: + return output, operations + # Invalid case + else: + current_token = w[cursor] + self.errors.append( + SyntacticError( + current_token.location[0], + current_token.location[1], + "ERROR at or near"+ str(current_token.lex) + ) + ) + return output, operations + # "Invalid case. Sentence given does not belong to the grammar", + + if cursor >= len(w): # or not stack + current_token = w[cursor] + self.errors.append( + SyntacticError( + current_token.location[0], + current_token.location[1], + "Exceed word length while looking for a viable derivation. Sentence given does not belong to the grammar", + ) + ) + return output, operations + + +class SLR1Parser(ShiftReduceParser): + def _build_parsing_table(self): + G = self.G.AugmentedGrammar(True) + firsts = compute_firsts(G) + follows = compute_follows(G, firsts) + + automaton = build_LR0_automaton(G).to_deterministic() + for i, node in enumerate(automaton): + if self.verbose: + print(i, "\t", "\n\t ".join(str(x) for x in node.state), "\n") + node.idx = i + + for node in automaton: + idx = node.idx + for state in node.state: + item = state.state + # - Filling `self.Action` and `self.Goto` according to `item`) + # - Using `self._register(...)`) + if item.IsReduceItem: + if item.production.Left == G.startSymbol: + self._register(self.action, (idx, G.EOF), (self.OK, None)) + else: + for symbol in follows[item.production.Left]: + self._register( + self.action, + (idx, symbol), + (self.REDUCE, item.production), + ) + else: + if item.NextSymbol.IsTerminal: + self._register( + self.action, + (idx, item.NextSymbol), + (self.SHIFT, node[item.NextSymbol.Name][0].idx), + ) + else: + self._register( + self.goto, + (idx, item.NextSymbol), + node[item.NextSymbol.Name][0].idx, + ) + return automaton + + @staticmethod + def _register(table, key, value): + # assert ( + # key not in table or table[key] == value + # ), "Shift-Reduce or Reduce-Reduce conflict!!!" + if key in table and table[key] != value: + raise shift_reduce_error(table[key], value, "SLR") + table[key] = value + + +class LR1Parser(ShiftReduceParser): + def _build_parsing_table(self): + G = self.G.AugmentedGrammar(True) + + automaton = build_LR1_automaton(G) + for i, node in enumerate(automaton): + if self.verbose: + print(i, "\t", "\n\t ".join(str(x) for x in node.state), "\n") + node.idx = i + + # print("automatons states") + for node in automaton: + idx = node.idx + for item in node.state: + # print("item", item) + # - Fill `self.Action` and `self.Goto` according to `item`) + # - Feel free to use `self._register(...)`) + if item.IsReduceItem: + if item.production.Left == G.startSymbol: + self._register(self.action, (idx, G.EOF), (self.OK, None)) + else: + for symbol in item.lookaheads: + self._register( + self.action, + (idx, symbol), + (self.REDUCE, item.production), + ) + else: + if item.NextSymbol.IsTerminal: + self._register( + self.action, + (idx, item.NextSymbol), + (self.SHIFT, node[item.NextSymbol.Name][0].idx), + ) + else: + self._register( + self.goto, + (idx, item.NextSymbol), + node[item.NextSymbol.Name][0].idx, + ) + return automaton + + @staticmethod + def _register(table, key, value): + if key in table and table[key] != value: + raise shift_reduce_error(table[key], value, "LR", key) + table[key] = value + + +class LALR_Parser(ShiftReduceParser): + def _build_parsing_table(self): + G = self.G.AugmentedGrammar(True) + + automaton = build_LALR_automaton(G) + + for i, node in enumerate(automaton): + if self.verbose: + print(i, "\t", "\n\t ".join(str(x) for x in node.state), "\n") + node.idx = i + + for node in automaton: + idx = node.idx + for item in node.state: + # - Fill `self.Action` and `self.Goto` according to `item`) + # - Feel free to use `self._register(...)`) + if item.IsReduceItem: + if item.production.Left == G.startSymbol: + self._register(self.action, (idx, G.EOF), (self.OK, None)) + else: + for symbol in item.lookaheads: + self._register( + self.action, + (idx, symbol), + (self.REDUCE, item.production), + ) + else: + if item.NextSymbol.IsTerminal: + self._register( + self.action, + (idx, item.NextSymbol), + (self.SHIFT, node[item.NextSymbol.Name][0].idx), + ) + else: + self._register( + self.goto, + (idx, item.NextSymbol), + node[item.NextSymbol.Name][0].idx, + ) + + return automaton + + @staticmethod + def _register(table, key, value): + if key in table and table[key] != value: + raise shift_reduce_error(table[key], value, "LALR") + table[key] = value + + +# ----------------------derivation tree-------------------------# +def DerivationTree(derivation, G): + lent = len(derivation) + + nonTerminalstack = [] + root = State(G.startSymbol.Name) + nonTerminalstack.append(root) + + while lent > 0: + lent -= 1 + next_production = derivation[lent] + print("next_production", next_production) + currentNode = nonTerminalstack.pop() + # assert currentNode.state == next_production.Left.Name, "Wrong derivation" + + if next_production.IsEpsilon: + currentNode.add_transition(" ", State("epsilon", True)) + + for symbol in next_production.Right: + if symbol.IsTerminal: + currentNode.add_transition(" ", State(symbol.Name, True)) + else: + nonTerminalstack.append(State(symbol.Name)) + currentNode.add_transition( + " ", nonTerminalstack[len(nonTerminalstack) - 1] + ) + + return root diff --git a/src/parsing/tokens_rules.py b/src/parsing/tokens_rules.py new file mode 100644 index 000000000..13a358020 --- /dev/null +++ b/src/parsing/tokens_rules.py @@ -0,0 +1,243 @@ +# https://www.dabeaz.com/ply/ply.html +# file for PLY rules + +from cmp.errors import ( + tokenizer_error, + LexicographicError, + UnexpectedCharError, + UnexpectedEOFError, + UnexpectedTokenError, +) + +# Declare the states +states = (("comments", "exclusive"),) + + +# All lexers must provide a list tokens that defines all of the possible token names +# that can be produced by the lexer. +first_tokens = [ + "larrow", + "rarrow", + "lessequal", + "id", + "type_id", + "int", + "string", +] +# Add "ccom" to test comments + +reserved = { + "class": "class", + "inherits": "inherits", + "not": "not", + "isvoid": "isvoid", + "let": "let", + "in": "in", + "if": "if", + "then": "then", + "else": "else", + "fi": "fi", + "loop": "loop", + "pool": "pool", + "case": "case", + "of": "of", + "esac": "esac", + "while": "while", + "new": "new", + "true": "true", + "false": "false", +} + +literals = [ + ";", + ":", + ",", + ".", + "(", + ")", + "{", + "}", + "@", + "+", + "-", + "*", + "/", + "<", + "=", + "~", +] + +tokens = first_tokens + list(reserved.values()) + +# Match the first (*. Enter comments state. +def t_begin_comments(t): + r"\(\*" + t.lexer.code_start = t.lexer.lexpos # Record the starting position + t.lexer.level = 1 # Initial level + t.lexer.begin("comments") # Enter 'comments' state + + +# Rules for the comments state +# Comments starting symbol +def t_comments_opsymb(t): + r"\(\*" + t.lexer.level += 1 + +# Define a rule so we can track line numbers +def t_comments_newline(t): + r"\n" + t.lexer.last_new_line_pos = t.lexer.lexpos + t.lexer.lineno += 1 + +# end comments +def t_comments_ccom(t): + r"\*\)" + t.lexer.level -= 1 + + if t.lexer.level == 0: + # t.value = t.lexer.lexdata[t.lexer.code_start : t.lexer.lexpos -2] # comments should not be returned, just skipped + # t.type = "ccom" + # return t + t.lexer.begin("INITIAL") + +# For bad characters. In this case we just skip over everything but (* or *) +def t_comments_error(t): + t.lexer.skip(1) + +def t_comments_anycharacter(t): + "." + +# EOF handling rule +def t_comments_eof(t): + if t.lexer.level > 0: # guardar este error y actuar acorde + t.lexer.errors.append(LexicographicError(t.lexer.lineno, t.lexer.lexpos - t.lexer.last_new_line_pos + 1, "EOF in comment")) + return None + # t.lexer.skip(1) + + +# Rules for initial state (default state) +def t_comment1(t): + r"\--.*" + pass + # No return value. Token discarded + + +def t_string(t):# se va a develve el string vacio cada vez que no se puede matchear el string completo + r'\"'#xq habria que seguir analizando el string cuando se ha encontrado un caracter null y se ha de parar en otros casos? + string_list = [] + text = t.lexer.lexdata + initial = t.lexer.lexpos + index = t.lexer.lexpos + final = len(text) + while index < final and text[index] != '\"': + if text[index] == '\\': + if text[index + 1] in ["t", "b", "f", "n"]: + string_list.append(text[index : index + 2]) # \t,\b,\f, \n + elif text[index + 1] == '\n': # \n whith \ before + t.lexer.lineno +=1 + t.lexer.last_new_line_pos = index + 2# saving last \n + string_list.append('\n') + else: + string_list.append(# ESTO SE AHCE DOS VECES< COMO TRATAR DIFERENTE EL \t por ejempli + text[index : index + 2] + ) # ]character c: take the character in \c + index += 2 + + elif text[index] == '\n': # non scape \n (whithout and extra \) is not allowed + t.lexer.errors.append( + LexicographicError( + t.lexer.lineno, + index - t.lexer.last_new_line_pos + 1, + "Unterminated string constant", + ) + ) + t.lexer.lineno +=1 + t.lexer.last_new_line_pos = index + 1# saving last \n + t.lexer.lexpos = index + 1 + return t + elif text[index] == '\0': # null character \0 is not allowed + t.lexer.errors.append( + LexicographicError( + t.lexer.lineno, + index - t.lexer.last_new_line_pos + 1, + "String contains null character", + ) + ) + index += 1 + # return t + else: + string_list.append(text[index]) + index += 1 + + if index == final: # String may not cross file boundaries + t.lexer.errors.append( + LexicographicError( + t.lexer.lineno, + index - t.lexer.last_new_line_pos + 1, + "EOF in string constant", + ) + ) + t.lexer.lexpos = index + return t + else: + index += 1#jumping '\"' character (character for closing coments) + + t.value = "".join(string_list) + t.type = "string" + t.lexer.lexpos = index + return t + + +#Object identifiers +def t_id(t): + r'[a-z][a-zA-Z_0-9]*' + t.type = reserved.get( + t.value.lower(), "id" + ) # Check for reserved words. If it isn't a reserved word is categorized as identifier + return t + +#Type identifiers +def t_type_id(t): + r'[A-Z][a-zA-Z_0-9]*' + value_in_lowercase = t.value.lower() + if value_in_lowercase != "false" and value_in_lowercase != "true": + t.type = reserved.get( + value_in_lowercase, "type_id" + ) # Check for reserved words. If it isn't a reserved word is categorized as identifier + else: + t.type = "type_id"#this may be extra as t.type is already setted as type_id + # t.lexpos = t.lexpos - t.lexer.last_new_line_pos + 1 + return t + +# matching int numbers +def t_int(t): + r"\d+" + t.value = int(t.value) + # r'\d+(\.\d*)?' float numbers + # t.value = float(t.value) + return t + +# Define a rule so we can track line numbers +def t_newline(t): + r'\n' + t.lexer.last_new_line_pos = t.lexer.lexpos + t.lexer.lineno += 1 + +t_larrow = r"<-" +t_rarrow = r"=>" +t_lessequal = r"<=" + +# A string containing ignored characters (spaces and tabs) +t_ignore = " \t" + + +# Error handling rule +def t_error(t):#At the moment of entering this method lexpos is the current character (instead of the last matched character) because nothing could've been matched + t.lexer.errors.append( + LexicographicError( + t.lexer.lineno, + t.lexer.lexpos - t.lexer.last_new_line_pos + 1, + f"ERROR {t.value[0]}", + ) + ) + t.lexer.skip(1) diff --git a/src/parsing/visitor_type_ast.py b/src/parsing/visitor_type_ast.py new file mode 100644 index 000000000..a51fdd601 --- /dev/null +++ b/src/parsing/visitor_type_ast.py @@ -0,0 +1,215 @@ +import cmp.visitor as visitor +from code_gen.ast_typed_nodes import ( + ProgramNode, + ClassDeclarationNode, + FuncDeclarationNode, + AttrDeclarationNode, + IfNode, + WhileNode, + LetNode, + CaseNode, + AssignNode, + VarDeclarationNode, + CaseItemNode, + InstantiateNode, + BlockNode, + CallNode, + BinaryNode, + AtomicNode, + UnaryNode, + ArithmeticOperation, + ComparisonOperation, + ConstantNumNode, + VariableNode, + StringNode, + BooleanNode, + InstantiateNode, + NotNode, + IsvoidNode, + NegNode, + PlusNode, + MinusNode, + StarNode, + DivNode, + LessNode, + LessEqualNode, + EqualNode, +) + +class FormatVisitorTypedAst(object): + tree = [] + + @visitor.on("node") + def visit(self, node, tabs=0): + pass + + @visitor.when(ProgramNode) + def visit(self, node, tabs=0): + self.tree = [] + ans = "\\__\\__" * tabs + f"\\__ProgramNode [< class > ... < class >]" + self.tree.append(ans) + for child in node.declarations: + self.visit(child, tabs + 1) + return self.tree + + @visitor.when(ClassDeclarationNode) + def visit(self, node, tabs=0): + parent = "" if node.parent is None else f"inherits {node.parent}" + ans = ( + "\\__\\__" * tabs + + f"\\__ClassDeclarationNode: class {node.id} {parent} {{ ... }}" + ) + self.tree.append(ans) + for child in node.features: + self.visit(child, tabs + 1) + return + + @visitor.when(AttrDeclarationNode) + def visit(self, node, tabs=0): + ans = ( + "\\__\\__" * tabs + + f"\\__AttrDeclarationNode: {node.id} : {node.type} <- " + ) + exp = "\\__\\__" * (tabs + 1) + "\\__NONE" + self.tree.append(ans) + if not node.init_exp is None: + self.visit(node.init_exp, tabs + 1) + else: + self.tree.append(exp) + + return + + @visitor.when(VarDeclarationNode) + def visit(self, node, tabs=0): + ans = ( + "\\__\\__" * tabs + + f"\\__VarDeclarationNode: {node.id} : {node.type} <- " + ) + expr = "\\__\\__" * (tabs + 1) + "\\__NONE" + self.tree.append(ans) + + if not node.expr is None: + expr = self.visit(node.expr, tabs + 1) + else: # esto estaba antes sin el else + self.tree.append(expr) + + return + + @visitor.when(AssignNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__AssignNode: {node.id} <- " + self.tree.append(ans) + self.visit(node.expr, tabs + 1) + return + + @visitor.when(FuncDeclarationNode) + def visit(self, node, tabs=0): + # params = ", ".join(":".join(param) for param in node.params) + params = ", ".join(":".join(param_name).join(param_type) for param_name, param_type in node.params) + ans = ( + "\\__\\__" * tabs + + f"\\__FuncDeclarationNode: {node.id}({params}) : {node.type} {{ }}" + ) + self.tree.append(ans) + self.visit(node.body, tabs + 1) + return + + @visitor.when(BinaryNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__ {node.__class__.__name__} " + self.tree.append(ans) + self.visit(node.left, tabs + 1) + self.visit(node.right, tabs + 1) + return + + @visitor.when(AtomicNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__ {node.__class__.__name__}: {node.lex}" + self.tree.append(ans) + return + + @visitor.when(UnaryNode) + def visit(self, node, tabs=0): + class_name = node.__class__.__name__.split("Node")[0] + ans = "\\__\\__" * tabs + f"\\__ {node.__class__.__name__}: {class_name} " + self.tree.append(ans) + self.visit(node.expr, tabs + 1) + return + + @visitor.when(CallNode) + def visit(self, node, tabs=0): + if not node.obj is None: + if not node.at_type is None: + ans = ( + "\\__\\__" * tabs + + f"\\__CallNode: @{node.at_type}.{node.id}(, ..., )" + ) + else: + ans = ( + "\\__\\__" * tabs + + f"\\__CallNode: .{node.id}(, ..., )" + ) + self.tree.append(ans) + self.visit(node.obj, tabs + 1) + else: + ans = "\\__\\__" * tabs + f"\\__CallNode: {node.id}(, ..., )" + self.tree.append(ans) + for arg in node.args: + self.visit(arg, tabs + 1) + return + + @visitor.when(InstantiateNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__ InstantiateNode: new {node.lex}()" + self.tree.append(ans) + return + + @visitor.when(BlockNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__BlockNode: {{; ... ;}}" + self.tree.append(ans) + for child in node.expression_list: + self.visit(child, tabs + 1) + return + + @visitor.when(IfNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__IfNode: if then else fi" + self.tree.append(ans) + self.visit(node.if_expr, tabs + 1) + self.visit(node.then_expr, tabs + 1) + self.visit(node.else_expr, tabs + 1) + return + + @visitor.when(WhileNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__WhileNode: while loop pool" + self.tree.append(ans) + self.visit(node.condition, tabs + 1) + self.visit(node.body, tabs + 1) + return + + @visitor.when(LetNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__LetNode: let in " + self.tree.append(ans) + for child in node.identifiers: + self.visit(child, tabs + 1) + self.visit(node.body, tabs + 1) + return + + @visitor.when(CaseNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__CaseNode: case of esac" + self.tree.append(ans) + self.visit(node.expr, tabs + 1) + for child in node.case_items: + self.visit(child, tabs + 1) + return + + @visitor.when(CaseItemNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__CaseItemNode: {node.id} : {node.type} => ;" + self.tree.append(ans) + self.visit(node.expr, tabs + 1) + return diff --git a/src/semantic/__init__.py b/src/semantic/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/semantic/ast_nodes.py b/src/semantic/ast_nodes.py new file mode 100644 index 000000000..bb0e00c79 --- /dev/null +++ b/src/semantic/ast_nodes.py @@ -0,0 +1,206 @@ +from cmp.semantic import Context +from cmp.utils import Token + +class Node: + def __init__(self, token): + self.token = token + + +class ProgramNode(Node): + def __init__(self, declarations, context=None): + super().__init__(Token("", "", (0,0))) # symbolic initial token + self.declarations = declarations + self.context = context + + +class ExpressionNode(Node): + pass + + +class ClassDeclarationNode: + def __init__(self, idx, features, token, parent=None): + self.id = idx + self.parent = parent + self.features = features + self.token = token + + +class FuncDeclarationNode: + def __init__(self, idx, params, return_type, body, token): + self.id = idx + self.params = params + self.type = return_type + self.body = body + self.token = token + + +class AttrDeclarationNode: + def __init__(self, idx, typex, init_exp=None, token = Token("", "", (0,0))): + self.id = idx + self.type = typex + self.init_exp = init_exp + self.token = token + + +class AssignNode(ExpressionNode): + def __init__(self, idx, expr, token): + self.id = idx + self.expr = expr + self.token = token + + +class LetNode(ExpressionNode): + def __init__(self, identifiers, body, token): + self.identifiers = identifiers + self.body = body + self.token = token + + +#No tiene uno asi +class VarDeclarationNode: + def __init__(self, token, typex, expr=None): + self.id = token.lex + self.type = typex + self.expr = expr + self.token = token + + +class IfNode(ExpressionNode): + def __init__(self, if_exp, then_exp, else_exp, token): + self.if_expr = if_exp + self.then_expr = then_exp + self.else_expr = else_exp + self.token = token + + +class WhileNode(ExpressionNode): + def __init__(self, condition, body, token): + self.condition = condition + self.body = body + self.token = token + + +class CaseNode(ExpressionNode): + def __init__(self, exp, case_items, token): + self.expr = exp + self.case_items = case_items + self.token = token + +class CaseItemNode(ExpressionNode): + def __init__(self, idx, typex, exp, token): + self.id = idx + self.type = typex + self.expr = exp + self.token = token + + +class CallNode(ExpressionNode): + def __init__(self, idx, args, obj=None, at_type=None, token = Token("", "", (-1,-1))): + self.obj = obj + self.id = idx + self.args = args + self.at_type = at_type + if token.location[0] == -1: + self.token = idx + else: + self.token = token + + +class BlockNode(ExpressionNode): + def __init__(self, expression_list, token): + self.expression_list = expression_list + self.token = token + + +class AtomicNode(ExpressionNode): + def __init__(self, token): + self.lex = token.lex + self.token = token + + +class UnaryNode(ExpressionNode): + def __init__(self, expr, token): + self.expr = expr + self.token = token + + +class BinaryNode(ExpressionNode): + def __init__(self, left, right, token): + self.left = left + self.right = right + self.token = token + + +class ArithmeticOperation(BinaryNode): + pass + + +class ComparisonOperation(BinaryNode): + pass + + +class ConstantNumNode(AtomicNode): + pass + + +class VariableNode(AtomicNode): + pass + + +class StringNode(AtomicNode): + pass + + +class BooleanNode(AtomicNode): + pass + + +class InstantiateNode(AtomicNode): + def __init__(self, lex, token): + self.lex = lex + self.token = token + + +class NotNode(UnaryNode): + pass + + +class IsvoidNode(UnaryNode): + pass + + +class NegNode(UnaryNode): + pass + + +class PlusNode(ArithmeticOperation): + pass + + +class MinusNode(ArithmeticOperation): + pass + + +class StarNode(ArithmeticOperation): + pass + + +class DivNode(ArithmeticOperation): + pass + + +class LessNode(ComparisonOperation): + pass + + +class LessEqualNode(ComparisonOperation): + pass + + +class EqualNode(ComparisonOperation): + pass + + +class DefaultValueNode(ExpressionNode): + def __init__(self, typex): + self.type = typex diff --git a/src/semantic/cool_visitor.py b/src/semantic/cool_visitor.py new file mode 100644 index 000000000..42afa90d6 --- /dev/null +++ b/src/semantic/cool_visitor.py @@ -0,0 +1,533 @@ +import cmp.visitor as visitor +from semantic.ast_nodes import ( + ProgramNode, + ClassDeclarationNode, + FuncDeclarationNode, + AttrDeclarationNode, + IfNode, + WhileNode, + LetNode, + CaseNode, + AssignNode, + VarDeclarationNode, + CaseItemNode, + InstantiateNode, + BlockNode, + CallNode, + BinaryNode, + AtomicNode, + UnaryNode, + ArithmeticOperation, + ComparisonOperation, + ConstantNumNode, + VariableNode, + StringNode, + BooleanNode, + InstantiateNode, + NotNode, + IsvoidNode, + NegNode, + PlusNode, + MinusNode, + StarNode, + DivNode, + LessNode, + LessEqualNode, + EqualNode, +) + + +class FormatVisitor(object): + @visitor.on("node") + def visit(self, node, tabs=0): + pass + + @visitor.when(ProgramNode) + def visit(self, node, tabs=0): + ans = "\t" * tabs + f"\\__ProgramNode [ ... ]" + statements = "\n".join( + self.visit(child, tabs + 1) for child in node.declarations + ) + return f"{ans}\n{statements}" + + @visitor.when(ClassDeclarationNode) + def visit(self, node, tabs=0): + parent = "" if node.parent is None else f"inherits {node.parent}" + ans = ( + "\t" * tabs + + f"\\__ClassDeclarationNode: class {node.id} {parent} {{ ... }}" + ) + features = "\n".join(self.visit(child, tabs + 1) for child in node.features) + return f"{ans}\n{features}" + + @visitor.when(AttrDeclarationNode) + def visit(self, node, tabs=0): + ans = "\t" * tabs + f"\\__AttrDeclarationNode: {node.id} : {node.type} <- " + exp = "\t" * (tabs + 1) + "__NONE" + if not node.init_exp is None: + exp = "\n".join(self.visit(node.init_exp, tabs + 1)) + return f"{ans}\n{exp}" + + @visitor.when(VarDeclarationNode) + def visit(self, node, tabs=0): + ans = "\t" * tabs + f"\\__VarDeclarationNode: {node.id} : {node.type} <- " + expr = "\t" * (tabs + 1) + "__NONE" + if not node.expr is None: + expr = self.visit(node.expr, tabs + 1) + return f"{ans}\n{expr}" + + @visitor.when(AssignNode) + def visit(self, node, tabs=0): + ans = "\t" * tabs + f"\\__AssignNode: {node.id} <- " + expr = self.visit(node.expr, tabs + 1) + return f"{ans}\n{expr}" + + @visitor.when(FuncDeclarationNode) + def visit(self, node, tabs=0): + params = ", ".join(":".join(param) for param in node.params) + ans = ( + "\t" * tabs + + f"\\__FuncDeclarationNode: {node.id}({params}) : {node.type} {{ }}" + ) + body = self.visit(node.body, tabs + 1) + return f"{ans}\n{body}" + + @visitor.when(BinaryNode) + def visit(self, node, tabs=0): + ans = "\t" * tabs + f"\\__ {node.__class__.__name__} " + left = self.visit(node.left, tabs + 1) + right = self.visit(node.right, tabs + 1) + return f"{ans}\n{left}\n{right}" + + @visitor.when(AtomicNode) + def visit(self, node, tabs=0): + return "\t" * tabs + f"\\__ {node.__class__.__name__}: {node.lex}" + + @visitor.when(UnaryNode) + def visit(self, node, tabs=0): + class_name = node.__class__.__name__.split("Node")[0] + ans = "\t" * tabs + f"\\__ {node.__class__.__name__}: {class_name} " + expr = self.visit(node.expr, tabs + 1) + return f"{ans}\n{expr}" + + @visitor.when(CallNode) + def visit(self, node, tabs=0): + args = "\n".join(self.visit(arg, tabs + 1) for arg in node.args) + if not node.obj is None: + obj = self.visit(node.obj, tabs + 1) + if not node.at_type is None: + ans = ( + "\t" * tabs + + f"\\__CallNode: @{node.at_type}.{node.id}(, ..., )" + ) + else: + ans = ( + "\t" * tabs + f"\\__CallNode: .{node.id}(, ..., )" + ) + return f"{ans}\n{obj}\n{args}" + else: + ans = "\t" * tabs + f"\\__CallNode: {node.id}(, ..., )" + return f"{ans}\n{args}" + + @visitor.when(InstantiateNode) + def visit(self, node, tabs=0): + return "\t" * tabs + f"\\__ InstantiateNode: new {node.lex}()" + + @visitor.when(BlockNode) + def visit(self, node, tabs=0): + ans = "\t" * tabs + f"\\__BlockNode: {{; ... ;}}" + body = "\n".join(self.visit(child, tabs + 1) for child in node.expression_list) + return f"{ans}\n{body}" + + @visitor.when(IfNode) + def visit(self, node, tabs=0): + ans = "\t" * tabs + f"\\__IfNode: if then else fi" + if_expr = self.visit(node.if_expr, tabs + 1) + then_expr = self.visit(node.then_expr, tabs + 1) + else_expr = self.visit(node.else_expr, tabs + 1) + return f"{ans}\n{if_expr}\n{then_expr}\n{else_expr}" + + @visitor.when(WhileNode) + def visit(self, node, tabs=0): + ans = "\t" * tabs + f"\\__WhileNode: while loop pool" + condition = self.visit(node.condition, tabs + 1) + body = self.visit(node.body, tabs + 1) + return f"{ans}\n{condition}\n{body}" + + @visitor.when(LetNode) + def visit(self, node, tabs=0): + ans = "\t" * tabs + f"\\__LetNode: let in " + ident_list = "\n".join( + self.visit(child, tabs + 1) for child in node.identifiers + ) + body = self.visit(node.body, tabs + 1) + return f"{ans}\n{ident_list}\n{body}" + + @visitor.when(CaseNode) + def visit(self, node, tabs=0): + ans = "\t" * tabs + f"\\__CaseNode: case of esac" + case_block = "\n".join(self.visit(child, tabs + 1) for child in node.case_items) + expr = self.visit(node.expr, tabs + 1) + return f"{ans}\n{expr}\n{case_block}" + + @visitor.when(CaseItemNode) + def visit(self, node, tabs=0): + ans = "\t" * tabs + f"\\__CaseItemNode: {node.id} : {node.type} => ;" + expr = self.visit(node.expr, tabs + 1) + return f"{ans}\n{expr}" + + +class FormatVisitorST(object): + tree = [] + + @visitor.on("node") + def visit(self, node, tabs=0): + pass + + @visitor.when(ProgramNode) + def visit(self, node, tabs=0): + self.tree = [] + ans = "\\__\\__" * tabs + f"\\__ProgramNode [< class > ... < class >]" + self.tree.append(ans) + for child in node.declarations: + self.visit(child, tabs + 1) + return self.tree + + @visitor.when(ClassDeclarationNode) + def visit(self, node, tabs=0): + parent = "" if node.parent is None else f"inherits {node.parent}" + ans = ( + "\\__\\__" * tabs + + f"\\__ClassDeclarationNode: class {node.id} {parent} {{ ... }}" + ) + self.tree.append(ans) + for child in node.features: + self.visit(child, tabs + 1) + return + + @visitor.when(AttrDeclarationNode) + def visit(self, node, tabs=0): + ans = ( + "\\__\\__" * tabs + + f"\\__AttrDeclarationNode: {node.id} : {node.type} <- " + ) + exp = "\\__\\__" * (tabs + 1) + "\\__NONE" + self.tree.append(ans) + if not node.init_exp is None: + self.visit(node.init_exp, tabs + 1) + else: + self.tree.append(exp) + + return + + @visitor.when(VarDeclarationNode) + def visit(self, node, tabs=0): + ans = ( + "\\__\\__" * tabs + + f"\\__VarDeclarationNode: {node.id} : {node.type} <- " + ) + expr = "\\__\\__" * (tabs + 1) + "\\__NONE" + self.tree.append(ans) + + if not node.expr is None: + expr = self.visit(node.expr, tabs + 1) + else: # esto estaba antes sin el else + self.tree.append(expr) + + return + + @visitor.when(AssignNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__AssignNode: {node.id} <- " + self.tree.append(ans) + self.visit(node.expr, tabs + 1) + return + + @visitor.when(FuncDeclarationNode) + def visit(self, node, tabs=0): + # params = ", ".join(":".join(param) for param in node.params) + params = ", ".join(":".join(param_name.lex).join(param_type.lex) for param_name, param_type in node.params) + ans = ( + "\\__\\__" * tabs + + f"\\__FuncDeclarationNode: {node.id}({params}) : {node.type} {{ }}" + ) + self.tree.append(ans) + self.visit(node.body, tabs + 1) + return + + @visitor.when(BinaryNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__ {node.__class__.__name__} " + self.tree.append(ans) + self.visit(node.left, tabs + 1) + self.visit(node.right, tabs + 1) + return + + @visitor.when(AtomicNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__ {node.__class__.__name__}: {node.lex}" + self.tree.append(ans) + return + + @visitor.when(UnaryNode) + def visit(self, node, tabs=0): + class_name = node.__class__.__name__.split("Node")[0] + ans = "\\__\\__" * tabs + f"\\__ {node.__class__.__name__}: {class_name} " + self.tree.append(ans) + self.visit(node.expr, tabs + 1) + return + + @visitor.when(CallNode) + def visit(self, node, tabs=0): + if not node.obj is None: + if not node.at_type is None: + ans = ( + "\\__\\__" * tabs + + f"\\__CallNode: @{node.at_type}.{node.id}(, ..., )" + ) + else: + ans = ( + "\\__\\__" * tabs + + f"\\__CallNode: .{node.id}(, ..., )" + ) + self.tree.append(ans) + self.visit(node.obj, tabs + 1) + else: + ans = "\\__\\__" * tabs + f"\\__CallNode: {node.id}(, ..., )" + self.tree.append(ans) + for arg in node.args: + self.visit(arg, tabs + 1) + return + + @visitor.when(InstantiateNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__ InstantiateNode: new {node.lex}()" + self.tree.append(ans) + return + + @visitor.when(BlockNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__BlockNode: {{; ... ;}}" + self.tree.append(ans) + for child in node.expression_list: + self.visit(child, tabs + 1) + return + + @visitor.when(IfNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__IfNode: if then else fi" + self.tree.append(ans) + self.visit(node.if_expr, tabs + 1) + self.visit(node.then_expr, tabs + 1) + self.visit(node.else_expr, tabs + 1) + return + + @visitor.when(WhileNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__WhileNode: while loop pool" + self.tree.append(ans) + self.visit(node.condition, tabs + 1) + self.visit(node.body, tabs + 1) + return + + @visitor.when(LetNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__LetNode: let in " + self.tree.append(ans) + for child in node.identifiers: + self.visit(child, tabs + 1) + self.visit(node.body, tabs + 1) + return + + @visitor.when(CaseNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__CaseNode: case of esac" + self.tree.append(ans) + self.visit(node.expr, tabs + 1) + for child in node.case_items: + self.visit(child, tabs + 1) + return + + @visitor.when(CaseItemNode) + def visit(self, node, tabs=0): + ans = "\\__\\__" * tabs + f"\\__CaseItemNode: {node.id} : {node.type} => ;" + self.tree.append(ans) + self.visit(node.expr, tabs + 1) + return + + +class CopyVisitor(object): + @visitor.on("node") + def visit(self, node): + pass + + @visitor.when(ProgramNode) + def visit(self, node): + declarations = [] + for child in node.declarations: + declarations.append(self.visit(child)) + return ProgramNode(declarations) + + @visitor.when(ClassDeclarationNode) + def visit(self, node): + features = [] + for feat in node.features: + features.append(self.visit(feat)) + return ClassDeclarationNode(node.id, features, node.token, node.parent) + + @visitor.when(AttrDeclarationNode) + def visit(self, node): + init_exp = None + if not node.init_exp is None: + init_exp = self.visit(node.init_exp) + + return AttrDeclarationNode(node.id, node.type, init_exp, node.token) + + @visitor.when(VarDeclarationNode) + def visit(self, node): + expr = None + if not node.expr is None: + expr = self.visit(node.expr) + + return VarDeclarationNode(node.token, node.type, expr) + + @visitor.when(AssignNode) + def visit(self, node): + expr = self.visit(node.expr) + return AssignNode(node.id, expr, node.token) + + @visitor.when(FuncDeclarationNode) + def visit(self, node): + params = [p for p in node.params] + body = self.visit(node.body) + return FuncDeclarationNode(node.id, params, node.type, body, node.token) + + @visitor.when(CallNode) + def visit(self, node): + obj = None + if not node.obj is None: + obj = self.visit(node.obj) + + args = [] + for arg in node.args: + args.append(self.visit(arg)) + + return CallNode(node.id, args, obj, node.at_type, node.token) + + @visitor.when(BlockNode) + def visit(self, node): + expression_list = [] + for child in node.expression_list: + expression_list.append(self.visit(child)) + return BlockNode(expression_list, node.token) + + @visitor.when(IfNode) + def visit(self, node): + if_expr = self.visit(node.if_expr) + then_expr = self.visit(node.then_expr) + else_expr = self.visit(node.else_expr) + return IfNode(if_expr, then_expr, else_expr, node.token) + + @visitor.when(WhileNode) + def visit(self, node): + condition = self.visit(node.condition) + body = self.visit(node.body) + return WhileNode(condition, body, node.token) + + @visitor.when(LetNode) + def visit(self, node): + identifiers = [] + for child in node.identifiers: + identifiers.append(self.visit(child)) + body = self.visit(node.body) + return LetNode(identifiers, body, node.token) + + @visitor.when(CaseNode) + def visit(self, node): + expr = self.visit(node.expr) + case_items = [] + for child in node.case_items: + case_items.append(self.visit(child)) + return CaseNode(expr, case_items, node.token) + + @visitor.when(CaseItemNode) + def visit(self, node): + expr = self.visit(node.expr) + return CaseItemNode(node.id, node.type, expr, node.token) + + @visitor.when(PlusNode) + def visit(self, node): + left = self.visit(node.left) + right = self.visit(node.right) + return PlusNode(left, right, node.token) + + @visitor.when(MinusNode) + def visit(self, node): + left = self.visit(node.left) + right = self.visit(node.right) + return MinusNode(left, right, node.token) + + @visitor.when(StarNode) + def visit(self, node): + left = self.visit(node.left) + right = self.visit(node.right) + return StarNode(left, right, node.token) + + @visitor.when(DivNode) + def visit(self, node): + left = self.visit(node.left) + right = self.visit(node.right) + return DivNode(left, right, node.token) + + @visitor.when(LessNode) + def visit(self, node): + left = self.visit(node.left) + right = self.visit(node.right) + return LessNode(left, right, node.token) + + @visitor.when(LessEqualNode) + def visit(self, node): + left = self.visit(node.left) + right = self.visit(node.right) + return LessEqualNode(left, right, node.token) + + @visitor.when(EqualNode) + def visit(self, node): + left = self.visit(node.left) + right = self.visit(node.right) + return EqualNode(left, right, node.token) + + @visitor.when(ConstantNumNode) + def visit(self, node): + return ConstantNumNode(node.token) + + @visitor.when(VariableNode) + def visit(self, node): + return VariableNode(node.token) + + @visitor.when(StringNode) + def visit(self, node): + return StringNode(node.token) + + @visitor.when(BooleanNode) + def visit(self, node): + return BooleanNode(node.token) + + @visitor.when(InstantiateNode) + def visit(self, node): + return InstantiateNode(node.lex, node.token) + + @visitor.when(NotNode) + def visit(self, node): + expr = self.visit(node.expr) + return NotNode(expr, node.token) + + @visitor.when(IsvoidNode) + def visit(self, node): + expr = self.visit(node.expr) + return IsvoidNode(expr, node.token) + + @visitor.when(NegNode) + def visit(self, node): + expr = self.visit(node.expr) + return NegNode(expr, node.token) diff --git a/src/semantic/tset.py b/src/semantic/tset.py new file mode 100644 index 000000000..4258080d4 --- /dev/null +++ b/src/semantic/tset.py @@ -0,0 +1,72 @@ +class Tset: + def __init__(self, parent=None): + self.locals = {} + self.parent = parent + self.children = {} + + def create_child(self, node): + child = Tset(self) + self.children[node] = child + return child + + def find_set(self, idx): + if idx in self.locals.keys(): + return self.locals + elif self.parent != None: + return self.parent.find_set(idx) + + return None + + def __str__(self): + output = "" + + for key, value in self.locals.items(): + output += "\t" + str(key) + ":" + str(value) + "\n" + for key, child in self.children.items(): + output += "\n" + try: + output += key.id + "--->" + except AttributeError: + output += "let or case --->" + output += "\n" + output += str(child) + return output + + def clone(self): + solve = Tset() + solve.parent = self.parent + for idx, typex in self.locals.items(): + solve.locals[idx] = typex.copy() + + for key, value in self.children.items(): + solve.children[key] = value.clone() + + return solve + + def compare(self, other): + if len(self.locals) != len(other.locals) or len(self.children) != len( + other.children + ): + return False + + for (idx, tset), (idx_other, tset_other) in zip( + self.locals.items(), other.locals.items() + ): + if idx != idx_other or tset != tset_other: + return False + for (key, value), (key_other, value_other) in zip( + self.children.items(), other.children.items() + ): + if key != key_other or not value.compare(value_other): + return False + return True + + def clean(self): + for typex in self.locals.values(): + if "InferenceError" in typex: + typex.remove("InferenceError") + if "!static_type_declared" in typex: + typex.remove("!static_type_declared") + for child in self.children.values(): + child.clean() + diff --git a/src/semantic/type_builder.py b/src/semantic/type_builder.py new file mode 100644 index 000000000..fb86153f0 --- /dev/null +++ b/src/semantic/type_builder.py @@ -0,0 +1,264 @@ +import copy +from cmp.semantic import SemanticError as SError +from cmp.semantic import Attribute, Method, Type +from cmp.semantic import VoidType, IntType, ErrorType, StringType, BoolType +from cmp.semantic import Context +from semantic.ast_nodes import ( + ProgramNode, + ClassDeclarationNode, + AttrDeclarationNode, + FuncDeclarationNode, +) +import cmp.visitor as visitor +from semantic.tset import Tset +from collections import deque +from semantic.cool_visitor import CopyVisitor +from cmp.errors import SemanticError, TypeError + +class TypeBuilder: + def __init__(self, errors=[]): + self.context = None + self.current_type = None + self.errors = errors + + @visitor.on("node") + def visit(self, node): + pass + + @visitor.when(ProgramNode) + def visit(self, node): + self.context = copy.copy(node.context) + + io_type = self.context.get_type("IO") + self_type = self.context.get_type("SELF_TYPE") + int_type = self.context.get_type("Int") + string_type = self.context.get_type("String") + object_type = self.context.get_type("Object") + + # Object + parent_tset = Tset() + parent_tset.locals["abort"] = {"Object"} + parent_tset.locals["copy"] = {"SELF_TYPE"} + parent_tset.locals["type_name"] = {"String"} + + method = object_type.define_method("abort", [], [], object_type) + method.tset = Tset(parent_tset) + + method = object_type.define_method("copy", [], [], self_type) + method.tset = Tset(parent_tset) + + method = object_type.define_method("type_name", [], [], string_type) + method.tset = Tset(parent_tset) + + # IO + parent_tset = Tset() + parent_tset.locals["out_string"] = {"SELF_TYPE"} + parent_tset.locals["out_int"] = {"SELF_TYPE"} + parent_tset.locals["in_string"] = {"String"} + parent_tset.locals["in_int"] = {"Int"} + + method = io_type.define_method("out_string", ["x"], [string_type], self_type) + method.tset = Tset(parent_tset) + method.tset.locals["x"] = {"String"} + + method = io_type.define_method("out_int", ["x"], [int_type], self_type) + method.tset = Tset(parent_tset) + method.tset.locals["x"] = {"Int"} + + method = io_type.define_method("in_string", [], [], string_type) + method.tset = Tset(parent_tset) + + method = io_type.define_method("in_int", [], [], int_type) + method.tset = Tset(parent_tset) + + # String + parent_tset = Tset() + parent_tset.locals["length"] = {"Int"} + parent_tset.locals["concat"] = {"String"} + parent_tset.locals["substr"] = {"String"} + + method = string_type.define_method("length", [], [], int_type) + method.tset = Tset(parent_tset) + + method = string_type.define_method("concat", ["s"], [string_type], string_type) + method.tset = Tset(parent_tset) + method.tset.locals["s"] = {"String"} + + method = string_type.define_method( + "substr", ["i", "l"], [int_type, int_type], string_type + ) + method.tset = Tset(parent_tset) + method.tset.locals["i"] = {"Int"} + method.tset.locals["l"] = {"Int"} + + # ------checking for in order definitions and cyclic heritage + self.check_cycles(node.declarations) + + parent_child_dict = {} + queue = deque() + visited = {} + not_visited = [] # ++ + for class_declaration in node.declarations: + not_visited.append(class_declaration) + try: + parent_type = class_declaration.parent.lex + self.context.get_type(parent_type) + try: + parent_child_dict[parent_type].append(class_declaration) + except: # KeyError + parent_child_dict[parent_type] = [class_declaration] + except Exception: # parent is None or not definition provided + queue.append(class_declaration) + + while not_visited: # ++ + while queue: + class_declaration = queue.popleft() + try: # avoid redefining classes involved in a ciclyc heritage + class_visited = visited[class_declaration] # .id + except: + not_visited.remove(class_declaration) + try: + children = parent_child_dict[class_declaration.id.lex] + for declaration in children: + queue.append(declaration) + except: # no one inherits from this class + pass + + self.visit(class_declaration) + visited[class_declaration] = True # .id + + if not_visited: + queue.append(not_visited[0]) + + try: + main_meth = self.context.get_type("Main").get_method("main", non_rec=True) + if len(main_meth.param_names) > 0: + self.errors.append( + SemanticError(0, 0, '"main" method in class Main does not receive any parameters') + ) + # modify in semantic get_method in order to get some ancestor where the method is already defined + except SError: + self.errors.append(SemanticError(0, 0 ,"A class Main with a method main most be provided")) + + copy_visitor = CopyVisitor() + newAst = copy_visitor.visit(node) + newAst.context = self.context + + # Reset state + self.context = None + self.current_type = None + self.errors = None + + return newAst + + @visitor.when(ClassDeclarationNode) + def visit(self, node): + self.current_type = self.context.get_type(node.id.lex) + + if node.parent is not None: + try: + parent_type = self.get_type(node.parent, f"declared as {node.id.lex}'s parent") + self.current_type.set_parent(parent_type) # set parent type if defined + except SError as error: + node_row, node_col = node.parent.location + self.errors.append(SemanticError(node_row, node_col, error.text)) + else: + object_type = self.context.get_type("Object") + try: + self.current_type.set_parent(object_type) + except SError as error: # this is actually an intern error, a class parent most not be setted twice (is valid to note that the intention to inherit from a prohibited class is considered a semantic error) + node_row, node_col = node.token.location + self.errors.append(SemanticError(node_row, node_col, error.text)) + + for feature in node.features: + self.visit(feature) + + @visitor.when(FuncDeclarationNode) + def visit(self, node): + param_names = [fname.lex for fname, ftype in node.params] + + try: + param_types = [self.get_type(ftype, f"of formal parameter {fname.lex}") for fname, ftype in node.params] + return_type = self.get_type(node.type, f"marked in '{node.id.lex}' as return type") + self.current_type.define_method( + node.id.lex, param_names, param_types, return_type + ) + except SError as error: # method already defined + node_row, node_col = node.id.location + self.errors.append(SemanticError(node_row, node_col,error.text)) + + @visitor.when(AttrDeclarationNode) + def visit(self, node): + if node.id.lex == "self": + node_row, node_col = node.id.location + self.errors.append(SemanticError(node_row, node_col,"'self' cannot be the name of an attribute.")) + return + try: + attr_type = self.get_type(node.type, f"of attribute {node.id.lex}") + self.current_type.define_attribute(node.id.lex, attr_type) + except SError as error: # attribute already defined + node_row, node_col = node.id.location + self.errors.append(SemanticError(node_row, node_col,error.text)) + + def get_type(self, ntype, comp_error_mesg): + try: + return self.context.get_type(ntype.lex) + except SError as error: + node_row, node_col = ntype.location + self.errors.append(TypeError(node_row, node_col, f"Type {ntype.lex} " + comp_error_mesg + " is not defined.")) + return ErrorType() + + def check_cycles(self, class_declarations): + # checking for cycles + paths = [] + modified_paths = paths + + for class_declaration in class_declarations: + if not (class_declaration.parent is None): + d = class_declaration.id.lex + p = class_declaration.parent.lex + + modified_paths = paths + + already_in_some_path = False + for i in range(0,len(paths)): + path = paths[i] + if path[-1] == d: + if not (p in path): + modified_paths[i] = path + [p] + # add parent to last pos + else: + # error + node_row, node_col = class_declaration.parent.location + self.errors.append( + SemanticError(node_row, node_col, f"Class {class_declaration.id.lex}, or an ancestor of {class_declaration.id.lex}, is involved in an inheritance cycle.") + ) + already_in_some_path = True + + elif path[0] == p: + if not (d in path): + # add himself to first pos + modified_paths[i] = [d] + path + else: + # error + node_row, node_col = class_declaration.parent.location + self.errors.append( + SemanticError(node_row, node_col, f"Class {class_declaration.id.lex}, or an ancestor of {class_declaration.id.lex}, is involved in an inheritance cycle.") + ) + already_in_some_path = True + + elif p in path: + # duplicate list + indx = path.index(p) + modified_paths = modified_paths + [[d] + path[indx:len(path)]] + already_in_some_path = True + + if not already_in_some_path: + if d != p: + modified_paths = paths + [[d, p]] + else: # class inherits from itself + node_row, node_col = class_declaration.parent.location + self.errors.append( + SemanticError(node_row, node_col, f"Class {class_declaration.id.lex}, or an ancestor of {class_declaration.id.lex}, is involved in an inheritance cycle.") + ) + paths = modified_paths \ No newline at end of file diff --git a/src/semantic/type_checker.py b/src/semantic/type_checker.py new file mode 100644 index 000000000..33f0887aa --- /dev/null +++ b/src/semantic/type_checker.py @@ -0,0 +1,650 @@ +from email import message +import cmp.nbpackage +import cmp.visitor as visitor + +from semantic.ast_nodes import LessEqualNode, LessNode, Node, ProgramNode, ExpressionNode +from semantic.ast_nodes import ClassDeclarationNode, FuncDeclarationNode, AttrDeclarationNode +from semantic.ast_nodes import VarDeclarationNode, AssignNode, CallNode +from semantic.ast_nodes import ( + AtomicNode, + BinaryNode, + ArithmeticOperation, + ComparisonOperation, + IfNode, + LetNode, + CaseNode, + CaseItemNode, + WhileNode, + BlockNode, + IsvoidNode, +) +from semantic.ast_nodes import ( + ConstantNumNode, + VariableNode, + InstantiateNode, + PlusNode, + MinusNode, + StarNode, + DivNode, + NegNode, + NotNode, + EqualNode, + BooleanNode, + StringNode, +) +from semantic.cool_visitor import FormatVisitor + +from cmp.semantic import SemanticError as SError +from cmp.semantic import Attribute, Method, Type +from cmp.semantic import VoidType, ErrorType, IntType +from cmp.semantic import Context + +from cmp.semantic import Scope +from cmp.utils import find_least_type + +import copy +from cmp.errors import TypeError, NameError, SemanticError, AttributeError +import code_gen.ast_typed_nodes as cool_type_nodes + +# some predefined errors +WRONG_SIGNATURE = 'Method "%s" already defined in "%s" with a different signature.' +SELF_IS_READONLY = 'Variable "self" is read-only.' +LOCAL_ALREADY_DEFINED = 'Variable "%s" is already defined in method "%s".' +INCOMPATIBLE_TYPES = 'Cannot convert "%s" into "%s".' +VARIABLE_NOT_DEFINED = 'Variable "%s" is not defined in "%s".' +INVALID_OPERATION = 'Operation is not defined between "%s" and "%s".' + + +class TypeChecker: + def __init__(self, errors=[]): + self.context = None + self.current_type = None + self.current_method = None + self.errors = errors + + @visitor.on("node") + def visit(self, node, scope=None, parent_children_dict=None): + pass + + @visitor.when(ProgramNode) + def visit(self, node): + scope = Scope() + self.context = copy.copy(node.context) + + #visit classes in order (from tree root to leaves) + parent_children_dict = {} + initial_nodes = [] + visited = {} + self.class_to_visit = [] + self.class_visited = {} + for declaration in node.declarations: + try: + visited[declaration.id.lex] # checking if visited + except: + visited[declaration.id.lex] = True + self.class_visited[declaration.id.lex] = False + self.class_to_visit.append(declaration) + if declaration.parent is None or declaration.parent.lex in ["IO", "Int", "String", "Bool"]: # is node has no parent, mark it to visit it first later + initial_nodes.append(declaration) + else: + try: + self.context.get_type(declaration.parent.lex) + try: + parent_children_dict[declaration.parent.lex].append(declaration) + except: + parent_children_dict[declaration.parent.lex] = [declaration] + except: # add declarations where parent is not defined + initial_nodes.append(declaration) + + # initialize a list for classDeclNodes of typed ast + self.tast_class_nodes = [] + + for declaration in initial_nodes: # first visit root nodes + self.visit(declaration, scope.create_child(), parent_children_dict) + + while self.class_to_visit: # visiting classes involved in ciclyc heritage + declaration = self.class_to_visit[0] + self.visit(declaration, scope.create_child(), parent_children_dict) + + + program_node = (scope, cool_type_nodes.ProgramNode(self.tast_class_nodes, copy.copy(self.context))) + + self.context = None + self.current_type = None + self.current_method = None + + return program_node + + @visitor.when(ClassDeclarationNode) + def visit(self, node, scope, parent_children_dict): + self.class_to_visit.remove(node) + self.class_visited[node.id.lex] = True # arked class as visited + + self.current_type = self.context.get_type(node.id.lex) + scope.define_variable("self", self.current_type) + + for attr in self.current_type.attributes: + scope.define_variable(attr.name, attr.type) + + new_features = [] + for feature in node.features: + feature_node = self.visit(feature, scope) + new_features.append(feature_node) + + try: + children = parent_children_dict[node.id.lex] + for child in children: # after initialization, each parent class visits its children (note the child scope creation) + if not self.class_visited[child.id.lex]: + self.visit(child, scope.create_child(), parent_children_dict) + except: + # return + pass + if node.parent is None: + parent = None + else: + parent = node.parent.lex + + self.tast_class_nodes.append(cool_type_nodes.ClassDeclarationNode(node.id.lex, new_features, parent)) + + + @visitor.when(AttrDeclarationNode) + def visit(self, node, scope): + type_not_found = False + try: + typex = self.context.get_type(node.type.lex) + + if typex.name == "SELF_TYPE": + typex = self.current_type + + except SError as e: + # ERROR already reported in type builder + type_not_found = True + + if node.init_exp != None: + init_expr_type, init_exp = self.visit(node.init_exp, scope) + if type_not_found: + return cool_type_nodes.AttrDeclarationNode(node.id.lex, node.type.lex, init_exp) + + if not init_expr_type.conforms_to(typex): + line, col = node.token.location + self.errors.append(TypeError(line, col,INCOMPATIBLE_TYPES % (init_expr_type.name, typex.name))) + else: + init_exp = None + return cool_type_nodes.AttrDeclarationNode(node.id.lex, node.type.lex, init_exp) + + @visitor.when(FuncDeclarationNode) + def visit(self, node, scope): + self.current_method = self.current_type.get_method(node.id.lex) + method_return_type = self.current_method.return_type + if method_return_type.name == "SELF_TYPE": + method_return_type = self.current_type + + child_scope = scope.create_child() + + # ------------parameters most have differente names------------ + param_names = self.current_method.param_names + param_types = self.current_method.param_types + param_used = {} + new_params = [] + + for i, param_name in enumerate(param_names): + param_n, param_t = node.params[i] + new_params.append((param_n.lex, param_t.lex)) + if param_name == "self": + node_row, node_col = param_n.location # location of param name + self.errors.append( + SemanticError(node_row, node_col ,f"'self' cannot be the name of a formal parameter.") + ) + try: + param_used[param_name] + node_row, node_col = param_n.location + self.errors.append( + SemanticError(node_row, node_col ,f"Formal parameter '{param_name}' multiply defined in method '{node.id.lex}'") + ) + except: + param_used[param_name] = True + child_scope.define_variable(param_name, param_types[i]) + + # ------------------------------------------------------------- + + body_type, body_exp = self.visit(node.body, child_scope) + + if not body_type.conforms_to(method_return_type): + node_row, node_col = node.body.token.location + self.errors.append(TypeError( node_row, node_col, f"Inferred return type '{body_type.name}' of method '{node.id.lex}' (the type of the last expression) does not conform to declared return type '{method_return_type.name}'.")) + + if self.current_type.parent is not None: + try: + parent_method = self.current_type.parent.get_method( + self.current_method.name + ) + # ensure same return type of redefined method + if parent_method.return_type != self.current_method.return_type: + node_row, node_col = node.type.location + self.errors.append(SemanticError(node_row, node_col, f"In redefined method '{node.id.lex}', return type {self.current_method.return_type.name} is different from original return type {parent_method.return_type.name}.")) + + # redefined method most have same number of parameters + if len(parent_method.param_names) != len(self.current_method.param_names): + node_row, node_col = node.id.location + self.errors.append(SemanticError(node_row, node_col, f"Incompatible number of formal parameters in redefined method '{node.id.lex}'.")) + len_parent_params = len(parent_method.param_names) + len_current_params = len(self.current_method.param_names) + if len_current_params >= len_parent_params: + max_len = len_parent_params + else: + max_len = len_current_params + else: + max_len = len(parent_method.param_names) + + # check that each param has the same type as in the original method + for i in range(0, max_len): + if self.current_method.param_types[i] != parent_method.param_types[i]: + param_i_name, param_i_type = node.params[i] + node_row, node_col = param_i_name.location + self.errors.append(SemanticError(node_row, node_col, f"In redefined method '{node.id.lex}', type {self.current_method.param_types[i].name} of parameter {param_i_name.lex} is different from original type {parent_method.param_types[i].name}.")) + + except SError: + pass # parent has no method named like this + + return cool_type_nodes.FuncDeclarationNode(node.id.lex, new_params, node.type.lex, body_exp) + + @visitor.when(AssignNode) + def visit(self, node, scope): + if node.id.lex == "self": + node_row, node_col = node.token.location + self.errors.append(SemanticError(node_row, node_col, "Cannot assign to 'self'. " + SELF_IS_READONLY)) + var_type = None + if not scope.is_defined(node.id.lex): + node_row, node_col = node.id.location + self.errors.append( + NameError(node_row, node_col, VARIABLE_NOT_DEFINED % (node.id.lex, self.current_method.name)) + ) + var_type = ErrorType() + else: + var_type = scope.find_variable(node.id.lex).type + + expr_type, exp_node = self.visit(node.expr, scope) + if not expr_type.conforms_to(var_type): + node_row, node_col = node.token.location + self.errors.append(TypeError(node_row, node_col, f"Inferred type {expr_type.name} of assigned expression does not conforms to type {var_type.name} of variable '{node.id.lex}'")) + + return (expr_type, cool_type_nodes.AssignNode(node.id.lex, exp_node, expr_type)) + + @visitor.when(CallNode) + def visit(self, node, scope): + auto_type = self.context.get_type("AUTO_TYPE") + typex = None + if node.obj is not None: + typex, obj_exp = self.visit(node.obj, scope) + if typex == auto_type: + return auto_type + + else: + typex = self.current_type + obj_exp = None + + new_args = [] + arg_types = [] + for arg in node.args: # visiting arguments in case of earlier return + arg_type, arg_node = self.visit(arg, scope) + new_args.append(arg_node) + arg_types.append(arg_type) + + method = None + try: + if not( node.at_type is None): + at_type = node.at_type.lex + node_at_type = self.context.get_type(node.at_type.lex) + method = node_at_type.get_method(node.id.lex) + if not typex.conforms_to(node_at_type): + node_row, node_col = node.at_type.location # maybe in node.obj + self.errors.append( + TypeError(node_row, node_col, f"Expression type {typex.name} does not conform to declared static dispatch type {node_at_type.name}.") + ) + return (ErrorType(), cool_type_nodes.CallNode(node.id.lex, new_args, obj_exp, at_type, typex, ErrorType())) + + else: + at_type = None + method = typex.get_method(node.id.lex) + except SError as error: + node_col, node_row = node.token.location + self.errors.append(AttributeError(node_col, node_row ,error.text)) + return (ErrorType(), cool_type_nodes.CallNode(node.id.lex, new_args, obj_exp, at_type, typex, ErrorType())) + + + if len(method.param_names) != len(node.args): + node_row, node_col = node.id.location + self.errors.append( + SemanticError(node_row, node_col, f"There is no definition of {method.name} that takes {len(node.args)} arguments ") + ) + + n_method_args = len(method.param_names) + for i, arg in enumerate(node.args): + if n_method_args == i: + break + arg_type = arg_types[i] + ptype = method.param_types[i] + if not arg_type.conforms_to(ptype): + node_row, node_col = arg.token.location + self.errors.append(TypeError(node_row, node_col,f"In call of method {node.id.lex} parameter of type {arg_type.name} does not conforms to declared type {ptype.name}")) + + if method.return_type == self.context.get_type("SELF_TYPE"): + return (typex, cool_type_nodes.CallNode(node.id.lex, new_args, obj_exp, at_type, typex, typex)) + + + return (method.return_type, cool_type_nodes.CallNode(node.id.lex, new_args, obj_exp, at_type, typex, method.return_type)) + + @visitor.when(IfNode) + def visit(self, node, scope): + predicate_type, if_node = self.visit(node.if_expr, scope) + then_type, then_node = self.visit(node.then_expr, scope) + else_type, else_node = self.visit(node.else_expr, scope) + + if predicate_type.name != "Bool" and predicate_type.name != "AUTO_TYPE": + node_row, node_col = node.if_expr.token.location + self.errors.append( + TypeError(node_row, node_col, f"Expression after 'if' must be Bool, current type is {predicate_type.name}") + ) + return (ErrorType(), cool_type_nodes.IfNode(if_node, then_node, else_node, ErrorType())) + + least_type = find_least_type(then_type, else_type, self.context) + return (least_type, cool_type_nodes.IfNode(if_node, then_node, else_node, least_type)) + + + @visitor.when(WhileNode) + def visit(self, node, scope): + condition_type, condition_node = self.visit(node.condition, scope) + body_type, body_node = self.visit(node.body, scope) + bool_type = self.context.get_type("Bool") + + if condition_type != bool_type and condition_type.name != "AUTO_TYPE": + node_row, node_col = node.condition.token.location + self.errors.append( + TypeError(node_row, node_col, f"Expression in 'while' condition must be bool, current type is {condition_type.name}") + ) + return (ErrorType(), cool_type_nodes.WhileNode(condition_node, body_node, ErrorType())) + + obj_type = self.context.get_type("Object") + return (obj_type, cool_type_nodes.WhileNode(condition_node, body_node, obj_type)) + + @visitor.when(BlockNode) + def visit(self, node, scope): + typex = None + new_exp_list = [] + for expr in node.expression_list: + typex, node_exp = self.visit(expr, scope) + new_exp_list.append(node_exp) + + return (typex, cool_type_nodes.BlockNode(new_exp_list, typex)) + + @visitor.when(LetNode) + def visit(self, node, scope): + + child_scope = scope.create_child() + + new_var_list = [] + for var_dec in node.identifiers: + var_type, var_node = self.visit(var_dec, child_scope) + new_var_list.append(var_node) + + exp_type, body_exp = self.visit(node.body, child_scope) + return (exp_type, cool_type_nodes.LetNode(new_var_list, body_exp, exp_type)) + + @visitor.when(VarDeclarationNode) + def visit(self, node, scope): + if node.id == "self": + node_row, node_col = node.token.location + self.errors.append(SemanticError(node_row, node_col, "'self' cannot be bound in a 'let' expression. " + SELF_IS_READONLY)) + + static_type = None + try: + static_type = self.context.get_type(node.type.lex) + if static_type.name == "SELF_TYPE": + static_type = self.current_type + + except SError as e: + node_row, node_col = node.type.location + self.errors.append( + TypeError(node_row, node_col, e.text) + ) + static_type = ErrorType() + + if node.expr != None: + typex, node_exp = self.visit(node.expr, scope) + if not typex.conforms_to(static_type): + line, col = node.expr.token.location + self.errors.append(TypeError(line, col, INCOMPATIBLE_TYPES % (typex.name, static_type.name))) + else: + node_exp = None + + scope.define_variable(node.id, static_type) + return (static_type, cool_type_nodes.VarDeclarationNode(node.id, node.type.lex, node_exp, static_type)) + + @visitor.when(CaseNode) + def visit(self, node, scope): + exp_node_type, node_exp = self.visit(node.expr, scope) + new_case_items = [] + current_case_type = None + case_types_found = [] + for item in node.case_items: + if not (item.type.lex in case_types_found): + case_types_found.append(item.type.lex) + child_scope = scope.create_child() + case_item_type, item_node = self.visit(item, child_scope) + new_case_items.append(item_node) + current_case_type = find_least_type( + current_case_type, case_item_type, self.context + ) + else: + line, col = item.type.location + self.errors.append(SemanticError(line, col, f"Duplicate branch {item.type.lex} in case statement")) + + + return (current_case_type, cool_type_nodes.CaseNode(node_exp, new_case_items, current_case_type)) + + + @visitor.when(CaseItemNode) + def visit(self, node, scope): + if node.id.lex == "self": + node_row, node_col = node.id.location + self.errors.append(SemanticError(node_row, node_col, "'self' cannot be bound in a 'case' expression. " + SELF_IS_READONLY)) + + try: + static_type = self.context.get_type(node.type.lex) + scope.define_variable(node.id.lex, static_type) + except SError as e: + node_row, node_col = node.type.location + self.errors.append(TypeError(node_row, node_col, f"Type {node.type.lex} of case branch is undefined.")) + + typex, node_exp = self.visit(node.expr, scope) + return (typex, cool_type_nodes.CaseItemNode(node.id.lex, node.type.lex, node_exp, typex)) + + + @visitor.when(InstantiateNode) # NewNode + def visit(self, node, scope): + try: + typex = self.context.get_type(node.lex.lex) + if typex.name == "SELF_TYPE": + return self.current_type + return (typex, cool_type_nodes.InstantiateNode(node.lex.lex, typex)) + + except SError as error: + node_row, node_col = node.lex.location + self.errors.append(TypeError(node_row, node_col, f"Type {node.lex.lex} of 'new' expression is not defined.")) + return (ErrorType(), cool_type_nodes.InstantiateNode(node.lex.lex, ErrorType())) + + @visitor.when(IsvoidNode) + def visit(self, node, scope): + type_exp, node_exp = self.visit(node.expr, scope) + bool_type = self.context.get_type("Bool") + return (bool_type, cool_type_nodes.IsvoidNode(node_exp, bool_type)) + + + @visitor.when(PlusNode) + def visit(self, node, scope): + int_type = self.context.get_type("Int") + left_type, left_exp_node = self.visit(node.left, scope) + right_type, right_exp_node = self.visit(node.right, scope) + + if (left_type != int_type and left_type.name != "AUTO_TYPE") or ( + right_type != int_type and right_type.name != "AUTO_TYPE" + ): + node_row, node_col = node.token.location + self.errors.append(TypeError( node_row, node_col, INVALID_OPERATION % (left_type.name, right_type.name))) + + return (int_type, cool_type_nodes.PlusNode(left_exp_node, right_exp_node, int_type)) + + @visitor.when(MinusNode) + def visit(self, node, scope): + int_type = self.context.get_type("Int") + left_type, left_exp_node = self.visit(node.left, scope) + right_type, right_exp_node = self.visit(node.right, scope) + + if (left_type != int_type and left_type.name != "AUTO_TYPE") or ( + right_type != int_type and right_type.name != "AUTO_TYPE" + ): + node_row, node_col = node.token.location + self.errors.append(TypeError( node_row, node_col, INVALID_OPERATION % (left_type.name, right_type.name))) + + return (int_type, cool_type_nodes.MinusNode(left_exp_node, right_exp_node, int_type)) + + @visitor.when(StarNode) + def visit(self, node, scope): + int_type = self.context.get_type("Int") + left_type, left_exp_node = self.visit(node.left, scope) + right_type, right_exp_node = self.visit(node.right, scope) + + if (left_type != int_type and left_type.name != "AUTO_TYPE") or ( + right_type != int_type and right_type.name != "AUTO_TYPE" + ): + node_row, node_col = node.token.location + self.errors.append(TypeError( node_row, node_col, INVALID_OPERATION % (left_type.name, right_type.name))) + + return (int_type, cool_type_nodes.StarNode(left_exp_node, right_exp_node, int_type)) + + @visitor.when(DivNode) + def visit(self, node, scope): + int_type = self.context.get_type("Int") + left_type, left_exp_node = self.visit(node.left, scope) + right_type, right_exp_node = self.visit(node.right, scope) + + if (left_type != int_type and left_type.name != "AUTO_TYPE") or ( + right_type != int_type and right_type.name != "AUTO_TYPE" + ): + node_row, node_col = node.token.location + self.errors.append(TypeError( node_row, node_col, INVALID_OPERATION % (left_type.name, right_type.name))) + + return (int_type, cool_type_nodes.DivNode(left_exp_node, right_exp_node, int_type)) + + + @visitor.when(LessNode) + def visit(self, node, scope): + int_type = self.context.get_type("Int") + left_type, left_exp_node = self.visit(node.left, scope) + right_type, right_exp_node, = self.visit(node.right, scope) + + if (left_type != int_type and left_type.name != "AUTO_TYPE") or ( + right_type != int_type and right_type.name != "AUTO_TYPE" + ): + node_row, node_col = node.token.location + self.errors.append(TypeError( node_row, node_col, INVALID_OPERATION % (left_type.name, right_type.name))) + + bool_type = self.context.get_type("Bool") + return (bool_type, cool_type_nodes.LessNode(left_exp_node, right_exp_node, bool_type)) + + + @visitor.when(LessEqualNode) + def visit(self, node, scope): + int_type = self.context.get_type("Int") + left_type, left_exp_node = self.visit(node.left, scope) + right_type, right_exp_node, = self.visit(node.right, scope) + + if (left_type != int_type and left_type.name != "AUTO_TYPE") or ( + right_type != int_type and right_type.name != "AUTO_TYPE" + ): + node_row, node_col = node.token.location + self.errors.append(TypeError( node_row, node_col, INVALID_OPERATION % (left_type.name, right_type.name))) + + bool_type = self.context.get_type("Bool") + return (bool_type, cool_type_nodes.LessEqualNode(left_exp_node, right_exp_node, bool_type)) + + @visitor.when(EqualNode) + def visit(self, node, scope): + int_type = self.context.get_type("Int") + string_type = self.context.get_type("String") + bool_type = self.context.get_type("Bool") + built_in_types = [int_type, string_type, bool_type] + + left_type, left_exp_node = self.visit(node.left, scope) + right_type, right_exp_node = self.visit(node.right, scope) + + if left_type in built_in_types or right_type in built_in_types: + if ( + left_type != right_type + and left_type.name != "AUTO_TYPE" + and right_type.name != "AUTO_TYPE" + ): + node_row, node_col = node.token.location + self.errors.append( + TypeError(node_row, node_col, f"One of the expressions of '=' operator is of type Int, String or Bool, the other must have the same static type. Left type: {left_type.name}. Right type: {right_type.name}") + ) + + return (bool_type, cool_type_nodes.EqualNode(left_exp_node, right_exp_node, bool_type)) + + + @visitor.when(NotNode) + def visit(self, node, scope): + bool_type = self.context.get_type("Bool") + typex, exp_node = self.visit(node.expr, scope) + + if typex != bool_type and not typex.name == "AUTO_TYPE": + line, col = node.expr.token.location + self.errors.append( + TypeError(line, col, f"Expression after 'not' must be Bool, current is {typex.name}") + ) + return (ErrorType(), cool_type_nodes.NotNode(exp_node, ErrorType())) + + return (bool_type, cool_type_nodes.NotNode(exp_node, bool_type)) + + + @visitor.when(NegNode) + def visit(self, node, scope): + int_type = self.context.get_type("Int") + typex, exp_node = self.visit(node.expr, scope) + + if typex != int_type and not typex.name == "AUTO_TYPE": + node_row, node_col = node.expr.token.location + self.errors.append( + TypeError( node_row, node_col,f"Expression after '~' must be Int, current is {typex.name}") + ) + return (ErrorType(), cool_type_nodes.NegNode(exp_node, ErrorType())) + + return (int_type, cool_type_nodes.NegNode(exp_node, int_type)) + + + @visitor.when(ConstantNumNode) + def visit(self, node, scope): + node_type = self.context.get_type("Int") + return (node_type, cool_type_nodes.ConstantNumNode(node.lex, node_type)) + + @visitor.when(VariableNode) + def visit(self, node, scope): + var = scope.find_variable(node.lex) + if var is None: + node_row, node_col = node.token.location + self.errors.append( + NameError( node_row, node_col,VARIABLE_NOT_DEFINED % (node.lex, self.current_method.name)) + ) + return (ErrorType(), cool_type_nodes.VariableNode(node.lex, ErrorType())) + + return (var.type, cool_type_nodes.VariableNode(node.lex, var.type)) + + @visitor.when(StringNode) + def visit(self, node, scope): + node_type = self.context.get_type("String") + return (node_type, cool_type_nodes.StringNode(node.lex, node_type)) + + @visitor.when(BooleanNode) + def visit(self, node, scope): + node_type = self.context.get_type("Bool") + return (node_type, cool_type_nodes.BooleanNode(node.lex, node_type)) diff --git a/src/semantic/type_collector.py b/src/semantic/type_collector.py new file mode 100644 index 000000000..1b391e4d1 --- /dev/null +++ b/src/semantic/type_collector.py @@ -0,0 +1,65 @@ +from cmp.semantic import SemanticError as SError +from cmp.semantic import Attribute, Method, Type +from cmp.semantic import ( + VoidType, + IntType, + ErrorType, + StringType, + BoolType, + AutoType, + ObjectType, + SelfType, + IOType, +) +from cmp.semantic import Context +from semantic.ast_nodes import ProgramNode, ClassDeclarationNode +import cmp.visitor as visitor +from semantic.cool_visitor import CopyVisitor +from cmp.errors import SemanticError + +class TypeCollector(object): + def __init__(self, errors=[]): + self.context = None + self.errors = errors + + @visitor.on("node") + def visit(self, node): + pass + + @visitor.when(ProgramNode) + def visit(self, node): + self.context = Context() + self.context.types["Object"] = ObjectType() + self.context.types["Int"] = IntType() + self.context.types["String"] = StringType() + self.context.types["Bool"] = BoolType() + self.context.types["AUTO_TYPE"] = AutoType() + self.context.types["SELF_TYPE"] = SelfType() + self.context.types["IO"] = IOType() + + object_type = self.context.get_type("Object") + for typex in self.context.types.values(): + if typex == object_type: + continue + typex.set_parent(object_type) + + for declaration in node.declarations: + self.visit(declaration) + + copy_visitor = CopyVisitor() + newAst = copy_visitor.visit(node) + newAst.context = self.context + + # Reset state + self.context = None + self.errors = None + + return newAst + + @visitor.when(ClassDeclarationNode) + def visit(self, node): + try: + self.context.create_type(node.id.lex) + except SError as error: # class alerady defined + node_row, node_col = node.id.location + self.errors.append(SemanticError(node_row, node_col, error.text)) diff --git a/tests/code_gen/case.cl b/tests/code_gen/case.cl new file mode 100644 index 000000000..8f7a7498a --- /dev/null +++ b/tests/code_gen/case.cl @@ -0,0 +1,29 @@ + + + +class Main inherits IO{ + a : A <- new B; + b : B <- new B; + c : String <- "First sentence."; + d : String <- "Second sentence."; + g : String <- "Second sentence."; + main (): Object { + { + out_string( + (case a of + h:A => new A; + f:B => new B; + esac).type_name() + ); + } + }; +}; + +class A { + +}; + +class B inherits A{ + +}; + diff --git a/tests/code_gen/case_input.txt b/tests/code_gen/case_input.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/code_gen/case_output.txt b/tests/code_gen/case_output.txt new file mode 100644 index 000000000..7371f47a6 --- /dev/null +++ b/tests/code_gen/case_output.txt @@ -0,0 +1 @@ +B \ No newline at end of file diff --git a/tests/code_gen/comparison.cl b/tests/code_gen/comparison.cl new file mode 100644 index 000000000..74f8ee15a --- /dev/null +++ b/tests/code_gen/comparison.cl @@ -0,0 +1,15 @@ + +class Main inherits IO{ + a : Int <- 5; + b : Int <- 4; + main (): Object { + { + + a < b; + a <= b; + a = b; + + out_string(if a < b then "a=b" fi); + } + }; +}; diff --git a/tests/code_gen/comparison_input.txt b/tests/code_gen/comparison_input.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/code_gen/comparison_output.txt b/tests/code_gen/comparison_output.txt new file mode 100644 index 000000000..39a2cc94e --- /dev/null +++ b/tests/code_gen/comparison_output.txt @@ -0,0 +1 @@ +a>=b \ No newline at end of file diff --git a/tests/code_gen/hello_world.cl b/tests/code_gen/hello_world.cl new file mode 100644 index 000000000..eafc79020 --- /dev/null +++ b/tests/code_gen/hello_world.cl @@ -0,0 +1,5 @@ +class Main inherits IO { + main(): IO { + out_string(in_string()) + }; +}; \ No newline at end of file diff --git a/tests/code_gen/hello_world_input.txt b/tests/code_gen/hello_world_input.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/code_gen/hello_world_output.txt b/tests/code_gen/hello_world_output.txt new file mode 100644 index 000000000..349db2bfe --- /dev/null +++ b/tests/code_gen/hello_world_output.txt @@ -0,0 +1 @@ +Hello, World. diff --git a/tests/code_gen/is_void.cl b/tests/code_gen/is_void.cl new file mode 100644 index 000000000..5b6327b91 --- /dev/null +++ b/tests/code_gen/is_void.cl @@ -0,0 +1,12 @@ +class Main inherits IO{ + a : Int <- 0; + b : Int <- 3; + main (): Object { + { + if isvoid while a < b loop { + a <- a + 1; + } + pool then out_string("YEI") else out_string("Ou nou") fi; + } + }; +}; \ No newline at end of file diff --git a/tests/code_gen/is_void_input.txt b/tests/code_gen/is_void_input.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/code_gen/is_void_output.txt b/tests/code_gen/is_void_output.txt new file mode 100644 index 000000000..2433fb4b4 --- /dev/null +++ b/tests/code_gen/is_void_output.txt @@ -0,0 +1 @@ +YEI \ No newline at end of file diff --git a/tests/code_gen/kk.cl b/tests/code_gen/kk.cl new file mode 100644 index 000000000..223a18c53 --- /dev/null +++ b/tests/code_gen/kk.cl @@ -0,0 +1,23 @@ +class Main inherits IO { + boolop : BoolOp <- new BoolOp; + main(): Object { + + let a:Int <- 0 ,line : String <- in_string() in + while (boolop.and(not line="\n", not line.length()=1)) loop { + a <- a+1; + out_int(line.length()); + line <- in_string(); + + } pool + + }; +}; + +class BoolOp { + + and(b1 : Bool, b2 : Bool) : Bool { + if b1 then b2 else false fi + }; + + +}; diff --git a/tests/code_gen/kk_input.txt b/tests/code_gen/kk_input.txt new file mode 100644 index 000000000..b67d90210 --- /dev/null +++ b/tests/code_gen/kk_input.txt @@ -0,0 +1,5 @@ +1 2,100 +2 3,200 1,150 +3 2,10 +4 3,55 5,100 +5 1,1 2,2 3,3 4,4 5,5 diff --git a/tests/code_gen/kk_output.txt b/tests/code_gen/kk_output.txt new file mode 100644 index 000000000..162a54bf5 --- /dev/null +++ b/tests/code_gen/kk_output.txt @@ -0,0 +1,3 @@ +a +d + diff --git a/tests/code_gen/object_methods.cl b/tests/code_gen/object_methods.cl new file mode 100644 index 000000000..41dbc160a --- /dev/null +++ b/tests/code_gen/object_methods.cl @@ -0,0 +1,13 @@ + +class Main inherits IO{ + c : String <- "First sentence.\n"; + d : Int <- 1; + main (): Object { + { + out_string(c.copy()); + out_string(c.type_name()); + + + } + }; +}; diff --git a/tests/code_gen/object_methods_input.txt b/tests/code_gen/object_methods_input.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/code_gen/object_methods_output.txt b/tests/code_gen/object_methods_output.txt new file mode 100644 index 000000000..0c61dbc68 --- /dev/null +++ b/tests/code_gen/object_methods_output.txt @@ -0,0 +1,2 @@ +First sentence. +String \ No newline at end of file diff --git a/tests/code_gen/point.cl b/tests/code_gen/point.cl new file mode 100644 index 000000000..fb0e4f25b --- /dev/null +++ b/tests/code_gen/point.cl @@ -0,0 +1,15 @@ +class Main inherits IO{ + main (): Object { + out_int((new Point).init(5, 6)) + + }; +}; + +class Point{ + x: Int; + y: Int; + + init(x0: AUTO_TYPE, y0: AUTO_TYPE): AUTO_TYPE { + x0 + y0 + }; +}; diff --git a/tests/code_gen/point_input.txt b/tests/code_gen/point_input.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/code_gen/point_output.txt b/tests/code_gen/point_output.txt new file mode 100644 index 000000000..9d607966b --- /dev/null +++ b/tests/code_gen/point_output.txt @@ -0,0 +1 @@ +11 \ No newline at end of file diff --git a/tests/code_gen/string_methods.cl b/tests/code_gen/string_methods.cl new file mode 100644 index 000000000..15136ddf7 --- /dev/null +++ b/tests/code_gen/string_methods.cl @@ -0,0 +1,13 @@ + +class Main inherits IO{ + a : Int; + b : Int; + c : String <- "First sentence."; + d : String <- "Second sentence."; + main (): Object { + { + out_string(c.concat(d)); + out_string(c.substr(0, c.length())); + } + }; +}; diff --git a/tests/code_gen/string_methods_input.txt b/tests/code_gen/string_methods_input.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/code_gen/string_methods_output.txt b/tests/code_gen/string_methods_output.txt new file mode 100644 index 000000000..42bc7029f --- /dev/null +++ b/tests/code_gen/string_methods_output.txt @@ -0,0 +1 @@ +First sentence.Second sentence.First sentence. \ No newline at end of file diff --git a/tests/code_gen/test_goto_if.cl b/tests/code_gen/test_goto_if.cl new file mode 100644 index 000000000..57e2f1f25 --- /dev/null +++ b/tests/code_gen/test_goto_if.cl @@ -0,0 +1,13 @@ +class Main inherits IO{ + a : Int <- 1; + b : Int <- 2; + main (): Object { + { + a + b; + a - b; + a / b; + out_int(b * a); + } + + }; +}; \ No newline at end of file diff --git a/tests/code_gen/test_goto_if_input.txt b/tests/code_gen/test_goto_if_input.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/code_gen/test_goto_if_output.txt b/tests/code_gen/test_goto_if_output.txt new file mode 100644 index 000000000..d8263ee98 --- /dev/null +++ b/tests/code_gen/test_goto_if_output.txt @@ -0,0 +1 @@ +2 \ No newline at end of file diff --git a/tests/code_gen/unary_nodes.cl b/tests/code_gen/unary_nodes.cl new file mode 100644 index 000000000..98a12e453 --- /dev/null +++ b/tests/code_gen/unary_nodes.cl @@ -0,0 +1,11 @@ +class Main inherits IO{ + a : Int <- 2; + b : Int; + c: Bool; + main (): Object { + { + (* out_string(if not a <= b then "True \n" else "False \n" fi); *) + out_int(~a); + } + }; +}; diff --git a/tests/code_gen/unary_nodes_input.txt b/tests/code_gen/unary_nodes_input.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/code_gen/unary_nodes_output.txt b/tests/code_gen/unary_nodes_output.txt new file mode 100644 index 000000000..06056678e --- /dev/null +++ b/tests/code_gen/unary_nodes_output.txt @@ -0,0 +1 @@ +-2 \ No newline at end of file diff --git a/tests/code_gen/utils/__init__.py b/tests/code_gen/utils/__init__.py new file mode 100644 index 000000000..90f60fdd8 --- /dev/null +++ b/tests/code_gen/utils/__init__.py @@ -0,0 +1 @@ +from .utils import * \ No newline at end of file diff --git a/tests/code_gen/utils/utils.py b/tests/code_gen/utils/utils.py new file mode 100644 index 000000000..961cf7cbc --- /dev/null +++ b/tests/code_gen/utils/utils.py @@ -0,0 +1,91 @@ +import subprocess +import re + + +COMPILER_TIMEOUT = 'El compilador tarda mucho en responder.' +SPIM_TIMEOUT = 'El spim tarda mucho en responder.' +TEST_MUST_FAIL = 'El test %s debe fallar al compilar' +TEST_MUST_COMPILE = 'El test %s debe compilar' +BAD_ERROR_FORMAT = '''El error no esta en formato: (,) - : + o no se encuentra en la 3ra linea\n\n%s''' +UNEXPECTED_ERROR = 'Se esperaba un %s en (%d, %d). Su error fue un %s en (%d, %d)' +UNEXPECTED_OUTPUT = 'La salida de %s no es la esperada:\n%s\nEsperada:\n%s' + +ERROR_FORMAT = r'^\s*\(\s*(\d+)\s*,\s*(\d+)\s*\)\s*-\s*(\w+)\s*:(.*)$' + +def parse_error(error: str): + merror = re.fullmatch(ERROR_FORMAT, error) + assert merror, BAD_ERROR_FORMAT % error + + return (t(x) for t, x in zip([int, int, str, str], merror.groups())) + + +def first_error(compiler_output: list, errors: list): + line, column, error_type, _ = parse_error(errors[0]) + + oline, ocolumn, oerror_type, _ = parse_error(compiler_output[0]) + + assert line == oline and column == ocolumn and error_type == oerror_type,\ + UNEXPECTED_ERROR % (error_type, line, column, oerror_type, oline, ocolumn) + +def first_error_only_line(compiler_output: list, errors: list): + line, column, error_type, _ = parse_error(errors[0]) + + oline, ocolumn, oerror_type, _ = parse_error(compiler_output[0]) + + assert line == oline and error_type == oerror_type,\ + UNEXPECTED_ERROR % (error_type, line, column, oerror_type, oline, ocolumn) + + +def get_file_name(path: str): + try: + return path[path.rindex('/') + 1:] + except ValueError: + return path + +def compare_errors(compiler_path: str, cool_file_path: str, error_file_path: str, cmp=first_error, timeout=100): + try: + sp = subprocess.run(['bash', compiler_path, cool_file_path], capture_output=True, timeout=timeout) + return_code, output = sp.returncode, sp.stdout.decode() + except subprocess.TimeoutExpired: + assert False, COMPILER_TIMEOUT + + assert return_code == 1, TEST_MUST_FAIL % get_file_name(cool_file_path) + + fd = open(error_file_path, 'r') + errors = fd.read().split('\n') + fd.close() + + # checking the errors of compiler + compiler_output = output.split('\n') + cmp(compiler_output[2:], errors) + +SPIM_HEADER = r'''^SPIM Version .+ of .+ +Copyright .+\, James R\. Larus\. +All Rights Reserved\. +See the file README for a full copyright notice\. +(?:Loaded: .+\n)*''' +def compare_outputs(compiler_path: str, cool_file_path: str, input_file_path: str, output_file_path: str, timeout=100): + try: + sp = subprocess.run(['bash', compiler_path, cool_file_path], capture_output=True, timeout=timeout) + assert sp.returncode == 0, TEST_MUST_COMPILE % get_file_name(cool_file_path) + except subprocess.TimeoutExpired: + assert False, COMPILER_TIMEOUT + + spim_file = cool_file_path[:-2] + 'mips' + + try: + fd = open(input_file_path, 'rb') + sp = subprocess.run(['spim', '-file', spim_file], input=fd.read(), capture_output=True, timeout=timeout) + fd.close() + mo = re.match(SPIM_HEADER, sp.stdout.decode()) + if mo: + output = mo.string[mo.end():] + except subprocess.TimeoutExpired: + assert False, SPIM_TIMEOUT + + fd = open(output_file_path, 'r') + eoutput = fd.read() + fd.close() + + assert output == eoutput, UNEXPECTED_OUTPUT % (spim_file, repr(output), repr(eoutput)) diff --git a/tests/code_gen/while.cl b/tests/code_gen/while.cl new file mode 100644 index 000000000..69bbbcc14 --- /dev/null +++ b/tests/code_gen/while.cl @@ -0,0 +1,14 @@ +class Main inherits IO{ + a : Int <- 0; + b : Int <- 3; + main (): Object { + { + while a < b loop { + out_int(a); + out_string("iteration \n"); + a <- a + 1; + } + pool; + } + }; +}; \ No newline at end of file diff --git a/tests/code_gen/while_input.txt b/tests/code_gen/while_input.txt new file mode 100644 index 000000000..e69de29bb diff --git a/tests/code_gen/while_output.txt b/tests/code_gen/while_output.txt new file mode 100644 index 000000000..de2cad9cc --- /dev/null +++ b/tests/code_gen/while_output.txt @@ -0,0 +1,3 @@ +0iteration +1iteration +2iteration