diff --git a/.gitignore b/.gitignore index 4acafde18..7f1414274 100644 --- a/.gitignore +++ b/.gitignore @@ -408,3 +408,8 @@ dmypy.json # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) +env/ +.idea/ + +settings.json +code.mips diff --git a/doc/MIPS_Instruction_Set.pdf b/doc/MIPS_Instruction_Set.pdf new file mode 100644 index 000000000..d34ae7c62 Binary files /dev/null and b/doc/MIPS_Instruction_Set.pdf differ diff --git a/doc/Readme.md b/doc/Readme.md index 3b2569f5c..ee4fcc19a 100644 --- a/doc/Readme.md +++ b/doc/Readme.md @@ -1,33 +1,41 @@ # Documentación -## Readme +## Como usar el compilador -Modifique el contenido de este documento para documentar de forma clara y concisa los siguientes aspectos: +Para compilar un fichero de COOL se puede usar el comando: -- Cómo ejecutar (y compilar si es necesario) su compilador. -- Requisitos adicionales, dependencias, configuración, etc. -- Opciones adicionales que tenga su compilador. +``` +python3 coolc.py "path/to/file.cl" +``` -## Sobre los Equipos de Desarrollo +Este compila el fichero `file.cl` y almacena el código generado en un fichero del mismo nombre pero pero con extension `.mips`. -Para desarrollar el compilador del lenguaje COOL se trabajará en equipos de 2 o 3 integrantes. El proyecto de Compilación será recogido y evaluado únicamente a través de Github. Es imprescindible tener una cuenta de Github para cada participante, y que su proyecto esté correctamente hosteado en esta plataforma. +Se pueden consultar las demás opciones de la línea de comandos para el compilador ejecutando `python3 coolc.py -h`. -**⚠️ NOTA**: Debe completar el archivo `team.yml` con los datos correctos de cada miembro de su equipo. +``` +usage: coolc.py [-h] [--out OUT] [--run | --no-run] [--verbose | --no-verbose] file -## Sobre los Materiales a Entregar +positional arguments: + file COOL source file. -Para la evaluación del proyecto Ud. debe entregar un informe en formato PDF (`report.pdf`) en esta carpeta, que resuma de manera organizada y comprensible la arquitectura e implementación de su compilador. -El documento no tiene límite de extensión. -En él explicará en más detalle su solución a los problemas que, durante la implementación de cada una de las fases del proceso de compilación, hayan requerido de Ud. especial atención. +optional arguments: + -h, --help show this help message and exit + --out OUT Name for .mips generated file after compilation. + --run, --no-run Execute the file compiled with SPIM. (default: False) + --verbose, --no-verbose + Verbose output. (default: False) +``` -## Estructura del reporte +- `--out`: modifica el fichero de salida para el `.mips` generado, que por defecto es creado con el mismo nombre del fichero de COOL. +- `--run`: facilita la ejecución del programa escrito en COOL ejecutando automáticamente el fichero `.mips` de salida. Para realizar dicha acción es necesario tener `spim` instalado y en el path. +- `--verbose`: imprime en consola el AST generado después de concluido el análisis lexico, parsing y análisis semántico, asi como el código intermedio (CIL) generado previamente a la generación de código final. -Usted es libre de estructurar su reporte escrito como más conveniente le parezca. A continuación le sugerimos algunas secciones que no deberían faltar, aunque puede mezclar, renombrar y organizarlas de la manera que mejor le parezca: +## Requerimientos -- **Uso del compilador**: detalles sobre las opciones de líneas de comando, si tiene opciones adicionales (e.j., `--ast` genera un AST en JSON, etc.). Básicamente lo mismo que pondrá en este Readme. -- **Arquitectura del compilador**: una explicación general de la arquitectura, en cuántos módulos se divide el proyecto, cuantas fases tiene, qué tipo de gramática se utiliza, y en general, como se organiza el proyecto. Una buena imagen siempre ayuda. -- **Problemas técnicos**: detalles sobre cualquier problema teórico o técnico interesante que haya necesitado resolver de forma particular. +### Ambiente de ejecución -## Sobre la Fecha de Entrega +El proyecto fue desarrollado y probado bajo un ambiente en `Python 3.9.5`, asi que se espera compatibilidad con esta version y superiores (`3.9+`). No se garantiza la correctitud del compilador o que este sea ejecutable en versiones inferiores. -Se realizarán recogidas parciales del proyecto a lo largo del curso. En el Canal de Telegram se anunciará la fecha y requisitos de cada entrega. +### Dependencias + +La única dependencia del compilador es `ply==3.11`, la cual puede ser instalada ejecutando el comando `python3 -m pip install ply==3.11`. diff --git a/doc/SPIM_Manual.pdf b/doc/SPIM_Manual.pdf new file mode 100644 index 000000000..b785613c8 Binary files /dev/null and b/doc/SPIM_Manual.pdf differ diff --git a/doc/report.md b/doc/report.md new file mode 100644 index 000000000..c1f5bfcdd --- /dev/null +++ b/doc/report.md @@ -0,0 +1,161 @@ +# Proyecto de compilación + +## Integrantes + +- Enmanuel Verdesia Suárez C-411 +- Samuel David Suárez Rodríguez C-412 + +## Instalación / Ejecución + +Para compilar un fichero de COOL se puede usar el comando: + +``` +python3 coolc.py "path/to/file.cl" +``` + +Este compila el fichero `file.cl` y almacena el código generado en un fichero del mismo nombre pero pero con extension `.mips`. + +Se pueden consultar las demás opciones de la línea de comandos para el compilador ejecutando `python3 coolc.py -h`. + +``` +usage: coolc.py [-h] [--out OUT] [--run | --no-run] [--verbose | --no-verbose] file + +positional arguments: + file COOL source file. + +optional arguments: + -h, --help show this help message and exit + --out OUT Name for .mips generated file after compilation. + --run, --no-run Execute the file compiled with SPIM. (default: False) + --verbose, --no-verbose + Verbose output. (default: False) +``` + +- `--out`: modifica el fichero de salida para el `.mips` generado, que por defecto es creado con el mismo nombre del fichero de COOL. +- `--run`: facilita la ejecución del programa escrito en COOL ejecutando automáticamente el fichero `.mips` de salida. Para realizar dicha acción es necesario tener `spim` instalado y en el path. +- `--verbose`: imprime en consola el AST generado después de concluido el análisis lexico, parsing y análisis semántico, asi como el código intermedio (CIL) generado previamente a la generación de código final. + +### Requerimientos + +El proyecto fue desarrollado y probado bajo un ambiente en `Python 3.9.5`, asi que se espera compatibilidad con esta version y superiores (`3.9+`). No se garantiza la correctitud del compilador o que este sea ejecutable en versiones inferiores. + +La única dependencia del compilador es `ply==3.11`, la cual puede ser instalada ejecutando el comando `python3 -m pip install ply==3.11`. + + +## Arquitectura + +Para la implementación del compilador de `COOL` se dividió el proceso de desarrollo en las etapas siguientes: + +- Análisis Sintáctico + - Lexing + - Parsing +- Análisis Semántico + - Recolección de tipos (Type collection) + - Construcción de tipos (Type building) + - Chequeo de tipos (Type checking) +- Generación de código (Code generation) + - COOL -> CIL + - CIL -> MIPS + +## Lexing + +Para el análisis léxico se utilizó el módulo `ply` de `Python`, el cual permite generar esta parte del proceso de manera automática simplemente definiendo el conjunto de tokens del lenguaje. + +Se emplearon dos estados exclusivos para el automata además del incial, uno para tokenizar los string y otros para los comentarios que ocupan multiples líneas. + +Al concluir esta fase se obtuvo cada uno de los tokens relevantes involcrados en el código fuente, estos almacenan su lexema y tipo de token. + +## Parsing + +De igual manera se utilizó `ply` para la fase de parsing debido a que soporta varios tipos de parsers como el parser `LALR(1)` que resuelve de manera eficiente la gramática de `COOL`, esta gramática se definió en base al manual oficial de `COOL` [cool-manual](./cool-manual.pdf), definida en su página 16. + +En esta fase se estableció la precedencia de los operadores de acuerdo al manual en la sección 11.1. Se establecieron las reglas de la gramática de forma apropiada para obtener el AST una vez finalizado el proceso de parsing de los tokens y asegurar que cada nodo almacena la línea y columna correspondiente al token, esto permite mejorar la información de los errores en el chequeo semántico. + +## Recolección de tipos + +Esta fase se encarga de recorrer el AST generado previamente y definir los tipos del lenguaje. Entre estos tipos tenemos los BUILT_IN (Bool, String, Int, IO, Object), así como los tipos definidos en el código por el usuario en las declaraciones de clases. En un recorrido posterior se realiza el chequeo y asignación de padres de tipos, debido a que en el lenguaje las declaraciones de clases se pueden encontrar en cualquier orden, por lo que es necesario recolectar los tipos primeramente. En esta fase se resuelven también las excepciones de herencia cíclica. + +La lógica para esta fase se implementó en el archivo `collector.py` + +## Construcción de tipos + +En esta fase se recorre el AST con el objetivo de visitar cada `feature`(método o atributo) de las clases para asignarla a cada tipo y chequear la existencia de una clase `Main` con el método `main`. + +La lógica para esta fase se implementó en el archivo `builder.py` + +## Chequeo de tipos + +Esta fase es la encargada de validar el uso correcto de los tipos definidos en el programa y detectar otros errores definidos dentro de la semántica de COOL, estos errores con su correspondiente descripcion pueden ser encontrados bajo el fichero `errors.py`. + +Por ejemplo entre ellos se encuentran: +- Verificar que las asignaciones de las variables sea el tipo adecuado a su definición, así como los argumentos de funciones. +- `self` es de solo lectura. +- no violar el número de argumentos y los tipos de estos cuando se sobreescribe una función. +- No usar variables que no estén definidas previamente en el contexto. +- Evitar herencia de los tipos Int, String y Bool. +- Verificar el correcto uso de los operadores +, -, /, *, <, <=, =, etc. +- Evitar ramas duplicadas en el `case of`. + +En este recorrido sobre el AST además se crea el scope del programa para cada una de las clases, funciones, el `let in` y el `case of`. Los scopes establecen una jerarquía de herencia, de tal forma que el scope de una clase es hijo del scope de la clase que esta hereda, el scope de una función es hijo del scope de la clase en que esta se encuentra definida y los scopes de los `let in` y `case of` son hijos del scope del contexto en que se encuentren. + +Los scopes permiten ocultar las definiciones de variables de los contextos superiores. La salida de este recorrido sobre el AST es el scope raíz resultante de la visita a cada uno de los nodos. + +La lógica para esta fase se implementó en el archivo `checker.py` + +## Generación de código intermedio COOL -> CIL + +Para compilar el código en `COOL` a un lenguaje de bajo nivel como `MIPS`, se utilizó un lenguaje intermedio para disminuir la dificultad en la generación de código entre estas dos partes. Para ello definimos un pseudolenguaje (`CIL`) que posee elementos similares al estudiado en clase más algunos agregados y permiten controlar de una manera más sencilla el flujo del programa al generar el código en `MIPS`. Entre estos añadidos tenemos: + +- Abort + + `ABORT`: Termina el programa con un mensaje indicando el tipo desde el cual se llamó esta instrucción. + +- Errores en tiempo de ejecución: + + `CASE_MATCH_RUNTIME_ERROR`: Devuelve un error en tiempo de ejecución con el mensaje `"RuntimeError: Case statement without a match branch"`. Se usa en las expresiones de tipo case of cuando ninguna rama conforma el resultado de la expresión. + + `EXPR_VOID_RUNTIME_ERROR`: Devuelve un error en tiempo de ejecución con el mensaje: `"RuntimeError: Expression is void"`. Usado para controlar excepciones con expresiones de tipo `void` + +- Conforms + + ` = CONFORMS `: Usada para saber si el resultado de `` conforma el tipo ``. Se creó por la necesidad de saber en tiempo de ejecución si el tipo que retorna la expresión en un `case of` podía ser asignado a una rama de un tipo dado. + +Dicho esto, se implementaron 2 visitors encargados de generar el código en `CIL`. El primero se encuentra en el archivo `types_data_visitor.py` y es el encargado de definir las secciones `.TYPES` y `.DATA`, en donde se alojarán los tipos definidos en `COOL` y los datos constantes (mensajes de excepción, strings definidos en el código, ...) respectivamente. El segundo visitor encontrado en el archivo `code_visitor.py` se encarga de generar la sección `.CODE` en la cual se encuentra toda la lógica del programa. Ambos visitors fueron encargados de devolver un AST de `CIL` para el próximo paso de generación de código. + +### Manejo de `case of` + +Este tipo de expresión tuvo un tratamiento especial debido a que la evaluación del tipo de la expresión dentro del `case` se debe realizar en tiempo de ejecución, y en base a esta seleccionar la rama correspondiente como valor de retorno. Para resolver esto hacemos uso de la instrucción definida anteriormente en `CIL`: `CONFORMS`, sin embargo, con esto solo sabemos si el tipo de este resultado conforma el tipo de la rama, pero según la definición del `case of` queremos el tipo más específico que lo cumpla. Esto lo resolvimos reordenando las ramas del case en función a su profundidad en el árbol de tipos de mayor a menor, quedando como primeras ramas las más específicas. De esta manera, la primera rama que cumpla que `CONFORMS ` sea verdadero, es la seleccionada, y será la más específica para ese tipo puesto que las de mayor profundidad ya fueron visitadas, por tanto se hace un salto para esa rama. + +## Generación de código intermedio: CIL -> MIPS + +Una vez tenemos el AST de `CIL`, solo resta el paso final, generar el código final `MIPS` que será el que se ejecutará desde el emulador `spim`. Para esto definimos un visitor encargado de traducir cada instrucción. + +### Representación de Instancias en Memoria + +Para representar los objetos de `CIL` en memoria desde `MIPS` usamos la siguiente notación: + +``` +Type: # Nombre del tipo + .word 4 # Espacio necesario para una instancia de este tipo + .word # Label del tipo padre + + # Métodos de la instancia + .word Type__init + .word Type__abort + . + . + . + # + .word Type_type_name # Label para direccionar el string del typename + .asciiz "Type" # Typename (String del tipo) +``` + +Esta definición se encuentra en la sección del `.data` de MIPS y se puede decir que sirve de esqueleto para la las instancias. + +Por lo visto anteriormente cada tipo contiene información relacionada con este, como la lista de atributos, métodos, el string correspondiente a su typename, así como una referencia al address del label de su padre. Todos los tipos son tratados como instancias de clases, incluyendo los `BUILT_IN` como `Int, String y Bool` en los que podemos ver su valor en el offset 4 de su dirección. Los métodos se representan con labels que son definidos posteriormente en la sección `.text` de `MIPS`. Existe un label especial llamado `main` que será el encargado de correr el programa, su función es instanciar el tipo `Main` y ejecutar sus instrucciones. + +Cada una de las instancias posee como primer atributo un puntero a su tipo correspondiente (ubicado en `.data`). En los bytes continuos almacena cada uno de sus atributos. + +Para resolver el método de una instancia primeramente se resuelve el tipo de la instancia, al cual apunta el primer byte de esta. Este puntero más el offset del método permite encontrar el label de la función correspondiente en mips e invocarla. + +Todos los procedimientos en mips esperan recibir las instancias por referencia, por tanto realizan internamente el unboxing de los atributos de estas. De igual forma al retornar un tipo Int, String, Bool, se retorna una nueva instancia con su tipo y valor correctamente asignados, no el valor que esta contiene. diff --git a/doc/report.pdf b/doc/report.pdf new file mode 100644 index 000000000..55741fca2 Binary files /dev/null and b/doc/report.pdf differ diff --git a/doc/team.yml b/doc/team.yml index c16162532..8c31ce0d4 100644 --- a/doc/team.yml +++ b/doc/team.yml @@ -1,10 +1,7 @@ members: - - name: Nombre Apellido1 Apellido2 - github: github_id - group: CXXX - - name: Nombre Apellido1 Apellido2 - github: github_id - group: CXXX - - name: Nombre Apellido1 Apellido2 - github: github_id - group: CXXX + - name: Samuel David Suarez Rodriguez + github: samueldsr99 + group: C412 + - name: Enmanuel Verdesia Suarez + github: svex99 + group: C411 diff --git a/requirements.txt b/requirements.txt index 9eb0cad1a..da29e1ebf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ pytest pytest-ordering +ply==3.11 diff --git a/src/coolc.py b/src/coolc.py new file mode 100644 index 000000000..8faa314fb --- /dev/null +++ b/src/coolc.py @@ -0,0 +1,93 @@ +""" +Main entry point of COOL compiler +""" +import os +import subprocess + +from coolcmp.codegen.cil2mips.mips_formatter import MIPSFormatter +from coolcmp.lexing_parsing.lexer import errors as lexer_errors +from coolcmp.lexing_parsing.parser import parser, errors as parser_errors +from coolcmp.semantics import check_semantics +from coolcmp.codegen.cool2cil import build_cil + +from coolcmp.codegen.cil2mips import build_mips +from coolcmp.utils.ast_formatter import ASTFormatter +from coolcmp.utils.cil_formatter import CILFormatter + +from coolcmp.utils.cil import ProgramNode + + +def main(filename: str, cool_code: str, run: bool, verbose: bool): + ast = parser.parse(cool_code) + + if verbose: + ast_str = ASTFormatter().visit(ast) + print(ast_str) + + if lexer_errors: + for error in lexer_errors: + print(error) + exit(1) + + if parser_errors: + for error in parser_errors: + print(error) + exit(1) + + sem_errors, ctx, scope = check_semantics(ast) + if sem_errors: + for error in sem_errors: + print(error) + exit(1) + + cil: ProgramNode + cil = build_cil(ast, ctx, scope) + + if verbose: + cil_str = CILFormatter().visit(cil) + print(cil_str) + + mips = build_mips(cil, None, None) + mips_str = MIPSFormatter().visit(mips) + + mips_file = filename + '.mips' + + with open(mips_file, 'w') as fd: + fd.write(mips_str) + + if run: + print('=' * 20, 'Running SPIM', '=' * 20) + subprocess.run(['spim', '-f', mips_file]) + + +if __name__ == "__main__": + from argparse import ArgumentParser, FileType, BooleanOptionalAction + + arg_parser = ArgumentParser() + arg_parser.add_argument( + '--out', required=False, + help='Name for .mips generated file after compilation.' + ) + arg_parser.add_argument( + '--run', default=False, action=BooleanOptionalAction, + help='Execute the file compiled with SPIM.' + ) + arg_parser.add_argument( + '--verbose', default=False, action=BooleanOptionalAction, + help='Verbose output.' + ) + arg_parser.add_argument( + 'file', type=FileType(mode='r', encoding='utf8'), + help='COOL source file.' + ) + args = arg_parser.parse_args() + + full_name = args.file.name + filename = full_name[:full_name.rfind('.')] + cool_code = args.file.read() + main( + filename=args.out or filename, + cool_code=cool_code, + run=args.run, + verbose=args.verbose + ) diff --git a/src/coolc.sh b/src/coolc.sh index 3088de4f9..8662c2ad4 100755 --- a/src/coolc.sh +++ b/src/coolc.sh @@ -1,11 +1,9 @@ -# Incluya aquí las instrucciones necesarias para ejecutar su compilador - INPUT_FILE=$1 OUTPUT_FILE=${INPUT_FILE:0: -2}mips -# Si su compilador no lo hace ya, aquí puede imprimir la información de contacto -echo "LINEA_CON_NOMBRE_Y_VERSION_DEL_COMPILADOR" # TODO: Recuerde cambiar estas -echo "Copyright (c) 2019: Nombre1, Nombre2, Nombre3" # TODO: líneas a los valores correctos +echo "COOL-Compiler v1.0.0" +echo "Copyright (c) 2021: Samuel David Suárez Rodríguez, Enmanuel Verdesia Suárez" + +# echo "Compiling $INPUT_FILE into $OUTPUT_FILE" -# Llamar al compilador -echo "Compiling $INPUT_FILE into $OUTPUT_FILE" +python3 coolc.py $INPUT_FILE diff --git a/src/coolcmp/__init__.py b/src/coolcmp/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/coolcmp/codegen/__init__.py b/src/coolcmp/codegen/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/coolcmp/codegen/cil2mips/__init__.py b/src/coolcmp/codegen/cil2mips/__init__.py new file mode 100644 index 000000000..cba961fa9 --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/__init__.py @@ -0,0 +1,5 @@ +from .cil2mips_visitor import CILToMipsVisitor + + +def build_mips(ast, context, scope): + return CILToMipsVisitor().visit(ast) diff --git a/src/coolcmp/codegen/cil2mips/cil2mips_visitor.py b/src/coolcmp/codegen/cil2mips/cil2mips_visitor.py new file mode 100644 index 000000000..0150874e4 --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/cil2mips_visitor.py @@ -0,0 +1,815 @@ +from __future__ import annotations +from audioop import add +from typing import Dict + +from coolcmp.utils import cil, visitor +from coolcmp.utils import mips, registers +from coolcmp.utils import extract_class_name +from coolcmp.utils.registers import ( + t0, + t1, + t2, + + s0, + s1, + s2, + + a0, + a1, + a2, + dw, + + v0, + fp, + sp, + ra, + zero +) + + +class CILToMipsVisitor: + def __init__(self): + self.cil_root: cil.ProgramNode | None = None + self.data: Dict[str, mips.Node] = {} + self.types: Dict[str, mips.Type] = {} + self.functions: Dict[str, mips.FunctionNode] = {} + self.cur_function: mips.FunctionNode | None = None + + def add_inst(self, *inst: mips.InstructionNode) -> None: + self.cur_function.instructions.extend(inst) + + def get_address(self, var_name: str): + return self.cur_function.variable_address(var_name) + + def get_method_index(self, name: str): + for i, method in enumerate(self.cil_root.all_methods): + if method == name: + return (i + 2) * 4 + raise ValueError(f"Unexpected method: {name}") + + @visitor.on("node") + def visit(self, node): + pass + + @visitor.when(cil.ProgramNode) + def visit(self, node: cil.ProgramNode): + node.update_method_indexes() + self.cil_root = node + + for i in node.dot_types: + self.visit(i) + for i in node.dot_data: + self.visit(i) + for func in node.dot_code: + if func.name == 'main': + self.visit(func) + for func in node.dot_code: + if func.name != 'main': + self.visit(func) + + return mips.ProgramNode( + list(self.data.values()), + list(self.types.values()), + list(self.functions.values()), + ) + + @visitor.when(cil.TypeNode) + def visit(self, node: cil.TypeNode): + type_ = mips.Type( + label=node.name, + parent=node.parent, + attrs=list(node.attributes), + methods=node.methods, + total_methods=node.total_methods, + index=len(self.types), + ) + + self.types[node.name] = type_ + + @visitor.when(cil.InitNode) + def visit(self, node: cil.InitNode): + dest = self.get_address(node.dest) + self.add_inst( + mips.LWNode(a0, node.type_name), + mips.JALNode('malloc'), + mips.LANode(t0, node.type_name) .with_comm(f"Get pointer to type {node.type_name}"), + mips.SWNode(t0, 0, v0) .with_comm(f"Set type pointer as attr"), + mips.SWNode(v0, dest, fp), + ) + + @visitor.when(cil.DataNode) + def visit(self, node: cil.DataNode): + self.data[node.name] = mips.StringNode(node.name, f"{node.value}") + + @visitor.when(cil.FunctionNode) + def visit(self, node: cil.FunctionNode): + params = [x.name for x in node.params] + local_vars = [x.name for x in node.local_vars] + + local_vars_size = len(local_vars) * dw + + self.cur_function = mips.FunctionNode(node.name, params, local_vars) + self.functions[node.name] = self.cur_function + + # Push local vars + push_instructions = ( + mips.push_register_instructions(ra) + + mips.push_register_instructions(fp) + + [mips.ADDINode(fp, sp, 8)] + + [mips.ADDINode(sp, sp, -local_vars_size)] + ) + + self.add_inst( + mips.CommentNode(f""), + *push_instructions, + ) + + for instruction in node.instructions: + self.visit(instruction) + + # Pop local vars + pop_instructions = ( + [mips.ADDINode(sp, sp, local_vars_size)] + + mips.pop_register_instructions(fp) + + mips.pop_register_instructions(ra) + ) + + return_instructions = ( + [mips.LINode(v0, 10), mips.SysCallNode()] + if self.cur_function.name == "main" + else [mips.JRNode(ra)] + ) + + self.add_inst( + *pop_instructions, + *return_instructions, + mips.CommentNode(f""), + ) + + @visitor.when(cil.SetAttrNode) + def visit(self, node: cil.SetAttrNode): + if node.value == 'void': + load_value_inst = mips.LWNode(t0, 'void') + elif isinstance(node.value, int): + load_value_inst = mips.LINode(t0, node.value) + else: + value_address = self.get_address(node.value) + load_value_inst = mips.LWNode(t0, (value_address, fp)) + + instance = self.get_address(node.instance) + + self.add_inst( + mips.CommentNode(f""), + # sum 1 to attr index because at offset 0 is the type pointer + load_value_inst, + mips.LWNode(t1, (instance, fp)), + mips.SWNode(t0, 4 * (node.attr.index + 1), t1) .with_comm(f"Set attr '{node.attr}' of {node.instance} = {node.value}"), + mips.CommentNode(f""), + ) + + @visitor.when(cil.GetAttrNode) + def visit(self, node: cil.GetAttrNode): + dest_offset = self.get_address(node.dest) + src_offset = self.get_address(node.src) + class_ = extract_class_name(node.attr) + attr_offset = self.types[class_].get_attr_index(node.attr) * 4 + + self.add_inst( + mips.CommentNode(f""), + mips.LWNode(t0, (src_offset, fp)) .with_comm(f"Get instance: {node.src}"), + mips.LWNode(t0, (attr_offset, t0)) .with_comm(f"Get attribute: {node.attr}"), + mips.SWNode(t0, dest_offset, fp) .with_comm(f"Store attribute in local {node.dest}"), + mips.CommentNode(f""), + ) + + @visitor.when(cil.PrintIntNode) + def visit(self, node: cil.PrintIntNode): + address = self.get_address(node.addr) + + self.add_inst( + mips.CommentNode(f""), + mips.LWNode(t0, (address, fp)), + mips.ADDUNode(a0, t0, 4), + mips.LINode(v0, 1), + mips.LWNode(a0, (0, a0)), + mips.SysCallNode(), + mips.CommentNode(f""), + ) + + @visitor.when(cil.PrintStringNode) + def visit(self, node: cil.PrintStringNode): + address = self.get_address(node.addr) + + self.add_inst( + mips.CommentNode(f""), + mips.LWNode(t0, (address, fp)), + mips.ADDUNode(a0, t0, 4), + mips.LINode(v0, 4), + mips.LWNode(a0, (0, a0)), + mips.SysCallNode(), + mips.CommentNode(f""), + ) + + @visitor.when(cil.ReadIntNode) + def visit(self, node: cil.ReadIntNode): + address = self.get_address(node.dest) + + self.add_inst( + mips.CommentNode(f""), + mips.LINode(v0, 5), + mips.SysCallNode(), + mips.MoveNode(t2, v0) + ) + + self.visit(cil.StaticCallNode("Int__init", node.dest)) + + self.add_inst( + mips.LWNode(t1, (address, fp)), + mips.SWNode(t2, 4, t1), + mips.LWNode(v0, (address, fp)), + mips.CommentNode(f"") + ) + + @visitor.when(cil.DynamicCallNode) + def visit(self, node: cil.DynamicCallNode): + meth_offset = self.get_method_index(node.method) + dest_address = self.get_address(node.dest) + args_space = self.cil_root.get_function( + f"{node.type or node.dtype}_{node.method}" + ).args_space + + if node.type is None: + obj_address = self.get_address(node.obj) + get_type_inst = ( + mips.LWNode(t0, (obj_address, fp)) .with_comm("Get instance pointer"), + mips.LWNode(t0, (0, t0)) .with_comm("Get type pointer at offset 0"), + ) + else: + get_type_inst = ( + mips.LANode(t0, node.type), + ) + + self.add_inst( + mips.CommentNode(f""), + *get_type_inst, + mips.LWNode(t0, (meth_offset, t0)) .with_comm(f"Get method: {node.method}"), + mips.JALRNode(t0) .with_comm(f"Jump to {node.method}"), + mips.SWNode(v0, dest_address, fp), + mips.ADDINode(sp, sp, args_space) .with_comm("Pop args pushed"), + mips.CommentNode(f""), + ) + + @visitor.when(cil.StaticCallNode) + def visit(self, node: cil.StaticCallNode): + dest = self.get_address(node.dest) + + self.add_inst( + mips.CommentNode(f""), + mips.JALNode(node.function), + mips.SWNode(v0, dest, fp), + mips.CommentNode(f""), + ) + + @visitor.when(cil.ArgNode) + def visit(self, node: cil.ArgNode): + address = self.get_address(node.name) + + self.add_inst( + mips.CommentNode(f""), + mips.LWNode(t0, (address, fp)), + mips.ADDINode(sp, sp, -4), + mips.SWNode(t0, 0, sp), + mips.CommentNode(f""), + ) + + @visitor.when(cil.PlusNode) + def visit(self, node: cil.PlusNode): + """ + assuming left and right operands are ints + """ + left_offset = self.get_address(node.left) + right_offset = self.get_address(node.right) + dest_offset = self.get_address(node.dest) + + self.add_inst( + mips.CommentNode(f""), + mips.LWNode(t0, (left_offset, fp)) .with_comm(f"Load Int {node.left}"), + mips.LWNode(t0, (4, t0)) .with_comm('Load Int_value at offset 4'), + mips.LWNode(t1, (right_offset, fp)) .with_comm(f"Load Int {node.right}"), + mips.LWNode(t1, (4, t1)) .with_comm('Load Int_value at offset 4'), + mips.ADDNode(t2, t0, t1) .with_comm('Add the integer values'), + ) + + self.visit(cil.StaticCallNode('Int__init', node.dest)) + + self.add_inst( + mips.LWNode(t1, (dest_offset, fp)), + mips.SWNode(t2, 4, t1), + mips.CommentNode(f""), + ) + + @visitor.when(cil.MinusNode) + def visit(self, node: cil.MinusNode): + left_offset = self.get_address(node.left) + right_offset = self.get_address(node.right) + dest_offset = self.get_address(node.dest) + + self.add_inst( + mips.CommentNode(f""), + mips.LWNode(t0, (left_offset, fp)), # load Int_value at offset 4 + mips.LWNode(t0, (4, t0)), + mips.LWNode(t1, (right_offset, fp)), # load Int_value at offset 4 + mips.LWNode(t1, (4, t1)), + mips.SUBNode(t2, t0, t1), # subtract the integer values + ) + + self.visit(cil.StaticCallNode('Int__init', node.dest)) + + self.add_inst( + mips.LWNode(t1, (dest_offset, fp)), + mips.SWNode(t2, 4, t1), + mips.CommentNode(f""), + ) + + @visitor.when(cil.StarNode) + def visit(self, node: cil.StarNode): + left_offset = self.get_address(node.left) + right_offset = self.get_address(node.right) + dest_offset = self.get_address(node.dest) + + self.add_inst( + mips.CommentNode(f""), + mips.LWNode(t0, (left_offset, fp)), # load Int_value at offset 4 + mips.LWNode(t0, (4, t0)), + mips.LWNode(t1, (right_offset, fp)), # load Int_value at offset 4 + mips.LWNode(t1, (4, t1)), + mips.MULTNode(t2, t0, t1), # multiply the integer values + ) + + self.visit(cil.StaticCallNode('Int__init', node.dest)) + + self.add_inst( + mips.LWNode(t1, (dest_offset, fp)), + mips.SWNode(t2, 4, t1), + mips.CommentNode(f""), + ) + + @visitor.when(cil.DivNode) + def visit(self, node: cil.DivNode): + left_offset = self.get_address(node.left) + right_offset = self.get_address(node.right) + dest_offset = self.get_address(node.dest) + + self.add_inst( + mips.CommentNode(f""), + mips.LWNode(t0, (left_offset, fp)), # load Int_value at offset 4 + mips.LWNode(t0, (4, t0)), + mips.LWNode(t1, (right_offset, fp)), # load Int_value at offset 4 + mips.LWNode(t1, (4, t1)), + mips.DIVNode(t2, t0, t1), # divide the integer values + ) + + self.visit(cil.StaticCallNode('Int__init', node.dest)) + + self.add_inst( + mips.LWNode(t1, (dest_offset, fp)), + mips.SWNode(t2, 4, t1), + mips.CommentNode(f""), + ) + + @visitor.when(cil.NegationNode) + def visit(self, node: cil.NegationNode): + src_offset = self.get_address(node.src) + dest_offset = self.get_address(node.dest) + + self.add_inst( + mips.CommentNode(f""), + mips.LWNode(t0, (src_offset, fp)), + mips.LWNode(t0, (4, t0)), + mips.XORINode(t1, t0, 1) + ) + + self.visit(cil.StaticCallNode('Bool__init', node.dest)) + + self.add_inst( + mips.LWNode(t0, (dest_offset, fp)), + mips.SWNode(t1, 4, t0), + mips.CommentNode(f""), + ) + + + @visitor.when(cil.LoadNode) + def visit(self, node: cil.LoadNode): + dest_address = self.cur_function.variable_address(node.dest) + + self.add_inst( + mips.CommentNode(f""), + mips.LANode(t0, self.data[node.msg].label), + mips.SWNode(t0, dest_address, fp), + mips.CommentNode(f""), + ) + + @visitor.when(cil.AssignNode) + def visit(self, node: cil.AssignNode): + dest = self.get_address(node.dest) + + if isinstance(node.source, int): + load_inst = mips.LINode(t0, node.source) + elif node.source == "void": + load_inst = mips.LANode(t0, "void") + else: + src = self.get_address(node.source) + load_inst = mips.LWNode(t0, (src, fp)) + + self.add_inst( + mips.CommentNode(f""), + load_inst, + mips.SWNode(t0, dest, fp), + mips.CommentNode(f""), + ) + + @visitor.when(cil.ReturnNode) + def visit(self, node: cil.ReturnNode): + if node.value is not None: + if isinstance(node.value, int): + load_to_v0 = mips.LINode(v0, node.value) + else: + dest_offset = self.get_address(node.value) + load_to_v0 = mips.LWNode(v0, (dest_offset, fp)) + + self.add_inst( + mips.CommentNode(f""), + load_to_v0, + mips.CommentNode(f""), + ) + + @visitor.when(cil.SubstringNode) + def visit(self, node: cil.SubstringNode): + self.add_inst(mips.CommentNode(f"{node.dest}[{node.index}:{node.length}]")) + + self.visit(cil.StaticCallNode('String__init', node.dest)) + + src_address = self.get_address('self') + dest_address = self.get_address(node.dest) + index_address = self.get_address(node.index) + length_address = self.get_address(node.length) + + push_src = ( + mips.LWNode(a0, (src_address, fp)), + mips.LWNode(a0, (4, a0)) + ) + push_length = ( + mips.LWNode(a2, (length_address, fp)), + mips.LWNode(a2, (4, a2)) + ) + push_index = ( + mips.LWNode(a1, (index_address, fp)), + mips.LWNode(a1, (4, a1)) + ) + + self.add_inst( + *push_src, + *push_length, + *push_index, + mips.JALNode('substr'), + ) + + self.add_inst( + mips.LWNode(t1, (dest_address, fp)), + mips.SWNode(v0, 4, t1), + mips.CommentNode(f"{node.dest}[{node.index}:{node.length}]") + ) + + @visitor.when(cil.LengthNode) + def visit(self, node: cil.LengthNode): + self.add_inst(mips.CommentNode(f"")) + + self.visit(cil.StaticCallNode("Int__init", node.dest)) + + src_address = self.get_address(node.src) + dest_address = self.get_address(node.dest) + + self.add_inst( + mips.LWNode(a0, (src_address, fp)), + mips.LWNode(a0, (4, a0)), + mips.JALNode('length'), + + mips.LWNode(t1, (dest_address, fp)), + mips.SWNode(v0, 4, t1), + ) + + self.add_inst(mips.CommentNode(f"")) + + @visitor.when(cil.ConcatNode) + def visit(self, node: cil.ConcatNode): + self.add_inst(mips.CommentNode(f"")) + + self.visit(cil.StaticCallNode("String__init", node.dest)) + + str1_address = self.get_address(node.str1) + str2_address = self.get_address(node.str2) + dest_address = self.get_address(node.dest) + + # Calc length of str1 and save it in t0 + length_of_str1 = ( + mips.LWNode(a0, (str1_address, fp)), + mips.LWNode(a0, (4, a0)), + mips.JALNode('length'), + mips.MoveNode(t0, v0) + ) + # Calc length of str2 and save it in t1 + length_of_str2 = ( + mips.LWNode(a0, (str2_address, fp)), + mips.LWNode(a0, (4, a0)), + mips.JALNode('length'), + mips.MoveNode(t1, v0) + ) + + push_str1 = ( + mips.LWNode(a0, (str1_address, fp)), + mips.LWNode(a0, (4, a0)) + ) + push_str2 = ( + mips.LWNode(a1, (str2_address, fp)), + mips.LWNode(a1, (4, a1)) + ) + push_length = ( + mips.ADDNode(a2, t0, t1), + ) + + self.add_inst( + *length_of_str1, + *length_of_str2, + *push_str1, + *push_str2, + *push_length, + mips.JALNode('concat'), + ) + + self.add_inst( + mips.LWNode(t1, (dest_address, fp)), + mips.SWNode(v0, 4, t1) + ) + + self.add_inst(mips.CommentNode(f"")) + + @visitor.when(cil.TypeNameNode) + def visit(self, node: cil.TypeNameNode): + name_offset = self.types['Object'].name_offset + src_offset = self.get_address(node.src) + + self.add_inst( + mips.CommentNode(f""), + ) + + self.visit(cil.StaticCallNode('String__init', node.dest)) + + self.add_inst( + mips.LWNode(t0, (src_offset, fp)) .with_comm('Load pointer to self'), + mips.LWNode(t0, (0, t0)) .with_comm('Load pointer to type of self'), + mips.ADDINode(t0, t0, name_offset) .with_comm('Point to name of type'), + mips.SWNode(t0, 4, v0) .with_comm('Save name of the type in the new string'), + mips.CommentNode(f""), + ) + + @visitor.when(cil.IsVoidNode) + def visit(self, node: cil.IsVoidNode): + src = self.get_address(node.src) + dest = self.get_address(node.dest) + + self.add_inst( + mips.CommentNode(f""), + ) + + self.visit(cil.StaticCallNode('Bool__init', node.dest)) + + self.add_inst( + mips.LWNode(a0, (src, fp)) .with_comm('Push instance pointer'), + mips.JALNode('isvoid'), + mips.LWNode(t0, (dest, fp)) .with_comm('Load Bool pointer'), + mips.SWNode(v0, 4, t0) .with_comm('Save isvoid result as value of Bool'), + mips.CommentNode(f""), + ) + + @visitor.when(cil.LabelNode) + def visit(self, node: cil.LabelNode): + self.add_inst( + mips.LabelNode(node.name), + ) + + @visitor.when(cil.GotoNode) + def visit(self, node: cil.GotoNode): + self.add_inst( + mips.JNode(node.label), + ) + + @visitor.when(cil.GotoIfNode) + def visit(self, node: cil.GotoIfNode): + condition = self.get_address(node.condition) + + self.add_inst( + mips.CommentNode(f""), + mips.LWNode(t0, (condition, fp)), + mips.LWNode(t0, (4, t0)), + mips.LINode(t1, 1), + mips.BEQNode(t0, t1, node.label), + mips.CommentNode(f""), + ) + + @visitor.when(cil.LessThanNode) + def visit(self, node: cil.LessThanNode): + left = self.get_address(node.left) + right = self.get_address(node.right) + dest = self.get_address(node.dest) + + self.add_inst( + mips.CommentNode(f""), + ) + + self.visit(cil.StaticCallNode("Bool__init", node.dest)) + + self.add_inst( + mips.LWNode(a0, (left, fp)), + mips.LWNode(a1, (right, fp)), + mips.JALNode("less_than"), + mips.LWNode(t0, (dest, fp)), + mips.SWNode(v0, 4, t0), + mips.CommentNode(f""), + ) + + @visitor.when(cil.LessEqualNode) + def visit(self, node: cil.LessEqualNode): + left = self.get_address(node.left) + right = self.get_address(node.right) + dest = self.get_address(node.dest) + + self.add_inst( + mips.CommentNode(f""), + ) + + self.visit(cil.StaticCallNode("Bool__init", node.dest)) + + self.add_inst( + mips.LWNode(a0, (left, fp)), + mips.LWNode(a1, (right, fp)), + mips.JALNode("less_equal"), + mips.LWNode(t0, (dest, fp)), + mips.SWNode(v0, 4, t0), + mips.CommentNode(f""), + ) + + @visitor.when(cil.EqualNode) + def visit(self, node: cil.EqualNode): + left = self.get_address(node.left) + right = self.get_address(node.right) + dest = self.get_address(node.dest) + + self.add_inst( + mips.CommentNode(f""), + ) + + self.visit(cil.StaticCallNode("Bool__init", node.dest)) + + self.add_inst( + mips.LWNode(a0, (left, fp)), + mips.LWNode(a1, (right, fp)), + mips.JALNode("equal"), + mips.LWNode(t0, (dest, fp)), + mips.SWNode(v0, 4, t0), + mips.CommentNode(f""), + ) + + @visitor.when(cil.AbortNode) + def visit(self, node: cil.AbortNode): + type_name_address = self.get_address('typename') + + self.add_inst( + mips.CommentNode(""), + # "Abort called from class " + mips.LINode(v0, 4), + mips.LANode(a0, "s2"), + mips.SysCallNode(), + ) + + self.visit(cil.TypeNameNode('typename', 'self')) + + self.add_inst( + # Typename + mips.LWNode(a0, (type_name_address, fp)), + mips.LWNode(a0, (4, a0)), + mips.LINode(v0, 4), + mips.SysCallNode(), + + # \n + mips.LINode(v0, 4), + mips.LANode(a0, 's3'), + mips.SysCallNode(), + + # Abort + mips.LINode(v0, 10) .with_comm("Finish program execution"), + mips.SysCallNode(), + mips.CommentNode("") + ) + + @visitor.when(cil.ReadStringNode) + def visit(self, node: cil.ReadStringNode): + self.add_inst(mips.CommentNode(f"")) + + address = self.get_address(node.dest) + + self.add_inst( + mips.LINode(a0, 512), + mips.JALNode('malloc'), + mips.MoveNode(t2, v0) + ) + self.add_inst( + mips.MoveNode(a0, t2), + mips.LINode(a1, 512), + mips.LINode(v0, 8), + mips.SysCallNode(), + ) + self.visit(cil.StaticCallNode('String__init', node.dest)) + self.add_inst( + mips.LWNode(t0, (address, fp)), + mips.SWNode(t2, 4, t0) + ) + + # Remove eol + self.add_inst( + mips.MoveNode(a0, t2), + mips.JALNode('remove_eol') + ) + + self.add_inst(mips.CommentNode(f"")) + + @visitor.when(cil.ComplementNode) + def visit(self, node: cil.ComplementNode): + src = self.get_address(node.src) + dest = self.get_address(node.dest) + + self.add_inst( + mips.CommentNode(f""), + ) + + self.visit(cil.StaticCallNode('Int__init', node.dest)) + + self.add_inst( + mips.LWNode(t0, (src, fp)), + mips.LWNode(t0, (4, t0)), + mips.NOTNode(t0, t0), + mips.ADDINode(t0, t0, 1), + mips.LWNode(t1, (dest, fp)), + mips.SWNode(t0, 4, t1), + mips.CommentNode(f""), + ) + + @visitor.when(cil.CaseMatchRuntimeErrorNode) + def visit(self, node: cil.CaseMatchRuntimeErrorNode): + self.add_inst( + mips.CommentNode(f""), + mips.LINode(v0, 4), + mips.LANode(a0, 's4'), + mips.SysCallNode(), + mips.LINode(v0, 10), + mips.SysCallNode(), + mips.CommentNode(f""), + ) + + @visitor.when(cil.ExprVoidRuntimeErrorNode) + def visit(self, node: cil.ExprVoidRuntimeErrorNode): + self.add_inst( + mips.CommentNode(f""), + mips.LINode(v0, 4), + mips.LANode(a0, 's5'), + mips.SysCallNode(), + mips.LINode(v0, 10), + mips.SysCallNode(), + mips.CommentNode(f"") + ) + + @visitor.when(cil.ConformsNode) + def visit(self, node: cil.ConformsNode): + left_offset = self.get_address(node.left) + # right_offset = self.get_address(node.right) + dest_offset = self.get_address(node.dest) + + self.add_inst( + mips.CommentNode(f""), + ) + + self.add_inst( + mips.LWNode(t0, (left_offset, fp)) .with_comm("Load left pointer to self"), + mips.LWNode(a0, (0, t0)) .with_comm("Load left pointer to type of self"), + + mips.LANode(a1, node.right), + mips.JALNode('conforms'), + mips.MoveNode(s0, v0) + ) + + self.visit(cil.StaticCallNode('Bool__init', node.dest)) + + self.add_inst( + mips.LWNode(t0, (dest_offset, fp)), + mips.SWNode(s0, 4, t0), + mips.CommentNode(f""), + ) diff --git a/src/coolcmp/codegen/cil2mips/mips_formatter.py b/src/coolcmp/codegen/cil2mips/mips_formatter.py new file mode 100644 index 000000000..159cd202f --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/mips_formatter.py @@ -0,0 +1,131 @@ +from typing import List + +from coolcmp.utils import visitor +from coolcmp.utils import mips, cil +from coolcmp.codegen.cil2mips.templates import load_templates + + +class MIPSFormatter: + def __init__(self): + pass + + @visitor.on("node") + def visit(self, node): + pass + + @visitor.when(mips.ProgramNode) + def visit(self, node: mips.ProgramNode): + data = "# data\n.data\n" + "\n".join(self.visit(d) for d in node.data) + void = 'void:\n\t.word Void' + type_defs = ( + "\n_NoParent:\t\t# label for Object parent reference\n\t.word\t0\n\n" + + "\n\n".join(self.visit(t) for t in node.types) + ) + functions = "\n# functions\n.text\n.globl main\n" + "\n".join( + self.visit(f) for f in node.functions + ) + template_code = load_templates() + + return "\n".join([data, void, type_defs, functions, template_code]) # , inits_seg + + @visitor.when(mips.Type) + def visit(self, node: mips.Type): + method_labels = "" + lm = cil.MethodAt('_', -1) + for m in node.methods: + if lm.index + 1 != m.index: + method_labels += f"\t.space\t{(m.index - lm.index - 1) * 4}\n" + method_labels += f"\t.word\t{m.name}\t\t# {m.index}\n" + lm = m + if lm.index != node.total_methods - 1: + method_labels += f"\t.space\t{(node.total_methods - 1 - lm.index) * 4}\n" + + parent_name = f"\t.word\t{node.parent}" + typename_label = f'\t.asciiz\t"{node.label}"' + + lines = [ + f"{node.label}:", + f"\t.word\t{(node.length() + 1) * 4}", + parent_name, + method_labels, + typename_label, + ] + return "\n".join(lines).replace("\n\n", "\n") + + @visitor.when(mips.FunctionNode) + def visit(self, node: mips.FunctionNode): + return f"{node.name}:\n\t" + f"\n\t".join( + self.visit(ins) for ins in node.instructions + ) + + @visitor.when(mips.DataNode) + def visit(self, node: mips.DataNode): + return str(node) + + @visitor.when(mips.InstructionNode) + def visit(self, node: mips.InstructionNode): + return str(node) + node.comment + + # @visitor.when(mips.JALNode) + # def visit(self, node: mips.JALNode): + # return f"jal {node.dest}" + # + # @visitor.when(mips.SLLNode) + # def visit(self, node: mips.SLLNode): + # return f"sll {node.dest}, {node.src}, {node.bits}" + # + # @visitor.when(mips.MoveNode) + # def visit(self, node: mips.MoveNode): + # return f"move {node.reg1}, {node.reg2}" + # + # @visitor.when(str) + # def visit(self, node: str): + # return node + # + # @visitor.when(int) + # def visit(self, node: int): + # return node + # + # @visitor.when(mips.StringNode) + # def visit(self, node: mips.StringNode): + # return f"{node.label}: .asciiz {repr(node.value)[1:-1]}" + # + # @visitor.when(mips.LINode) + # def visit(self, node: mips.LINode): + # return f"li {node.reg}, {node.value}" + # + # @visitor.when(mips.LANode) + # def visit(self, node: mips.LANode): + # return f"la {node.reg}, {node.label}" + # + # @visitor.when(mips.SysCallNode) + # def visit(self, _: mips.SysCallNode): + # return "syscall" + # + # @visitor.when(mips.JRNode) + # def visit(self, node: mips.JRNode): + # return f"jr {node.dest}" + # + # @visitor.when(mips.LWNode) + # def visit(self, node: mips.LWNode): + # return str(node) + # + # @visitor.when(mips.SWNode) + # def visit(self, node: mips.SWNode): + # return f"sw {node.dest}, {node.offset}({node.src})" + # + # @visitor.when(mips.ADDNode) + # def visit(self, node: mips.ADDNode): + # return f"add {node.dest} {node.src1} {node.src2}" + # + # @visitor.when(mips.ADDINode) + # def visit(self, node: mips.ADDINode): + # return f"addi {node.dest}, {node.src}, {node.isrc}" + # + # @visitor.when(mips.CommentNode) + # def visit(self, node: mips.CommentNode): + # return f"# {node.text}" + # + # @visitor.when(mips.SUBNode) + # def visit(self, node: mips.SUBNode): + # return f"sub {node.rdest}, {node.r1}, {node.r2}," diff --git a/src/coolcmp/codegen/cil2mips/templates/__init__.py b/src/coolcmp/codegen/cil2mips/templates/__init__.py new file mode 100644 index 000000000..1e9eeb231 --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/templates/__init__.py @@ -0,0 +1,21 @@ +def load_templates() -> str: + template_names = [ + "malloc.mips", + "copy.mips", + "substr.mips", + "isvoid.mips", + "less_than.mips", + "length.mips", + "concat.mips", + "remove_eol.mips", + "less_equal.mips", + "equal.mips", + "conforms.mips" + ] + code_templates = ["\n# Templates"] + + for tname in template_names: + with open(f"./coolcmp/codegen/cil2mips/templates/{tname}") as fd: + code_templates.append("".join(fd.readlines())) + + return "\n".join(code_templates) diff --git a/src/coolcmp/codegen/cil2mips/templates/concat.mips b/src/coolcmp/codegen/cil2mips/templates/concat.mips new file mode 100644 index 000000000..0cb358ffd --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/templates/concat.mips @@ -0,0 +1,54 @@ +# Template for concat function: +# Arguments +# a0: Address of string 1 +# a1 Address of string 2 +# a2 size of string 1 + size of string 2 +concat: + addiu $sp $sp -24 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $a0 12($sp) + sw $a1 16($sp) + sw $ra 20($sp) + move $t0 $a0 + move $t1 $a1 + addiu $a0 $a2 1 + li $t2 4 + div $a0 $t2 + mfhi $a0 + bne $a0 $zero concat_allign_size + addiu $a0 $a2 1 + j concat_size_alligned +concat_allign_size: + sub $t2 $t2 $a0 + add $a0 $a2 $t2 + addiu $a0 $a0 1 +concat_size_alligned: + jal malloc + move $t2 $v0 + j concat_copy_first_loop +concat_copy_first_loop: + lb $a0 0($t0) + beq $a0 $zero concat_copy_second_loop + sb $a0 0($t2) + addiu $t0 $t0 1 + addiu $t2 $t2 1 + j concat_copy_first_loop +concat_copy_second_loop: + lb $a0 0($t1) + beq $a0 $zero concat_end + sb $a0 0($t2) + addiu $t1 $t1 1 + addiu $t2 $t2 1 + j concat_copy_second_loop +concat_end: + sb $zero 0($t2) + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $a0 12($sp) + lw $a1 16($sp) + lw $ra 20($sp) + addiu $sp $sp 24 + jr $ra diff --git a/src/coolcmp/codegen/cil2mips/templates/conforms.mips b/src/coolcmp/codegen/cil2mips/templates/conforms.mips new file mode 100644 index 000000000..ad78355e4 --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/templates/conforms.mips @@ -0,0 +1,45 @@ +# Template for conforms function: +# Check whether type $a0 conforms type $a1 +# Parameters: +# a0: address to first type +# a1: address to second type +# Returns: +# v0: 1 if $a0 conforms $a0, 0 elsewhere +conforms: + addiu $sp $sp -24 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $a0 12($sp) + sw $a1 16($sp) + sw $ra 20($sp) + + li $v0, 0 # By default $v0 = 0 + + move $t0, $a0 + move $t1, $a1 + la $t2, _NoParent # End condition + +conforms_loop: + beq $t0, $t1, conforms_equal # if $t0 == $t1 then conforms_equal + beq $t0, $t2, conforms_end # if $t0 == $t2 then conforms_end (return false) + + # $t0 = $t0.parent + addi $t0, $t0, 4 # $t0 = $t0 + 4 + lw $t0, 0($t0) + + j conforms_loop + +conforms_equal: + li $v0, 1 # return true + + +conforms_end: + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $a0 12($sp) + lw $a1 16($sp) + lw $ra 20($sp) + addiu $sp $sp 24 + jr $ra \ No newline at end of file diff --git a/src/coolcmp/codegen/cil2mips/templates/copy.mips b/src/coolcmp/codegen/cil2mips/templates/copy.mips new file mode 100644 index 000000000..60a0f0db7 --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/templates/copy.mips @@ -0,0 +1,28 @@ +# Template for copy function: +# Copy $a2 size from address at $a0 to address $a1 +# Arguments: +# $a0: from +# $a1: to +# $a2: size +copy: + addiu $sp $sp -16 + sw $a0 0($sp) + sw $a1 4($sp) + sw $a2 8($sp) + sw $t0 12($sp) +loop_copy: + beq $a2 $zero break_loop_copy + lw $t0 0($a0) + sw $t0 0($a1) + addiu $a0 $a0 4 + addiu $a1 $a1 4 + addi $a2 $a2 -4 + j loop_copy +break_loop_copy: + lw $a0 0($sp) + lw $a1 4($sp) + lw $a2 8($sp) + lw $t0 12($sp) + addiu $sp $sp 16 + + jr $ra diff --git a/src/coolcmp/codegen/cil2mips/templates/equal.mips b/src/coolcmp/codegen/cil2mips/templates/equal.mips new file mode 100644 index 000000000..01e5e6d9f --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/templates/equal.mips @@ -0,0 +1,44 @@ +# Template for = operator. +# Args: +# $a0: pointer to left instance +# $a1: pointer to right instance +# Returns: +# $v0: 1 if the instance are Int or Bool and have the same value, +# or the instance are of other type and share the same pointer; +# 0 in other case. +equal: + beq $a0, $a1, are_equal + lw $t0, 0($a0) + # check if the type is Int + la $t1, Int + beq $t0, $t1, by_value + # check if the type is Bool + la $t1, Bool + beq $t0, $t1, by_value + # check if the type is String + la $t1, String + beq $t0, $t1, compare_str + j are_not_equal +by_value: + lw $t0, 4($a0) + lw $t1, 4($a1) + beq $t0, $t1, are_equal +are_not_equal: + li $v0, 0 + j end_equal +are_equal: + li $v0, 1 +end_equal: + jr $ra +# instructions for string content comparison +compare_str: + lw $t0, 4($a0) + lw $t1, 4($a1) +compare_str_loop: + lb $t2, 0($t0) + lb $t3, 0($t1) + bne $t2, $t3, are_not_equal + beq $t2, $zero, are_equal + addi $t1, $t1, 1 + addi $t0, $t0, 1 + j compare_str_loop \ No newline at end of file diff --git a/src/coolcmp/codegen/cil2mips/templates/isvoid.mips b/src/coolcmp/codegen/cil2mips/templates/isvoid.mips new file mode 100644 index 000000000..18c6e4f4d --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/templates/isvoid.mips @@ -0,0 +1,15 @@ +# Template for isvoid function: +# Arguments +# a0: pointer to instance +# Returns +# $v0: 1 if type of instance is Void, false in other case +isvoid: + la $t0, Void + lw $t1, 0($a0) + beq $t0, $t1, _is_void + li, $v0, 0 + j _end_is_void +_is_void: + li $v0, 1 +_end_is_void: + jr $ra \ No newline at end of file diff --git a/src/coolcmp/codegen/cil2mips/templates/length.mips b/src/coolcmp/codegen/cil2mips/templates/length.mips new file mode 100644 index 000000000..e8bda2823 --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/templates/length.mips @@ -0,0 +1,26 @@ +# Template for length function: +# Arguments +# a0 Address of start of string +# Returns +# $v0: size of string +length: + addiu $sp $sp -8 + sw $t0 0($sp) + sw $t1 4($sp) + + move $t0 $a0 # Move to t0 the address to begin + move $v0 $zero # Set v0 to zero + +length_loop: + lb $t1 0($t0) # Save in t1 first byte of address + beq $t1 $zero length_end # Finish object if t1 is zero + addi $v0 $v0 1 # Increase count in v0 + addiu $t0 $t0 1 # Increase address pointer + j length_loop # Finish loop + +length_end: + lw $t0 0($sp) + lw $t1 4($sp) + addiu $sp $sp 8 + + jr $ra \ No newline at end of file diff --git a/src/coolcmp/codegen/cil2mips/templates/less_equal.mips b/src/coolcmp/codegen/cil2mips/templates/less_equal.mips new file mode 100644 index 000000000..0208061db --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/templates/less_equal.mips @@ -0,0 +1,16 @@ +# Template for <= operator. +# Args: +# $a0: pointer to Int instance +# $a1: pointer to Int instance +# Returns: +# $v0: 1 if the Int in $a0 is less than or equal to the Int in $a1, false in other case. +less_equal: + lw $t0, 4($a0) + lw $t1, 4($a1) + ble $t0, $t1, first_smaller_or_equal + li $v0, 0 + j end_less_equal +first_smaller_or_equal: + li $v0, 1 +end_less_equal: + jr $ra \ No newline at end of file diff --git a/src/coolcmp/codegen/cil2mips/templates/less_than.mips b/src/coolcmp/codegen/cil2mips/templates/less_than.mips new file mode 100644 index 000000000..5d2111897 --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/templates/less_than.mips @@ -0,0 +1,16 @@ +# Template for < operator. +# Args: +# $a0: pointer to Int instance +# $a1: pointer to Int instance +# Returns: +# $v0: 1 if the Int in $a0 is less than the Int in $a1, false in other case. +less_than: + lw $t0, 4($a0) + lw $t1, 4($a1) + blt $t0, $t1, first_smaller + li $v0, 0 + j end_less_than +first_smaller: + li $v0, 1 +end_less_than: + jr $ra \ No newline at end of file diff --git a/src/coolcmp/codegen/cil2mips/templates/malloc.mips b/src/coolcmp/codegen/cil2mips/templates/malloc.mips new file mode 100644 index 000000000..336711a38 --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/templates/malloc.mips @@ -0,0 +1,21 @@ +malloc: + addiu $sp $sp -12 # Save content of registers in sp + sw $a0 0($sp) + sw $t0 4($sp) + sw $t1 8($sp) + + li $t0 4 + div $a0 $t0 # Size of string / wordsize + mfhi $t1 # t2 holds remainder of division + + sub $t0 $t0 $t1 # Convert t1 to multiple of 4 + add $a0 $a0 $t0 + + li $v0 9 + syscall + + lw $a0 0($sp) # Return original content to registers + lw $t0 4($sp) + lw $t1 8($sp) + addiu $sp $sp 12 + jr $ra diff --git a/src/coolcmp/codegen/cil2mips/templates/remove_eol.mips b/src/coolcmp/codegen/cil2mips/templates/remove_eol.mips new file mode 100644 index 000000000..8979f94a5 --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/templates/remove_eol.mips @@ -0,0 +1,10 @@ +# Removes end eol from string address: +# Arguments +# a0: string address +remove_eol: + lb $t1, 0($a0) + addi $a0, $a0, 1 + bnez $t1, remove_eol + subu $a0, $a0, 2 + sb $0, 0($a0) + jr $ra diff --git a/src/coolcmp/codegen/cil2mips/templates/substr.mips b/src/coolcmp/codegen/cil2mips/templates/substr.mips new file mode 100644 index 000000000..e03158d70 --- /dev/null +++ b/src/coolcmp/codegen/cil2mips/templates/substr.mips @@ -0,0 +1,64 @@ +# Template for substring function: +# Arguments +# a0 Address to start of string +# a1 Address to start of substring +# a2 Size of substring +# Returns +# $v0: Address to start of substring +substr: + # Push register values + addiu $sp $sp -32 + sw $t0 0($sp) + sw $t1 4($sp) + sw $t2 8($sp) + sw $t3 12($sp) + sw $a0 16($sp) + sw $a1 20($sp) + sw $a2 24($sp) + sw $ra 28($sp) + + move $t0 $a0 + add $t0 $t0 $a1 + li $t1 4 + + div $a2 $t1 + mfhi $t2 + + bne $t2 $zero substr_align_size + move $t1 $a2 + j substr_new_block + +substr_align_size: + sub $t1 $t1 $t2 + add $t1 $t1 $a2 + +substr_new_block: + move $a0 $t1 + jal malloc + move $t3 $v0 + move $t1 $zero + +substr_copy_loop: + beq $t1 $a2 substr_end + lb $t2 0($t0) + sb $t2 0($t3) + addiu $t0 $t0 1 + addiu $t3 $t3 1 + addiu $t1 $t1 1 + j substr_copy_loop + +substr_end: + sb $zero 0($t3) + + # Pop register values + lw $t0 0($sp) + lw $t1 4($sp) + lw $t2 8($sp) + lw $t3 12($sp) + lw $a0 16($sp) + lw $a1 20($sp) + lw $a2 24($sp) + lw $ra 28($sp) + addiu $sp $sp 32 + + jr $ra diff --git a/src/coolcmp/codegen/cool2cil/__init__.py b/src/coolcmp/codegen/cool2cil/__init__.py new file mode 100644 index 000000000..e81c18258 --- /dev/null +++ b/src/coolcmp/codegen/cool2cil/__init__.py @@ -0,0 +1,10 @@ +from .types_data_visitor import DotTypesDataVisitor +from .code_visitor import DotCodeVisitor + + +def build_cil(ast, context, scope): + cil = DotTypesDataVisitor(context).visit(ast) + + DotCodeVisitor(cil, context).visit(ast, scope) + + return cil diff --git a/src/coolcmp/codegen/cool2cil/code_visitor.py b/src/coolcmp/codegen/cool2cil/code_visitor.py new file mode 100644 index 000000000..6fc12c28d --- /dev/null +++ b/src/coolcmp/codegen/cool2cil/code_visitor.py @@ -0,0 +1,702 @@ +from __future__ import annotations +from copy import deepcopy +from typing import List, Tuple + +from coolcmp.utils import ast, cil, visitor, extract_feat_name +from coolcmp.utils.semantic import Context, Scope + + +class DotCodeVisitor: + """ + Builds the .CODE section. + """ + def __init__(self, cil_root: cil.ProgramNode, context: Context): + super().__init__() + self.root = cil_root + self.code = cil_root.dot_code + self.current_function: cil.FunctionNode | None = None + self.current_type: str | None = None + self.current_init: cil.FunctionNode | None = None + self.label_count = -1 + self.context = context + + def new_label(self, name: str) -> cil.LabelNode: + self.label_count += 1 + name = f'_{name}_{self.label_count}' + return cil.LabelNode(name) + + def add_function(self, name: str): + # if name is None: + # name = f'f{self.next_function_id}' + self.current_function = cil.FunctionNode(name, [], [], []) + self.code.append(self.current_function) + + def add_param(self, name: str) -> str: + param = cil.ParamNode(name) + self.current_function.params.append(param) + return name + + def add_local(self, name: str, internal: bool = True) -> str: + if internal: + name = f'_{name}_{len(self.current_function.local_vars)}' + local = cil.LocalNode(name) + self.current_function.local_vars.append(local) + return name + + def add_inst(self, inst: cil.InstructionNode) -> cil.InstructionNode: + self.current_function.instructions.append(inst) + return inst + + def add_comment(self, text: str): + self.add_inst(cil.CommentNode(text)) + + @property + def current_is_init(self): + return self.current_function.name == self.current_init.name + + @visitor.on('node') + def visit(self, node: ast.Node, scope: Scope): + raise NotImplementedError() + + @visitor.when(ast.ProgramNode) + def visit(self, node: ast.ProgramNode, scope: Scope): + # the root scope stores void to avoid semantic errors initializing + # the void attribute of classes to void. After that every function + # has access to void through that attribute in every class. + # So, pop it to avoid repeated locals. + scope.locals.pop(0) + # build the code functions + for class_ in node.declarations: + tagged_scope = scope.get_tagged_scope(class_.id) + + self.visit(class_, deepcopy(tagged_scope)) + + # build the entry function: + for class_ in node.declarations: + if class_.id == 'Main': + for feature in class_.features: + if isinstance(feature, ast.FuncDeclarationNode) and feature.id == 'main': + self.add_function('main') + instance = self.add_local('main_instance') + self.add_inst(cil.StaticCallNode('Main__init', instance)) + self.add_comment('Calling main') + result = self.add_local('result') + self.add_inst(cil.ArgNode(instance)) + self.add_inst(cil.DynamicCallNode(instance, 'main', result, None, 'Main')) + self.add_inst(cil.ReturnNode(0)) + break + + # add the default functions of COOL + # TODO: add missing instructions + self.code += [ + cil.FunctionNode( + name='Object__init', + params=[], + local_vars=[ + cil.LocalNode('self'), + ], + instructions=[ + cil.InitNode('self', 'Object'), + cil.ReturnNode('self') + ] + ), + cil.FunctionNode( + name='Object_abort', + params=[ + cil.ParamNode('self'), + cil.ParamNode('typename') + ], + local_vars=[], + instructions=[ + cil.AbortNode(), + cil.ReturnNode(), + ] + ), + cil.FunctionNode( + name='Object_type_name', + params=[ + cil.ParamNode('self'), + ], + local_vars=[ + cil.LocalNode('name'), + ], + instructions=[ + cil.TypeNameNode('name', 'self'), + cil.ReturnNode('name'), + ] + ), + cil.FunctionNode( + name='Object_copy', + params=[ + cil.ParamNode('self'), + ], + local_vars=[ + cil.LocalNode('self_copy'), + ], + instructions=[ + cil.AssignNode('self_copy', 'self'), + cil.ReturnNode('self_copy'), + ] + ), + cil.FunctionNode( + name='IO__init', + params=[], + local_vars=[ + cil.LocalNode('self'), + ], + instructions=[ + cil.InitNode('self', 'IO'), + cil.ReturnNode('self'), + ] + ), + cil.FunctionNode( + name='IO_out_string', + params=[ + cil.ParamNode('self'), + cil.ParamNode('str_addr'), + ], + local_vars=[], + instructions=[ + cil.PrintStringNode('str_addr'), + cil.ReturnNode('self'), + ] + ), + cil.FunctionNode( + name='IO_out_int', + params=[ + cil.ParamNode('self'), + cil.ParamNode('int_addr'), + ], + local_vars=[], + instructions=[ + cil.PrintIntNode('int_addr'), + cil.ReturnNode('self'), + ] + ), + cil.FunctionNode( + name='IO_in_string', + params=[ + cil.ParamNode('self'), + ], + local_vars=[ + cil.LocalNode('_in_string'), + ], + instructions=[ + cil.ReadStringNode('_in_string'), + cil.ReturnNode('_in_string'), + ] + ), + cil.FunctionNode( + name='IO_in_int', + params=[ + cil.ParamNode('self'), + ], + local_vars=[ + cil.LocalNode('_in_int'), + ], + instructions=[ + cil.ReadIntNode('_in_int'), + cil.ReturnNode('_in_int'), + ] + ), + cil.FunctionNode( + name='String__init', + params=[], + local_vars=[ + cil.LocalNode('self'), + ], + instructions=[ + cil.InitNode('self', 'String'), + cil.ReturnNode('self'), + ] + ), + cil.FunctionNode( + name='String_length', + params=[ + cil.ParamNode('self'), + ], + local_vars=[ + cil.LocalNode('_length'), + ], + instructions=[ + cil.LengthNode('self', '_length'), + cil.ReturnNode('_length'), + ] + ), + cil.FunctionNode( + name='String_concat', + params=[ + cil.ParamNode('self'), + cil.ParamNode('other_str'), + ], + local_vars=[ + cil.LocalNode('result'), + ], + instructions=[ + cil.ConcatNode('result', 'self', 'other_str'), + cil.ReturnNode('result'), + ] + ), + cil.FunctionNode( + name='String_substr', + params=[ + cil.ParamNode('self'), + cil.ParamNode('index'), + cil.ParamNode('length'), + ], + local_vars=[ + cil.LocalNode('result'), + ], + instructions=[ + cil.SubstringNode('result', 'value', 'index', 'length'), + cil.ReturnNode('result'), + ] + ), + cil.FunctionNode( + name='Bool__init', + params=[], + local_vars=[ + cil.LocalNode('self'), + ], + instructions=[ + cil.InitNode('self', 'Bool'), + cil.ReturnNode('self'), + ] + ), + cil.FunctionNode( + name='Int__init', + params=[], + local_vars=[ + cil.LocalNode('self'), + ], + instructions=[ + cil.InitNode('self', 'Int'), + cil.ReturnNode('self'), + ] + ), + cil.FunctionNode( + name='Void__init', + params=[], + local_vars=[ + cil.LocalNode('self'), + ], + instructions=[ + cil.InitNode('self', 'Void'), + cil.ReturnNode('self'), + ] + ), + ] + + @visitor.when(ast.ClassDeclarationNode) + def visit(self, node: ast.ClassDeclarationNode, scope: Scope): + self.current_type = node.id + init = cil.FunctionNode( + name=f'{node.id}__init', + params=[], + local_vars=[ + cil.LocalNode('self'), + ], + instructions=[ + cil.InitNode('self', node.id), + ] + ) + self.root.dot_code.append(init) + self.current_init = init + + self.current_function = self.current_init + type_node = self.root.get_type(self.current_type) + for attr_name in type_node.attributes: + attr = self.add_local(extract_feat_name(attr_name), internal=False) + attr_expr, attr_scope = type_node.get_attr_node(attr_name) + attr_value = self.visit(attr_expr, attr_scope) + attr_index = type_node.attributes.index(attr_name) + attr_at = cil.AttributeAt(attr_name, attr_index) + self.add_inst(cil.SetAttrNode('self', attr_at, attr_value)) + self.add_inst(cil.AssignNode(attr, attr_value)) + + for feat in node.features: + # if isinstance(feat, ast.AttrDeclarationNode): + # visited_attrs.append(feat.id) + # self.visit(feat, scope) + if isinstance(feat, ast.FuncDeclarationNode): + tagged_scope = scope.get_tagged_scope(feat.id) + self.visit(feat, tagged_scope) + + init.instructions.append(cil.ReturnNode('self')) + + # @visitor.when(ast.AttrDeclarationNode) + # def visit(self, node: ast.AttrDeclarationNode, scope: Scope): + # self.current_function = self.current_init + # attr = self.add_local(node.id, internal=False) + # + # attr_index = self.root.get_type(self.current_type).attributes.index(f'{self.current_type}_{node.id}') + # if node.expr is not None: + # result = self.visit(node.expr, scope) + # self.add_inst(cil.SetAttrNode('self', cil.AttributeAt(attr, attr_index), result)) + # self.add_inst(cil.AssignNode(attr, result)) + # else: + # type_node = self.root.get_type(self.current_type) + # attr_expr = type_node.get_attr_node(node.id) + # attr_value = self.visit(attr_expr, scope) + # attr_index = type_node.attributes.index(node.id) + # attr_at = cil.AttributeAt(node.id, attr_index) + # self.add_inst( + # cil.SetAttrNode('self', attr_at, attr_value) + # ) + + + @visitor.when(ast.FuncDeclarationNode) + def visit(self, node: ast.FuncDeclarationNode, scope: Scope): + self.add_function(f'{self.current_type}_{node.id}') + + for local in scope.all_locals(): + if local.is_param: + self.add_param(local.name) + else: + local_name = self.add_local(local.name, internal=False) + if local.is_attr: + attr_name = f'{self.current_type}_{local.name}' + self.add_inst(cil.GetAttrNode(local_name, 'self', attr_name)) + + result = self.visit(node.body, scope) + self.add_inst(cil.ReturnNode(result)) + + @visitor.when(ast.LetDeclarationNode) + def visit(self, node: ast.LetDeclarationNode, scope: Scope): + local = self.add_local(node.id, internal=False) + if node.expr is not None: + expr_node = node.expr + elif node.type == 'String': + expr_node = ast.StringNode('""') + elif node.type == 'Bool': + expr_node = ast.BooleanNode('false') + elif node.type == 'Int': + expr_node = ast.IntegerNode('0') + else: + expr_node = ast.VariableNode('void') + expr_dest = self.visit(expr_node, scope) + self.add_inst(cil.AssignNode(local, expr_dest)) + + @visitor.when(ast.LetNode) + def visit(self, node: ast.LetNode, scope: Scope): + let_scope = scope.children.pop(0) + for let_declaration in node.declarations: + self.visit(let_declaration, let_scope) + + return self.visit(node.expr, let_scope) + + @visitor.when(ast.ParenthesisExpr) + def visit(self, node: ast.ParenthesisExpr, scope: Scope): + return self.visit(node, scope) + + @visitor.when(ast.BlockNode) + def visit(self, node: ast.BlockNode, scope: Scope): + last_expr = None + for expr in node.expressions: + last_expr = self.visit(expr, scope) + + return last_expr + + @visitor.when(ast.CaseBranchNode) + def visit(self, node: ast.CaseBranchNode, scope: Scope): + return self.visit(node.expr, scope) + + @visitor.when(ast.CaseNode) + def visit(self, node: ast.CaseNode, scope: Scope): + self.add_comment("Case of") + ret_exp = self.visit(node.expr, scope) + typename = self.add_local('typename') + case_ret = self.add_local('case_ret') + end_label = self.new_label('end') + case_match_re_label = self.new_label('case_match_re') + expr_void_re_label = self.new_label('expr_void_re') + + self.add_inst(cil.TypeNameNode(typename, ret_exp)) + + def get_depth(x: ast.CaseBranchNode): + return self.context.type_depth(self.context.get_type(x.type)) + + # Sort cases and scopes + sorted_cases: List[Tuple[ast.CaseBranchNode, Scope]] = [] + for case in node.cases: + child_scope = scope.children.pop(0) + sorted_cases.append((case, child_scope)) + sorted_cases.sort(key=lambda x: get_depth(x[0]), reverse=True) + + branch_labels = [self.new_label('case_branch') for _ in sorted_cases] + + for (case, scope), label in zip(sorted_cases, branch_labels): + self.add_comment(f"Check for case branch {case.type}") + cond = self.add_local('case_cond') + + self.add_inst(cil.ConformsNode(cond, ret_exp, case.type)) + self.add_inst(cil.GotoIfNode(cond, label.name)) + + # Does not conform to anyone => Runtime error + self.add_inst(cil.GotoNode(case_match_re_label.name)) + + for (case, child_scope), label in zip(sorted_cases, branch_labels): + self.add_inst(label) + idx = self.add_local(case.id, internal=False) + self.add_inst(cil.AssignNode(idx, ret_exp)) + branch_ret = self.visit(case, child_scope) + self.add_inst(cil.AssignNode(case_ret, branch_ret)) + self.add_inst(cil.GotoNode(end_label.name)) + + # Handle Runtime Errors + self.add_inst(case_match_re_label) + self.add_inst(cil.CaseMatchRuntimeErrorNode()) + self.add_inst(expr_void_re_label) + self.add_inst(cil.ExprVoidRuntimeErrorNode()) + + self.add_inst(end_label) + self.add_inst(cil.ReturnNode(ret_exp)) + return case_ret + + @visitor.when(ast.AssignNode) + def visit(self, node: ast.AssignNode, scope: Scope): + expr_dest = self.visit(node.expr, scope) + self.add_inst(cil.AssignNode(node.id, expr_dest)) + variable = scope.find_variable(node.id) + if variable.is_attr: + attr_name = f'{self.current_type}_{node.id}' + attr_index = self.root.get_type(self.current_type).attributes.index(attr_name) + attr_at = cil.AttributeAt(attr_name, attr_index) + self.add_inst(cil.SetAttrNode('self', attr_at, value=expr_dest)) + return expr_dest + + @visitor.when(ast.ConditionalNode) + def visit(self, node: ast.ConditionalNode, scope: Scope): + """ + + if_dest = + IF if_dest GOTO then + GOTO else + LABEL then + then_dest = + cond_res = then_dest + GOTO endif + LABEL else + else_dest = + cond_res = else_dest + LABEL endif + """ + self.add_comment('Conditional if-else') + + then_label = self.new_label('then') + else_label = self.new_label('else') + endif_label = self.new_label('endif') + + cond_res = self.add_local('cond_res') + if_dest = self.visit(node.if_expr, scope) + self.add_inst(cil.GotoIfNode(if_dest, then_label.name)) + self.add_inst(cil.GotoNode(else_label.name)) + self.add_inst(then_label) + then_dest = self.visit(node.then_expr, scope) + self.add_inst(cil.AssignNode(cond_res, then_dest)) + self.add_inst(cil.GotoNode(endif_label.name)) + self.add_inst(else_label) + else_dest = self.visit(node.else_expr, scope) + self.add_inst(cil.AssignNode(cond_res, else_dest)) + self.add_inst(endif_label) + return cond_res + + @visitor.when(ast.WhileNode) + def visit(self, node: ast.WhileNode, scope: Scope): + """ + + LABEL while_cond + cond_dest = + IF cond_dest GOTO while_body + GOTO end_while + LABEL while_body + <----- the body return is not used, just side effects + GOTO while_cond + LABEL end_while + + void_res = VCALL Object Void + """ + self.add_comment('While loop') + + cond_label = self.new_label('while_cond') + body_label = self.new_label('while_body') + end_while_label = self.new_label('end_while') + + self.add_inst(cond_label) + cond_dest = self.visit(node.condition, scope) + self.add_inst(cil.GotoIfNode(cond_dest, body_label.name)) + self.add_inst(cil.GotoNode(end_while_label.name)) + self.add_inst(body_label) + self.visit(node.body, scope) + self.add_inst(cil.GotoNode(cond_label.name)) + self.add_inst(end_while_label) + + return 'void' + + @visitor.when(ast.CallNode) + def visit(self, node: ast.CallNode, scope: Scope): + self.add_comment(f'Calling function {node.id}') + # allocate and push the object + if node.obj is None: + obj = ast.VariableNode('self') + else: + obj = node.obj + obj_dest = self.visit(obj, scope) + + # allocate and push the args + for arg in reversed(node.args): + arg_dest = self.visit(arg, scope) + self.add_inst(cil.ArgNode(arg_dest)) + self.add_inst(cil.ArgNode(obj_dest)) + + # call the function + call_res = self.add_local('call_res') + self.add_inst( + cil.DynamicCallNode(obj_dest, node.id, call_res, node.type, node.obj_dyn_type) + ) + + # update attrs refs + for attr, _ in self.context.get_type(self.current_type).all_attributes(): + self.add_inst(cil.GetAttrNode(attr.name, 'self', f'{self.current_type}_{attr.name}')) + + # for local in self.current_function.local_vars: + # if local.is_attr and local.name != 'self': + # print('updating attr', local.name) + # self.add_inst(cil.GetAttrNode(local.name, 'self', f'{self.current_type}_{local.name}')) + + return call_res + + @visitor.when(ast.InstantiateNode) + def visit(self, node: ast.InstantiateNode, scope: Scope): + if node.lex == 'String': + return self.visit(ast.StringNode('""'), scope) + elif node.lex == 'Bool': + return self.visit(ast.BooleanNode('false'), scope) + elif node.lex == 'Int': + return self.visit(ast.IntegerNode('0'), scope) + + self.add_comment(f'Instantiating type {node.lex}') + + # type_node = self.root.get_type(node.lex) + # attr_values = [] + # for attr in type_node.attributes: + # attr_expr = type_node.get_attr_node(attr) + # attr_values.append(self.visit(attr_expr, scope)) + instance = self.add_local(f'inst_of_{node.lex}') + self.add_inst(cil.StaticCallNode(f'{node.lex}__init', instance)) + # for attr, attr_value in zip(type_node.attributes, attr_values): + # attr_index = type_node.attributes.index(attr) + # attr_at = cil.AttributeAt(attr, attr_index) + # self.add_inst( + # cil.SetAttrNode(instance, attr_at, attr_value) + # ) + return instance + + @visitor.when(ast.StringNode) + def visit(self, node: ast.StringNode, scope: Scope): + self.add_comment( + 'Instantiating string: ' + + (node.lex if len(node.lex) < 20 else node.lex[:15] + '...') + ) + + value = self.visit(node.lex, scope) + str_instance = self.add_local('str_instance') + self.add_inst(cil.StaticCallNode('String__init', str_instance)) + attr_index = self.root.get_type('String').attributes.index('String_value') + attr_at = cil.AttributeAt('String_value', attr_index) + self.add_inst(cil.SetAttrNode(str_instance, attr_at, value)) + return str_instance + + @visitor.when(ast.IntegerNode) + def visit(self, node: ast.IntegerNode, scope: Scope): + value = self.visit(int(node.lex), scope) + int_instance = self.add_local('int_instance') + self.add_inst(cil.StaticCallNode('Int__init', int_instance)) + attr_index = self.root.get_type('Int').attributes.index('Int_value') + attr_at = cil.AttributeAt('Int_value', attr_index) + self.add_inst(cil.SetAttrNode(int_instance, attr_at, value)) + return int_instance + + @visitor.when(ast.BooleanNode) + def visit(self, node: ast.BooleanNode, scope: Scope): + value = self.visit(node.lex == 'true', scope) + bool_instance = self.add_local('bool_instance') + self.add_inst(cil.StaticCallNode('Bool__init', bool_instance)) + attr_index = self.root.get_type('Bool').attributes.index('Bool_value') + attr_at = cil.AttributeAt('Bool_Value', attr_index) + self.add_inst(cil.SetAttrNode(bool_instance, attr_at, value)) + return bool_instance + + @visitor.when(ast.VariableNode) + def visit(self, node: ast.VariableNode, _): + return node.lex + + @visitor.when(ast.PlusNode) + def visit(self, node: ast.PlusNode, scope: Scope): + return self.build_binary_node(cil.PlusNode, node, scope) + + @visitor.when(ast.MinusNode) + def visit(self, node: ast.MinusNode, scope: Scope): + return self.build_binary_node(cil.MinusNode, node, scope) + + @visitor.when(ast.StarNode) + def visit(self, node: ast.PlusNode, scope: Scope): + return self.build_binary_node(cil.StarNode, node, scope) + + @visitor.when(ast.DivNode) + def visit(self, node: ast.DivNode, scope: Scope): + return self.build_binary_node(cil.DivNode, node, scope) + + @visitor.when(ast.LessThanNode) + def visit(self, node: ast.LessThanNode, scope: Scope): + return self.build_binary_node(cil.LessThanNode, node, scope) + + @visitor.when(ast.LessEqualNode) + def visit(self, node: ast.LessEqualNode, scope: Scope): + return self.build_binary_node(cil.LessEqualNode, node, scope) + + @visitor.when(ast.EqualNode) + def visit(self, node: ast.EqualNode, scope: Scope): + return self.build_binary_node(cil.EqualNode, node, scope) + + def build_binary_node(self, new_node_cls, node: ast.BinaryNode, scope: Scope): + left_dest = self.visit(node.left, scope) + right_dest = self.visit(node.right, scope) + oper_dest = self.add_local('oper_dest') + self.add_inst(new_node_cls(oper_dest, left_dest, right_dest)) + return oper_dest + + @visitor.when(ast.IsVoidNode) + def visit(self, node: ast.IsVoidNode, scope: Scope): + expr_dest = self.visit(node.expr, scope) + comp_res = self.add_local('comp_res') + self.add_inst(cil.IsVoidNode(comp_res, expr_dest)) + return comp_res + + @visitor.when(ast.NegationNode) + def visit(self, node: ast.NegationNode, scope: Scope): + neg_res = self.add_local('neg_res') + expr_res = self.visit(node.expr, scope) + self.add_inst(cil.NegationNode(neg_res, expr_res)) + return neg_res + + @visitor.when(ast.ComplementNode) + def visit(self, node: ast.ComplementNode, scope: Scope): + com_res = self.add_local('com_res') + expr_res = self.visit(node.expr, scope) + self.add_inst(cil.ComplementNode(com_res, expr_res)) + return com_res + + @visitor.when(str) + def visit(self, lex: str, _): + str_dest = self.add_local('str_dest') + self.add_inst(cil.LoadNode(str_dest, self.root.get_data_name(lex))) + return str_dest + + @visitor.when(bool) + def visit(self, bool_value: bool, _): + return int(bool_value) + + @visitor.when(int) + def visit(self, value: int, _): + return value diff --git a/src/coolcmp/codegen/cool2cil/types_data_visitor.py b/src/coolcmp/codegen/cool2cil/types_data_visitor.py new file mode 100644 index 000000000..7be822121 --- /dev/null +++ b/src/coolcmp/codegen/cool2cil/types_data_visitor.py @@ -0,0 +1,218 @@ +from __future__ import annotations +from copy import deepcopy + +from coolcmp.utils import visitor, ast, cil +from coolcmp.utils.semantic import Context, Type + + +class DotTypesDataVisitor: + """ + Builds the .TYPES and .DATA sections. + """ + def __init__(self, context: Context): + super().__init__() + self.context = context + self.current_type: Type | None = None + self.root = cil.ProgramNode([], [], []) + self.types = self.root.dot_types + + @visitor.on('node') + def visit(self, node: ast.Node): + pass + + @visitor.when(ast.ProgramNode) + def visit(self, node: ast.ProgramNode): + self.root.set_data('""') + self.root.set_data('"Abort called from class "') + self.root.set_data('"\n"') + self.root.set_data('"RuntimeError: Case statement without a match branch\n"') + self.root.set_data('"RuntimeError: Expression is void\n"') + + # add Object, IO, String, Bool, Int and to types + self.types += [ + cil.TypeNode( + name='Object', + parent='_NoParent', + attrs=[], + methods={ + 'Object__init': 'Object__init', + 'Object_abort': 'Object_abort', + 'Object_type_name': 'Object_type_name', + 'Object_copy': 'Object_copy', + } + ), + cil.TypeNode( + name='IO', + parent='Object', + attrs=[], + methods={ + 'IO__init': 'IO__init', + 'IO_abort': 'Object_abort', + 'IO_type_name': 'Object_type_name', + 'IO_copy': 'Object_copy', + 'IO_out_string': 'IO_out_string', + 'IO_out_int': 'IO_out_int', + 'IO_in_string': 'IO_in_string', + 'IO_in_int': 'IO_in_int', + } + ), + cil.TypeNode( + name='String', + parent='Object', + attrs=[ + 'String_value', + ], + methods={ + 'String__init': 'String__init', + 'String_abort': 'Object_abort', + 'String_type_name': 'Object_type_name', + 'String_copy': 'Object_copy', + 'String_length': 'String_length', + 'String_concat': 'String_concat', + 'String_substr': 'String_substr', + } + ), + cil.TypeNode( + name='Bool', + parent='Object', + attrs=[ + 'Bool_value', + ], + methods={ + 'Bool__init': 'Bool__init', + 'Bool_abort': 'Object_abort', + 'Bool_type_name': 'Object_type_name', + 'Bool_copy': 'Object_copy', + } + ), + cil.TypeNode( + name='Int', + parent='Object', + attrs=[ + 'Int_value', + ], + methods={ + 'Int__init': 'Int__init', + 'Int_abort': 'Object_abort', + 'Int_type_name': 'Object_type_name', + 'Int_copy': 'Object_copy', + } + ), + cil.TypeNode( + name='Void', + parent='Object', + attrs=[], + methods={ + 'Void_init': 'Void__init', + 'Void_abort': 'Object_abort', + 'Void_type_name': 'Object_type_name', + 'Void_copy': 'Object_copy', + } + ), + ] + + for class_ in node.declarations: + self.visit(class_) + + return self.root + + @visitor.when(ast.ClassDeclarationNode) + def visit(self, node: ast.ClassDeclarationNode): + type_ = self.context.get_type(node.id) + type_attributes: list[str] = [] + type_methods: dict[str, str] = {} + type_node = cil.TypeNode( + name=type_.name, + parent=type_.parent.name, + attrs=type_attributes, + methods=type_methods + ) + + for attr, _ in type_.all_attributes(): + type_attributes.append(f'{type_.name}_{attr.name}') + type_node.add_attr_node(f'{node.id}_{attr.name}', attr.node, deepcopy(attr.scope)) + + for meth, owner in type_.all_methods(): + # if owner.name in ('Object', 'IO', 'String', 'String', 'Bool', 'Int', ): + # func_target = meth.name + # else: + type_methods[f'{node.id}_{meth.name}'] = f'{owner.name}_{meth.name}' + + self.types.append(type_node) + + for feature in node.features: + self.visit(feature) + + @visitor.when(ast.AttrDeclarationNode) + def visit(self, node: ast.AttrDeclarationNode): + self.visit(node.expr) + + @visitor.when(ast.FuncDeclarationNode) + def visit(self, node: ast.FuncDeclarationNode): + self.visit(node.body) + + @visitor.when(ast.LetDeclarationNode) + def visit(self, node: ast.LetDeclarationNode): + self.visit(node.expr) + + @visitor.when(ast.ParenthesisExpr) + def visit(self, node: ast.ParenthesisExpr): + self.visit(node.expr) + + @visitor.when(ast.BlockNode) + def visit(self, node: ast.BlockNode): + for expr in node.expressions: + self.visit(expr) + + @visitor.when(ast.LetNode) + def visit(self, node: ast.LetNode): + for declaration in node.declarations: + self.visit(declaration) + + self.visit(node.expr) + + @visitor.when(ast.CaseBranchNode) + def visit(self, node: ast.CaseBranchNode): + self.visit(node.expr) + + @visitor.when(ast.CaseNode) + def visit(self, node: ast.CaseNode): + self.visit(node.expr) + for case in node.cases: + self.visit(case) + + @visitor.when(ast.AssignNode) + def visit(self, node: ast.AssignNode): + self.visit(node.expr) + + @visitor.when(ast.ConditionalNode) + def visit(self, node: ast.ConditionalNode): + self.visit(node.if_expr) + self.visit(node.then_expr) + self.visit(node.else_expr) + + @visitor.when(ast.WhileNode) + def visit(self, node: ast.WhileNode): + self.visit(node.condition) + self.visit(node.body) + + @visitor.when(ast.CallNode) + def visit(self, node: ast.CallNode): + if node.obj is not None: + self.visit(node.obj) + + for arg in node.args: + self.visit(arg) + + @visitor.when(ast.BinaryNode) + def visit(self, node: ast.BinaryNode): + self.visit(node.left) + self.visit(node.right) + + @visitor.when(ast.UnaryNode) + def visit(self, node: ast.UnaryNode): + self.visit(node.expr) + + @visitor.when(ast.StringNode) + def visit(self, node: ast.StringNode): + self.root.set_data(node.lex) diff --git a/src/coolcmp/errors.py b/src/coolcmp/errors.py new file mode 100644 index 000000000..6368bd7e3 --- /dev/null +++ b/src/coolcmp/errors.py @@ -0,0 +1,35 @@ +""" +Compilation errors. +""" + +# lexical analysis +LEX_ERROR = '(%s, %s) - LexicographicError: Unexpected symbol "%s".' +UNT_STR = '(%s, %s) - LexicographicError: Unterminated string.' +EOF_STR = '(%s, %s) - LexicographicError: EOF in string.' +NULL_STR = '(%s, %s) - LexicographicError: String contains null character.' +EOF_COMM = '(%s, %s) - LexicographicError: EOF in comment.' + +# parsing +SYN_ERROR = '(%s, %s) - SyntacticError: Syntax error at or near "%s".' +SYN_EOF = '(0, 0) - SyntacticError: ERROR at or near EOF.' # empty program + +# semantic analysis +CANNOT_INHERIT = '%s - SemanticError: Type "%s" cannot be inherited.' +CYCLIC_INHERITANCE = '%s - SemanticError: Cyclic inheritance involving "%s".' +TYPE_ALREADY_DEFINED = '%s - SemanticError: Type "%s" already defined.' +ATTRIBUTE_DEFINED_IN_PARENT = '%s - SemanticError: Attribute "%s" is already defined in parent.' +ATTRIBUTE_ALREADY_DEFINED = '%s - SemanticError: Attribute "%s" already defined in "%s".' +METHOD_ALREADY_DEFINED = '%s - SemanticError: Method "%s" already defined in "%s".' +WRONG_SIGNATURE = '%s - SemanticError: Method "%s" already defined in "%s" with a different signature.' +LOCAL_ALREADY_DEFINED = '%s - SemanticError: Variable "%s" is already defined in method "%s".' +SELF_TYPE_INVALID_PARAM_TYPE = '%s - SemanticError: SELF_TYPE cannot be a static type for a parameter.' +SELF_INVALID_ID = '%s - SemanticError: Cannot define "self" as attribute of a class or an identifier.' +SELF_IS_READONLY = '%s - SemanticError: Variable "self" is read-only.' +CASE_DUPLICATED_BRANCH = '%s - SemanticError: Duplicate branch "%s" in case statement.' +UNDEFINED_METHOD = '%s - AttributeError: Method "%s" is not defined in "%s" or inherited.' +UNDEFINED_TYPE = '%s - TypeError: Type "%s" is not defined.' +INCOMPATIBLE_TYPES = '%s - TypeError: Cannot convert "%s" into "%s".' +INVALID_ANCESTOR = '%s - TypeError: Class "%s" has not class "%s" as ancestor.' +INVALID_BINARY_OPERATOR = '%s - TypeError: Operation "%s" is not defined between "%s" and "%s".' +INVALID_UNARY_OPERATOR = '%s - TypeError: Operation "%s" is not defined for "%s".' +VARIABLE_NOT_DEFINED = '%s - NameError: Variable "%s" is not defined in "%s".' diff --git a/src/coolcmp/lexing_parsing/__init__.py b/src/coolcmp/lexing_parsing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/coolcmp/lexing_parsing/lexer.py b/src/coolcmp/lexing_parsing/lexer.py new file mode 100644 index 000000000..b4a02dd9e --- /dev/null +++ b/src/coolcmp/lexing_parsing/lexer.py @@ -0,0 +1,220 @@ +from ply import lex +from ply.lex import TOKEN + + +import coolcmp.errors as err +from coolcmp.utils import find_column + +states = ( + ('STR', 'exclusive'), + ('COMM', 'exclusive') +) + +reserved = { + 'class': 'CLASS', + 'inherits': 'INHERITS', + 'if': 'IF', + 'then': 'THEN', + 'else': 'ELSE', + 'fi': 'FI', + 'while': 'WHILE', + 'loop': 'LOOP', + 'pool': 'POOL', + 'case': 'CASE', + 'of': 'OF', + 'esac': 'ESAC', + 'let': 'LET', + 'new': 'NEW', + 'isvoid': 'ISVOID', + 'not': 'NOT', + 'in': 'IN', +} + +tokens = [ + # Identifiers + 'TYPE', 'ID', + + # Built-in types + 'INT', 'STRING', 'BOOL', + + # Special Notation + 'PLUS', 'MINUS', 'STAR', 'DIV', 'LESS', 'LEQ', 'EQ', 'COMP', + 'COLON', 'SEMI', 'COMMA', 'DOT', 'ASSIGN', 'ARROW', 'AT', + 'OPAR', 'CPAR', 'OCUR', 'CCUR' + +] + list(reserved.values()) + +# Tokens defined as strings go after defined in functions. +# This order is not relevant, they are ordered by length (longer first). +t_PLUS = r'\+' +t_MINUS = r'\-' +t_STAR = r'\*' +t_DIV = r'\/' +t_LESS = r'\<' +t_LEQ = r'\<\=' +t_EQ = r'\=' +t_COMP = r'\~' +t_COLON = r'\:' +t_SEMI = r'\;' +t_COMMA = r'\,' +t_DOT = r'\.' +t_ASSIGN = r'\<\-' +t_ARROW = r'\=\>' +t_AT = r'\@' +t_OPAR = r'\(' +t_CPAR = r'\)' +t_OCUR = r'\{' +t_CCUR = r'\}' + +t_ignore = ' \t\f\r' +t_ignore_comment = r'\-\-[^\n]*' + + +# ######################### +# ##### INITIAL state ##### +# ######################### + +# match types +@TOKEN(r'[A-Z]\w*') +def t_TYPE(t: lex.LexToken) -> lex.LexToken: + if t.value[0] in ('t', 'f') and t.value.lower() in ('true', 'false'): + t.type = 'BOOL' + else: + t.type = reserved.get(t.value.lower(), 'TYPE') + + return t + + +# match ids +@TOKEN(r'[a-z]\w*') +def t_ID(t: lex.LexToken) -> lex.LexToken: + if t.value[0] in ('t', 'f') and t.value.lower() in ('true', 'false'): + t.type = 'BOOL' + else: + t.type = reserved.get(t.value.lower(), 'ID') + + return t + + +# match integers +@TOKEN(r'\d+') +def t_INT(t: lex.LexToken) -> lex.LexToken: + t.value = int(t.value) + + return t + + +# also defined for COMM state, newline tracker +@TOKEN(r'\n+') +def t_INITIAL_COMM_newline(t: lex.LexToken): + t.lexer.lineno += len(t.value) + + +# in case of error +def t_error(t: lex.LexToken): + t.lexer.skip(1) + + line, col = t.lexer.lineno, find_column(t.lexer.lexdata, t.lexpos) + errors.append(err.LEX_ERROR % (line, col, t.value[0])) + + +# ##################### +# ##### STR state ##### +# ##################### + +t_STR_ignore = '' + + +@TOKEN(r'"') +def t_begin_STR(t: lex.LexToken): + t.lexer.string_start = t.lexer.lexpos - 1 + t.lexer.push_state('STR') + + +@TOKEN(r'"') +def t_STR_end(t: lex.LexToken) -> lex.LexToken: + t.value = t.lexer.lexdata[t.lexer.string_start: t.lexer.lexpos] + t.type = 'STRING' + t.lexer.pop_state() + + return t + + +@TOKEN(r'\n+') +def t_STR_newline(t: lex.LexToken): + line, col = t.lexer.lineno, find_column(t.lexer.lexdata, t.lexpos) + errors.append(err.UNT_STR % (line, col)) + + t.lexer.lineno += len(t.value) + t.lexer.pop_state() + + +@TOKEN(r'\\(.|\n)') +def t_STR_escaped(t: lex.LexToken): + lookahead = t.value[1] + + if lookahead == '\n': + t.lexer.lineno += 1 + + +@TOKEN(r'\x00') +def t_STR_null(t: lex.LexToken): + t.lexer.skip(1) + line, col = t.lexer.lineno, find_column(t.lexer.lexdata, t.lexpos) + errors.append(err.NULL_STR % (line, col)) + + +@TOKEN(r'[^"\n\\\x00]+') +def t_STR_char(t: lex.LexToken): + pass + + +def t_STR_error(t: lex.LexToken): + t.lexer.skip(1) + line, col = t.lexer.lineno, find_column(t.lexer.lexdata, t.lexpos) + errors.append(err.EOF_STR % (line, col)) + + +def t_STR_eof(t: lex.LexToken): + line, col = t.lexer.lineno, find_column(t.lexer.lexdata, t.lexpos) + errors.append(err.EOF_STR % (line, col)) + + +# ###################### +# ##### COMM state ##### +# ###################### + +t_COMM_ignore = '' + + +@TOKEN(r'\(\*') +def t_begin_COMM(t: lex.LexToken): + t.lexer.comment_start = t.lexer.lexpos + t.lexer.level = 1 + t.lexer.push_state('COMM') + + +@TOKEN(r'\(\*') +def t_COMM_new(t: lex.LexToken): + t.lexer.level += 1 + + +@TOKEN(r'\*\)') +def t_COMM_end(t: lex.LexToken): + t.lexer.level -= 1 + + if t.lexer.level == 0: + t.lexer.pop_state() + + +def t_COMM_error(token): + token.lexer.skip(1) + + +def t_COMM_eof(t: lex.LexToken): + line, col = t.lexer.lineno, find_column(t.lexer.lexdata, t.lexpos) + errors.append(err.EOF_COMM % (line, col)) + + +errors = [] +lexer = lex.lex() diff --git a/src/coolcmp/lexing_parsing/parser.py b/src/coolcmp/lexing_parsing/parser.py new file mode 100644 index 000000000..d0a5daaa3 --- /dev/null +++ b/src/coolcmp/lexing_parsing/parser.py @@ -0,0 +1,403 @@ +import ply.yacc as yacc + +from coolcmp.utils import ast +from coolcmp import errors as err +from coolcmp.lexing_parsing import lexer +from coolcmp.utils import find_column + + +tokens = lexer.tokens + +precedence = ( + ('right', 'ASSIGN'), + ('right', 'NOT'), + ('nonassoc', 'LEQ', 'LESS', 'EQ'), + ('left', 'PLUS', 'MINUS'), + ('left', 'STAR', 'DIV'), + ('right', 'ISVOID'), + ('right', 'COMP'), + ('left', 'AT'), + ('left', 'DOT') +) + + +def p_program(p): + """ + program : class_list + """ + p[0] = ast.ProgramNode(p[1]) + + +def p_class_list(p): + """ + class_list : class_def SEMI class_list + | class_def SEMI + """ + if len(p) == 4: + p[0] = [p[1]] + p[3] + else: + p[0] = [p[1]] + + +def p_class_def(p): + """ + class_def : CLASS TYPE INHERITS TYPE OCUR feature_list CCUR + | CLASS TYPE OCUR feature_list CCUR + """ + if len(p) == 8: + p[0] = ast.ClassDeclarationNode(p[2], p[6], p[4]) + p[0].parent_pos = (p.lineno(4), find_column(p.lexer.lexdata, p.lexpos(4))) + else: + p[0] = ast.ClassDeclarationNode(p[2], p[4]) + p[0].set_pos(p.lineno(2), find_column(p.lexer.lexdata, p.lexpos(2))) + + +def p_feature_list(p): + """ + feature_list : attr_def SEMI feature_list + | func_def SEMI feature_list + | empty + """ + if len(p) == 4: + p[0] = [p[1]] + p[3] + else: + p[0] = [] + + +def p_attr_def(p): + """ + attr_def : ID COLON TYPE ASSIGN expr + | ID COLON TYPE + """ + if len(p) == 6: + p[0] = ast.AttrDeclarationNode(p[1], p[3], p[5]) + p[0].expr_pos = p[5].pos + else: + p[0] = ast.AttrDeclarationNode(p[1], p[3]) + + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + p[0].type_pos = (p.lineno(3), find_column(p.lexer.lexdata, p.lexpos(3))) + + +def p_func_def(p): + """ + func_def : ID OPAR param_list CPAR COLON TYPE OCUR expr CCUR + | ID OPAR CPAR COLON TYPE OCUR expr CCUR + """ + if len(p) == 10: + p[0] = ast.FuncDeclarationNode(p[1], p[3], p[6], p[8]) + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + p[0].type_pos = (p.lineno(6), find_column(p.lexer.lexdata, p.lexpos(6))) + else: + p[0] = ast.FuncDeclarationNode(p[1], [], p[5], p[7]) + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + p[0].type_pos = (p.lineno(5), find_column(p.lexer.lexdata, p.lexpos(5))) + + +def p_param_list(p): + """ + param_list : ID COLON TYPE COMMA param_list + | ID COLON TYPE + """ + param = ast.ParamNode(p[1], p[3]) + param.set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + param.type_pos = (p.lineno(3), find_column(p.lexer.lexdata, p.lexpos(3))) + if len(p) == 6: + p[0] = [param] + p[5] + elif len(p) == 4: + p[0] = [param] + + +# expr productions + +def p_expr_assign(p): + """ + expr : ID ASSIGN expr + """ + p[0] = ast.AssignNode(p[1], p[3]) + + p[0].set_pos(p.lineno(2), find_column(p.lexer.lexdata, p.lexpos(2))) + + +def p_expr_list(p): + """ + expr_list : expr COMMA expr_list_not_empty + | expr + """ + if len(p) == 4: + p[0] = [p[1]] + p[3] + else: + p[0] = [p[1]] + + +def p_expr_list_empty(p): + """ + expr_list : empty + """ + p[0] = [] + + +def p_expr_list_not_empty(p): + """ + expr_list_not_empty : expr COMMA expr_list_not_empty + | expr + """ + if len(p) == 4: + p[0] = [p[1]] + p[3] + else: + p[0] = [p[1]] + + +def p_expr_func_call(p): + """ + expr : ID OPAR expr_list CPAR + | expr DOT ID OPAR expr_list CPAR + | expr AT TYPE DOT ID OPAR expr_list CPAR + """ + if len(p) == 5: + p[0] = ast.CallNode(p[1], p[3]) + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + elif len(p) == 7: + p[0] = ast.CallNode(p[3], p[5], p[1]) + p[0].set_pos(p.lineno(3), find_column(p.lexer.lexdata, p.lexpos(3))) + else: + p[0] = ast.CallNode(p[5], p[7], p[1], p[3]) + p[0].set_pos(p.lineno(5), find_column(p.lexer.lexdata, p.lexpos(5))) + p[0].parent_pos = (p.lineno(3), find_column(p.lexer.lexdata, p.lexpos(3))) + + +def p_expr_if(p): + """ + expr : IF expr THEN expr ELSE expr FI + """ + p[0] = ast.ConditionalNode(p[2], p[4], p[6]) + + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + + +def p_expr_while(p): + """ + expr : WHILE expr LOOP expr POOL + """ + p[0] = ast.WhileNode(p[2], p[4]) + + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + + +def p_block(p): + """ + block : expr SEMI block + | expr SEMI + """ + if len(p) == 4: + p[0] = [p[1]] + p[3] + else: + p[0] = [p[1]] + + +def p_expr_block(p): + """ + expr : OCUR block CCUR + """ + p[0] = ast.BlockNode(p[2]) + + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + + +def p_decl_list(p): + """ + decl_list : ID COLON TYPE ASSIGN expr COMMA decl_list + | ID COLON TYPE COMMA decl_list + | ID COLON TYPE ASSIGN expr + | ID COLON TYPE + """ + if len(p) > 4 and p[4] == '<-': + declaration = ast.LetDeclarationNode(p[1], p[3], p[5]) + declaration.type_pos = (p.lineno(3), find_column(p.lexer.lexdata, p.lexpos(3))) + declaration.expr_pos = p[5].pos + declaration.set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + if len(p) == 8: + p[0] = [declaration] + p[7] + else: + p[0] = [declaration] + else: + declaration = ast.LetDeclarationNode(p[1], p[3]) + declaration.type_pos = (p.lineno(3), find_column(p.lexer.lexdata, p.lexpos(3))) + declaration.set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + if len(p) == 6: + p[0] = [declaration] + p[5] + else: + p[0] = [declaration] + + # if len(p) == 8: + # p[0] = [ast.LetDeclarationNode(p[1], p[3], p[5])] + p[7] + # elif len(p) == 6: + # if p[4] == ',': + # p[0] = [ast.LetDeclarationNode(p[1], p[3])] + p[5] + # else: + # p[0] = [ast.LetDeclarationNode(p[1], p[3], p[5])] + # else: + # p[0] = [ast.LetDeclarationNode(p[1], p[3])] + + +def p_expr_let(p): + """ + expr : LET decl_list IN expr + """ + p[0] = ast.LetNode(p[2], p[4]) + + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + + +def p_case_list(p): + """ + case_list : ID COLON TYPE ARROW expr SEMI case_list + | ID COLON TYPE ARROW expr SEMI + """ + branch = ast.CaseBranchNode(p[1], p[3], p[5]) + branch.set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + branch.type_pos = (p.lineno(3), find_column(p.lexer.lexdata, p.lexpos(3))) + + if len(p) == 8: + p[0] = [branch] + p[7] + else: + p[0] = [branch] + + +def p_expr_case(p): + """ + expr : CASE expr OF case_list ESAC + """ + p[0] = ast.CaseNode(p[2], p[4]) + + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + + +def p_expr_new(p): + """ + expr : NEW TYPE + """ + p[0] = ast.InstantiateNode(p[2]) + + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + + +def p_expr_isvoid(p): + """ + expr : ISVOID expr + """ + p[0] = ast.IsVoidNode(p[2]) + + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + + +def p_expr_binary_op(p): + """ + expr : expr PLUS expr + | expr MINUS expr + | expr STAR expr + | expr DIV expr + | expr LESS expr + | expr LEQ expr + | expr EQ expr + """ + if p[2] == '+': + p[0] = ast.PlusNode(p[1], p[2], p[3]) + elif p[2] == '-': + p[0] = ast.MinusNode(p[1], p[2], p[3]) + elif p[2] == '/': + p[0] = ast.DivNode(p[1], p[2], p[3]) + elif p[2] == '*': + p[0] = ast.StarNode(p[1], p[2], p[3]) + elif p[2] == '<': + p[0] = ast.LessThanNode(p[1], p[2], p[3]) + elif p[2] == '<=': + p[0] = ast.LessEqualNode(p[1], p[2], p[3]) + else: + p[0] = ast.EqualNode(p[1], p[2], p[3]) + + p[0].set_pos(p.lineno(2), find_column(p.lexer.lexdata, p.lexpos(2))) + + +def p_expr_comp(p): + """ + expr : COMP expr + """ + p[0] = ast.ComplementNode(p[2]) + + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + + +def p_expr_not(p): + """ + expr : NOT expr + """ + p[0] = ast.NegationNode(p[2]) + + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + + +def p_expr_pars(p): + """ + expr : OPAR expr CPAR + """ + p[0] = p[2] + + +def p_expr_id(p): + """ + expr : ID + """ + p[0] = ast.VariableNode(p[1]) + + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + + +def p_expr_int(p): + """ + expr : INT + """ + p[0] = ast.IntegerNode(p[1]) + + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + + +def p_expr_string(p): + """ + expr : STRING + """ + p[0] = ast.StringNode(p[1]) + + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + + +def p_expr_bool(p): + """ + expr : BOOL + """ + p[0] = ast.BooleanNode(p[1]) + + p[0].set_pos(p.lineno(1), find_column(p.lexer.lexdata, p.lexpos(1))) + + +# empty productions + +def p_empty(p): + """ + empty : + """ + pass + + +# error handling + +def p_error(p): + if p: + line, col = p.lineno, find_column(p.lexer.lexdata, p.lexpos) + errors.append(err.SYN_ERROR % (line, col, p.value)) + else: + errors.append(err.SYN_EOF) + + +errors = [] +parser = yacc.yacc() diff --git a/src/coolcmp/lexing_parsing/parsetab.py b/src/coolcmp/lexing_parsing/parsetab.py new file mode 100644 index 000000000..d30f40548 --- /dev/null +++ b/src/coolcmp/lexing_parsing/parsetab.py @@ -0,0 +1,83 @@ + +# parsetab.py +# This file is automatically generated. Do not edit. +# pylint: disable=W,C,R +_tabversion = '3.10' + +_lr_method = 'LALR' + +_lr_signature = 'rightASSIGNrightNOTnonassocLEQLESSEQleftPLUSMINUSleftSTARDIVrightISVOIDrightCOMPleftATleftDOTARROW ASSIGN AT BOOL CASE CCUR CLASS COLON COMMA COMP CPAR DIV DOT ELSE EQ ESAC FI ID IF IN INHERITS INT ISVOID LEQ LESS LET LOOP MINUS NEW NOT OCUR OF OPAR PLUS POOL SEMI STAR STRING THEN TYPE WHILE\n program : class_list\n \n class_list : class_def SEMI class_list\n | class_def SEMI\n \n class_def : CLASS TYPE INHERITS TYPE OCUR feature_list CCUR\n | CLASS TYPE OCUR feature_list CCUR\n \n feature_list : attr_def SEMI feature_list\n | func_def SEMI feature_list\n | empty\n \n attr_def : ID COLON TYPE ASSIGN expr\n | ID COLON TYPE\n \n func_def : ID OPAR param_list CPAR COLON TYPE OCUR expr CCUR\n | ID OPAR CPAR COLON TYPE OCUR expr CCUR\n \n param_list : ID COLON TYPE COMMA param_list\n | ID COLON TYPE\n \n expr : ID ASSIGN expr\n \n expr_list : expr COMMA expr_list_not_empty\n | expr\n \n expr_list : empty\n \n expr_list_not_empty : expr COMMA expr_list_not_empty\n | expr\n \n expr : ID OPAR expr_list CPAR\n | expr DOT ID OPAR expr_list CPAR\n | expr AT TYPE DOT ID OPAR expr_list CPAR\n \n expr : IF expr THEN expr ELSE expr FI\n \n expr : WHILE expr LOOP expr POOL\n \n block : expr SEMI block\n | expr SEMI\n \n expr : OCUR block CCUR\n \n decl_list : ID COLON TYPE ASSIGN expr COMMA decl_list\n | ID COLON TYPE COMMA decl_list\n | ID COLON TYPE ASSIGN expr\n | ID COLON TYPE\n \n expr : LET decl_list IN expr\n \n case_list : ID COLON TYPE ARROW expr SEMI case_list\n | ID COLON TYPE ARROW expr SEMI\n \n expr : CASE expr OF case_list ESAC\n \n expr : NEW TYPE\n \n expr : ISVOID expr\n \n expr : expr PLUS expr\n | expr MINUS expr\n | expr STAR expr\n | expr DIV expr\n | expr LESS expr\n | expr LEQ expr\n | expr EQ expr\n \n expr : COMP expr\n \n expr : NOT expr\n \n expr : OPAR expr CPAR\n \n expr : ID\n \n expr : INT\n \n expr : STRING\n \n expr : BOOL\n \n empty :\n ' + +_lr_action_items = {'CLASS':([0,5,],[4,4,]),'$end':([1,2,5,7,],[0,-1,-3,-2,]),'SEMI':([3,12,13,17,25,29,34,35,46,47,48,67,71,72,73,74,78,84,85,86,87,88,89,90,91,94,102,109,114,120,123,125,127,135,138,140,],[5,18,19,-5,-10,-4,-49,-9,-50,-51,-52,95,-37,-38,-46,-47,-15,-39,-40,-41,-42,-43,-44,-45,-48,-28,-21,-33,-12,-25,-36,-11,-22,-24,-23,141,]),'TYPE':([4,8,20,31,33,42,50,55,97,124,],[6,10,25,49,51,71,76,83,110,132,]),'INHERITS':([6,],[8,]),'OCUR':([6,10,30,36,37,38,39,41,43,44,45,51,52,53,56,57,58,59,60,61,62,76,77,92,93,95,96,100,103,104,119,121,126,128,137,],[9,16,39,39,39,39,39,39,39,39,39,77,39,39,39,39,39,39,39,39,39,100,39,39,39,39,39,39,39,39,39,39,39,39,39,]),'ID':([9,16,18,19,21,30,36,37,38,39,40,41,43,44,45,52,53,54,56,57,58,59,60,61,62,75,77,92,93,95,96,98,100,103,104,105,119,121,122,126,128,136,137,141,],[15,15,15,15,26,34,34,34,34,34,69,34,34,34,34,34,34,82,34,34,34,34,34,34,34,26,34,34,34,34,34,112,34,34,34,118,34,34,69,34,34,69,34,112,]),'CCUR':([9,11,14,16,18,19,22,23,24,34,46,47,48,66,71,72,73,74,78,84,85,86,87,88,89,90,91,94,95,101,102,108,109,113,120,123,127,135,138,],[-53,17,-8,-53,-53,-53,29,-6,-7,-49,-50,-51,-52,94,-37,-38,-46,-47,-15,-39,-40,-41,-42,-43,-44,-45,-48,-28,-27,114,-21,-26,-33,125,-25,-36,-22,-24,-23,]),'COLON':([15,26,28,32,69,112,],[20,31,33,50,97,124,]),'OPAR':([15,30,34,36,37,38,39,41,43,44,45,52,53,56,57,58,59,60,61,62,77,82,92,93,95,96,100,103,104,118,119,121,126,128,137,],[21,36,53,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,104,36,36,36,36,36,36,36,128,36,36,36,36,36,]),'CPAR':([21,27,34,46,47,48,49,53,63,71,72,73,74,78,79,80,81,84,85,86,87,88,89,90,91,94,99,102,104,109,115,116,117,120,123,127,128,133,134,135,138,],[28,32,-49,-50,-51,-52,-14,-53,91,-37,-38,-46,-47,-15,102,-17,-18,-39,-40,-41,-42,-43,-44,-45,-48,-28,-13,-21,-53,-33,-20,-16,127,-25,-36,-22,-53,-19,138,-24,-23,]),'ASSIGN':([25,34,110,],[30,52,121,]),'IF':([30,36,37,38,39,41,43,44,45,52,53,56,57,58,59,60,61,62,77,92,93,95,96,100,103,104,119,121,126,128,137,],[37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,]),'WHILE':([30,36,37,38,39,41,43,44,45,52,53,56,57,58,59,60,61,62,77,92,93,95,96,100,103,104,119,121,126,128,137,],[38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,]),'LET':([30,36,37,38,39,41,43,44,45,52,53,56,57,58,59,60,61,62,77,92,93,95,96,100,103,104,119,121,126,128,137,],[40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,40,]),'CASE':([30,36,37,38,39,41,43,44,45,52,53,56,57,58,59,60,61,62,77,92,93,95,96,100,103,104,119,121,126,128,137,],[41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,]),'NEW':([30,36,37,38,39,41,43,44,45,52,53,56,57,58,59,60,61,62,77,92,93,95,96,100,103,104,119,121,126,128,137,],[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,]),'ISVOID':([30,36,37,38,39,41,43,44,45,52,53,56,57,58,59,60,61,62,77,92,93,95,96,100,103,104,119,121,126,128,137,],[43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,]),'COMP':([30,36,37,38,39,41,43,44,45,52,53,56,57,58,59,60,61,62,77,92,93,95,96,100,103,104,119,121,126,128,137,],[44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,]),'NOT':([30,36,37,38,39,41,43,44,45,52,53,56,57,58,59,60,61,62,77,92,93,95,96,100,103,104,119,121,126,128,137,],[45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,]),'INT':([30,36,37,38,39,41,43,44,45,52,53,56,57,58,59,60,61,62,77,92,93,95,96,100,103,104,119,121,126,128,137,],[46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,]),'STRING':([30,36,37,38,39,41,43,44,45,52,53,56,57,58,59,60,61,62,77,92,93,95,96,100,103,104,119,121,126,128,137,],[47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,]),'BOOL':([30,36,37,38,39,41,43,44,45,52,53,56,57,58,59,60,61,62,77,92,93,95,96,100,103,104,119,121,126,128,137,],[48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,]),'DOT':([34,35,46,47,48,63,64,65,67,70,71,72,73,74,78,80,83,84,85,86,87,88,89,90,91,94,101,102,106,107,109,113,115,120,123,127,129,130,135,138,140,],[-49,54,-50,-51,-52,54,54,54,54,54,-37,54,54,54,54,54,105,54,54,54,54,54,54,54,-48,-28,54,-21,54,54,54,54,54,-25,-36,-22,54,54,-24,-23,54,]),'AT':([34,35,46,47,48,63,64,65,67,70,71,72,73,74,78,80,84,85,86,87,88,89,90,91,94,101,102,106,107,109,113,115,120,123,127,129,130,135,138,140,],[-49,55,-50,-51,-52,55,55,55,55,55,-37,55,55,55,55,55,55,55,55,55,55,55,55,-48,-28,55,-21,55,55,55,55,55,-25,-36,-22,55,55,-24,-23,55,]),'PLUS':([34,35,46,47,48,63,64,65,67,70,71,72,73,74,78,80,84,85,86,87,88,89,90,91,94,101,102,106,107,109,113,115,120,123,127,129,130,135,138,140,],[-49,56,-50,-51,-52,56,56,56,56,56,-37,-38,-46,56,56,56,-39,-40,-41,-42,56,56,56,-48,-28,56,-21,56,56,56,56,56,-25,-36,-22,56,56,-24,-23,56,]),'MINUS':([34,35,46,47,48,63,64,65,67,70,71,72,73,74,78,80,84,85,86,87,88,89,90,91,94,101,102,106,107,109,113,115,120,123,127,129,130,135,138,140,],[-49,57,-50,-51,-52,57,57,57,57,57,-37,-38,-46,57,57,57,-39,-40,-41,-42,57,57,57,-48,-28,57,-21,57,57,57,57,57,-25,-36,-22,57,57,-24,-23,57,]),'STAR':([34,35,46,47,48,63,64,65,67,70,71,72,73,74,78,80,84,85,86,87,88,89,90,91,94,101,102,106,107,109,113,115,120,123,127,129,130,135,138,140,],[-49,58,-50,-51,-52,58,58,58,58,58,-37,-38,-46,58,58,58,58,58,-41,-42,58,58,58,-48,-28,58,-21,58,58,58,58,58,-25,-36,-22,58,58,-24,-23,58,]),'DIV':([34,35,46,47,48,63,64,65,67,70,71,72,73,74,78,80,84,85,86,87,88,89,90,91,94,101,102,106,107,109,113,115,120,123,127,129,130,135,138,140,],[-49,59,-50,-51,-52,59,59,59,59,59,-37,-38,-46,59,59,59,59,59,-41,-42,59,59,59,-48,-28,59,-21,59,59,59,59,59,-25,-36,-22,59,59,-24,-23,59,]),'LESS':([34,35,46,47,48,63,64,65,67,70,71,72,73,74,78,80,84,85,86,87,88,89,90,91,94,101,102,106,107,109,113,115,120,123,127,129,130,135,138,140,],[-49,60,-50,-51,-52,60,60,60,60,60,-37,-38,-46,60,60,60,-39,-40,-41,-42,None,None,None,-48,-28,60,-21,60,60,60,60,60,-25,-36,-22,60,60,-24,-23,60,]),'LEQ':([34,35,46,47,48,63,64,65,67,70,71,72,73,74,78,80,84,85,86,87,88,89,90,91,94,101,102,106,107,109,113,115,120,123,127,129,130,135,138,140,],[-49,61,-50,-51,-52,61,61,61,61,61,-37,-38,-46,61,61,61,-39,-40,-41,-42,None,None,None,-48,-28,61,-21,61,61,61,61,61,-25,-36,-22,61,61,-24,-23,61,]),'EQ':([34,35,46,47,48,63,64,65,67,70,71,72,73,74,78,80,84,85,86,87,88,89,90,91,94,101,102,106,107,109,113,115,120,123,127,129,130,135,138,140,],[-49,62,-50,-51,-52,62,62,62,62,62,-37,-38,-46,62,62,62,-39,-40,-41,-42,None,None,None,-48,-28,62,-21,62,62,62,62,62,-25,-36,-22,62,62,-24,-23,62,]),'THEN':([34,46,47,48,64,71,72,73,74,78,84,85,86,87,88,89,90,91,94,102,109,120,123,127,135,138,],[-49,-50,-51,-52,92,-37,-38,-46,-47,-15,-39,-40,-41,-42,-43,-44,-45,-48,-28,-21,-33,-25,-36,-22,-24,-23,]),'LOOP':([34,46,47,48,65,71,72,73,74,78,84,85,86,87,88,89,90,91,94,102,109,120,123,127,135,138,],[-49,-50,-51,-52,93,-37,-38,-46,-47,-15,-39,-40,-41,-42,-43,-44,-45,-48,-28,-21,-33,-25,-36,-22,-24,-23,]),'OF':([34,46,47,48,70,71,72,73,74,78,84,85,86,87,88,89,90,91,94,102,109,120,123,127,135,138,],[-49,-50,-51,-52,98,-37,-38,-46,-47,-15,-39,-40,-41,-42,-43,-44,-45,-48,-28,-21,-33,-25,-36,-22,-24,-23,]),'COMMA':([34,46,47,48,49,71,72,73,74,78,80,84,85,86,87,88,89,90,91,94,102,109,110,115,120,123,127,130,135,138,],[-49,-50,-51,-52,75,-37,-38,-46,-47,-15,103,-39,-40,-41,-42,-43,-44,-45,-48,-28,-21,-33,122,126,-25,-36,-22,136,-24,-23,]),'ELSE':([34,46,47,48,71,72,73,74,78,84,85,86,87,88,89,90,91,94,102,106,109,120,123,127,135,138,],[-49,-50,-51,-52,-37,-38,-46,-47,-15,-39,-40,-41,-42,-43,-44,-45,-48,-28,-21,119,-33,-25,-36,-22,-24,-23,]),'POOL':([34,46,47,48,71,72,73,74,78,84,85,86,87,88,89,90,91,94,102,107,109,120,123,127,135,138,],[-49,-50,-51,-52,-37,-38,-46,-47,-15,-39,-40,-41,-42,-43,-44,-45,-48,-28,-21,120,-33,-25,-36,-22,-24,-23,]),'FI':([34,46,47,48,71,72,73,74,78,84,85,86,87,88,89,90,91,94,102,109,120,123,127,129,135,138,],[-49,-50,-51,-52,-37,-38,-46,-47,-15,-39,-40,-41,-42,-43,-44,-45,-48,-28,-21,-33,-25,-36,-22,135,-24,-23,]),'IN':([34,46,47,48,68,71,72,73,74,78,84,85,86,87,88,89,90,91,94,102,109,110,120,123,127,130,131,135,138,139,],[-49,-50,-51,-52,96,-37,-38,-46,-47,-15,-39,-40,-41,-42,-43,-44,-45,-48,-28,-21,-33,-32,-25,-36,-22,-31,-30,-24,-23,-29,]),'ESAC':([111,141,142,],[123,-35,-34,]),'ARROW':([132,],[137,]),} + +_lr_action = {} +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_action: _lr_action[_x] = {} + _lr_action[_x][_k] = _y +del _lr_action_items + +_lr_goto_items = {'program':([0,],[1,]),'class_list':([0,5,],[2,7,]),'class_def':([0,5,],[3,3,]),'feature_list':([9,16,18,19,],[11,22,23,24,]),'attr_def':([9,16,18,19,],[12,12,12,12,]),'func_def':([9,16,18,19,],[13,13,13,13,]),'empty':([9,16,18,19,53,104,128,],[14,14,14,14,81,81,81,]),'param_list':([21,75,],[27,99,]),'expr':([30,36,37,38,39,41,43,44,45,52,53,56,57,58,59,60,61,62,77,92,93,95,96,100,103,104,119,121,126,128,137,],[35,63,64,65,67,70,72,73,74,78,80,84,85,86,87,88,89,90,101,106,107,67,109,113,115,80,129,130,115,80,140,]),'block':([39,95,],[66,108,]),'decl_list':([40,122,136,],[68,131,139,]),'expr_list':([53,104,128,],[79,117,134,]),'case_list':([98,141,],[111,142,]),'expr_list_not_empty':([103,126,],[116,133,]),} + +_lr_goto = {} +for _k, _v in _lr_goto_items.items(): + for _x, _y in zip(_v[0], _v[1]): + if not _x in _lr_goto: _lr_goto[_x] = {} + _lr_goto[_x][_k] = _y +del _lr_goto_items +_lr_productions = [ + ("S' -> program","S'",1,None,None,None), + ('program -> class_list','program',1,'p_program','parser.py',26), + ('class_list -> class_def SEMI class_list','class_list',3,'p_class_list','parser.py',33), + ('class_list -> class_def SEMI','class_list',2,'p_class_list','parser.py',34), + ('class_def -> CLASS TYPE INHERITS TYPE OCUR feature_list CCUR','class_def',7,'p_class_def','parser.py',44), + ('class_def -> CLASS TYPE OCUR feature_list CCUR','class_def',5,'p_class_def','parser.py',45), + ('feature_list -> attr_def SEMI feature_list','feature_list',3,'p_feature_list','parser.py',57), + ('feature_list -> func_def SEMI feature_list','feature_list',3,'p_feature_list','parser.py',58), + ('feature_list -> empty','feature_list',1,'p_feature_list','parser.py',59), + ('attr_def -> ID COLON TYPE ASSIGN expr','attr_def',5,'p_attr_def','parser.py',69), + ('attr_def -> ID COLON TYPE','attr_def',3,'p_attr_def','parser.py',70), + ('func_def -> ID OPAR param_list CPAR COLON TYPE OCUR expr CCUR','func_def',9,'p_func_def','parser.py',84), + ('func_def -> ID OPAR CPAR COLON TYPE OCUR expr CCUR','func_def',8,'p_func_def','parser.py',85), + ('param_list -> ID COLON TYPE COMMA param_list','param_list',5,'p_param_list','parser.py',99), + ('param_list -> ID COLON TYPE','param_list',3,'p_param_list','parser.py',100), + ('expr -> ID ASSIGN expr','expr',3,'p_expr_assign','parser.py',115), + ('expr_list -> expr COMMA expr_list_not_empty','expr_list',3,'p_expr_list','parser.py',124), + ('expr_list -> expr','expr_list',1,'p_expr_list','parser.py',125), + ('expr_list -> empty','expr_list',1,'p_expr_list_empty','parser.py',135), + ('expr_list_not_empty -> expr COMMA expr_list_not_empty','expr_list_not_empty',3,'p_expr_list_not_empty','parser.py',142), + ('expr_list_not_empty -> expr','expr_list_not_empty',1,'p_expr_list_not_empty','parser.py',143), + ('expr -> ID OPAR expr_list CPAR','expr',4,'p_expr_func_call','parser.py',153), + ('expr -> expr DOT ID OPAR expr_list CPAR','expr',6,'p_expr_func_call','parser.py',154), + ('expr -> expr AT TYPE DOT ID OPAR expr_list CPAR','expr',8,'p_expr_func_call','parser.py',155), + ('expr -> IF expr THEN expr ELSE expr FI','expr',7,'p_expr_if','parser.py',171), + ('expr -> WHILE expr LOOP expr POOL','expr',5,'p_expr_while','parser.py',180), + ('block -> expr SEMI block','block',3,'p_block','parser.py',189), + ('block -> expr SEMI','block',2,'p_block','parser.py',190), + ('expr -> OCUR block CCUR','expr',3,'p_expr_block','parser.py',200), + ('decl_list -> ID COLON TYPE ASSIGN expr COMMA decl_list','decl_list',7,'p_decl_list','parser.py',209), + ('decl_list -> ID COLON TYPE COMMA decl_list','decl_list',5,'p_decl_list','parser.py',210), + ('decl_list -> ID COLON TYPE ASSIGN expr','decl_list',5,'p_decl_list','parser.py',211), + ('decl_list -> ID COLON TYPE','decl_list',3,'p_decl_list','parser.py',212), + ('expr -> LET decl_list IN expr','expr',4,'p_expr_let','parser.py',245), + ('case_list -> ID COLON TYPE ARROW expr SEMI case_list','case_list',7,'p_case_list','parser.py',254), + ('case_list -> ID COLON TYPE ARROW expr SEMI','case_list',6,'p_case_list','parser.py',255), + ('expr -> CASE expr OF case_list ESAC','expr',5,'p_expr_case','parser.py',269), + ('expr -> NEW TYPE','expr',2,'p_expr_new','parser.py',278), + ('expr -> ISVOID expr','expr',2,'p_expr_isvoid','parser.py',287), + ('expr -> expr PLUS expr','expr',3,'p_expr_binary_op','parser.py',296), + ('expr -> expr MINUS expr','expr',3,'p_expr_binary_op','parser.py',297), + ('expr -> expr STAR expr','expr',3,'p_expr_binary_op','parser.py',298), + ('expr -> expr DIV expr','expr',3,'p_expr_binary_op','parser.py',299), + ('expr -> expr LESS expr','expr',3,'p_expr_binary_op','parser.py',300), + ('expr -> expr LEQ expr','expr',3,'p_expr_binary_op','parser.py',301), + ('expr -> expr EQ expr','expr',3,'p_expr_binary_op','parser.py',302), + ('expr -> COMP expr','expr',2,'p_expr_comp','parser.py',324), + ('expr -> NOT expr','expr',2,'p_expr_not','parser.py',333), + ('expr -> OPAR expr CPAR','expr',3,'p_expr_pars','parser.py',342), + ('expr -> ID','expr',1,'p_expr_id','parser.py',349), + ('expr -> INT','expr',1,'p_expr_int','parser.py',358), + ('expr -> STRING','expr',1,'p_expr_string','parser.py',367), + ('expr -> BOOL','expr',1,'p_expr_bool','parser.py',376), + ('empty -> ','empty',0,'p_empty','parser.py',387), +] diff --git a/src/coolcmp/semantics/__init__.py b/src/coolcmp/semantics/__init__.py new file mode 100644 index 000000000..bcacf0790 --- /dev/null +++ b/src/coolcmp/semantics/__init__.py @@ -0,0 +1,21 @@ +from .collector import TypeCollector +from .builder import TypeBuilder +from .consistence import TypeConsistence +from .checker import TypeChecker +from coolcmp.utils.semantic import Context, Scope + + +def check_semantics(ast) -> tuple[list[str], Context, Scope]: + collector = TypeCollector() + collector.visit(ast) + + builder = TypeBuilder(collector.context, collector.errors) + builder.visit(ast) + + cons = TypeConsistence(builder.context, builder.errors) + cons.visit(ast) + + checker = TypeChecker(cons.context, cons.errors) + scope = checker.visit(ast) + + return checker.errors, checker.context, scope diff --git a/src/coolcmp/semantics/builder.py b/src/coolcmp/semantics/builder.py new file mode 100644 index 000000000..df51558d5 --- /dev/null +++ b/src/coolcmp/semantics/builder.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +from coolcmp import errors as err +from coolcmp.utils.ast import ProgramNode, ClassDeclarationNode, FuncDeclarationNode, AttrDeclarationNode, \ + IntegerNode, StringNode, BooleanNode, VariableNode +from coolcmp.utils.semantic import SemanticError, Context, ErrorType, Type, VoidType +from coolcmp.utils import visitor + + +class TypeBuilder: + """ + Collects attributes, methods and parent in classes. + In case of a type error set type to ErrorType. + """ + def __init__(self, context: Context, errors: list[str]): + self.context = context + self.current_type: Type | None = None + self.errors = errors + + @visitor.on('node') + def visit(self, node): + pass + + @visitor.when(ProgramNode) + def visit(self, node: ProgramNode): + for declaration in node.declarations: + self.visit(declaration) + + # self.context.get_type('Object').define_attribute('void', VoidType) + # void_attr = AttrDeclarationNode('void', '', VariableNode('void')) + # object_class = ClassDeclarationNode('Object', [void_attr]) + # node.declarations.append(object_class) + + @visitor.when(ClassDeclarationNode) + def visit(self, node: ClassDeclarationNode): + self.current_type = self.context.get_type(node.id) + + # check parent + if node.parent is not None: + if node.parent in ('SELF_TYPE', 'String', 'Int', 'Bool', node.id): + self.current_type.set_parent(ErrorType()) + self.errors.append(err.CANNOT_INHERIT % (node.parent_pos, node.parent)) + else: + try: + parent_type = self.context.get_type(node.parent) + except SemanticError: # the parent type is not defined + parent_type = ErrorType() + self.errors.append(err.UNDEFINED_TYPE % (node.parent_pos, node.parent)) + try: + self.current_type.set_parent(parent_type) + except SemanticError: # this node already has a parent + self.errors.append(err.CANNOT_INHERIT % (node.pos, node.id, node.parent)) + else: + try: + self.current_type.set_parent(self.context.get_type('Object')) + except SemanticError: + pass + + # visit features + for feature in node.features: + self.visit(feature) + + @visitor.when(FuncDeclarationNode) + def visit(self, node: FuncDeclarationNode): + param_names = [] + param_types = [] + for param_node in node.params: + param_names.append(param_node.id) + try: + param_types.append(self.context.get_type(param_node.type)) + except SemanticError: + param_types.append(ErrorType()) + self.errors.append(err.UNDEFINED_TYPE % (param_node.type_pos, param_node.type)) + try: + ret_type = self.context.get_type(node.return_type) + except SemanticError: + self.errors.append(err.UNDEFINED_TYPE % (node.pos, node.return_type)) + ret_type = ErrorType() + + try: + self.current_type.define_method(node.id, param_names, param_types, ret_type) + except SemanticError: + self.errors.append(err.METHOD_ALREADY_DEFINED % (node.pos, node.id, self.current_type.name)) + + @visitor.when(AttrDeclarationNode) + def visit(self, node: AttrDeclarationNode): + try: + attr_type = self.context.get_type(node.type) + except SemanticError: + attr_type = ErrorType() + self.errors.append(err.UNDEFINED_TYPE % (node.pos, node.type)) + + # add a default initialization expr to the node if it doesn't have one + if node.expr is None: + if attr_type == self.context.get_type('Int'): + node.expr = IntegerNode('0') + elif attr_type == self.context.get_type('String'): + node.expr = StringNode('""') + elif attr_type == self.context.get_type('Bool'): + node.expr = BooleanNode('false') + else: + node.expr = VariableNode('void') + + try: + self.current_type.define_attribute( + node.id, attr_type, node.expr or VariableNode('void'), self.current_type.name) + except SemanticError: + self.errors.append(err.ATTRIBUTE_ALREADY_DEFINED % (node.pos, node.id, self.current_type.name)) diff --git a/src/coolcmp/semantics/checker.py b/src/coolcmp/semantics/checker.py new file mode 100644 index 000000000..869895def --- /dev/null +++ b/src/coolcmp/semantics/checker.py @@ -0,0 +1,406 @@ +from __future__ import annotations + +from coolcmp import errors as err +from coolcmp.utils import visitor +from coolcmp.utils.semantic import Context, Method, Type, SemanticError, ErrorType, VoidType, Scope +from coolcmp.utils.ast import ProgramNode, ClassDeclarationNode, AttrDeclarationNode, FuncDeclarationNode, BlockNode, \ + LetNode, CaseNode, AssignNode, ConditionalNode, WhileNode, CallNode, VariableNode, InstantiateNode, IntegerNode, \ + StringNode, BooleanNode, PlusNode, MinusNode, StarNode, DivNode, LessThanNode, LessEqualNode, EqualNode, \ + IsVoidNode, NegationNode, ComplementNode, BinaryNode, UnaryNode, CaseBranchNode, LetDeclarationNode, ParamNode + + +class TypeChecker: + def __init__(self, context, errors): + self.context: Context = context + self.current_type: Type | None = None + self.current_method: Method | None = None + self.errors = errors + + @visitor.on('node') + def collect_features(self, node): + pass + + @visitor.when(AttrDeclarationNode) + def collect_features(self, node: AttrDeclarationNode, scope: Scope): + try: + attr_type = self.context.get_type(node.type) if node.type != 'SELF_TYPE' else self.current_type + except SemanticError: + attr_type = ErrorType() + self.errors.append(err.UNDEFINED_TYPE % (node.type_pos, node.type)) + scope.define_variable(node.id, attr_type, is_attr=True) + + @visitor.on('node') + def visit(self, node, scope): + pass + + @visitor.when(ProgramNode) + def visit(self, node: ProgramNode, scope: Scope = None): + scope = Scope('Object') + scope.define_variable('void', VoidType()) + + for attr in self.context.get_type('Object').attributes: + scope.define_variable(attr.name, attr.type, is_attr=True) + + pending = [(class_node.id, class_node) for class_node in node.declarations] + scopes = {'Object': scope, 'IO': scope.create_child('IO')} + + while pending: + + actual = pending.pop(0) + type_ = self.context.get_type(actual[0]) + + if type_.parent.name != '': + try: + scopes[type_.name] = scopes[type_.parent.name].create_child(type_.name) + self.visit(actual[1], scopes[type_.name]) + except KeyError: # Parent not visited yet + pending.append(actual) + else: + scopes[type_.name] = scope.create_child(type_.name) + self.visit(actual[1], scopes[type_.name]) + + return scope + + @visitor.when(ClassDeclarationNode) + def visit(self, node: ClassDeclarationNode, scope: Scope): + self.current_type = self.context.get_type(node.id) + scope.define_variable('self', self.current_type) + + for feature in node.features: + self.collect_features(feature, scope) + + # visit features + for feature in node.features: + if isinstance(feature, AttrDeclarationNode): + self.visit(feature, scope) + for feature in node.features: + if isinstance(feature, FuncDeclarationNode): + self.visit(feature, scope.create_child(feature.id)) + + @visitor.when(AttrDeclarationNode) + def visit(self, node: AttrDeclarationNode, scope: Scope): + # Check attribute override + try: + attr = self.current_type.parent.get_attribute(node.id, self.current_type.name) + self.errors.append(err.ATTRIBUTE_DEFINED_IN_PARENT % (node.pos, attr.name)) + except SemanticError: + pass + + if node.id == 'self': + self.errors.append(err.SELF_INVALID_ID % (node.pos, )) + + try: + attr_type = self.context.get_type(node.type) if node.type != 'SELF_TYPE' else self.current_type + except SemanticError: + attr_type = ErrorType() + self.errors.append(err.UNDEFINED_TYPE % (node.type_pos, node.type)) + + if node.expr is not None: + expr_type = self.visit(node.expr, scope) + if not expr_type.conforms_to(attr_type): + self.errors.append(err.INCOMPATIBLE_TYPES % (node.expr_pos, expr_type.name, attr_type.name)) + + self.current_type.get_attribute(node.id).scope = scope + + @visitor.when(FuncDeclarationNode) + def visit(self, node: FuncDeclarationNode, scope: Scope): + self.current_method = self.current_type.get_method(node.id) + + # Check method override + try: + method, method_owner = self.current_type.parent.get_method(node.id, get_owner=True) + if method != self.current_method: + self.errors.append(err.WRONG_SIGNATURE % (node.pos, node.id, method_owner.name)) + except SemanticError: + pass + + scope.define_variable('self', self.current_type, is_param=True) + + for param_node in node.params: + self.visit(param_node, scope) + + try: + ret_type = self.context.get_type(node.return_type) if node.return_type != 'SELF_TYPE' else self.current_type + except SemanticError: + # this error is logged by type builder + # self.errors.append(err.UNDEFINED_TYPE % (node.pos, node.return_type)) + ret_type = ErrorType() + + expr_type = self.visit(node.body, scope) + if not expr_type.conforms_to(ret_type): + self.errors.append(err.INCOMPATIBLE_TYPES % (node.pos, expr_type.name, ret_type.name)) + + @visitor.when(ParamNode) + def visit(self, node: ParamNode, scope: Scope): + if not scope.is_local(node.id): + if node.type == 'SELF_TYPE': + type_ = ErrorType() + self.errors.append(err.SELF_TYPE_INVALID_PARAM_TYPE % (node.type_pos, )) + else: + try: + type_ = self.context.get_type(node.type) + except SemanticError: + # this error is logged by the type builder + # self.errors.append(err.UNDEFINED_TYPE % (node.type_pos, node.type)) + type_ = ErrorType() + scope.define_variable(node.id, type_, is_param=True) + else: + self.errors.append(err.LOCAL_ALREADY_DEFINED % (node.pos, node.id, self.current_method.name)) + + @visitor.when(BlockNode) + def visit(self, node: BlockNode, scope: Scope): + ret_type = ErrorType() + for expr in node.expressions: + ret_type = self.visit(expr, scope) + return ret_type + + @visitor.when(LetNode) + def visit(self, node: LetNode, scope: Scope): + child_scope = scope.create_child(tag='_let_node') + for declaration in node.declarations: + self.visit(declaration, child_scope) + + return self.visit(node.expr, child_scope) + + @visitor.when(LetDeclarationNode) + def visit(self, node: LetDeclarationNode, scope: Scope): + try: + type_ = self.context.get_type(node.type) if node.type != 'SELF_TYPE' else self.current_type + except SemanticError: + self.errors.append(err.UNDEFINED_TYPE % (node.type_pos, node.type)) + type_ = ErrorType() + + if node.id == 'self': + self.errors.append(err.SELF_INVALID_ID % (node.pos, )) + else: + scope.define_variable(node.id, type_) + + expr_type: Type = self.visit(node.expr, scope) if node.expr is not None else None + if expr_type is not None and not expr_type.conforms_to(type_): + self.errors.append(err.INCOMPATIBLE_TYPES % (node.expr_pos, expr_type.name, type_.name)) + + return type_ + + @visitor.when(CaseNode) + def visit(self, node: CaseNode, scope: Scope): + self.visit(node.expr, scope) + + case_types = [] + reported_types = [] + for case_node in node.cases: + type_ = case_node.type + if type_ in case_types: + if type_ not in reported_types: + self.errors.append(err.CASE_DUPLICATED_BRANCH % (case_node.type_pos, type_)) + reported_types.append(case_node.type) + else: + case_types.append(type_) + + types = [ + self.visit(case, scope) + for case in node.cases + ] + + ret_type = types[0] + for type_ in types[1:]: + ret_type = ret_type.join(type_) + + return ret_type + + @visitor.when(CaseBranchNode) + def visit(self, node: CaseBranchNode, scope: Scope): + child_scope = scope.create_child('_case_branch') + try: + id_type = self.context.get_type(node.type) + except SemanticError: + self.errors.append(err.UNDEFINED_TYPE % (node.type_pos, node.type)) + id_type = ErrorType() + + if node.id == 'self': + self.errors.append(err.SELF_INVALID_ID % (node.pos, )) + else: + child_scope.define_variable(node.id, id_type) + + return self.visit(node.expr, child_scope) + + @visitor.when(AssignNode) + def visit(self, node: AssignNode, scope: Scope): + if node.id == 'self': + self.errors.append(err.SELF_IS_READONLY % (node.pos, )) + + var = scope.find_variable(node.id) + expr_type = self.visit(node.expr, scope) + if var is None: + self.errors.append(err.VARIABLE_NOT_DEFINED % (node.pos, node.id, self.current_method.name)) + else: + if not expr_type.conforms_to(var.type): + self.errors.append(err.INCOMPATIBLE_TYPES % (expr_type.name, var.type.name)) + return expr_type + + @visitor.when(ConditionalNode) + def visit(self, node: ConditionalNode, scope: Scope): + if_type = self.visit(node.if_expr, scope) + then_type = self.visit(node.then_expr, scope) + else_type = self.visit(node.else_expr, scope) + + if if_type != self.context.get_type('Bool'): + self.errors.append(err.INCOMPATIBLE_TYPES % (node.pos, if_type.name, 'Bool')) + + return then_type.join(else_type) + + @visitor.when(WhileNode) + def visit(self, node: WhileNode, scope: Scope): + cond_type = self.visit(node.condition, scope) + if cond_type != self.context.get_type('Bool'): + self.errors.append(err.INCOMPATIBLE_TYPES % (node.pos, cond_type.name, 'Bool')) + + self.visit(node.body, scope) + + return self.context.get_type('Object') + + @visitor.when(CallNode) + def visit(self, node: CallNode, scope: Scope): + if node.obj is None: + obj_type = self.current_type + else: + obj_type = self.visit(node.obj, scope) + + try: + _, owner = obj_type.get_method(node.id, get_owner=True) + node.update_obj_dynamic_type(owner.name) + except SemanticError: + pass + + if node.type is not None: + try: + anc_type = self.context.get_type(node.type) + except SemanticError: + anc_type = ErrorType() + self.errors.append(err.UNDEFINED_TYPE % (node.parent_pos, node.type)) + if not obj_type.conforms_to(anc_type): + self.errors.append(err.INVALID_ANCESTOR % (node.pos, obj_type.name, anc_type.name)) + else: + anc_type = obj_type + + try: + method = anc_type.get_method(node.id) + except SemanticError: + self.errors.append(err.UNDEFINED_METHOD % (node.pos, node.id, anc_type.name)) + for arg in node.args: + self.visit(arg, scope) + return ErrorType() + + if len(node.args) != len(method.param_names): + self.errors.append(err.WRONG_SIGNATURE % (node.pos, method.name, obj_type.name)) + else: + for i, arg in enumerate(node.args): + arg_type = self.visit(arg, scope) + if not arg_type.conforms_to(method.param_types[i]): + self.errors.append(err.INCOMPATIBLE_TYPES % (arg.pos, arg_type.name, method.param_types[i].name)) + + return method.return_type if method.return_type.name != 'SELF_TYPE' else anc_type + + @visitor.when(VariableNode) + def visit(self, node: VariableNode, scope: Scope): + var = scope.find_variable(node.lex) + if var is None: + self.errors.append(err.VARIABLE_NOT_DEFINED % ( + node.pos, + node.lex, + self.current_method.name if self.current_method is not None else self.current_type.name) + ) + return ErrorType() + return var.type + + @visitor.when(InstantiateNode) + def visit(self, node: InstantiateNode, _: Scope): + try: + return self.context.get_type(node.lex) if node.lex != 'SELF_TYPE' else self.current_type + except SemanticError: + self.errors.append(err.UNDEFINED_TYPE % (node.pos, node.lex)) + return ErrorType() + + @visitor.when(IntegerNode) + def visit(self, _: IntegerNode, __: Scope): + return self.context.get_type('Int') + + @visitor.when(StringNode) + def visit(self, _: StringNode, __: Scope): + return self.context.get_type('String') + + @visitor.when(BooleanNode) + def visit(self, _: BooleanNode, __: Scope): + return self.context.get_type('Bool') + + @visitor.when(PlusNode) + def visit(self, node: PlusNode, scope: Scope): + ret_type = self.context.get_type('Int') + return self._check_binary_node(node, scope, '+', ret_type) + + @visitor.when(MinusNode) + def visit(self, node: MinusNode, scope: Scope): + ret_type = self.context.get_type('Int') + return self._check_binary_node(node, scope, '-', ret_type) + + @visitor.when(StarNode) + def visit(self, node: StarNode, scope: Scope): + ret_type = self.context.get_type('Int') + return self._check_binary_node(node, scope, '*', ret_type) + + @visitor.when(DivNode) + def visit(self, node: DivNode, scope: Scope): + ret_type = self.context.get_type('Int') + return self._check_binary_node(node, scope, '/', ret_type) + + @visitor.when(LessThanNode) + def visit(self, node: LessThanNode, scope: Scope): + ret_type = self.context.get_type('Bool') + return self._check_binary_node(node, scope, '<', ret_type) + + @visitor.when(LessEqualNode) + def visit(self, node: LessEqualNode, scope: Scope): + ret_type = self.context.get_type('Bool') + return self._check_binary_node(node, scope, '<=', ret_type) + + @visitor.when(EqualNode) + def visit(self, node: EqualNode, scope: Scope): + left_type = self.visit(node.left, scope) + right_type = self.visit(node.right, scope) + basic_types = ('Int', 'String', 'Bool') + + if left_type.name in basic_types or right_type.name in basic_types: + if left_type.name != right_type.name: + self.errors.append(err.INCOMPATIBLE_TYPES % (node.pos, left_type.name, right_type.name)) + + return self.context.get_type('Bool') + + @visitor.when(IsVoidNode) + def visit(self, node: IsVoidNode, scope: Scope): + self.visit(node.expr, scope) + return self.context.get_type('Bool') + + @visitor.when(NegationNode) + def visit(self, node: NegationNode, scope: Scope): + return self._check_unary_node(node, scope, 'not', self.context.get_type('Bool')) + + @visitor.when(ComplementNode) + def visit(self, node: ComplementNode, scope: Scope): + return self._check_unary_node(node, scope, '~', self.context.get_type('Int')) + + def _check_binary_node(self, node: BinaryNode, scope: Scope, oper: str, ret_type: Type): + int_type = self.context.get_type('Int') + left_type = self.visit(node.left, scope) + right_type = self.visit(node.right, scope) + if left_type == right_type == int_type: + return ret_type + else: + self.errors.append(err.INVALID_BINARY_OPERATOR % (node.pos, oper, left_type.name, right_type.name)) + return ErrorType() + + def _check_unary_node(self, node: UnaryNode, scope: Scope, oper: str, expected_type: Type): + type_ = self.visit(node.expr, scope) + if type_ == expected_type: + return type_ + else: + self.errors.append(err.INVALID_UNARY_OPERATOR % (node.pos, oper, type_.name)) + return ErrorType() diff --git a/src/coolcmp/semantics/collector.py b/src/coolcmp/semantics/collector.py new file mode 100644 index 000000000..c9fd5689a --- /dev/null +++ b/src/coolcmp/semantics/collector.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +from coolcmp import errors as err +from coolcmp.utils import visitor, ast +from coolcmp.utils.semantic import Context, SemanticError, IntType, VoidType, ErrorType + + +class TypeCollector(object): + def __init__(self): + self.context: Context | None = None + self.errors: list[str] = [] + + @visitor.on('node') + def visit(self, node): + pass + + @visitor.when(ast.ProgramNode) + def visit(self, node: ast.ProgramNode): + self.context = Context() + + # Default types definition + self.context.types[''] = ErrorType() + void = self.context.types['Void'] = VoidType() + self_ = self.context.create_type('SELF_TYPE') + object_ = self.context.create_type('Object') + io = self.context.create_type('IO') + string = self.context.create_type('String') + int_ = self.context.types['Int'] = IntType() + bool_ = self.context.create_type('Bool') + + # Default types inheritance + void.set_parent(object_) + io.set_parent(object_) + string.set_parent(object_) + int_.set_parent(object_) + bool_.set_parent(object_) + + # Default types attributes + object_.define_attribute('void', void, ast.VariableNode('void')) + + # Default types methods + object_.define_method('abort', [], [], object_) + object_.define_method('type_name', [], [], string) + object_.define_method('copy', [], [], self_) + + io.define_method('out_string', ['x'], [string], self_) + io.define_method('out_int', ['x'], [int_], self_) + io.define_method('in_string', [], [], string) + io.define_method('in_int', [], [], int_) + + string.define_method('length', [], [], int_) + string.define_method('concat', ['s'], [string], string) + string.define_method('substr', ['i', 'l'], [int_, int_], string) + + for declaration in node.declarations: + self.visit(declaration) + + @visitor.when(ast.ClassDeclarationNode) + def visit(self, node: ast.ClassDeclarationNode): + try: + self.context.create_type(node.id) + except SemanticError: + self.errors.append(err.TYPE_ALREADY_DEFINED % (node.pos, node.id)) diff --git a/src/coolcmp/semantics/consistence.py b/src/coolcmp/semantics/consistence.py new file mode 100644 index 000000000..d0e635af3 --- /dev/null +++ b/src/coolcmp/semantics/consistence.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from coolcmp import errors as err +from coolcmp.utils import visitor +from coolcmp.utils.ast import ProgramNode, ClassDeclarationNode +from coolcmp.utils.semantic import Context, ErrorType, SemanticError + + +class TypeConsistence: + """ + Checks for cyclic inheritance. + """ + + def __init__(self, context: Context, errors: list[str]): + self.context = context + self.errors = errors + + @visitor.on('node') + def visit(self, node): + pass + + @visitor.when(ProgramNode) + def visit(self, node: ProgramNode): + for declaration in reversed(node.declarations): # TODO: reversed to match tests + self.visit(declaration) + + @visitor.when(ClassDeclarationNode) + def visit(self, node: ClassDeclarationNode): + type_ = self.context.get_type(node.id) + try: + type_.get_ancestors(node.id) + except SemanticError: + self.errors.append(err.CYCLIC_INHERITANCE % (node.parent_pos, node.parent)) + type_.parent = None + type_.set_parent(ErrorType()) diff --git a/src/coolcmp/utils/__init__.py b/src/coolcmp/utils/__init__.py new file mode 100644 index 000000000..3dbf12fbc --- /dev/null +++ b/src/coolcmp/utils/__init__.py @@ -0,0 +1,3 @@ +from .utils import * +from .ast_formatter import ASTFormatter +from .cil_formatter import CILFormatter diff --git a/src/coolcmp/utils/ast.py b/src/coolcmp/utils/ast.py new file mode 100644 index 000000000..1272ddeb2 --- /dev/null +++ b/src/coolcmp/utils/ast.py @@ -0,0 +1,269 @@ +""" +Cool AST. +""" +from __future__ import annotations + + +class Node: + def __init__(self): + self.line = 0 + self.col = 0 + + def set_pos(self, line, col) -> None: + self.line = line + self.col = col + + @property + def pos(self) -> tuple[int, int]: + return self.line, self.col + + +class ProgramNode(Node): + def __init__(self, declarations: list[ClassDeclarationNode]): + super().__init__() + + self.declarations = declarations + + +class DeclarationNode(Node): + id: str + + +class ExpressionNode(Node): + pass + + +class ClassDeclarationNode(DeclarationNode): + def __init__(self, + idx: str, + features: list[FuncDeclarationNode | AttrDeclarationNode], + parent: str = None): + super().__init__() + + self.id = idx + self.parent = parent + self.features = features + + self.parent_pos = (-1, -1) + + +class ParamNode(DeclarationNode): + def __init__(self, idx: str, typex: str): + super().__init__() + + self.id = idx + self.type = typex + + self.type_pos = (-1, -1) + + +class FuncDeclarationNode(DeclarationNode): + def __init__(self, + idx: str, + params: list[ParamNode], + return_type: str, + body: ExpressionNode): + super().__init__() + + self.id = idx + self.params = params + self.return_type = return_type + self.body = body + + self.type_pos = (-1, -1) + + +class AttrDeclarationNode(DeclarationNode): + def __init__(self, idx: str, typex: str, expr: ExpressionNode = None): + super().__init__() + + self.id = idx + self.type = typex + self.expr = expr + + self.type_pos = (-1, -1) + self.expr_pos = (-1, -1) + + +class LetDeclarationNode(DeclarationNode): + def __init__(self, idx: str, typex: str, expr: ExpressionNode = None): + super().__init__() + + self.id = idx + self.type = typex + self.expr = expr + + self.type_pos = (-1, -1) + self.expr_pos = (-1, -1) + + +class ParenthesisExpr(ExpressionNode): + def __init__(self, expr: ExpressionNode): + super().__init__() + + self.expr = expr + + +class BlockNode(ExpressionNode): + def __init__(self, expressions: list[ExpressionNode]): + super().__init__() + + self.expressions = expressions + + +class LetNode(ExpressionNode): + def __init__(self, declarations: list[LetDeclarationNode], expr: ExpressionNode): + super().__init__() + + self.declarations = declarations + self.expr = expr + + +class CaseBranchNode(DeclarationNode): + def __init__(self, id_: str, type_: str, expr: ExpressionNode): + super().__init__() + + self.id = id_ + self.type = type_ + self.expr = expr + + self.type_pos = (-1, -1) + + +class CaseNode(ExpressionNode): + def __init__(self, expr: ExpressionNode, cases: list[CaseBranchNode]): + super().__init__() + + self.expr = expr + self.cases = cases + + +class AssignNode(ExpressionNode): + def __init__(self, idx: str, expr: ExpressionNode): + super().__init__() + + self.id = idx + self.expr = expr + + +class ConditionalNode(ExpressionNode): + def __init__(self, ifx: ExpressionNode, then: ExpressionNode, elsex: ExpressionNode): + super().__init__() + + self.if_expr = ifx + self.then_expr = then + self.else_expr = elsex + + +class WhileNode(ExpressionNode): + def __init__(self, condition: ExpressionNode, body: ExpressionNode): + super().__init__() + + self.condition = condition + self.body = body + + +class CallNode(ExpressionNode): + def __init__(self, + idx: str, + args: list[ExpressionNode], + obj: ExpressionNode = None, + typex: str = None): + super().__init__() + + self.obj = obj + self.id = idx + self.args = args + self.type = typex + self.obj_dyn_type: str | None = None + + self.parent_pos = (-1, -1) + + def update_obj_dynamic_type(self, type_name: str): + if self.type is not None: + self.obj_dyn_type = self.type + else: + self.obj_dyn_type = type_name + + +class AtomicNode(ExpressionNode): + def __init__(self, lex: str): + super().__init__() + + self.lex = lex + + +class BinaryNode(ExpressionNode): + def __init__(self, left: ExpressionNode, operation: str, right: ExpressionNode): + super().__init__() + + self.left = left + self.operation = operation + self.right = right + + +class UnaryNode(ExpressionNode): + def __init__(self, expr: ExpressionNode): + super().__init__() + + self.expr = expr + + +class VariableNode(AtomicNode): + pass + + +class InstantiateNode(AtomicNode): + pass + + +class IntegerNode(AtomicNode): + pass + + +class StringNode(AtomicNode): + pass + + +class BooleanNode(AtomicNode): + pass + + +class PlusNode(BinaryNode): + pass + + +class MinusNode(BinaryNode): + pass + + +class StarNode(BinaryNode): + pass + + +class DivNode(BinaryNode): + pass + + +class LessThanNode(BinaryNode): + pass + + +class LessEqualNode(BinaryNode): + pass + + +class EqualNode(BinaryNode): + pass + + +class IsVoidNode(UnaryNode): + pass + + +class NegationNode(UnaryNode): + pass + + +class ComplementNode(UnaryNode): + pass diff --git a/src/coolcmp/utils/ast_formatter.py b/src/coolcmp/utils/ast_formatter.py new file mode 100644 index 000000000..05b1fac33 --- /dev/null +++ b/src/coolcmp/utils/ast_formatter.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +from . import visitor +from .ast import ProgramNode, ClassDeclarationNode, AttrDeclarationNode, FuncDeclarationNode, LetNode, \ + AssignNode, BlockNode, ConditionalNode, WhileNode, CaseNode, CallNode, BinaryNode, AtomicNode, InstantiateNode, \ + UnaryNode, VariableNode + + +class ASTFormatter(object): + + def __init__(self): + self.current_type: str | None = None + + @visitor.on('node') + def visit(self, node, tabs): + pass + + @visitor.when(ProgramNode) + def visit(self, node: ProgramNode, tabs: int = 0): + ans = ' ' * tabs + f'\\__ProgramNode [ ... ]' + statements = '\n'.join(self.visit(child, tabs + 1) for child in node.declarations) + return f'{ans}\n{statements}' + + @visitor.when(ClassDeclarationNode) + def visit(self, node: ClassDeclarationNode, tabs: int = 0): + self.current_type = node.id + + parent = '' if node.parent is None else f": {node.parent}" + ans = ' ' * tabs + f'\\__ClassDeclarationNode: class {node.id} {parent} {{ ... }}' + features = '\n'.join(self.visit(child, tabs + 1) for child in node.features) + return f'{ans}\n{features}' + + @visitor.when(AttrDeclarationNode) + def visit(self, node: AttrDeclarationNode, tabs: int = 0): + ans = ' ' * tabs + f'\\__AttrDeclarationNode: {node.id} : {node.type}' + expr = f'\n{self.visit(node.expr, tabs + 1)}' if node.expr is not None else '' + return f'{ans}{expr}' + + @visitor.when(FuncDeclarationNode) + def visit(self, node: FuncDeclarationNode, tabs: int = 0): + params = ', '.join(f'{param.id}: {param.type}' for param in node.params) + ans = ' ' * tabs + f'\\__FuncDeclarationNode: {node.id}({params}) : {node.return_type} -> ' + body = self.visit(node.body, tabs + 1) + return f'{ans}\n{body}' + + @visitor.when(LetNode) + def visit(self, node: LetNode, tabs: int = 0): + declarations = [] + for declaration_node in node.declarations: + _id = declaration_node.id + _type = declaration_node.type + _expr = declaration_node.expr + if _expr is not None: + declarations.append( + ' ' * tabs + + f'\\__VarDeclarationNode: {_id}: {_type} <-\n{self.visit(_expr, tabs + 1)}' + ) + else: + declarations.append(' ' * tabs + + f'\\__VarDeclarationNode: {_id} : {_type}') + declarations = '\n'.join(declarations) + ans = ' ' * tabs + f'\\__LetNode: let' + expr = self.visit(node.expr, tabs + 2) + return f'{ans}\n {declarations}\n' + ' ' * (tabs + 1) + 'in\n' + f'{expr}' + + @visitor.when(AssignNode) + def visit(self, node: AssignNode, tabs: int = 0): + ans = ' ' * tabs + f'\\__AssignNode: {node.id} <- ' + expr = self.visit(node.expr, tabs + 1) + return f'{ans}\n{expr}' + + @visitor.when(BlockNode) + def visit(self, node: BlockNode, tabs: int = 0): + ans = ' ' * tabs + f'\\__BlockNode:' + body = '\n'.join( + self.visit(child, tabs + 1) for child in node.expressions) + return f'{ans}\n{body}' + + @visitor.when(ConditionalNode) + def visit(self, node: ConditionalNode, tabs: int = 0): + ifx = self.visit(node.if_expr, tabs + 2) + then = self.visit(node.then_expr, tabs + 2) + elsex = self.visit(node.else_expr, tabs + 2) + + return '\n'.join([ + ' ' * tabs + + f'\\__ConditionalNode: if then else fi', + ' ' * (tabs + 1) + f'\\__if \n{ifx}', + ' ' * (tabs + 1) + f'\\__then \n{then}', + ' ' * (tabs + 1) + f'\\__else \n{elsex}', + ]) + + @visitor.when(WhileNode) + def visit(self, node: WhileNode, tabs: int = 0): + condition = self.visit(node.condition, tabs + 2) + body = self.visit(node.body, tabs + 2) + + return '\n'.join([ + ' ' * tabs + f'\\__WhileNode: while loop pool', + ' ' * (tabs + 1) + f'\\__while \n{condition}', + ' ' * (tabs + 1) + f'\\__loop \n{body}', + ]) + + @visitor.when(CaseNode) + def visit(self, node: CaseNode, tabs: int = 0): + cases = [] + for case in node.cases: + _id = case.id + _type = case.type + _expr = case.expr + expr = self.visit(_expr, tabs + 3) + cases.append(' ' * tabs + + f'\\__CaseNode: {_id} : {_type} =>\n{expr}') + expr = self.visit(node.expr, tabs + 2) + cases = '\n'.join(cases) + + return '\n'.join([ + ' ' * tabs + + f'\\__CaseNode: case of [ ... ] esac', + ' ' * (tabs + 1) + f'\\__case \n{expr} of', + ]) + '\n' + cases + + @visitor.when(CallNode) + def visit(self, node: CallNode, tabs: int = 0): + obj = self.visit(node.obj or VariableNode('self'), tabs + 1) + ' ' + ans = ' ' * tabs + f'\\__CallNode: .{node.id}(, ..., )' + args = '\n'.join(self.visit(arg, tabs + 1) for arg in node.args) + return f'{ans}\n{obj}\n{args}' + + @visitor.when(BinaryNode) + def visit(self, node: BinaryNode, tabs: int = 0): + ans = ' ' * tabs + f'\\__ {node.__class__.__name__} ' + left = self.visit(node.left, tabs + 1) + right = self.visit(node.right, tabs + 1) + return f'{ans}\n{left}\n{right}' + + @visitor.when(AtomicNode) + def visit(self, node: AtomicNode, tabs: int = 0): + return ' ' * tabs + f'\\__{node.__class__.__name__}: {node.lex}' + + @visitor.when(InstantiateNode) + def visit(self, node: InstantiateNode, tabs: int = 0): + return ' ' * tabs + f'\\__InstantiateNode: new {node.lex}()' + + @visitor.when(UnaryNode) + def visit(self, node: UnaryNode, tabs: int = 0): + ans = ' ' * tabs + f'\\__{node.__class__.__name__}: ' + expr = self.visit(node.expr, tabs + 1) + return f'{ans}\n{expr}' diff --git a/src/coolcmp/utils/cil.py b/src/coolcmp/utils/cil.py new file mode 100644 index 000000000..5b620b5d0 --- /dev/null +++ b/src/coolcmp/utils/cil.py @@ -0,0 +1,411 @@ +from __future__ import annotations + +from coolcmp.utils import ast +from coolcmp.utils import extract_feat_name +from coolcmp.utils.semantic import Scope + + +class Node: + pass + + +class ProgramNode(Node): + def __init__( + self, + dot_types: list[TypeNode], + dot_data: list[DataNode], + dot_code: list[FunctionNode], + ): + self.dot_types = dot_types + self.dot_data = dot_data + self.dot_code = dot_code + self.all_methods: list[str] = [] + + def get_type(self, name: str) -> TypeNode: + for type_ in self.dot_types: + if type_.name == name: + return type_ + + def get_function(self, name: str) -> FunctionNode: + for func in self.dot_code: + if func.name == name: + return func + raise ValueError(f'Unexpected function name: {name}') + + def get_data_name(self, value: str): + for data in self.dot_data: + if data.value == value: + return data.name + raise ValueError(f"No data defined for value {value}") + + def set_data(self, value: str): + if value not in [data.value for data in self.dot_data]: + self.dot_data.append(DataNode(f"s{len(self.dot_data) + 1}", value)) + + def update_method_indexes(self): + all_methods = set() + for type_ in self.dot_types: + all_methods.update([extract_feat_name(m) for m in type_.methods.values()]) + + self.all_methods = sorted(list(all_methods)) + for type_ in self.dot_types: + meths = type_.methods + new_methods = [ + MethodAt( + name=meths[m], + index=self.all_methods.index(extract_feat_name(meths[m])) + ) for m in meths + ] + type_.methods = sorted(new_methods) + type_.total_methods = len(self.all_methods) + + +class MethodAt: + def __init__(self, name: str, index: int = -1): + self.tname = (name, ) + self.index = index + + @property + def name(self): + return self.tname[0] + + def __lt__(self, other: MethodAt): + return extract_feat_name(self.name).__lt__(extract_feat_name(other.name)) + + def __eq__(self, other: MethodAt): + return self.name == other.name + + def __hash__(self): + return hash(self.name) + + def __str__(self): + return self.name + + def __repr__(self): + return f'MethodAt({self.name}, {self.index})' + + +class AttributeAt: + def __init__(self, name: str, index: int = -1): + self.name = name + self.index = index + + def __eq__(self, other: MethodAt): + return self.name == other.name + + def __str__(self): + return self.name + + +class TypeNode(Node): + def __init__(self, + name: str, + parent: str | None, + attrs: list[str], + methods: dict[MethodAt | str, str], + attr_expr_nodes: dict[str, tuple[ast.ExpressionNode, Scope]] = None): + self.name = name + self.parent = parent + self.attributes = attrs + self.methods = methods + self.attr_expr_nodes = attr_expr_nodes or {} + self.total_methods: int | None = None + self.init_locals: list[LocalNode] = [] + + # Add the expression node of the attributes, so when is created an instance + # get quick access to the instructions of the attribute initialization. + def add_attr_node(self, attr: str, node: ast.ExpressionNode | int | str, scope: Scope): + self.attr_expr_nodes[attr] = (node, scope) + + def get_attr_node(self, attr: str) -> tuple[ast.ExpressionNode | int | str, Scope]: + return self.attr_expr_nodes.get(attr) + + def add_local(self, local: LocalNode): + return self.init_locals.append(local) + + +class DataNode(Node): + def __init__(self, vname: str, value: str): + self.name = vname + self.value = value + + +class FunctionNode(Node): + def __init__( + self, + name: str, + params: list[ParamNode], + local_vars: list[LocalNode], + instructions: list[InstructionNode], + ): + self.name = name + self.params = params + self.local_vars = local_vars + self.instructions = instructions + + @property + def args_space(self): + """ + Returns the size of the space used by this function args in the stack. + """ + return len(self.params) * 4 + + +class ParamNode(Node): + def __init__(self, name: str): + self.name = name + + +class LocalNode(Node): + def __init__(self, name: str): + self.name = name + + +class InstructionNode(Node): + pass + + +class CommentNode(InstructionNode): + def __init__(self, text: str): + self.text = text + + +class AssignNode(InstructionNode): + def __init__(self, dest: str, source: str): + self.dest = dest + self.source = source + + +class ArithmeticNode(InstructionNode): + def __init__(self, dest: str, left: str, right: str): + self.dest = dest + self.left = left + self.right = right + + +class PlusNode(ArithmeticNode): + pass + + +class MinusNode(ArithmeticNode): + pass + + +class StarNode(ArithmeticNode): + pass + + +class DivNode(ArithmeticNode): + pass + + +class ConformsNode(ArithmeticNode): + pass + + +class GetAttrNode(InstructionNode): + def __init__(self, dest: str, src: str, attr: str): + self.dest = dest + self.src = src + self.attr = attr + + +class SetAttrNode(InstructionNode): + def __init__(self, instance: str, attr: AttributeAt, value: str): + self.instance = instance + self.attr = attr + self.value = value + + +class GetIndexNode(InstructionNode): + pass + + +class SetIndexNode(InstructionNode): + pass + + +class AllocateNode(InstructionNode): + def __init__(self, type_: str, dest: str): + self.type = type_ + self.dest = dest + + +class ArrayNode(InstructionNode): + pass + + +class TypeOfNode(InstructionNode): + def __init__(self, obj, dest): + self.type = None + self.obj = obj + self.dest = dest + + +class LabelNode(InstructionNode): + def __init__(self, name: str): + self.name = name + + +class GotoNode(InstructionNode): + def __init__(self, label: str): + self.label = label + + +class GotoIfNode(InstructionNode): + def __init__(self, condition: str, label: str): + self.condition = condition + self.label = label + + +class StaticCallNode(InstructionNode): + def __init__(self, function: str, dest: str): + self.function = function + self.dest = dest + + +class DynamicCallNode(InstructionNode): + def __init__(self, obj: str, method: str, dest: str, type_: str | None, dtype: str): + self.obj = obj + self.method = method + self.dest = dest + self.type = type_ + self.dtype = dtype + + +class ArgNode(InstructionNode): + def __init__(self, name: str): + self.name = name + + +class ReturnNode(InstructionNode): + def __init__(self, value=None): + self.value = value + + +class LoadNode(InstructionNode): + def __init__(self, dest: str, msg: str): + self.dest = dest + self.msg = msg + + +class LengthNode(InstructionNode): + def __init__(self, src: str, dest: str): + self.src = src + self.dest = dest + + +class ConcatNode(InstructionNode): + def __init__(self, dest: str, str1: str, str2: str): + self.dest = dest + self.str1 = str1 + self.str2 = str2 + + +class PrefixNode(InstructionNode): + pass + + +class SubstringNode(InstructionNode): + def __init__(self, dest: str, src: str, index: str, length: str): + self.dest = dest + self.src = src + self.index = index + self.length = length + + +class ToStrNode(InstructionNode): + def __init__(self, dest, value): + self.dest = dest + self.value = value + + +class ReadNode(InstructionNode): + def __init__(self, dest): + self.dest = dest + + +class ReadIntNode(ReadNode): + pass + + +class ReadStringNode(ReadNode): + pass + + +class PrintNode(InstructionNode): + def __init__(self, addr: str): + self.addr = addr + + +class PrintIntNode(PrintNode): + pass + + +class PrintStringNode(PrintNode): + pass + + +class NegationNode(InstructionNode): + def __init__(self, dest: str, src: str): + self.dest = dest + self.src = src + + +class ComplementNode(InstructionNode): + def __init__(self, dest: str, src: str): + self.dest = dest + self.src = src + + +class CompareNode(InstructionNode): + def __init__(self, dest: str, left: str, right: str): + self.dest = dest + self.left = left + self.right = right + + +class LessThanNode(CompareNode): + pass + + +class LessEqualNode(CompareNode): + pass + + +class EqualNode(CompareNode): + pass + + +class IsVoidNode(InstructionNode): + def __init__(self, dest: str, src: str): + self.dest = dest + self.src = src + + +class TypeNameNode(InstructionNode): + def __init__(self, dest: str, src: str): + self.dest = dest + self.src = src + + +class InitNode(InstructionNode): + def __init__(self, dest: str, type_name: str): + self.type_name = type_name + self.dest = dest + + +class AbortNode(InstructionNode): + pass + + +class RuntimeErrorNode(InstructionNode): + pass + + +class CaseMatchRuntimeErrorNode(RuntimeError): + pass + + +class ExprVoidRuntimeErrorNode(RuntimeError): + pass diff --git a/src/coolcmp/utils/cil_formatter.py b/src/coolcmp/utils/cil_formatter.py new file mode 100644 index 000000000..1090eb09e --- /dev/null +++ b/src/coolcmp/utils/cil_formatter.py @@ -0,0 +1,188 @@ +from . import visitor, cil + + +class CILFormatter(object): + @visitor.on('node') + def visit(self, node): + raise NotImplementedError() + + @visitor.when(cil.ProgramNode) + def visit(self, node: cil.ProgramNode): + types = '\n'.join(self.visit(t) for t in node.dot_types) + data = '\n'.join(self.visit(t) for t in node.dot_data) + code = '\n'.join(self.visit(t) for t in node.dot_code) + + return ( + '.TYPES\n' + (f'{types}\n\n' if types else '\n') + + '.DATA\n' + (f'{data}\n\n' if data else '\n') + + f'.CODE\n{code}' + ) + + @visitor.when(cil.TypeNode) + def visit(self, node: cil.TypeNode): + attributes = '\n '.join(f'attribute {x}' for x in node.attributes) + methods = '\n '.join(f'method {m}: {f}' for m, f in node.methods.items()) + + return ( + f'type {node.name} {{' + + (f'\n {attributes}\n' if attributes else '') + + (f'\n {methods}\n' if methods else '') + '}' + ) + + @visitor.when(cil.DataNode) + def visit(self, node: cil.DataNode): + return f'{node.name} = {repr(node.value)[1:-1]}' + + @visitor.when(cil.FunctionNode) + def visit(self, node: cil.FunctionNode): + print(node.name) + params = '\n '.join(self.visit(x) for x in node.params) + local_vars = '\n '.join(self.visit(x) for x in node.local_vars) + instructions = '\n '.join(self.visit(x) for x in node.instructions) + + return ( + f'function {node.name} {{' + + (f'\n {params}\n' if params else '') + + (f'\n {local_vars}\n' if local_vars else '') + + (f'\n {instructions}\n' if instructions else '') + '}' + ) + + @visitor.when(cil.ParamNode) + def visit(self, node: cil.ParamNode): + return f'PARAM {node.name}' + + @visitor.when(cil.LocalNode) + def visit(self, node: cil.LocalNode): + return f'LOCAL {node.name}' + + @visitor.when(cil.AssignNode) + def visit(self, node: cil.AssignNode): + return f'{node.dest} = {node.source}' + + @visitor.when(cil.PlusNode) + def visit(self, node: cil.PlusNode): + return f'{node.dest} = {node.left} + {node.right}' + + @visitor.when(cil.MinusNode) + def visit(self, node: cil.MinusNode): + return f'{node.dest} = {node.left} - {node.right}' + + @visitor.when(cil.StarNode) + def visit(self, node: cil.StarNode): + return f'{node.dest} = {node.left} * {node.right}' + + @visitor.when(cil.DivNode) + def visit(self, node: cil.DivNode): + return f'{node.dest} = {node.left} / {node.right}' + + @visitor.when(cil.GetAttrNode) + def visit(self, node: cil.GetAttrNode): + return f'{node.dest} = GETATTR {node.src} {node.attr}' + + @visitor.when(cil.SetAttrNode) + def visit(self, node: cil.SetAttrNode): + return f'SETATTR {node.instance} {node.attr} {node.value}' + + @visitor.when(cil.AllocateNode) + def visit(self, node: cil.AllocateNode): + return f'{node.dest} = ALLOCATE {node.type}' + + @visitor.when(cil.TypeOfNode) + def visit(self, node: cil.TypeOfNode): + return f'{node.dest} = TYPEOF {node.obj}' + + @visitor.when(cil.LabelNode) + def visit(self, node: cil.LabelNode): + return f'LABEL {node.name}' + + @visitor.when(cil.GotoNode) + def visit(self, node: cil.GotoNode): + return f'GOTO {node.label}' + + @visitor.when(cil.GotoIfNode) + def visit(self, node: cil.GotoIfNode): + return f'IF {node.condition} GOTO {node.label}' + + @visitor.when(cil.StaticCallNode) + def visit(self, node: cil.StaticCallNode): + return f'{node.dest} = CALL {node.function}' + + @visitor.when(cil.DynamicCallNode) + def visit(self, node: cil.DynamicCallNode): + return f'{node.dest} = VCALL {node.obj} {node.method}' + + @visitor.when(cil.ArgNode) + def visit(self, node: cil.ArgNode): + return f'ARG {node.name}' + + @visitor.when(cil.ReturnNode) + def visit(self, node: cil.ReturnNode): + return f'RETURN {node.value if node.value is not None else ""}' + + @visitor.when(cil.LoadNode) + def visit(self, node: cil.LoadNode): + return f'{node.dest} = LOAD {node.msg}' + + @visitor.when(cil.PrintNode) + def visit(self, node: cil.PrintNode): + return f'PRINT {node.addr}' + + @visitor.when(cil.NegationNode) + def visit(self, node: cil.NegationNode): + return f'{node.dest} = NOT {node.src}' + + @visitor.when(cil.ComplementNode) + def visit(self, node: cil.ComplementNode): + return f'{node.dest} = COMPLEMENT {node.src}' + + @visitor.when(cil.CompareNode) + def visit(self, node: cil.CompareNode): + return f'{node.dest} = {node.left} == {node.right}' + + @visitor.when(cil.CommentNode) + def visit(self, node: cil.CommentNode): + return f'\033[32m# {node.text}\033[0m' + + @visitor.when(cil.ReadNode) + def visit(self, node: cil.ReadNode): + return f'{node.dest} = READ' + + @visitor.when(cil.LengthNode) + def visit(self, node: cil.LengthNode): + return f'{node.dest} = LENGTH {node.src}' + + @visitor.when(cil.ConcatNode) + def visit(self, node: cil.ConcatNode): + return f'{node.dest} = CONCAT {node.str1} {node.str2}' + + @visitor.when(cil.SubstringNode) + def visit(self, node: cil.SubstringNode): + return f'{node.dest} = SUBSTR {node.src} {node.index} {node.length}' + + @visitor.when(cil.IsVoidNode) + def visit(self, node: cil.IsVoidNode): + return f'{node.dest} = ISVOID {node.src}' + + @visitor.when(cil.TypeNameNode) + def visit(self, node: cil.TypeNameNode): + return f'{node.dest} = TYPENAME {node.src}' + + @visitor.when(cil.InitNode) + def visit(self, node: cil.InitNode): + return f'{node.dest} = INIT {node.type_name}' + + @visitor.when(cil.AbortNode) + def visit(self, node: cil.AbortNode): + return f'ABORT' + + @visitor.when(cil.CaseMatchRuntimeErrorNode) + def visit(self, node: cil.CaseMatchRuntimeErrorNode): + return f'CASE_MATCH_RUNTIME_ERROR' + + @visitor.when(cil.ExprVoidRuntimeErrorNode) + def visit(self, node: cil.ExprVoidRuntimeErrorNode): + return f'EXPR_VOID_RUNTIME_ERROR' + + @visitor.when(cil.ConformsNode) + def visit(sel, node: cil.ConformsNode): + return f'{node.dest} = CONFORMS {node.left} {node.right}' diff --git a/src/coolcmp/utils/mips.py b/src/coolcmp/utils/mips.py new file mode 100644 index 000000000..0f7fa3169 --- /dev/null +++ b/src/coolcmp/utils/mips.py @@ -0,0 +1,462 @@ +from __future__ import annotations +from typing import List, Union + +from coolcmp.utils.registers import Register, sp, dw +from coolcmp.utils import cil + +TYPES_LABELS = "__types_definition__" + +Memory = Union[str, int] + + +class Node: + pass + + +class Type: + def __init__(self, + label: str, + parent: str | None, + attrs: list[str], + methods: dict[cil.MethodAt, str], + total_methods: int, + index: int): + self.label = label + self.parent = parent + self.attrs = attrs + self.methods = methods + self.total_methods = total_methods + self.index = index + + def get_attr_index(self, name: str) -> int: + return self.attrs.index(name) + 1 + + @property + def name_offset(self) -> int: + return (self.total_methods + 2) * 4 + + def length(self) -> int: + return len(self.attrs) + + def __str__(self): + return f"{self.label}-{self.attrs}-{self.methods}-{self.index}" + + +class ProgramNode(Node): + def __init__( + self, + data: List[Node], + types: List[Type], + functions: List[FunctionNode], + ): + self.data = data + self.types = types + self.functions = functions + + +class FunctionNode(Node): + def __init__(self, name: str, params: List[str], local_vars: List[str]): + self.name = name + self.params = params + self.local_vars = local_vars + self.instructions = [] + + def local_address(self, name: str): + index = self.local_vars.index(name) + locals_amount = len(self.local_vars) + offset = (locals_amount + 2 - index) * dw + return -offset + + def param_address(self, name: str): + index = self.params.index(name) + offset = index * dw + return offset + + def variable_address(self, name: str): + try: + return self.param_address(name) + except ValueError: + return self.local_address(name) + + +class InstructionNode(Node): + def __init__(self): + self._comment: str = '' + + def with_comm(self, comment: str) -> InstructionNode: + self._comment = comment + return self + + @property + def comment(self): + return f"\t\t\t# {self._comment}" if self._comment else "" + + @comment.setter + def comment(self, comment: str): + self._comment = comment + + +class CommentNode(InstructionNode): + def __init__(self, text: str): + super().__init__() + self.text = text + + def __str__(self): + return f"# {self.text}" + + +class DataNode(Node): + def __init__(self, label: str): + self.label = label + + def __str__(self): + return f"{self.label}:" + + +class StringNode(DataNode): + def __init__(self, label: str, value: str): + super().__init__(label) + self.value = value + + def __str__(self): + return f"{self.label}: .asciiz {self.value}" + + +class LabelNode(InstructionNode): + def __init__(self, label: str): + super().__init__() + self.label = label + + def __str__(self): + return f"{self.label}:" + + +class SWNode(InstructionNode): + """ + store word | sw $1, 100($2) | Memory[$2 + 100] = $1 + Copy from register to memory. + """ + def __init__(self, dest: Register, offset: int, src: Memory): + super().__init__() + self.dest = dest + self.offset = offset + self.src = src + + def __str__(self): + return f"sw {self.dest}, {self.offset}({self.src})" + + +class LWNode(InstructionNode): + """ + load word | lw $1, 100($2) | $1 = Memory[$2 + 100] + Copy from memory to register. + """ + def __init__(self, dest: Register, src: tuple[int, Memory] | str): + super().__init__() + self.dest = dest + self.src = src + + def __str__(self): + if isinstance(self.src, tuple): + return f"lw {self.dest}, {self.src[0]}({self.src[1]})" + else: + return f"lw {self.dest}, {self.src}" + + +class LINode(InstructionNode): + """ + load immediate | li $1, 100 | $1 = 100 + Loads immediate value into register. + """ + def __init__(self, reg: Register, value: int): + super().__init__() + self.reg = reg + self.value = value + + def __str__(self): + return f"li {self.reg}, {self.value}" + + +class JALNode(InstructionNode): + """ + jump and link | jal 1000 | $ra = PC + 4; go to address 1000 + Use when making procedure call. + This saves the return address in $ra. + """ + def __init__(self, dest: str): + super().__init__() + self.dest = dest + + def __str__(self): + return f"jal {self.dest}" + + +class JALRNode(InstructionNode): + """ + jump and link to register value + """ + def __init__(self, reg: Register): + super().__init__() + self.reg = reg + + def __str__(self): + return f"jalr {self.reg}" + + +class LANode(InstructionNode): + """ + load address | la $1, label | $1 = Address of label + Loads computed address of label (not its contents) into register. + """ + def __init__(self, reg: Register, label: str): + super().__init__() + self.reg = reg + self.label = label + + def __str__(self): + return f"la {self.reg}, {self.label}" + + +class NOTNode(InstructionNode): + def __init__(self, dest: Register, src: Register): + super().__init__() + self.dest = dest + self.src = src + + def __str__(self): + return f"not {self.dest}, {self.src}" + + +class ADDNode(InstructionNode): + """ + add | add $1, $2, $3 | $1 = $2 + $3 + """ + def __init__(self, dest: Register, src1: Register | int, src2: Register | int): + super().__init__() + self.dest = dest + self.src1 = src1 + self.src2 = src2 + + def __str__(self): + return f"add {self.dest}, {self.src1}, {self.src2}" + + +class ADDINode(InstructionNode): + """ + add immediate | addi $1, $2, 100 | $1 = $2 + 100 + "Immediate" means a constant number. + """ + def __init__(self, dest: Register, src: Register | int, isrc: Register | int): + super().__init__() + self.dest = dest + self.src = src + self.isrc = isrc + + def __str__(self): + return f"addi {self.dest}, {self.src}, {self.isrc}" + + +class ADDUNode(InstructionNode): + """ + add unsigned | addu $1, $2, $3 | $1 = $2 + $3 + Values are treated as unsigned integers, not two's complement integers. + """ + def __init__(self, rdest: Register, r1: Register, r2: Register | int): + super().__init__() + self.rdest = rdest + self.r1 = r1 + self.r2 = r2 + + def __str__(self): + return f"addu {self.rdest}, {self.r1}, {self.r2}" + + +class SUBNode(InstructionNode): + """ + subtract | sub $1, $2, $3 | $1 = $2 - $3 + """ + def __init__(self, rdest: Register, r1: Register, r2: Register | int): + super().__init__() + self.rdest = rdest + self.r1 = r1 + self.r2 = r2 + + def __str__(self): + return f"sub {self.rdest}, {self.r1}, {self.r2}" + + +class SUBUNode(InstructionNode): + """ + subtract unsigned | subu $1, $2, $3 | $1 = $2 - $3 + Values are treated as unsigned integers, not two's complement integers. + """ + def __init__(self, rdest: Register, r1: Register, r2: Register | int): + super().__init__() + self.rdest = rdest + self.r1 = r1 + self.r2 = r2 + + def __str__(self): + return f"subu {self.rdest}, {self.r1}, {self.r2}" + + +class MULTNode(InstructionNode): + def __init__(self, rdest: Register, r1: Register, r2: Register): + super().__init__() + self.rdest = rdest + self.r1 = r1 + self.r2 = r2 + + def __str__(self): + return f"mul {self.rdest}, {self.r1}, {self.r2}" + + +class DIVNode(InstructionNode): + def __init__(self, rdest: Register, r1: Register, r2: Register): + super().__init__() + self.rdest = rdest + self.r1 = r1 + self.r2 = r2 + + def __str__(self): + return f"div {self.rdest}, {self.r1}, {self.r2}" + + +class XORINode(InstructionNode): + def __init__(self, dest: Register, r1: Register, value: int): + super().__init__() + self.dest = dest + self.r1 = r1 + self.value = value + + def __str__(self): + return f"xori {self.dest}, {self.r1}, {self.value}" + + +class JRNode(InstructionNode): + """ + jump register | jr $1 | go to address stored in $1 + For switch, procedure return. + """ + def __init__(self, dest: Register): + super().__init__() + self.dest = dest + + def __str__(self): + return f"jr {str(self.dest)}" + + +class BEQNode(InstructionNode): + """ + branch on equal | beq $1, $2, 100 | if($1 == $2) go to PC + 4 + 100 + Test if registers are equal. + """ + def __init__(self, reg1: Register, reg2: Register, label: str): + super().__init__() + self.reg1 = reg1 + self.reg2 = reg2 + self.label = label + + def __str__(self): + return f"beq {self.reg1}, {self.reg2}, {self.label}" + + +class JNode(InstructionNode): + """ + jump | j 1000 | go to address 1000 + Jump to target address + """ + def __init__(self, label: str): + super().__init__() + self.label = label + + def __str__(self): + return f"j {self.label}" + + +class SLLNode(InstructionNode): + """ + shift left logical by a constant number of bits + sll $1, $2, 10 -> $1 = $2<<10 + """ + def __init__(self, dest: Register, src: Register, bits: int): + super().__init__() + self.dest = dest + self.src = src + self.bits = bits + + def __str__(self): + return f"sll {self.dest}, {self.src}, {self.bits}" + + +class MoveNode(InstructionNode): + """ + copy from register to register + move $1,$2 -> $1=$2 + """ + + def __init__(self, reg1: Register, reg2: Register): + super().__init__() + self.reg1 = reg1 + self.reg2 = reg2 + + def __str__(self): + return f"move {self.reg1}, {self.reg2}" + + +class SysCallNode(InstructionNode): + def __init__(self): + super().__init__() + + def __str__(self): + return "syscall" + + +class PrintIntNode(SysCallNode): + """ + print_int | $a0 = integer to be printed | code in v0 = 1 + Print integer number (32 bit). + """ + def __init__(self): + super().__init__() + + +class PrintStringNode(SysCallNode): + """ + print_string | $a0 = address of string in memory | code in v0 = 4 + Print null-terminated character string. + """ + def __init__(self): + super().__init__() + + +class NEGNode(InstructionNode): + def __init__(self, dest: Register, src: Register): + super().__init__() + self.dest = dest + self.src = src + + def __str__(self): + return f"neg {self.dest}, {self.src}" + + +def push_register_instructions(reg_name: str) -> List[InstructionNode]: + """ + addi $sp, $sp, -4 + sw , 0($sp) + """ + addi = ADDINode(sp, sp, -dw) + sw = SWNode(reg_name, 0, sp) + + return [addi, sw] + + +def pop_register_instructions(reg_name: str) -> List[InstructionNode]: + """ + lw , 0($sp) + addi $sp, $sp, 4 + """ + lw = LWNode(reg_name, (0, sp)) + addi = ADDINode(sp, sp, dw) + + return [lw, addi] diff --git a/src/coolcmp/utils/registers.py b/src/coolcmp/utils/registers.py new file mode 100644 index 000000000..cf11bcba3 --- /dev/null +++ b/src/coolcmp/utils/registers.py @@ -0,0 +1,39 @@ +class Register: + def __init__(self, name: str): + self.name = name + + def __str__(self): + return f"${self.name}" + + +# Double word +dw = 4 + +# Temporal +t0 = Register("t0") +t1 = Register("t1") +t2 = Register("t2") + +# Argument +a0 = Register("a0") +a1 = Register("a1") +a2 = Register("a2") + +# Zero +zero = Register("zero") + +# Return +v0 = Register("v0") + +# Frame Pointer +fp = Register("fp") + +# Stack Pointer +sp = Register("sp") + +# RA +ra = Register("ra") + +s0 = Register("s0") +s1 = Register("s1") +s2 = Register("s2") diff --git a/src/coolcmp/utils/semantic.py b/src/coolcmp/utils/semantic.py new file mode 100644 index 000000000..a2d3efc53 --- /dev/null +++ b/src/coolcmp/utils/semantic.py @@ -0,0 +1,305 @@ +from __future__ import annotations +import itertools as itt +from collections import OrderedDict + +from . import ast + + +class SemanticError(Exception): + @property + def text(self) -> str: + return self.args[0] + + +class Attribute: + def __init__(self, name: str, typex: Type, node: ast.ExpressionNode, scope: Scope = None): + self.name = name + self.type = typex + self.node = node + self.scope = scope + + def __str__(self) -> str: + return f'[attrib] {self.name} : {self.type.name};' + + def __repr__(self) -> str: + return str(self) + + +class Method: + def __init__(self, name: str, param_names: list[str], params_types: list[Type], return_type: Type): + self.name = name + self.param_names = param_names + self.param_types = params_types + self.return_type = return_type + + def __str__(self) -> str: + params = ', '.join(f'{n}: {t.name}' for n, t in zip(self.param_names, self.param_types)) + return f'[method] {self.name}({params}): {self.return_type.name};' + + def __eq__(self, other) -> bool: + return other.name == self.name and \ + other.return_type == self.return_type and \ + other.param_types == self.param_types + + +class Type: + def __init__(self, name: str): + self.name = name + self.attributes: list[Attribute] = [] + self.methods: list[Method] = [] + self.parent: Type | None = None + + def set_parent(self, parent: Type) -> None: + if self.parent is not None: + raise SemanticError(f'Parent type is already set for {self.name}.') + self.parent = parent + + def get_attribute(self, name: str, from_class: str = None) -> Attribute: + try: + return next(attr for attr in self.attributes if attr.name == name) + except StopIteration: + if self.parent is None: + raise SemanticError(f'Attribute "{name}" is not defined in {self.name}.') + try: + if from_class is not None and (self.parent.name == from_class or self.name == from_class): + raise SemanticError(f'Cyclic inheritance in class "{from_class}"') + return self.parent.get_attribute(name, from_class) + except SemanticError: + raise SemanticError(f'Attribute "{name}" is not defined in {self.name}.') + + def define_attribute(self, name: str, typex: Type, attr_node: ast.ExpressionNode, from_class: str = None) -> Attribute: + try: + self.get_attribute(name, from_class) + except SemanticError: + attribute = Attribute(name, typex, attr_node) + self.attributes.append(attribute) + return attribute + else: + raise SemanticError(f'Attribute "{name}" is already defined in {self.name}.') + + def get_method(self, name: str, get_owner: bool = False) -> Method | tuple[Method, Type]: + try: + meth = next(method for method in self.methods if method.name == name) + return meth if not get_owner else (meth, self) + except StopIteration: + if self.parent is None: + raise SemanticError(f'Method "{name}" is not defined in {self.name}.') + try: + return self.parent.get_method(name, get_owner) + except SemanticError: + raise SemanticError(f'Method "{name}" is not defined in {self.name}.') + + def define_method(self, name: str, param_names: list[str], param_types: list[Type], return_type: Type) -> Method: + if name in (method.name for method in self.methods): + raise SemanticError(f'Method "{name}" already defined in {self.name}') + + method = Method(name, param_names, param_types, return_type) + self.methods.append(method) + return method + + def all_attributes(self, clean: bool = True) -> dict[str, tuple[Attribute, Type]] | tuple[Attribute, Type]: + plain = OrderedDict() if self.parent is None else self.parent.all_attributes(False) + for attr in self.attributes: + plain[attr.name] = (attr, self) + return plain.values() if clean else plain + + def all_methods(self, clean: bool = True) -> dict[str, tuple[Method, Type]] | tuple[Method, Type]: + plain = OrderedDict() if self.parent is None else self.parent.all_methods(False) + for method in self.methods: + plain[method.name] = (method, self) + return plain.values() if clean else plain + + def get_ancestors(self, from_: str = None, ignore_from: bool = True) -> list[Type]: + if not ignore_from and self.name == from_: + raise SemanticError(f'Cyclic inheritance in class {from_}') + if self.parent is None: + return [self] + else: + return [self] + self.parent.get_ancestors(from_, False) + + def conforms_to(self, other: Type) -> bool: + return other.bypass() or self == other or self.parent is not None and self.parent.conforms_to(other) + + def join(self, type_: Type) -> Type: + ancestors = self.get_ancestors() + current = type_ + while current is not None: + if current in ancestors: + break + current = current.parent + return current + + def bypass(self) -> bool: + return False + + def __str__(self) -> str: + output = f'type {self.name}' + parent = '' if self.parent is None else f' : {self.parent.name}' + output += parent + output += ' {' + output += '\n\t' if self.attributes or self.methods else '' + output += '\n\t'.join(str(x) for x in self.attributes) + output += '\n\t' if self.attributes else '' + output += '\n\t'.join(str(x) for x in self.methods) + output += '\n' if self.methods else '' + output += '}\n' + return output + + def __repr__(self) -> str: + return str(self) + + def __eq__(self, other: Type) -> bool: + return self.name == other.name + + +class ErrorType(Type): + def __init__(self): + Type.__init__(self, '') + + def conforms_to(self, other) -> bool: + return True + + def bypass(self) -> bool: + return True + + def __eq__(self, other: Type) -> bool: + return isinstance(other, Type) + + +class VoidType(Type): + def __init__(self): + Type.__init__(self, 'Void') + + def conforms_to(self, other) -> bool: + # raise Exception('Invalid type: void type.') + return True + + def bypass(self) -> bool: + return True + + def __eq__(self, other: Type) -> bool: + return isinstance(other, VoidType) + + +class IntType(Type): + def __init__(self): + Type.__init__(self, 'Int') + + def __eq__(self, other) -> bool: + return other.name == self.name or isinstance(other, IntType) + + +class Context: + def __init__(self): + self.types: dict[str, Type] = {} + + def create_type(self, name: str) -> Type: + if name in self.types: + raise SemanticError(f'Type with the same name ({name}) already in context.') + typex = self.types[name] = Type(name) + return typex + + def get_type(self, name: str) -> Type: + try: + return self.types[name] + except KeyError: + raise SemanticError(f'Type "{name}" is not defined.') + + def set_type(self, name: str, type_: Type) -> None: + self.types[name] = type_ + + def __str__(self) -> str: + return '{\n\t' + '\n\t'.join(y for x in self.types.values() for y in str(x).split('\n')) + '\n}' + + def type_depth(self, type_: Type): + depth = 0 + while type_.parent: + depth += 1 + type_ = type_.parent + return depth + + def __repr__(self) -> str: + return str(self) + + +class VariableInfo: + def __init__(self, name: str, vtype: Type, is_attr: bool = False, is_param: bool = False): + self.name = name + self.type = vtype + self.is_attr = is_attr + self.is_param = is_param + + def __str__(self) -> str: + return f'{self.name}: {self.type}' + + +class Scope: + def __init__(self, tag: str, parent: Scope = None): + self.locals: list[VariableInfo] = [] + self.tag = tag + self.parent = parent + self.children: list[Scope] = [] + self.index = 0 if parent is None else len(parent) + + def __len__(self) -> int: + return len(self.locals) + + def create_child(self, tag: str) -> Scope: + child = Scope(tag, self) + self.children.append(child) + return child + + def define_variable(self, vname: str, vtype: Type, is_attr: bool = False, is_param: bool = False) -> VariableInfo: + info = VariableInfo(vname, vtype, is_attr, is_param) + self.locals.append(info) + return info + + def find_variable(self, vname: str, index: int = None) -> VariableInfo | None: + locals_ = self.locals if index is None else itt.islice(self.locals, index) + try: + return next(x for x in locals_ if x.name == vname) + except StopIteration: + return self.parent.find_variable(vname, self.index) if self.parent is not None else None + + def is_defined(self, vname: str) -> bool: + return self.find_variable(vname) is not None + + def is_local(self, vname: str) -> bool: + return any(True for x in self.locals if x.name == vname) + + def get_tagged_scope(self, tag: str) -> Scope | None: + pending = [self] + while pending: + actual = pending.pop(0) + if actual.tag == tag: + return actual + else: + pending.extend(actual.children) + raise ValueError(f'Unexpected tag "{tag}" in scope "{self.tag}"') + + def all_locals(self) -> list[VariableInfo]: + if self.parent is None: + return self.locals + else: + return self.locals + self.parent.all_locals() + + def to_str(self, tabs: int = 0) -> str: + s = f'Scope ({self.tag}):\n' + for v in self.locals: + if v.is_attr: + tag = 'attr' + elif v.is_param: + tag = 'param' + else: + tag = 'local' + s += ' ' * tabs + f'\\__ [{tag}] {v.name}: {v.type.name}\n' + if self.children: + for child in self.children: + s += ' ' * tabs + f'\\__ {child.to_str(tabs + 1)}' + return s + + def __str__(self): + return self.to_str() + + def __repr__(self): + return str(self) diff --git a/src/coolcmp/utils/utils.py b/src/coolcmp/utils/utils.py new file mode 100644 index 000000000..b47057f06 --- /dev/null +++ b/src/coolcmp/utils/utils.py @@ -0,0 +1,41 @@ +""" + +""" + + +def find_column(input_text: str, token_lexpos: int) -> int: + """ + Used for compute column of tokens. Assumed that tabs have length 4. + """ + line_start = input_text.rfind('\n', 0, token_lexpos) + 1 + return (token_lexpos - line_start) + input_text.count('\t', line_start, token_lexpos) * 3 + 1 + + +def extract_feat_name(name: str) -> str: + """ + Returns the name of a feature when has the name of a class in front. + Example: + Object_type_name -> type_name + IO_out_string -> out_string + """ + return name[name.find('_') + 1:] + + +def extract_class_name(name: str) -> str: + """ + Returns the name of a class when has the name of a feature back. + Example: + Object_type_name -> Object + IO_out_string -> IO + """ + return name[:name.find('_')] + + +def split_class_and_feat(name: str) -> tuple[str, str]: + """ + Splits the full feature name in class name and feature name + Example: + Object_type_name -> (Object, type_name) + IO_out_string -> (IO, out_string) + """ + return extract_class_name(name), extract_feat_name(name) diff --git a/src/coolcmp/utils/visitor.py b/src/coolcmp/utils/visitor.py new file mode 100644 index 000000000..964842836 --- /dev/null +++ b/src/coolcmp/utils/visitor.py @@ -0,0 +1,80 @@ +# The MIT License (MIT) +# +# Copyright (c) 2013 Curtis Schlak +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import inspect + +__all__ = ['on', 'when'] + +def on(param_name): + def f(fn): + dispatcher = Dispatcher(param_name, fn) + return dispatcher + return f + + +def when(param_type): + def f(fn): + frame = inspect.currentframe().f_back + func_name = fn.func_name if 'func_name' in dir(fn) else fn.__name__ + dispatcher = frame.f_locals[func_name] + if not isinstance(dispatcher, Dispatcher): + dispatcher = dispatcher.dispatcher + dispatcher.add_target(param_type, fn) + def ff(*args, **kw): + return dispatcher(*args, **kw) + ff.dispatcher = dispatcher + return ff + return f + + +class Dispatcher(object): + def __init__(self, param_name, fn): + frame = inspect.currentframe().f_back.f_back + top_level = frame.f_locals == frame.f_globals + self.param_index = self.__argspec(fn).args.index(param_name) + self.param_name = param_name + self.targets = {} + + def __call__(self, *args, **kw): + typ = args[self.param_index].__class__ + d = self.targets.get(typ) + if d is not None: + return d(*args, **kw) + else: + issub = issubclass + t = self.targets + ks = t.keys() + ans = [t[k](*args, **kw) for k in ks if issub(typ, k)] + if len(ans) == 1: + return ans.pop() + return ans + + def add_target(self, typ, target): + self.targets[typ] = target + + @staticmethod + def __argspec(fn): + # Support for Python 3 type hints requires inspect.getfullargspec + if hasattr(inspect, 'getfullargspec'): + return inspect.getfullargspec(fn) + else: + return inspect.getargspec(fn) diff --git a/src/example.py b/src/example.py new file mode 100644 index 000000000..c71eee6b2 --- /dev/null +++ b/src/example.py @@ -0,0 +1,29 @@ +from coolcmp.codegen.cil2mips.mips_formatter import MIPSFormatter +from coolcmp.lexing_parsing.lexer import errors as lexer_errors +from coolcmp.lexing_parsing.parser import parser, errors as parser_errors +from coolcmp.semantics import check_semantics +from coolcmp.codegen.cool2cil import build_cil + +from coolcmp.codegen.cil2mips import build_mips +from coolcmp.utils.ast_formatter import ASTFormatter +from coolcmp.utils.cil_formatter import CILFormatter + +from test_cil2mips import hello_world, allocate, print_int + + +def main(): + cil_ast = print_int + + mips_ast = build_mips(cil_ast, None, None) + + mips_str = MIPSFormatter().visit(mips_ast) + + print("Mips code:") + print(mips_str) + + with open("./code.mips", "w") as fd: + fd.write(mips_str) + + +if __name__ == "__main__": + main() diff --git a/src/makefile b/src/makefile index 30df993f5..2d8c9f7fc 100644 --- a/src/makefile +++ b/src/makefile @@ -10,3 +10,5 @@ clean: test: pytest ../tests -v --tb=short -m=${TAG} +compile: + python3 coolc.py ../tests/codes/$(s).cl $(p) diff --git a/src/test_cil2mips.py b/src/test_cil2mips.py new file mode 100644 index 000000000..9095ace3c --- /dev/null +++ b/src/test_cil2mips.py @@ -0,0 +1,90 @@ +from coolcmp.utils import cil +from coolcmp.utils import CILFormatter + + +hello_world = cil.ProgramNode( + dot_types=[ + cil.TypeNode( + name="String", + attrs=[ + "String_value", + ], + methods=[ + "String_length", + "String_concat", + "String_substr", + ], + ), + ], + dot_data=[ + cil.DataNode("hello_message", '"Hello\n"'), + cil.DataNode("world_message", '"World!\n"'), + ], + dot_code=[ + cil.FunctionNode( + name="main", + params=[], + local_vars=[ + cil.LocalNode("x"), + cil.LocalNode("y"), + cil.LocalNode("z"), + ], + instructions=[ + cil.LoadNode("x", "hello_message"), + cil.PrintStringNode("x"), + cil.LoadNode("y", "world_message"), + cil.PrintStringNode("y"), + cil.LoadNode("z", "hello_message"), + cil.PrintStringNode("z"), + cil.ReturnNode(0), + ], + ) + ], +) + +allocate = cil.ProgramNode( + dot_types=[cil.TypeNode("Int", ["value"], [], [])], + dot_data=[ + cil.DataNode("the_value_is", '"The value is\n"'), + ], + dot_code=[ + cil.FunctionNode( + name="main", + params=[], + local_vars=[cil.LocalNode("a"), cil.LocalNode("msg")], + instructions=[ + cil.AllocateNode("Int", "a"), + cil.LoadNode("msg", "the_value_is"), + cil.PrintStringNode("msg"), + cil.PrintIntNode("a"), + cil.ReturnNode(0), + ], + ) + ], +) + +print_int = cil.ProgramNode( + dot_types=[], + dot_data=[cil.DataNode("the_sum_is", '"The sum is\n"')], + dot_code=[ + cil.FunctionNode( + name="main", + params=[], + local_vars=[ + cil.LocalNode("msg"), + cil.LocalNode("x"), + cil.LocalNode("y"), + cil.LocalNode("sum"), + ], + instructions=[ + cil.LoadNode("msg", "the_sum_is"), + cil.AssignNode("x", 10 + 12 + 10), + cil.AssignNode("y", "x"), + cil.PlusNode("sum", "x", "y"), + cil.PrintStringNode("msg"), + cil.PrintIntNode("sum"), + cil.ReturnNode(0), + ], + ) + ], +) diff --git a/tests/codegen/print-cool.cl b/tests/codegen/print-cool.cl index 76194e966..6510dfa5c 100644 --- a/tests/codegen/print-cool.cl +++ b/tests/codegen/print-cool.cl @@ -6,4 +6,4 @@ class Main inherits IO { out_string("\n"); } }; -}; +}; \ No newline at end of file diff --git a/tests/codes/concat.cl b/tests/codes/concat.cl new file mode 100644 index 000000000..33cf44464 --- /dev/null +++ b/tests/codes/concat.cl @@ -0,0 +1,5 @@ +class Main inherits IO { + main() : IO { + self.out_string("1212121212121212121212121212121212121212121212".concat("343434343434343434343434343434343434343434343434343434343434v").concat("5656565656565656565656565656565656")) + }; +}; \ No newline at end of file diff --git a/tests/codes/conditional.cl b/tests/codes/conditional.cl new file mode 100644 index 000000000..e806cc2be --- /dev/null +++ b/tests/codes/conditional.cl @@ -0,0 +1,10 @@ +--The predicate must have static type Bool. + +class A { }; +class B inherits A { }; +class C inherits B { }; + +class Main inherits IO { + main(): IO { out_string("Hello World!")}; + a: A <- if let x: Bool <- true in x then new B else new C fi; +}; \ No newline at end of file diff --git a/tests/codes/hello.cl b/tests/codes/hello.cl new file mode 100644 index 000000000..ee99123a5 --- /dev/null +++ b/tests/codes/hello.cl @@ -0,0 +1,49 @@ +class Main inherits IO { + msg : String <- "Hello World!\n"; + + sum(a: Int, b: Int) : Int { + a + b + }; + + main() : IO { + self.out_int(1) + }; +}; + +(* + .TYPES + type Main { + attribute Main_msg ; + method Main_main: f1 ; + } + + .DATA + s1 = "Hello World!\n"; + + .CODE + function entry { + LOCAL lmsg ; + LOCAL instance ; + LOCAL result ; + + lmsg = LOAD s1 ; + instance = ALLOCATE Main ; + SETATTR instance Main_msg lmsg ; + + ARG instance ; + result = VCALL Main Main_main ; + + RETURN 0 ; + } + + function f1 { + PARAM self ; + + LOCAL lmsg ; + + lmsg = GETATTR self Main_msg ; + PRINT lmsg ; + + RETURN self ; + } +*) \ No newline at end of file diff --git a/tests/codes/int_input_sum.cl b/tests/codes/int_input_sum.cl new file mode 100644 index 000000000..e253d6303 --- /dev/null +++ b/tests/codes/int_input_sum.cl @@ -0,0 +1,17 @@ +class Main inherits IO { + msg : String <- "Hello World!\n"; + a : Int; + b : Int; + + main() : IO { + { + self.out_string("Enter the first integer: "); + a <- self.in_int(); + self.out_string("Enter the second integer: "); + b <- self.in_int(); + self.out_string("The sum is: "); + self.out_int(a + b); + self.out_string("\n"); + } + }; +}; \ No newline at end of file diff --git a/tests/codes/length.cl b/tests/codes/length.cl new file mode 100644 index 000000000..258d83eec --- /dev/null +++ b/tests/codes/length.cl @@ -0,0 +1,9 @@ +class Main inherits IO { + msg : String <- "Hello world"; + + main() : IO { + let y : String <- msg.substr(2, 2) in { + self.out_int(y.substr(0, 1).length()); + } + }; +}; diff --git a/tests/codes/let.cl b/tests/codes/let.cl new file mode 100644 index 000000000..4924f8089 --- /dev/null +++ b/tests/codes/let.cl @@ -0,0 +1,13 @@ +--The type of an initialization expression must conform to the declared type of the identifier. + +class A { }; +class B inherits A { }; +class C inherits B { }; + +class Main inherits IO { + test: B <- let a: Bool, a: Int <- 5, a: String, b: C <- new C in b; + + main(): IO { out_string("Hello World!")}; + + get_test(): B { test }; +}; \ No newline at end of file diff --git a/tests/codes/loop.cl b/tests/codes/loop.cl new file mode 100644 index 000000000..3e3c53a5c --- /dev/null +++ b/tests/codes/loop.cl @@ -0,0 +1,8 @@ +--The predicate must have static type Bool. + +class Main inherits IO { + main(): IO { out_string("Hello World!")}; + + i: Int <- 1; + test: Object <- while true loop i <- i + 1 pool; +}; \ No newline at end of file diff --git a/tests/codes/silly.cl b/tests/codes/silly.cl new file mode 100644 index 000000000..cfb747cd4 --- /dev/null +++ b/tests/codes/silly.cl @@ -0,0 +1,17 @@ +class Main { + x : Sally <- (new Sally).copy(); + main() : Sally { + x + }; +}; + +class Silly { + a: Int <- 10; + copy() : SELF_TYPE { + self + }; +}; + +class Sally inherits Silly { + y : String <- "Hi from Silly"; +}; diff --git a/tests/codes/substring.cl b/tests/codes/substring.cl new file mode 100644 index 000000000..80039ba03 --- /dev/null +++ b/tests/codes/substring.cl @@ -0,0 +1,9 @@ +class Main inherits IO { + msg : String <- "Hello world!"; + a : Int <- 2; + b : Int <- 5; + + main() : IO { + self.out_string(msg.substr(a, b)) + }; +}; \ No newline at end of file