diff --git a/.gitignore b/.gitignore
index 3f5196b..e4f0843 100644
--- a/.gitignore
+++ b/.gitignore
@@ -142,4 +142,7 @@ config_local.py
 data/
 
 # testes
-/meus_testes/*
\ No newline at end of file
+/meus_testes/*
+
+# reports testes spider
+/reports/
\ No newline at end of file
diff --git a/projeto-raia/bin/Activate.ps1 b/projeto-raia/bin/Activate.ps1
new file mode 100644
index 0000000..b49d77b
--- /dev/null
+++ b/projeto-raia/bin/Activate.ps1
@@ -0,0 +1,247 @@
+<#
+.Synopsis
+Activate a Python virtual environment for the current PowerShell session.
+
+.Description
+Pushes the python executable for a virtual environment to the front of the
+$Env:PATH environment variable and sets the prompt to signify that you are
+in a Python virtual environment. Makes use of the command line switches as
+well as the `pyvenv.cfg` file values present in the virtual environment.
+
+.Parameter VenvDir
+Path to the directory that contains the virtual environment to activate. The
+default value for this is the parent of the directory that the Activate.ps1
+script is located within.
+
+.Parameter Prompt
+The prompt prefix to display when this virtual environment is activated. By
+default, this prompt is the name of the virtual environment folder (VenvDir)
+surrounded by parentheses and followed by a single space (ie. '(.venv) ').
+
+.Example
+Activate.ps1
+Activates the Python virtual environment that contains the Activate.ps1 script.
+
+.Example
+Activate.ps1 -Verbose
+Activates the Python virtual environment that contains the Activate.ps1 script,
+and shows extra information about the activation as it executes.
+
+.Example
+Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
+Activates the Python virtual environment located in the specified location.
+
+.Example
+Activate.ps1 -Prompt "MyPython"
+Activates the Python virtual environment that contains the Activate.ps1 script,
+and prefixes the current prompt with the specified string (surrounded in
+parentheses) while the virtual environment is active.
+
+.Notes
+On Windows, it may be required to enable this Activate.ps1 script by setting the
+execution policy for the user. You can do this by issuing the following PowerShell
+command:
+
+PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
+
+For more information on Execution Policies: 
+https://go.microsoft.com/fwlink/?LinkID=135170
+
+#>
+Param(
+    [Parameter(Mandatory = $false)]
+    [String]
+    $VenvDir,
+    [Parameter(Mandatory = $false)]
+    [String]
+    $Prompt
+)
+
+<# Function declarations --------------------------------------------------- #>
+
+<#
+.Synopsis
+Remove all shell session elements added by the Activate script, including the
+addition of the virtual environment's Python executable from the beginning of
+the PATH variable.
+
+.Parameter NonDestructive
+If present, do not remove this function from the global namespace for the
+session.
+
+#>
+function global:deactivate ([switch]$NonDestructive) {
+    # Revert to original values
+
+    # The prior prompt:
+    if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
+        Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
+        Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
+    }
+
+    # The prior PYTHONHOME:
+    if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
+        Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
+        Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
+    }
+
+    # The prior PATH:
+    if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
+        Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
+        Remove-Item -Path Env:_OLD_VIRTUAL_PATH
+    }
+
+    # Just remove the VIRTUAL_ENV altogether:
+    if (Test-Path -Path Env:VIRTUAL_ENV) {
+        Remove-Item -Path env:VIRTUAL_ENV
+    }
+
+    # Just remove VIRTUAL_ENV_PROMPT altogether.
+    if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
+        Remove-Item -Path env:VIRTUAL_ENV_PROMPT
+    }
+
+    # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
+    if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
+        Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
+    }
+
+    # Leave deactivate function in the global namespace if requested:
+    if (-not $NonDestructive) {
+        Remove-Item -Path function:deactivate
+    }
+}
+
+<#
+.Description
+Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
+given folder, and returns them in a map.
+
+For each line in the pyvenv.cfg file, if that line can be parsed into exactly
+two strings separated by `=` (with any amount of whitespace surrounding the =)
+then it is considered a `key = value` line. The left hand string is the key,
+the right hand is the value.
+
+If the value starts with a `'` or a `"` then the first and last character is
+stripped from the value before being captured.
+
+.Parameter ConfigDir
+Path to the directory that contains the `pyvenv.cfg` file.
+#>
+function Get-PyVenvConfig(
+    [String]
+    $ConfigDir
+) {
+    Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
+
+    # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
+    $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
+
+    # An empty map will be returned if no config file is found.
+    $pyvenvConfig = @{ }
+
+    if ($pyvenvConfigPath) {
+
+        Write-Verbose "File exists, parse `key = value` lines"
+        $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
+
+        $pyvenvConfigContent | ForEach-Object {
+            $keyval = $PSItem -split "\s*=\s*", 2
+            if ($keyval[0] -and $keyval[1]) {
+                $val = $keyval[1]
+
+                # Remove extraneous quotations around a string value.
+                if ("'""".Contains($val.Substring(0, 1))) {
+                    $val = $val.Substring(1, $val.Length - 2)
+                }
+
+                $pyvenvConfig[$keyval[0]] = $val
+                Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
+            }
+        }
+    }
+    return $pyvenvConfig
+}
+
+
+<# Begin Activate script --------------------------------------------------- #>
+
+# Determine the containing directory of this script
+$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
+$VenvExecDir = Get-Item -Path $VenvExecPath
+
+Write-Verbose "Activation script is located in path: '$VenvExecPath'"
+Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
+Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
+
+# Set values required in priority: CmdLine, ConfigFile, Default
+# First, get the location of the virtual environment, it might not be
+# VenvExecDir if specified on the command line.
+if ($VenvDir) {
+    Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
+}
+else {
+    Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
+    $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
+    Write-Verbose "VenvDir=$VenvDir"
+}
+
+# Next, read the `pyvenv.cfg` file to determine any required value such
+# as `prompt`.
+$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
+
+# Next, set the prompt from the command line, or the config file, or
+# just use the name of the virtual environment folder.
+if ($Prompt) {
+    Write-Verbose "Prompt specified as argument, using '$Prompt'"
+}
+else {
+    Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
+    if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
+        Write-Verbose "  Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
+        $Prompt = $pyvenvCfg['prompt'];
+    }
+    else {
+        Write-Verbose "  Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
+        Write-Verbose "  Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
+        $Prompt = Split-Path -Path $venvDir -Leaf
+    }
+}
+
+Write-Verbose "Prompt = '$Prompt'"
+Write-Verbose "VenvDir='$VenvDir'"
+
+# Deactivate any currently active virtual environment, but leave the
+# deactivate function in place.
+deactivate -nondestructive
+
+# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
+# that there is an activated venv.
+$env:VIRTUAL_ENV = $VenvDir
+
+if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
+
+    Write-Verbose "Setting prompt to '$Prompt'"
+
+    # Set the prompt to include the env name
+    # Make sure _OLD_VIRTUAL_PROMPT is global
+    function global:_OLD_VIRTUAL_PROMPT { "" }
+    Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
+    New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
+
+    function global:prompt {
+        Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
+        _OLD_VIRTUAL_PROMPT
+    }
+    $env:VIRTUAL_ENV_PROMPT = $Prompt
+}
+
+# Clear PYTHONHOME
+if (Test-Path -Path Env:PYTHONHOME) {
+    Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
+    Remove-Item -Path Env:PYTHONHOME
+}
+
+# Add the venv to the PATH
+Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
+$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
diff --git a/projeto-raia/bin/activate b/projeto-raia/bin/activate
new file mode 100644
index 0000000..503c906
--- /dev/null
+++ b/projeto-raia/bin/activate
@@ -0,0 +1,70 @@
+# This file must be used with "source bin/activate" *from bash*
+# You cannot run it directly
+
+deactivate () {
+    # reset old environment variables
+    if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
+        PATH="${_OLD_VIRTUAL_PATH:-}"
+        export PATH
+        unset _OLD_VIRTUAL_PATH
+    fi
+    if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
+        PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
+        export PYTHONHOME
+        unset _OLD_VIRTUAL_PYTHONHOME
+    fi
+
+    # Call hash to forget past commands. Without forgetting
+    # past commands the $PATH changes we made may not be respected
+    hash -r 2> /dev/null
+
+    if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
+        PS1="${_OLD_VIRTUAL_PS1:-}"
+        export PS1
+        unset _OLD_VIRTUAL_PS1
+    fi
+
+    unset VIRTUAL_ENV
+    unset VIRTUAL_ENV_PROMPT
+    if [ ! "${1:-}" = "nondestructive" ] ; then
+    # Self destruct!
+        unset -f deactivate
+    fi
+}
+
+# unset irrelevant variables
+deactivate nondestructive
+
+# on Windows, a path can contain colons and backslashes and has to be converted:
+if [ "${OSTYPE:-}" = "cygwin" ] || [ "${OSTYPE:-}" = "msys" ] ; then
+    # transform D:\path\to\venv to /d/path/to/venv on MSYS
+    # and to /cygdrive/d/path/to/venv on Cygwin
+    export VIRTUAL_ENV=$(cygpath /home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia)
+else
+    # use the path as-is
+    export VIRTUAL_ENV=/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia
+fi
+
+_OLD_VIRTUAL_PATH="$PATH"
+PATH="$VIRTUAL_ENV/"bin":$PATH"
+export PATH
+
+# unset PYTHONHOME if set
+# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
+# could use `if (set -u; : $PYTHONHOME) ;` in bash
+if [ -n "${PYTHONHOME:-}" ] ; then
+    _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
+    unset PYTHONHOME
+fi
+
+if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
+    _OLD_VIRTUAL_PS1="${PS1:-}"
+    PS1='(projeto-raia) '"${PS1:-}"
+    export PS1
+    VIRTUAL_ENV_PROMPT='(projeto-raia) '
+    export VIRTUAL_ENV_PROMPT
+fi
+
+# Call hash to forget past commands. Without forgetting
+# past commands the $PATH changes we made may not be respected
+hash -r 2> /dev/null
diff --git a/projeto-raia/bin/activate.csh b/projeto-raia/bin/activate.csh
new file mode 100644
index 0000000..62b36bd
--- /dev/null
+++ b/projeto-raia/bin/activate.csh
@@ -0,0 +1,27 @@
+# This file must be used with "source bin/activate.csh" *from csh*.
+# You cannot run it directly.
+
+# Created by Davide Di Blasi <davidedb@gmail.com>.
+# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
+
+alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
+
+# Unset irrelevant variables.
+deactivate nondestructive
+
+setenv VIRTUAL_ENV /home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia
+
+set _OLD_VIRTUAL_PATH="$PATH"
+setenv PATH "$VIRTUAL_ENV/"bin":$PATH"
+
+
+set _OLD_VIRTUAL_PROMPT="$prompt"
+
+if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
+    set prompt = '(projeto-raia) '"$prompt"
+    setenv VIRTUAL_ENV_PROMPT '(projeto-raia) '
+endif
+
+alias pydoc python -m pydoc
+
+rehash
diff --git a/projeto-raia/bin/activate.fish b/projeto-raia/bin/activate.fish
new file mode 100644
index 0000000..803ed0d
--- /dev/null
+++ b/projeto-raia/bin/activate.fish
@@ -0,0 +1,69 @@
+# This file must be used with "source <venv>/bin/activate.fish" *from fish*
+# (https://fishshell.com/). You cannot run it directly.
+
+function deactivate  -d "Exit virtual environment and return to normal shell environment"
+    # reset old environment variables
+    if test -n "$_OLD_VIRTUAL_PATH"
+        set -gx PATH $_OLD_VIRTUAL_PATH
+        set -e _OLD_VIRTUAL_PATH
+    end
+    if test -n "$_OLD_VIRTUAL_PYTHONHOME"
+        set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
+        set -e _OLD_VIRTUAL_PYTHONHOME
+    end
+
+    if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
+        set -e _OLD_FISH_PROMPT_OVERRIDE
+        # prevents error when using nested fish instances (Issue #93858)
+        if functions -q _old_fish_prompt
+            functions -e fish_prompt
+            functions -c _old_fish_prompt fish_prompt
+            functions -e _old_fish_prompt
+        end
+    end
+
+    set -e VIRTUAL_ENV
+    set -e VIRTUAL_ENV_PROMPT
+    if test "$argv[1]" != "nondestructive"
+        # Self-destruct!
+        functions -e deactivate
+    end
+end
+
+# Unset irrelevant variables.
+deactivate nondestructive
+
+set -gx VIRTUAL_ENV /home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia
+
+set -gx _OLD_VIRTUAL_PATH $PATH
+set -gx PATH "$VIRTUAL_ENV/"bin $PATH
+
+# Unset PYTHONHOME if set.
+if set -q PYTHONHOME
+    set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
+    set -e PYTHONHOME
+end
+
+if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
+    # fish uses a function instead of an env var to generate the prompt.
+
+    # Save the current fish_prompt function as the function _old_fish_prompt.
+    functions -c fish_prompt _old_fish_prompt
+
+    # With the original prompt function renamed, we can override with our own.
+    function fish_prompt
+        # Save the return status of the last command.
+        set -l old_status $status
+
+        # Output the venv prompt; color taken from the blue of the Python logo.
+        printf "%s%s%s" (set_color 4B8BBE) '(projeto-raia) ' (set_color normal)
+
+        # Restore the return status of the previous command.
+        echo "exit $old_status" | .
+        # Output the original/"old" prompt.
+        _old_fish_prompt
+    end
+
+    set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
+    set -gx VIRTUAL_ENV_PROMPT '(projeto-raia) '
+end
diff --git a/projeto-raia/bin/distro b/projeto-raia/bin/distro
new file mode 100755
index 0000000..9dda94e
--- /dev/null
+++ b/projeto-raia/bin/distro
@@ -0,0 +1,8 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from distro.distro import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())
diff --git a/projeto-raia/bin/dotenv b/projeto-raia/bin/dotenv
new file mode 100755
index 0000000..3c3640a
--- /dev/null
+++ b/projeto-raia/bin/dotenv
@@ -0,0 +1,8 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from dotenv.__main__ import cli
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(cli())
diff --git a/projeto-raia/bin/filetype b/projeto-raia/bin/filetype
new file mode 100755
index 0000000..6baebd3
--- /dev/null
+++ b/projeto-raia/bin/filetype
@@ -0,0 +1,8 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from filetype.__main__ import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())
diff --git a/projeto-raia/bin/httpx b/projeto-raia/bin/httpx
new file mode 100755
index 0000000..1a4903a
--- /dev/null
+++ b/projeto-raia/bin/httpx
@@ -0,0 +1,8 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from httpx import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())
diff --git a/projeto-raia/bin/jsondiff b/projeto-raia/bin/jsondiff
new file mode 100755
index 0000000..967b5c0
--- /dev/null
+++ b/projeto-raia/bin/jsondiff
@@ -0,0 +1,41 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+# -*- coding: utf-8 -*-
+
+from __future__ import print_function
+
+import sys
+import json
+import jsonpatch
+import argparse
+
+
+parser = argparse.ArgumentParser(description='Diff two JSON files')
+parser.add_argument('FILE1', type=argparse.FileType('r'))
+parser.add_argument('FILE2', type=argparse.FileType('r'))
+parser.add_argument('--indent', type=int, default=None,
+                    help='Indent output by n spaces')
+parser.add_argument('-u', '--preserve-unicode', action='store_true',
+                    help='Output Unicode character as-is without using Code Point')
+parser.add_argument('-v', '--version', action='version',
+                    version='%(prog)s ' + jsonpatch.__version__)
+
+
+def main():
+    try:
+        diff_files()
+    except KeyboardInterrupt:
+        sys.exit(1)
+
+
+def diff_files():
+    """ Diffs two JSON files and prints a patch """
+    args = parser.parse_args()
+    doc1 = json.load(args.FILE1)
+    doc2 = json.load(args.FILE2)
+    patch = jsonpatch.make_patch(doc1, doc2)
+    if patch.patch:
+        print(json.dumps(patch.patch, indent=args.indent, ensure_ascii=not(args.preserve_unicode)))
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
diff --git a/projeto-raia/bin/jsonpatch b/projeto-raia/bin/jsonpatch
new file mode 100755
index 0000000..baaf531
--- /dev/null
+++ b/projeto-raia/bin/jsonpatch
@@ -0,0 +1,107 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+# -*- coding: utf-8 -*-
+
+import sys
+import os.path
+import json
+import jsonpatch
+import tempfile
+import argparse
+
+
+parser = argparse.ArgumentParser(
+    description='Apply a JSON patch on a JSON file')
+parser.add_argument('ORIGINAL', type=argparse.FileType('r'),
+                    help='Original file')
+parser.add_argument('PATCH', type=argparse.FileType('r'),
+                    nargs='?', default=sys.stdin,
+                    help='Patch file (read from stdin if omitted)')
+parser.add_argument('--indent', type=int, default=None,
+                    help='Indent output by n spaces')
+parser.add_argument('-b', '--backup', action='store_true',
+                    help='Back up ORIGINAL if modifying in-place')
+parser.add_argument('-i', '--in-place', action='store_true',
+                    help='Modify ORIGINAL in-place instead of to stdout')
+parser.add_argument('-v', '--version', action='version',
+                    version='%(prog)s ' + jsonpatch.__version__)
+parser.add_argument('-u', '--preserve-unicode', action='store_true',
+                    help='Output Unicode character as-is without using Code Point')
+
+def main():
+    try:
+        patch_files()
+    except KeyboardInterrupt:
+        sys.exit(1)
+
+
+def patch_files():
+    """ Diffs two JSON files and prints a patch """
+    args = parser.parse_args()
+    doc = json.load(args.ORIGINAL)
+    patch = json.load(args.PATCH)
+    result = jsonpatch.apply_patch(doc, patch)
+
+    if args.in_place:
+        dirname = os.path.abspath(os.path.dirname(args.ORIGINAL.name))
+
+        try:
+            # Attempt to replace the file atomically.  We do this by
+            # creating a temporary file in the same directory as the
+            # original file so we can atomically move the new file over
+            # the original later.  (This is done in the same directory
+	    # because atomic renames do not work across mount points.)
+
+            fd, pathname = tempfile.mkstemp(dir=dirname)
+            fp = os.fdopen(fd, 'w')
+            atomic = True
+
+        except OSError:
+            # We failed to create the temporary file for an atomic
+            # replace, so fall back to non-atomic mode by backing up
+            # the original (if desired) and writing a new file.
+
+            if args.backup:
+                os.rename(args.ORIGINAL.name, args.ORIGINAL.name + '.orig')
+            fp = open(args.ORIGINAL.name, 'w')
+            atomic = False
+
+    else:
+        # Since we're not replacing the original file in-place, write
+        # the modified JSON to stdout instead.
+
+        fp = sys.stdout
+
+    # By this point we have some sort of file object we can write the 
+    # modified JSON to.
+    
+    json.dump(result, fp, indent=args.indent, ensure_ascii=not(args.preserve_unicode))
+    fp.write('\n')
+
+    if args.in_place:
+        # Close the new file.  If we aren't replacing atomically, this
+        # is our last step, since everything else is already in place.
+
+        fp.close()
+
+        if atomic:
+            try:
+                # Complete the atomic replace by linking the original
+                # to a backup (if desired), fixing up the permissions
+                # on the temporary file, and moving it into place.
+
+                if args.backup:
+                    os.link(args.ORIGINAL.name, args.ORIGINAL.name + '.orig')
+                os.chmod(pathname, os.stat(args.ORIGINAL.name).st_mode)
+                os.rename(pathname, args.ORIGINAL.name)
+
+            except OSError:
+                # In the event we could not actually do the atomic
+                # replace, unlink the original to move it out of the
+                # way and finally move the temporary file into place.
+                
+                os.unlink(args.ORIGINAL.name)
+                os.rename(pathname, args.ORIGINAL.name)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/projeto-raia/bin/jsonpointer b/projeto-raia/bin/jsonpointer
new file mode 100755
index 0000000..a8a7614
--- /dev/null
+++ b/projeto-raia/bin/jsonpointer
@@ -0,0 +1,66 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+
+
+import argparse
+import json
+import sys
+
+import jsonpointer
+
+parser = argparse.ArgumentParser(
+    description='Resolve a JSON pointer on JSON files')
+
+# Accept pointer as argument or as file
+ptr_group = parser.add_mutually_exclusive_group(required=True)
+
+ptr_group.add_argument('-f', '--pointer-file', type=argparse.FileType('r'),
+                       nargs='?',
+                       help='File containing a JSON pointer expression')
+
+ptr_group.add_argument('POINTER', type=str, nargs='?',
+                       help='A JSON pointer expression')
+
+parser.add_argument('FILE', type=argparse.FileType('r'), nargs='+',
+                    help='Files for which the pointer should be resolved')
+parser.add_argument('--indent', type=int, default=None,
+                    help='Indent output by n spaces')
+parser.add_argument('-v', '--version', action='version',
+                    version='%(prog)s ' + jsonpointer.__version__)
+
+
+def main():
+    try:
+        resolve_files()
+    except KeyboardInterrupt:
+        sys.exit(1)
+
+
+def parse_pointer(args):
+    if args.POINTER:
+        ptr = args.POINTER
+    elif args.pointer_file:
+        ptr = args.pointer_file.read().strip()
+    else:
+        parser.print_usage()
+        sys.exit(1)
+
+    return ptr
+
+
+def resolve_files():
+    """ Resolve a JSON pointer on JSON files """
+    args = parser.parse_args()
+
+    ptr = parse_pointer(args)
+
+    for f in args.FILE:
+        doc = json.load(f)
+        try:
+            result = jsonpointer.resolve_pointer(doc, ptr)
+            print(json.dumps(result, indent=args.indent))
+        except jsonpointer.JsonPointerException as e:
+            print('Could not resolve pointer: %s' % str(e), file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/projeto-raia/bin/normalizer b/projeto-raia/bin/normalizer
new file mode 100755
index 0000000..e8e795d
--- /dev/null
+++ b/projeto-raia/bin/normalizer
@@ -0,0 +1,8 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from charset_normalizer.cli import cli_detect
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(cli_detect())
diff --git a/projeto-raia/bin/pip b/projeto-raia/bin/pip
new file mode 100755
index 0000000..80760a2
--- /dev/null
+++ b/projeto-raia/bin/pip
@@ -0,0 +1,8 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from pip._internal.cli.main import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())
diff --git a/projeto-raia/bin/pip3 b/projeto-raia/bin/pip3
new file mode 100755
index 0000000..80760a2
--- /dev/null
+++ b/projeto-raia/bin/pip3
@@ -0,0 +1,8 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from pip._internal.cli.main import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())
diff --git a/projeto-raia/bin/pip3.12 b/projeto-raia/bin/pip3.12
new file mode 100755
index 0000000..80760a2
--- /dev/null
+++ b/projeto-raia/bin/pip3.12
@@ -0,0 +1,8 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from pip._internal.cli.main import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())
diff --git a/projeto-raia/bin/py.test b/projeto-raia/bin/py.test
new file mode 100755
index 0000000..9943710
--- /dev/null
+++ b/projeto-raia/bin/py.test
@@ -0,0 +1,8 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from pytest import console_main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(console_main())
diff --git a/projeto-raia/bin/pygmentize b/projeto-raia/bin/pygmentize
new file mode 100755
index 0000000..559002d
--- /dev/null
+++ b/projeto-raia/bin/pygmentize
@@ -0,0 +1,8 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from pygments.cmdline import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())
diff --git a/projeto-raia/bin/pytest b/projeto-raia/bin/pytest
new file mode 100755
index 0000000..9943710
--- /dev/null
+++ b/projeto-raia/bin/pytest
@@ -0,0 +1,8 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from pytest import console_main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(console_main())
diff --git a/projeto-raia/bin/python b/projeto-raia/bin/python
new file mode 120000
index 0000000..b8a0adb
--- /dev/null
+++ b/projeto-raia/bin/python
@@ -0,0 +1 @@
+python3
\ No newline at end of file
diff --git a/projeto-raia/bin/python3 b/projeto-raia/bin/python3
new file mode 120000
index 0000000..ae65fda
--- /dev/null
+++ b/projeto-raia/bin/python3
@@ -0,0 +1 @@
+/usr/bin/python3
\ No newline at end of file
diff --git a/projeto-raia/bin/python3.12 b/projeto-raia/bin/python3.12
new file mode 120000
index 0000000..b8a0adb
--- /dev/null
+++ b/projeto-raia/bin/python3.12
@@ -0,0 +1 @@
+python3
\ No newline at end of file
diff --git a/projeto-raia/bin/websockets b/projeto-raia/bin/websockets
new file mode 100755
index 0000000..e9a8e4a
--- /dev/null
+++ b/projeto-raia/bin/websockets
@@ -0,0 +1,8 @@
+#!/home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from websockets.cli import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())
diff --git a/projeto-raia/lib64 b/projeto-raia/lib64
new file mode 120000
index 0000000..7951405
--- /dev/null
+++ b/projeto-raia/lib64
@@ -0,0 +1 @@
+lib
\ No newline at end of file
diff --git a/projeto-raia/pyvenv.cfg b/projeto-raia/pyvenv.cfg
new file mode 100644
index 0000000..ff5363e
--- /dev/null
+++ b/projeto-raia/pyvenv.cfg
@@ -0,0 +1,5 @@
+home = /usr/bin
+include-system-site-packages = false
+version = 3.12.3
+executable = /usr/bin/python3.12
+command = /usr/bin/python3 -m venv /home/jonasmelo/ProjectsAndStudies/TextToInsight/projeto-raia
diff --git a/requirements.txt b/requirements.txt
index 4052cae..76b8fa5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,5 @@ langchain-google-genai>=2.0.0
 langchain-openai>=0.1.0
 pytest>=9.0.2
 pytest-recording>=0.13.0
-pytest-timeout>=2.3.0
\ No newline at end of file
+pytest-timeout>=2.3.0
+numpy
\ No newline at end of file
diff --git a/scripts/test_spider_eval.py b/scripts/test_spider_eval.py
new file mode 100644
index 0000000..e8415f2
--- /dev/null
+++ b/scripts/test_spider_eval.py
@@ -0,0 +1,497 @@
+#!/usr/bin/env python3
+"""
+Script de Avaliação do Agente contra Spider Dataset.
+
+Testa o agente Text-to-Insight contra perguntas reais do Spider dataset,
+usando a classe InsightEngine do pacote text_to_insight.
+
+Uso:
+    python scripts/test_spider_eval.py --sample-size 10 --seed 42
+    python scripts/test_spider_eval.py --db-filter concert_singer --output reports/eval.csv
+"""
+
+import argparse
+import os
+import sys
+import time
+from datetime import datetime
+from pathlib import Path
+
+# Add project root to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from dotenv import load_dotenv
+
+# Importar InsightEngine do pacote text_to_insight
+from text_to_insight import InsightEngine
+
+from src.spider.csv_reporter import CSVReporter
+from src.spider.data_loader import (
+    filter_by_db_id,
+    get_unique_db_ids,
+    load_spider_dev_examples,
+    sample_examples,
+)
+from src.spider.metrics import (
+    build_comparison_row,
+    results_exact_match,
+    results_f1_score,
+    sql_similarity_score,
+)
+from src.spider.query_executor import SpiderQueryExecutor
+
+load_dotenv()
+
+
+def _gerar_relatorio_md(
+    report_path: str,
+    summary: dict,
+    f1_medio: float,
+    exact_match_rate: float,
+    all_rows: list[dict],
+    mismatches: list[dict],
+    model: str,
+    sample_size: int,
+    seed: int,
+    data_dir: str,
+) -> None:
+    """Gera um relatório textual em Markdown com estatísticas e detalhes de mismatches."""
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    Path(report_path).parent.mkdir(parents=True, exist_ok=True)
+
+    lines = []
+    lines.append("# Spider Evaluation Report")
+    lines.append("")
+    lines.append(f"**Gerado em:** {timestamp}")
+    lines.append("")
+
+    # --- Configuração ---
+    lines.append("## Configuração")
+    lines.append("")
+    lines.append(f"| Parâmetro | Valor |")
+    lines.append(f"|-----------|-------|")
+    lines.append(f"| Modelo | `{model}` |")
+    lines.append(f"| Sample size | {sample_size} |")
+    lines.append(f"| Seed | {seed} |")
+    lines.append(f"| Data dir | `{data_dir}` |")
+    lines.append("")
+
+    # --- Resumo ---
+    lines.append("## Resumo")
+    lines.append("")
+    lines.append(f"| Métrica | Valor |")
+    lines.append(f"|---------|-------|")
+    lines.append(f"| Total de perguntas | {summary['total_perguntas']} |")
+    lines.append(f"| Total de tentativas | {summary['total_tentativas']} |")
+    lines.append(f"| Perguntas aprovadas (crítico) | {summary['perguntas_aprovadas']} |")
+    lines.append(f"| Taxa de aprovação | {summary['taxa_aprovacao']:.1%} |")
+    lines.append(f"| Taxa de sucesso na 1ª tentativa | {summary['taxa_1a_tentativa']:.1%} |")
+    lines.append(f"| Tentativas médias por pergunta | {summary['tentativas_media']:.2f} |")
+    lines.append(f"| Similarity score médio (SQL) | {summary['similarity_media']:.4f} |")
+    lines.append(f"| F1 score médio (resultados) | {f1_medio:.4f} |")
+    lines.append(f"| Exact match rate | {exact_match_rate:.1%} |")
+    lines.append(f"| Mismatches | {len(mismatches)}/{len(all_rows)} |")
+    lines.append(f"| Tempo médio por tentativa | {summary['tempo_medio_ms']:.0f} ms |")
+    lines.append("")
+
+    # --- Tabela por pergunta ---
+    lines.append("## Resultados por Pergunta")
+    lines.append("")
+    lines.append("| # | DB | Pergunta | Match | F1 | Similarity | Veredito |")
+    lines.append("|---|-----|----------|-------|----|------------|----------|")
+    for r in all_rows:
+        pergunta_curta = str(r['pergunta_usuario'])[:50]
+        match_icon = "✅" if r['resultado_exato_match'] is True else ("❌" if r['resultado_exato_match'] is False else "⚠️")
+        lines.append(
+            f"| {r['id_exemplo']} "
+            f"| {r['db_id']} "
+            f"| {pergunta_curta}... "
+            f"| {match_icon} "
+            f"| {r.get('resultado_f1', 0):.2f} "
+            f"| {r['similarity_score_sql']:.2f} "
+            f"| {r['veredito_critico']} |"
+        )
+    lines.append("")
+
+    # --- Detalhes dos mismatches ---
+    if mismatches:
+        lines.append("## Detalhes dos Mismatches")
+        lines.append("")
+        lines.append(f"Total: **{len(mismatches)}** perguntas não obtiveram exact match.")
+        lines.append("")
+
+        for i, m in enumerate(mismatches, 1):
+            lines.append(f"### Mismatch {i} — Pergunta #{m['id']} (`{m['db_id']}`)")
+            lines.append("")
+            lines.append(f"**Pergunta:** {m['pergunta']}")
+            lines.append("")
+            lines.append(f"**F1:** {m['f1']:.4f} | **Precision:** {m['precision']:.4f} | **Recall:** {m['recall']:.4f}")
+            lines.append("")
+
+            # SQL comparison
+            lines.append("**Query Ouro (Spider):**")
+            lines.append(f"```sql")
+            lines.append(m['query_ouro'])
+            lines.append(f"```")
+            lines.append("")
+            lines.append("**Query Agente:**")
+            lines.append(f"```sql")
+            lines.append(m['query_agente'])
+            lines.append(f"```")
+            lines.append("")
+
+            # Result comparison (show up to 20 rows each)
+            lines.append("**Resultado Ouro** (primeiras 20 linhas):")
+            lines.append("")
+            ouro_sample = m['resultado_ouro'][:20]
+            if ouro_sample:
+                cols = list(ouro_sample[0].keys())
+                lines.append("| " + " | ".join(cols) + " |")
+                lines.append("| " + " | ".join(["---"] * len(cols)) + " |")
+                for row in ouro_sample:
+                    vals = [str(row.get(c, "")) for c in cols]
+                    lines.append("| " + " | ".join(vals) + " |")
+                if len(m['resultado_ouro']) > 20:
+                    lines.append(f"*... e mais {len(m['resultado_ouro']) - 20} linhas*")
+            else:
+                lines.append("*(vazio)*")
+            lines.append("")
+
+            lines.append("**Resultado Agente** (primeiras 20 linhas):")
+            lines.append("")
+            agent_sample = m['resultado_agente'][:20]
+            if agent_sample:
+                cols = list(agent_sample[0].keys())
+                lines.append("| " + " | ".join(cols) + " |")
+                lines.append("| " + " | ".join(["---"] * len(cols)) + " |")
+                for row in agent_sample:
+                    vals = [str(row.get(c, "")) for c in cols]
+                    lines.append("| " + " | ".join(vals) + " |")
+                if len(m['resultado_agente']) > 20:
+                    lines.append(f"*... e mais {len(m['resultado_agente']) - 20} linhas*")
+            else:
+                lines.append("*(vazio)*")
+            lines.append("")
+            lines.append("---")
+            lines.append("")
+    else:
+        lines.append("## Detalhes dos Mismatches")
+        lines.append("")
+        lines.append("🎉 **Nenhum mismatch!** Todos os resultados foram exact match.")
+        lines.append("")
+
+    with open(report_path, "w", encoding="utf-8") as f:
+        f.write("\n".join(lines))
+
+
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="Avaliar agente Text-to-Insight contra Spider dataset"
+    )
+    parser.add_argument(
+        "--sample-size",
+        type=int,
+        default=10,
+        help="Quantas perguntas testar (default: 10)",
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=42,
+        help="Seed para reproducibilidade (default: 42)",
+    )
+    parser.add_argument(
+        "--db-filter",
+        type=str,
+        help="Filtrar por banco específico (ex: concert_singer)",
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        help="Caminho para salvar CSV (default: reports/spider_eval_TIMESTAMP.csv)",
+    )
+    parser.add_argument(
+        "--max-attempts",
+        type=int,
+        default=3,
+        help="Máximo de tentativas por pergunta (default: 3)",
+    )
+    parser.add_argument(
+        "--data-dir",
+        type=str,
+        default="data/spider_data/spider_data",
+        help="Diretório com dados do Spider",
+    )
+
+    # testar queries individualmente
+    parser.add_argument(
+        "--question-filter",
+        type=str,
+        help="Filtrar por um trecho específico da pergunta em inglês",
+    )
+
+    args = parser.parse_args()
+
+    # Validar API key
+    model = "gpt-4o-mini"
+    # model = "gemini-2.5-flash"
+    
+    api_key = os.getenv("OPENAI_API_KEY") if "gpt" in model.lower() else os.getenv("GOOGLE_API_KEY")
+    if not api_key:
+        print("❌ Erro: Chave API não encontrada em .env")
+        sys.exit(1)
+
+    # 1. Carregar dados
+    print(f"\n📂 Carregando exemplos do Spider de {args.data_dir}...")
+    try:
+        exemplos = load_spider_dev_examples(args.data_dir)
+        print(f"✓ Carregados {len(exemplos)} exemplos")
+    except FileNotFoundError as e:
+        print(f"❌ {e}")
+        sys.exit(1)
+
+    # 2. Aplicar filtros
+    if args.db_filter:
+        exemplos = filter_by_db_id(exemplos, args.db_filter)
+        print(f"✓ Filtrados por db_id={args.db_filter}: {len(exemplos)} exemplos")
+
+    # --- NOVO TRECHO ADICIONADO ---
+    if args.question_filter:
+        exemplos = [
+            ex for ex in exemplos 
+            if args.question_filter.lower() in ex.get("question", "").lower()
+        ]
+        print(f"✓ Filtrados pela pergunta contendo '{args.question_filter}': {len(exemplos)} exemplos")
+    
+    # 3. Fazer sampling
+    exemplos = sample_examples(exemplos, sample_size=args.sample_size, seed=args.seed)
+    print(
+        f"✓ Selecionados {len(exemplos)} exemplos (seed={args.seed}, "
+        f"bancos únicos: {len(get_unique_db_ids(exemplos))})"
+    )
+
+    # 4. Inicializar componentes
+    print("\n🔧 Inicializando componentes...")
+
+    executor = SpiderQueryExecutor(database_dir=str(Path(args.data_dir) / "database"))
+    print("✓ Query executor inicializado")
+
+    # 5. Preparar CSV
+    if args.output:
+        csv_path = args.output
+    else:
+        csv_path = f"reports/{CSVReporter.generate_timestamped_filename('spider_eval')}"
+
+    reporter = CSVReporter(csv_path)
+    print(f"✓ CSV reporter inicializado: {csv_path}")
+
+    # 6. Loop de testes
+    print(f"\n🚀 Iniciando avaliação com {len(exemplos)} perguntas...\n")
+    print("=" * 100)
+
+    all_rows = []
+    mismatches = []  # Coletar detalhes dos casos que não bateram
+    ex_id = 1
+
+    # Cache de InsightEngine por db_id para evitar recompilação do grafo
+    engine_cache: dict[str, InsightEngine] = {}
+
+    for idx, ex in enumerate(exemplos, 1):
+        pergunta = ex.get("question", "")
+        query_ouro = ex.get("query", "")
+        db_id = ex.get("db_id", "")
+
+        print(f"\n[{idx}/{len(exemplos)}] Pergunta: {pergunta[:60]}...")
+        print(f"     DB: {db_id} | Query Ouro: {query_ouro[:50]}...")
+
+        # Executar query ouro para obter resultado esperado
+        print(f"     → Executando query ouro...")
+        resultado_ouro = executor.execute_query(db_id, query_ouro)
+
+        if not resultado_ouro["success"]:
+            print(f"     ⚠️  Erro na query ouro: {resultado_ouro['error']}")
+            continue  # Pular este exemplo
+
+        print(f"     ✓ Query ouro retornou {resultado_ouro['row_count']} linhas")
+
+        # Obter ou criar InsightEngine para este db_id
+        db_path = str(executor.get_db_path(db_id))
+        if db_id not in engine_cache:
+            try:
+                engine_cache[db_id] = InsightEngine(
+                    api_key=api_key,
+                    model=model,
+                    db_path=db_path,
+                    hitl=False,
+                    show_output=False,
+                )
+                print(f"     ✓ InsightEngine inicializado para db={db_id}")
+            except Exception as e:
+                print(f"     ❌ Erro ao inicializar InsightEngine: {e}")
+                continue
+
+        engine = engine_cache[db_id]
+
+        # Invocar agente via InsightEngine.run()
+        print(f"     → Invocando agente via InsightEngine...")
+        inicio_agente = time.time()
+
+        try:
+            resultado = engine.run(
+                thread_id=f"spider_test_{ex_id}",
+                query=pergunta,
+            )
+        except Exception as e:
+            print(f"     ⚠️  Erro ao processar pergunta: {str(e)}")
+            continue
+
+        tempo_total = (time.time() - inicio_agente) * 1000
+
+        # Extrair dados do resultado
+        query_agente = resultado.get("sql_gerada", "")
+        veredito = resultado.get("status", "")
+        feedback_estado = resultado.get("feedback_critico", "")
+        erro_exec = resultado.get("erro_execucao", "")
+        tentativas = resultado.get("tentativas_loop", 1)
+
+        # Mapear status para veredito e definir feedback
+        if veredito == "aprovado":
+            veredito_critico = "aprovado"
+            feedback_critico = feedback_estado if feedback_estado else "Aprovado"
+        elif veredito == "reprovado":
+            veredito_critico = "reprovado"
+            feedback_critico = feedback_estado if feedback_estado else "Reprovado pelo crítico"
+        else:
+            veredito_critico = "erro"
+            feedback_critico = feedback_estado if feedback_estado else "Erro na avaliação"
+
+        # Comparar resultados se query agente foi gerada
+        resultado_exato_match = None
+        similarity_score = 0.0
+        f1_scores = {"f1": 0.0, "precision": 0.0, "recall": 0.0}
+
+        if query_agente and not erro_exec:
+            resultado_agente = executor.execute_query(db_id, query_agente)
+            if resultado_agente["success"]:
+                # Imprimir os resultados das duas queries
+                print(f"Resultado Ouro: {resultado_ouro['results'][:50]}")
+                print(f"Resultado Text-to-Insight: {resultado_agente['results'][:50]}")
+
+                resultado_exato_match = results_exact_match(
+                    resultado_ouro["results"],
+                    resultado_agente["results"],
+                )
+                similarity_score = sql_similarity_score(query_ouro, query_agente)
+                f1_scores = results_f1_score(
+                    resultado_ouro["results"],
+                    resultado_agente["results"],
+                )
+                print(
+                    f"       Resultado final ({tentativas} tentativa(s)): "
+                    f"similarity={similarity_score:.2f}, "
+                    f"match={resultado_exato_match}, "
+                    f"F1={f1_scores['f1']:.2f}, "
+                    f"veredito={veredito_critico}"
+                )
+                # Coletar detalhes dos mismatches
+                if not resultado_exato_match:
+                    mismatches.append({
+                        "id": ex_id,
+                        "db_id": db_id,
+                        "pergunta": pergunta,
+                        "query_ouro": query_ouro,
+                        "query_agente": query_agente,
+                        "resultado_ouro": resultado_ouro["results"],
+                        "resultado_agente": resultado_agente["results"],
+                        "f1": f1_scores["f1"],
+                        "precision": f1_scores["precision"],
+                        "recall": f1_scores["recall"],
+                    })
+            else:
+                erro_exec = resultado_agente["error"]
+        else:
+            print(
+                f"       Resultado final ({tentativas} tentativa(s)): "
+                f"sem query gerada ou com erro de execução"
+            )
+
+        # Construir linha para CSV
+        row = build_comparison_row(
+            id_exemplo=ex_id,
+            tentativa_numero=tentativas,
+            db_id=db_id,
+            pergunta=pergunta,
+            query_ouro=query_ouro,
+            query_agente=query_agente,
+            tempo_agente_ms=tempo_total,
+            veredito_critico=veredito_critico,
+            feedback_critico=feedback_critico,
+            erro_execucao=erro_exec,
+            resultado_exato_match=resultado_exato_match,
+            similarity_score=similarity_score,
+            resultado_f1=f1_scores["f1"],
+            resultado_precision=f1_scores["precision"],
+            resultado_recall=f1_scores["recall"],
+        )
+
+        reporter.append_row(row)
+        all_rows.append(row)
+
+        if veredito_critico == "aprovado":
+            print(f"     ✅ APROVADO após {tentativas} tentativa(s)")
+        else:
+            print(f"     ❌ NÃO APROVADO após {tentativas} tentativa(s)")
+
+        ex_id += 1
+        time.sleep(1)  # Delay entre perguntas
+
+    # 7. Gerar resumo
+    print("\n" + "=" * 100)
+    print("📊 RESUMO FINAL")
+    print("=" * 100)
+
+    if all_rows:
+        summary = reporter.generate_summary(all_rows)
+        # Calcular F1 médio
+        f1_values = [float(r.get("resultado_f1", 0)) for r in all_rows if r.get("resultado_f1")]
+        f1_medio = sum(f1_values) / len(f1_values) if f1_values else 0.0
+        # Calcular exact match rate
+        match_values = [r.get("resultado_exato_match") for r in all_rows]
+        exact_matches = sum(1 for v in match_values if v is True)
+        exact_match_rate = exact_matches / len(all_rows) if all_rows else 0.0
+
+        print(f"Total de perguntas: {summary['total_perguntas']}")
+        print(f"Total de tentativas: {summary['total_tentativas']}")
+        print(f"Perguntas aprovadas: {summary['perguntas_aprovadas']}")
+        print(f"Taxa de aprovação: {summary['taxa_aprovacao']:.1%}")
+        print(f"Taxa de sucesso na 1ª tentativa: {summary['taxa_1a_tentativa']:.1%}")
+        print(f"Tentativas médias por pergunta: {summary['tentativas_media']:.2f}")
+        print(f"Similarity score médio: {summary['similarity_media']:.4f}")
+        print(f"F1 score médio (resultados): {f1_medio:.4f}")
+        print(f"Exact match rate: {exact_match_rate:.1%}")
+        print(f"Mismatches: {len(mismatches)}/{len(all_rows)}")
+        print(f"Tempo médio por tentativa: {summary['tempo_medio_ms']:.2f} ms")
+        print(f"\n✅ CSV salvo em: {csv_path}")
+
+        # 8. Gerar relatório textual em Markdown
+        report_path = csv_path.replace(".csv", "_report.md")
+        _gerar_relatorio_md(
+            report_path=report_path,
+            summary=summary,
+            f1_medio=f1_medio,
+            exact_match_rate=exact_match_rate,
+            all_rows=all_rows,
+            mismatches=mismatches,
+            model=model,
+            sample_size=args.sample_size,
+            seed=args.seed,
+            data_dir=args.data_dir,
+        )
+        print(f"✅ Relatório salvo em: {report_path}")
+    else:
+        print("❌ Nenhum resultado para salvar")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/spider/BENCHMARK.md b/src/spider/BENCHMARK.md
new file mode 100644
index 0000000..6bcfbc9
--- /dev/null
+++ b/src/spider/BENCHMARK.md
@@ -0,0 +1,173 @@
+# Spider Benchmark - Documentação Técnica
+
+## Visão Geral
+
+Módulo de avaliação automatizada do agente Text-to-Insight contra o **Spider Dataset** (1.034 perguntas em SQL, 20 bancos diferentes). Rastreia cada tentativa individualmente gerando métricas de qualidade.
+
+## Arquitetura
+
+```
+scripts/test_spider_eval.py (Orquestrador)
+    ├── data_loader.py       (Carrega dev.json)
+    ├── query_executor.py    (Executa SQL)
+    ├── metrics.py           (Calcula similarity/match)
+    └── csv_reporter.py      (Salva results e resumo)
+```
+
+## Módulos
+
+### `src/spider/data_loader.py`
+Gerencia dataset Spider (1.034 exemplos JSON).
+
+**Funções principais:**
+- `load_spider_dev_examples(data_dir)` → Lê dev.json, retorna lista de dicts {question, query, db_id}
+- `sample_examples(examples, sample_size, seed)` → Amostra reproducível com seed
+- `filter_by_db_id(examples, db_id)` → Filtra pergunta de um único banco (ex: concert_singer)
+- `get_unique_db_ids(examples)` → Retorna 20 db_ids únicos
+
+---
+
+### `src/spider/query_executor.py`
+Executa queries SQL contra bancos SQLite do Spider.
+
+**Classe: `SpiderQueryExecutor`**
+- `execute_query(db_id, sql)` → Executa query, retorna {success, results, row_count, error, time_ms}
+- `get_db_path(db_id)` → Resolve caminho `/data/spider_data/spider_data/database/{db_id}/{db_id}.sqlite`
+- Usa SQLite em modo **read-only** (`?mode=ro&uri=true`)
+
+**Por que isolado:** Abstrai detalhes de banco de dados, facilita testar com outro driver se necessário.
+
+---
+
+### `src/spider/metrics.py`
+Compara queries geradas vs. queries ouro (baseline).
+
+**Funções principais:**
+- `sql_similarity_score(sql1, sql2)` → Valor 0-1 usando difflib.SequenceMatcher (normaliza UPPER/whitespace/comments)
+- `results_exact_match(results1, results2)` → bool, compara linhas executadas normalizando tipos (NULL → None)
+- `normalize_sql(sql)` → Transforma para comparação (rm whitespace, comments, UPPER)
+- `build_comparison_row(id_exemplo, tentativa_numero, ...)` → Monta dict com 12 colunas para CSV
+
+**Por que isolado:** Reutilizável em testes/análises extras, lógica de comparação centralizada.
+
+---
+
+### `src/spider/csv_reporter.py`
+Gerencia saída CSV e estatísticas agregadas.
+
+**Classe: `CSVReporter`**
+- `__init__(filepath)` → Cria CSV com 12 headers (id_exemplo, tentativa_numero, db_id, pergunta, query_ouro, query_agente, tempo_ms, veredito, feedback, similarity_score, resultado_match, erro)
+- `append_row(row_dict)` → Adiciona 1 linha por tentativa
+- `generate_summary(rows)` → Retorna dict {total_perguntas, taxa_aprovacao, similarity_media, tentativas_media, ...}
+- `generate_timestamped_filename(prefix)` → Returns `spider_eval_2026-04-11_15-30-42.csv`
+
+**Por que isolado:** Padrão CSV fixo, resumo automático, reutilizável em análises.
+
+---
+
+### `scripts/test_spider_eval.py`
+**O maestro do pipeline.** Orquestra todo o benchmark.
+
+**Fluxo:**
+1. **Parse args** → sample-size, seed, db-filter, output, max-attempts, data-dir
+2. **Load dados** → data_loader carrega e filtra exemplos
+3. **Para cada pergunta:**
+   - Executar query ouro (baseline via query_executor)
+   - Invocar grafo LangGraph com estado inicial
+   - **Rastrear stream()** do grafo acumulando estado (`full_estado.update()`)
+   - Quando nó crítico retorna:
+     - Extrair sql_gerada, veredito, feedback do full_estado
+     - Executar query_agente, calcular similarity/match (via metrics)
+     - Salvar linha no CSV (via csv_reporter)
+     - Se aprovado: next pergunta; se reprovado & tentativas < max: retry automático
+4. **Gerar resumo** → reporter calcula estatísticas finais
+
+**Responsabilidade única:** Não calcula metrics, não executa SQL, não salva CSV. Coordena os módulos.
+
+**Configuração o grafo:**
+```python
+grafo.stream(estado_inicial, config={"recursion_limit": 30})
+```
+- `recursion_limit=30`: Permite planejador iterar até ~3 vezes sem erro de recursão
+
+**State accumulation pattern:**
+```python
+full_estado = estado_inicial.copy()
+for output in grafo.stream(...):
+    for node_name, mudancas in output.items():
+        full_estado.update(mudancas)  # Acumula deltas em estado completo
+```
+Necessário porque `stream()` retorna deltas por nó, não estado total.
+
+---
+
+## Uso
+
+```bash
+# Teste básico: 10 perguntas
+python scripts/test_spider_eval.py
+
+# Parametrizado
+python scripts/test_spider_eval.py \
+  --sample-size 50 \
+  --seed 42 \
+  --db-filter concert_singer \
+  --output reports/eval.csv \
+  --max-attempts 3
+```
+
+## Saída
+
+**CSV:**
+- 1 linha = 1 tentativa (mesma pergunta pode ter 1-3 linhas)
+- 12 colunas: id_exemplo, tentativa_numero, db_id, pergunta, query_ouro, query_agente, tempo_ms, veredito, feedback, similarity_score, resultado_match, erro
+
+**Resumo:**
+- Total de perguntas avaliadas
+- Taxa de aprovação (% respostas corretas)
+- Taxa de sucesso 1ª tentativa (agente acerta de primeira?)
+- Tentativas médias por pergunta
+- Similarity score médio
+- Tempo médio por tentativa
+
+## Exemplo de Output
+
+```
+Total de perguntas: 50
+Total de tentativas: 63
+Perguntas aprovadas: 45
+Taxa de aprovação: 90.0%
+Taxa de sucesso na 1ª tentativa: 72.0%
+Tentativas médias: 1.26
+Similarity score médio: 0.953
+Tempo médio: 12345 ms
+✅ CSV salvo em: reports/spider_eval_2026-04-11_15-30-42.csv
+```
+
+## Estrutura do Estado (EstadoTextToInsight)
+
+Usado por todos os módulos, definido em `src/state.py`:
+```python
+{
+    "pergunta_usuario": str,
+    "db_path": str,
+    "contexto_schema": str,
+    "sql_gerada": str,
+    "linhas_resultado_preview": list,
+    "total_linhas_resultado": int,
+    "erro_execucao": str,
+    "feedback_critico": str,
+    "status": str,  # "aprovado", "reprovado", "erro"
+    "tentativas_loop": int,
+}
+```
+
+## Fluxo de Debug
+
+| Erro | Provável causa | Debug |
+|------|----------------|-------|
+| `FileNotFoundError: dev.json` | Dataset não baixado | `wget` Spider dataset em data/spider_data/spider_data/ |
+| `sqlite3.OperationalError: database is locked` | Mode não read-only | Verificar `query_executor.py`, deve ter `?mode=ro&uri=true` |
+| `RecursionLimitError: Recursion limit of 30` | Grafo entrando em loop infinito | Aumentar `recursion_limit` ou debugar nó que não para |
+| CSV vazio | Estado não acumulando | Verificar `full_estado.update()` no script (state accumulation pattern) |
+
diff --git a/src/spider/__init__.py b/src/spider/__init__.py
new file mode 100644
index 0000000..e9c82c2
--- /dev/null
+++ b/src/spider/__init__.py
@@ -0,0 +1,9 @@
+"""
+Módulo Spider: Integração com dataset Spider para avaliação de queries SQL.
+
+Submódulos:
+- data_loader: Carregar exemplos de dev.json
+- query_executor: Executar queries em bancos SQLite do spider
+- metrics: Comparar queries (similarity score, resultado exato)
+- csv_reporter: Salvar métricas em CSV por tentativa
+"""
diff --git a/src/spider/csv_reporter.py b/src/spider/csv_reporter.py
new file mode 100644
index 0000000..249a687
--- /dev/null
+++ b/src/spider/csv_reporter.py
@@ -0,0 +1,152 @@
+"""
+Reporter de CSV para resultados de avaliação Spider.
+
+Fornece:
+- Inicializar CSV com header
+- Salvar linhas de tentativas
+- Gerar resumo final
+"""
+
+import csv
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+
+class CSVReporter:
+    """Gerenciador de CSV para rastreamento de tentativas."""
+
+    HEADERS = [
+        "id_exemplo",
+        "tentativa_numero",
+        "db_id",
+        "pergunta_usuario",
+        "query_ouro_spider",
+        "query_agente_tentativa",
+        "tempo_agente_ms",
+        "veredito_critico",
+        "feedback_critico_recebido",
+        "erro_execucao",
+        "resultado_exato_match",
+        "similarity_score_sql",
+        "resultado_f1",
+        "resultado_precision",
+        "resultado_recall",
+    ]
+
+    def __init__(self, filepath: str | Path):
+        """
+        Inicializa reporter.
+
+        Args:
+            filepath: Caminho para arquivo CSV
+        """
+        self.filepath = Path(filepath)
+        self.filepath.parent.mkdir(parents=True, exist_ok=True)
+
+        # Inicializar CSV com headers
+        with open(self.filepath, "w", newline="", encoding="utf-8") as f:
+            writer = csv.DictWriter(f, fieldnames=self.HEADERS)
+            writer.writeheader()
+
+    def append_row(self, row: dict[str, Any]) -> None:
+        """
+        Adiciona uma linha ao CSV.
+
+        Args:
+            row: Dict com 12 chaves (id_exemplo, tentativa_numero, etc)
+
+        Raises:
+            ValueError: Se alguma chave obrigatória está faltando
+        """
+        # Validar chaves
+        missing = set(self.HEADERS) - set(row.keys())
+        if missing:
+            raise ValueError(f"Chaves obrigatórias faltando: {missing}")
+
+        with open(self.filepath, "a", newline="", encoding="utf-8") as f:
+            writer = csv.DictWriter(f, fieldnames=self.HEADERS)
+            writer.writerow(row)
+
+    def generate_summary(self, rows: list[dict[str, Any]]) -> dict[str, Any]:
+        """
+        Gera resumo estatístico dos resultados.
+
+        Args:
+            rows: Lista de linhas do CSV
+
+        Returns:
+            Dict com estatísticas
+        """
+        if not rows:
+            return {
+                "total_perguntas": 0,
+                "total_tentativas": 0,
+                "perguntas_aprovadas": 0,
+                "taxa_aprovacao": 0.0,
+                "taxa_1a_tentativa": 0.0,
+                "tentativas_media": 0.0,
+                "similarity_media": 0.0,
+                "tempo_medio_ms": 0.0,
+            }
+
+        # Agrupar por id_exemplo
+        by_exemplo = {}
+        for row in rows:
+            ex_id = row["id_exemplo"]
+            if ex_id not in by_exemplo:
+                by_exemplo[ex_id] = []
+            by_exemplo[ex_id].append(row)
+
+        total_perguntas = len(by_exemplo)
+        perguntas_aprovadas = 0
+        perguntas_1a_tentativa = 0
+        total_tentativas = len(rows)
+        similarities = []
+        tempos = []
+
+        for ex_id, tentativas in by_exemplo.items():
+            # Última tentativa desta pergunta
+            ultima = tentativas[-1]
+
+            if ultima["veredito_critico"] == "aprovado":
+                perguntas_aprovadas += 1
+
+            if len(tentativas) == 1 and ultima["veredito_critico"] == "aprovado":
+                perguntas_1a_tentativa += 1
+
+            # Coletar similarity scores (de tentativas bem-sucedidas)
+            for tent in tentativas:
+                if tent["similarity_score_sql"]:
+                    similarities.append(float(tent["similarity_score_sql"]))
+                if tent["tempo_agente_ms"]:
+                    tempos.append(float(tent["tempo_agente_ms"]))
+
+        return {
+            "total_perguntas": total_perguntas,
+            "total_tentativas": total_tentativas,
+            "perguntas_aprovadas": perguntas_aprovadas,
+            "taxa_aprovacao": (
+                perguntas_aprovadas / total_perguntas if total_perguntas > 0 else 0.0
+            ),
+            "taxa_1a_tentativa": (
+                perguntas_1a_tentativa / total_perguntas if total_perguntas > 0 else 0.0
+            ),
+            "tentativas_media": total_tentativas / total_perguntas if total_perguntas > 0 else 0.0,
+            "similarity_media": sum(similarities) / len(similarities) if similarities else 0.0,
+            "tempo_medio_ms": sum(tempos) / len(tempos) if tempos else 0.0,
+        }
+
+    @staticmethod
+    def generate_timestamped_filename(prefix: str = "spider_eval") -> str:
+        """
+        Gera nome de arquivo com timestamp.
+
+        Args:
+            prefix: Prefixo do arquivo
+
+        Returns:
+            Nome como: spider_eval_2025-04-06_14-30-45.csv
+        """
+        timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+        return f"{prefix}_{timestamp}.csv"
diff --git a/src/spider/data_loader.py b/src/spider/data_loader.py
new file mode 100644
index 0000000..8c5297e
--- /dev/null
+++ b/src/spider/data_loader.py
@@ -0,0 +1,96 @@
+"""
+Carregador de dados do Spider dataset.
+
+Fornece funcionalidades para:
+- Carregar exemplos de dev.json (pergunta, query_ouro, db_id)
+- Fazer sampling reproducível com seed
+- Filtrar por banco de dados específico
+"""
+
+import json
+import random
+from pathlib import Path
+from typing import Any
+
+
+def load_spider_dev_examples(data_dir: str = "data/spider_data/spider_data") -> list[dict[str, Any]]:
+    """
+    Carrega exemplos de dev.json do dataset Spider.
+
+    Args:
+        data_dir: Caminho para o diretório com dados do spider
+
+    Returns:
+        Lista de dicts com chaves: db_id, question, query
+
+    Raises:
+        FileNotFoundError: Se dev.json não existir
+        json.JSONDecodeError: Se arquivo está malformado
+    """
+    dev_path = Path(data_dir) / "dev.json"
+
+    if not dev_path.exists():
+        raise FileNotFoundError(
+            f"dev.json não encontrado em {dev_path}. "
+            f"Certifique-se que está em data/spider_data/spider_data/"
+        )
+
+    with open(dev_path, "r") as f:
+        examples = json.load(f)
+
+    return examples
+
+
+def sample_examples(
+    examples: list[dict[str, Any]],
+    sample_size: int | None = None,
+    seed: int | None = None,
+) -> list[dict[str, Any]]:
+    """
+    Faz sampling reproducível dos exemplos.
+
+    Args:
+        examples: Lista de exemplos
+        sample_size: Quantos exemplos pegar (None = todos)
+        seed: Seed para reproducibilidade
+
+    Returns:
+        Lista de exemplos selecionados
+    """
+    if seed is not None:
+        random.seed(seed)
+
+    if sample_size is None or sample_size >= len(examples):
+        return examples
+
+    return random.sample(examples, k=sample_size)
+
+
+def filter_by_db_id(
+    examples: list[dict[str, Any]],
+    db_id: str,
+) -> list[dict[str, Any]]:
+    """
+    Filtra exemplos por banco de dados.
+
+    Args:
+        examples: Lista de exemplos
+        db_id: ID do banco (ex: concert_singer)
+
+    Returns:
+        Lista de exemplos do banco especificado
+    """
+    return [ex for ex in examples if ex["db_id"] == db_id]
+
+
+def get_unique_db_ids(examples: list[dict[str, Any]]) -> list[str]:
+    """
+    Retorna lista de bancos únicos nos exemplos.
+
+    Args:
+        examples: Lista de exemplos
+
+    Returns:
+        Lista de db_ids únicos
+    """
+    return sorted(set(ex["db_id"] for ex in examples))
diff --git a/src/spider/metrics.py b/src/spider/metrics.py
new file mode 100644
index 0000000..5fd0ad9
--- /dev/null
+++ b/src/spider/metrics.py
@@ -0,0 +1,272 @@
+"""
+Métricas para comparação de queries SQL.
+
+Fornece:
+- Similarity score entre duas queries (difflib-based)
+- Comparação de resultados (exato match)
+- F1 score de resultados (row-level precision/recall)
+- Normalização de SQL para comparação
+"""
+
+import difflib
+import re
+from collections import Counter
+from typing import Any
+
+import numpy as np
+
+
+def normalize_sql(sql: str) -> str:
+    """
+    Normaliza SQL para comparação mais robusta.
+
+    - Remove espaços extras
+    - Converte para upper case
+    - Remove comentários
+    - Remove trailing semicolon
+
+    Args:
+        sql: SQL a normalizar
+
+    Returns:
+        SQL normalizado
+    """
+    # Remove comentários de linha
+    sql = re.sub(r"--.*$", "", sql, flags=re.MULTILINE)
+
+    # Remove comentários de bloco
+    sql = re.sub(r"/\*.*?\*/", "", sql, flags=re.DOTALL)
+
+    # Remove trailing semicolon
+    sql = sql.rstrip("; \n\t")
+
+    # Uppercase
+    sql = sql.upper()
+
+    # Remove espaços múltiplos
+    sql = re.sub(r"\s+", " ", sql).strip()
+
+    return sql
+
+
+def sql_similarity_score(sql1: str, sql2: str) -> float:
+    """
+    Calcula similarity score entre dois SQLs usando SequenceMatcher.
+
+    Args:
+        sql1: Primeira query
+        sql2: Segunda query
+
+    Returns:
+        Score de 0 a 1 (1 = idênticos)
+    """
+    norm1 = normalize_sql(sql1)
+    norm2 = normalize_sql(sql2)
+
+    # Se ambas vazias, considerar idênticas
+    if not norm1 and not norm2:
+        return 1.0
+
+    # Se uma vazia e outra não, completamente diferentes
+    if not norm1 or not norm2:
+        return 0.0
+
+    matcher = difflib.SequenceMatcher(None, norm1, norm2)
+    return matcher.ratio()
+
+
+# def results_exact_match(
+#     results_gold: list[dict[str, Any]],
+#     results_agent: list[dict[str, Any]],
+# ) -> bool:
+#     """
+#     Compara se dois conjuntos de resultados são exatamente iguais.
+
+#     Compara:
+#     - Número de linhas
+#     - Valores de cada linha (insensível a ordem das colunas)
+
+#     Args:
+#         results_gold: Resultados da query ouro
+#         results_agent: Resultados da query do agente
+
+#     Returns:
+#         True se resultados são iguais
+#     """
+#     if len(results_gold) != len(results_agent):
+#         return False
+
+#     # Converter dicts para conjuntos de tuplas para comparação
+#     # (para serem agnósticos à ordem das colunas)
+#     def result_set(results: list[dict[str, Any]]) -> set:
+#         converted = []
+#         for row in results:
+#             # Converter valores para strings para lidar com tipos diferentes
+#             items = []
+#             for k in sorted(row.keys()):
+#                 # Normalizar None/NULL
+#                 v = row[k]
+#                 if v is None:
+#                     v = "NULL"
+#                 items.append((k, str(v)))
+#             converted.append(tuple(items))
+#         return set(converted)
+
+#     return result_set(results_gold) == result_set(results_agent)
+
+def results_exact_match(
+    results_gold: list[dict[str, Any]],
+    results_agent: list[dict[str, Any]],
+) -> bool:
+    """
+    Compara se dois conjuntos de resultados são iguais baseando-se APENAS nos valores.
+    Ignora os nomes das colunas e a ordem das linhas.
+    """
+    # Se não têm o mesmo número de linhas, já é False
+    if len(results_gold) != len(results_agent):
+        return False
+        
+    # Se as duas listas vierem vazias (0 linhas), é True
+    if not results_gold:
+        return True
+
+    def extract_values_to_numpy(results: list[dict[str, Any]]) -> np.ndarray:
+        matrix = []
+        for row in results:
+            # Pega APENAS os valores, ignora as chaves
+            # Converte tudo para string (evita falsos negativos entre 0 inteiro e 0.0 float)
+            row_values = [str(v) if v is not None else "NULL" for v in row.values()]
+            matrix.append(row_values)
+            
+        # Converte a matriz nativa do Python para um Array NumPy
+        arr = np.array(matrix)
+        
+        # Como as queries podem retornar as linhas em ordens diferentes (se não houver ORDER BY),
+        # precisamos ordenar as linhas do array numpy lexograficamente para uma comparação justa.
+        # np.lexsort ordena pelas colunas, da última para a primeira, então passamos transposto e invertido
+        sorted_indices = np.lexsort(arr.T[::-1])
+        return arr[sorted_indices]
+
+    # Extrai, processa e ordena os arrays
+    gold_array = extract_values_to_numpy(results_gold)
+    agent_array = extract_values_to_numpy(results_agent)
+
+    # np.array_equal compara a estrutura (dimensões) e o conteúdo.
+    # Usamos bool() para garantir que retorne um booleano nativo do Python e não um np.bool_
+    return bool(np.array_equal(gold_array, agent_array))
+
+
+def results_f1_score(
+    results_gold: list[dict[str, Any]],
+    results_agent: list[dict[str, Any]],
+) -> dict[str, float]:
+    """
+    Calcula Precision, Recall e F1 row-level entre resultados gold e agent.
+
+    Cada linha é convertida em uma tupla canônica (valores ordenados, como string)
+    e tratada como membro de um multiset (Counter). Isso permite medir parcialmente
+    quantas linhas o agente acertou, mesmo que não tenha acertado todas.
+
+    - Precision: das linhas que o agente retornou, quantas estão no gold?
+    - Recall:    das linhas do gold, quantas o agente retornou?
+    - F1:        média harmônica de precision e recall.
+
+    Args:
+        results_gold: Resultados da query ouro
+        results_agent: Resultados da query do agente
+
+    Returns:
+        Dict com chaves: precision, recall, f1 (floats de 0 a 1)
+    """
+    def _row_to_canonical(row: dict[str, Any]) -> tuple:
+        """Converte uma linha em tupla canônica de valores (ordenados, stringificados)."""
+        values = [str(v) if v is not None else "NULL" for v in row.values()]
+        return tuple(sorted(values))
+
+    # Ambos vazios → match perfeito
+    if not results_gold and not results_agent:
+        return {"precision": 1.0, "recall": 1.0, "f1": 1.0}
+
+    # Um vazio e outro não
+    if not results_gold:
+        return {"precision": 0.0, "recall": 1.0, "f1": 0.0}
+    if not results_agent:
+        return {"precision": 1.0, "recall": 0.0, "f1": 0.0}
+
+    gold_bag = Counter(_row_to_canonical(r) for r in results_gold)
+    agent_bag = Counter(_row_to_canonical(r) for r in results_agent)
+
+    # Interseção: min(count_gold, count_agent) para cada tupla
+    true_positives = sum((gold_bag & agent_bag).values())
+    total_agent = sum(agent_bag.values())
+    total_gold = sum(gold_bag.values())
+
+    precision = true_positives / total_agent if total_agent > 0 else 0.0
+    recall = true_positives / total_gold if total_gold > 0 else 0.0
+
+    if precision + recall == 0:
+        f1 = 0.0
+    else:
+        f1 = 2 * (precision * recall) / (precision + recall)
+
+    return {"precision": round(precision, 4), "recall": round(recall, 4), "f1": round(f1, 4)}
+
+
+def build_comparison_row(
+    id_exemplo: int,
+    tentativa_numero: int,
+    db_id: str,
+    pergunta: str,
+    query_ouro: str,
+    query_agente: str,
+    tempo_agente_ms: float,
+    veredito_critico: str,
+    feedback_critico: str,
+    erro_execucao: str,
+    resultado_exato_match: bool | None,
+    similarity_score: float,
+    resultado_f1: float = 0.0,
+    resultado_precision: float = 0.0,
+    resultado_recall: float = 0.0,
+) -> dict[str, Any]:
+    """
+    Constrói uma linha para o CSV de avaliação.
+
+    Args:
+        id_exemplo: ID sequencial da pergunta
+        tentativa_numero: Qual tentativa (1, 2, 3...)
+        db_id: Banco de dados
+        pergunta: Pergunta em linguagem natural
+        query_ouro: Query padrão do spider
+        query_agente: Query gerada pelo agente NESTA tentativa
+        tempo_agente_ms: Tempo de execução em ms
+        veredito_critico: "aprovado" / "reprovado" / "erro"
+        feedback_critico: Feedback recebido (ou "Aprovado" se aprovado)
+        erro_execucao: Mensagem de erro (vazio se OK)
+        resultado_exato_match: True/False se resultado foi exato (None se erro)
+        similarity_score: Score 0-1
+        resultado_f1: F1 score row-level (0-1)
+        resultado_precision: Precision row-level (0-1)
+        resultado_recall: Recall row-level (0-1)
+
+    Returns:
+        Dict com 15 chaves para CSV
+    """
+    return {
+        "id_exemplo": id_exemplo,
+        "tentativa_numero": tentativa_numero,
+        "db_id": db_id,
+        "pergunta_usuario": pergunta,
+        "query_ouro_spider": query_ouro,
+        "query_agente_tentativa": query_agente,
+        "tempo_agente_ms": round(tempo_agente_ms, 2),
+        "veredito_critico": veredito_critico,
+        "feedback_critico_recebido": feedback_critico,
+        "erro_execucao": erro_execucao,
+        "resultado_exato_match": resultado_exato_match if resultado_exato_match is not None else "",
+        "similarity_score_sql": round(similarity_score, 4),
+        "resultado_f1": resultado_f1,
+        "resultado_precision": resultado_precision,
+        "resultado_recall": resultado_recall,
+    }
+
diff --git a/src/spider/query_executor.py b/src/spider/query_executor.py
new file mode 100644
index 0000000..c03142c
--- /dev/null
+++ b/src/spider/query_executor.py
@@ -0,0 +1,125 @@
+"""
+Executor de queries contra bancos SQLite do Spider dataset.
+
+Fornece funcionalidades para:
+- Conectar dinamicamente a bancos por db_id
+- Executar queries em modo read-only
+- Capturar resultados e erros
+"""
+
+import sqlite3
+import time
+from pathlib import Path
+from typing import Any
+
+
+class SpiderQueryExecutor:
+    """Executor de queries em bancos Spider com controle de timeout e segurança."""
+
+    def __init__(self, database_dir: str = "data/spider_data/spider_data/database"):
+        """
+        Inicializa executor.
+
+        Args:
+            database_dir: Diretório contendo subpastas com bancos SQLite
+        """
+        self.database_dir = Path(database_dir)
+
+    def get_db_path(self, db_id: str) -> Path:
+        """
+        Retorna caminho para banco específico.
+
+        Args:
+            db_id: ID do banco (ex: concert_singer)
+
+        Returns:
+            Caminho para .sqlite
+
+        Raises:
+            FileNotFoundError: Se banco não existe
+        """
+        db_path = self.database_dir / db_id / f"{db_id}.sqlite"
+        if not db_path.exists():
+            raise FileNotFoundError(f"Banco não encontrado: {db_path}")
+        return db_path
+
+    def execute_query(
+        self,
+        db_id: str,
+        sql: str,
+        timeout: int = 30,
+    ) -> dict[str, Any]:
+        """
+        Executa query em modo read-only contra um banco.
+
+        Args:
+            db_id: ID do banco
+            sql: SQL a executar
+            timeout: Timeout em segundos
+
+        Returns:
+            Dict com chaves:
+            - success: bool
+            - results: list[dict] (se sucesso)
+            - row_count: int (total de linhas, sem limit)
+            - error: str (se erro)
+            - time_ms: float (tempo de execução)
+        """
+        start_time = time.time()
+
+        try:
+            db_path = self.get_db_path(db_id)
+
+            # Conectar em modo read-only
+            connection_string = f"file:{db_path}?mode=ro&uri=true"
+            conn = sqlite3.connect(connection_string, timeout=timeout, uri=True)
+            conn.row_factory = sqlite3.Row  # Retornar dicts
+
+            cursor = conn.cursor()
+
+            # Executar query
+            cursor.execute(sql)
+            rows = cursor.fetchall()
+
+            # Converter para list[dict]
+            results = [dict(row) for row in rows]
+
+            conn.close()
+
+            elapsed_ms = (time.time() - start_time) * 1000
+
+            return {
+                "success": True,
+                "results": results,
+                "row_count": len(results),
+                "error": "",
+                "time_ms": elapsed_ms,
+            }
+
+        except sqlite3.Error as e:
+            elapsed_ms = (time.time() - start_time) * 1000
+            return {
+                "success": False,
+                "results": [],
+                "row_count": 0,
+                "error": f"SQLite error: {str(e)}",
+                "time_ms": elapsed_ms,
+            }
+        except FileNotFoundError as e:
+            elapsed_ms = (time.time() - start_time) * 1000
+            return {
+                "success": False,
+                "results": [],
+                "row_count": 0,
+                "error": f"Database not found: {str(e)}",
+                "time_ms": elapsed_ms,
+            }
+        except Exception as e:
+            elapsed_ms = (time.time() - start_time) * 1000
+            return {
+                "success": False,
+                "results": [],
+                "row_count": 0,
+                "error": f"Unexpected error: {str(e)}",
+                "time_ms": elapsed_ms,
+            }
diff --git a/text_to_insight/graph.py b/text_to_insight/graph.py
index 43e4552..74d0ccf 100644
--- a/text_to_insight/graph.py
+++ b/text_to_insight/graph.py
@@ -81,11 +81,18 @@ def _construir_grafo_text_to_insight(self, hitl: bool) -> StateGraph:
             }
         )
 
+        MAX_TENTATIVAS_CRITICO = 3
+
         def roteador_critico(estado: EstadoTextToInsight) -> str:
             status = estado.get("status", "")
-            # Se aprovado, enviar para nó de resposta; senão retornar ao planejador
+            tentativas = estado.get("tentativas_loop", 0)
+            # Se aprovado, enviar para nó de resposta
             if status == "aprovado":
                 return "resposta"
+            # Se atingiu limite de tentativas, encerrar mesmo reprovado
+            if tentativas >= MAX_TENTATIVAS_CRITICO:
+                print(f"[ROTEADOR_CRITICO] Limite de {MAX_TENTATIVAS_CRITICO} tentativas atingido → resposta (forçado)")
+                return "resposta"
             return "planejador"
 
         construtor_grafo.add_conditional_edges(
@@ -111,3 +118,20 @@ def _compilar_grafo(self, hitl: bool) -> "CompiledStateGraph":
 
     def app(self):
         return self.grafo_text_to_insight
+    def invoke(self, estado: EstadoTextToInsight):
+        return self.grafo_text_to_insight.invoke(estado)
+
+    def stream(self, estado: EstadoTextToInsight, config: dict = None):
+        """
+        Executa o grafo em modo streaming, yieldando estado após cada nó.
+
+        Args:
+            estado: Estado inicial
+            config: Configurações (ex: recursion_limit)
+
+        Yields:
+            Dicts com saída de cada nó
+        """
+        if config is None:
+            config = {}
+        return self.grafo_text_to_insight.stream(estado, config)
diff --git a/text_to_insight/nodes/code_agent/code_agent.py b/text_to_insight/nodes/code_agent/code_agent.py
index e0557ba..a2d5d0c 100644
--- a/text_to_insight/nodes/code_agent/code_agent.py
+++ b/text_to_insight/nodes/code_agent/code_agent.py
@@ -32,8 +32,8 @@
 === CONVERSA PRÉVIA (CONTEXTO ADICIONAL) ===
 {conversa_previa}
 
-=== FEEDBACK CRÍTICO (SE HOUVER) ===
-{feedback_section}
+=== HISTÓRICO DE TENTATIVAS ANTERIORES ===
+{historico_tentativas_section}
 
 Responda APENAS com a consulta SQL, sem markdown, sem explicação."""
 
@@ -48,6 +48,24 @@ def _extrair_sql(resposta: str) -> str:
     return resposta.strip()
 
 
+def _formatar_historico_tentativas(historico: list[dict]) -> str:
+    """Formata o histórico de tentativas anteriores para inclusão no prompt."""
+    if not historico:
+        return "Nenhuma tentativa anterior."
+
+    partes = []
+    for i, tent in enumerate(historico, 1):
+        bloco = f"--- Tentativa {i} ---\n"
+        bloco += f"SQL gerada:\n{tent.get('sql', '(vazia)')}\n"
+        if tent.get("erro"):
+            bloco += f"Erro de execução: {tent['erro']}\n"
+        if tent.get("feedback"):
+            bloco += f"Feedback do crítico: {tent['feedback']}\n"
+        partes.append(bloco)
+
+    return "\n".join(partes) + "\nNÃO repita os mesmos erros. Gere uma SQL diferente e corrigida."
+
+
 def nos_nodo_agente_codigo(estado: EstadoTextToInsight, llm: ChatGoogleGenerativeAI) -> dict:
     """
     Nó Agente de Código: usa Gemini para gerar SQL a partir da pergunta + schema.
@@ -55,26 +73,20 @@ def nos_nodo_agente_codigo(estado: EstadoTextToInsight, llm: ChatGoogleGenerativ
     pergunta = estado.get("pergunta_usuario", "")
     conversa_previa = estado.get("historico_conversa", "")
     schema = estado.get("contexto_schema", "")
-    feedback = estado.get("feedback_critico", "")
+    historico = estado.get("historico_tentativas", [])
     tentativas = estado.get("tentativas_loop", 0)
 
     print(f"[AGENTE_CODIGO] Gerando SQL (tentativa {tentativas + 1})...")
 
-    feedback_section = ""
-    if feedback:
-        feedback_section = f"""=== FEEDBACK DO CRÍTICO (corrija os problemas apontados) ===
-        {feedback}
-
-        === SQL ANTERIOR (que foi reprovada) ===
-        {estado.get('sql_gerada', '')}"""
+    historico_section = _formatar_historico_tentativas(historico)
 
     prompt = PROMPT_TEMPLATE.format(
         schema=schema,
         pergunta=pergunta,
         conversa_previa=conversa_previa if conversa_previa else "Nenhuma",
-        feedback_section=feedback_section,
+        historico_tentativas_section=historico_section,
     )
-
+    
     resposta = llm.invoke(prompt)
     sql = _extrair_sql(resposta.content)
 
@@ -91,3 +103,4 @@ def nos_nodo_agente_codigo(estado: EstadoTextToInsight, llm: ChatGoogleGenerativ
         "tokens_output": out_tokens,
         "tokens_total": total_tokens,
     }
+
diff --git a/text_to_insight/nodes/critic.py b/text_to_insight/nodes/critic.py
index 04e53c7..2831a26 100644
--- a/text_to_insight/nodes/critic.py
+++ b/text_to_insight/nodes/critic.py
@@ -33,16 +33,64 @@
 === ERROS (se houver) ===
 {erro}
 
+=== TENTATIVAS ANTERIORES ===
+{historico_tentativas_section}
+
 Avalie:
 1. A SQL responde à pergunta do usuário?
 2. Os resultados fazem sentido?
 3. Há algum erro lógico ou de interpretação?
+4. Se houve tentativas anteriores, verifique se os mesmos problemas persistem.
+
+Ao avaliar, priorize utilidade prática e correção semântica da resposta,
+não perfeição formal.
+
+Diferenças de formato, representação ou precisão que não alterem
+substancialmente a resposta NÃO devem causar reprovação.
+
+Exemplos de casos que normalmente devem ser APROVADOS:
+- Ano médio retornado como float em vez de inteiro/data
+- Pequenas diferenças de arredondamento
+- Colunas extras irrelevantes
+- Nomes/aliases diferentes
+- Resultado parcialmente correto mas ainda útil
+- Agregações corretas com precisão numérica diferente da esperada
+
+REPROVE apenas quando houver falha material, por exemplo:
+- A query responde outra pergunta
+- O dado necessário para responder não está presentes
+- Filtros importantes estão errados ou ausentes
+- JOIN incorreto altera significativamente os resultados
+- Métrica errada (SUM vs AVG, COUNT vs COUNT DISTINCT, etc.)
+- Resultado vazio inesperado
+- Erro SQL ou inconsistência lógica grave
+
+Considere o custo de retentativas. Em caso de dúvida entre APROVADO
+e REPROVADO, prefira APROVADO se a resposta ainda for útil para o usuário. Leve em consideração que ainda tem um agente depois de você que irá interpretar o resultado da query e criar uma resposta em linguagem natural.
 
 Responda no formato:
 VEREDITO: APROVADO ou REPROVADO
 FEEDBACK: <sua avaliação em 1-3 frases>"""
 
 
+def _formatar_historico_para_critico(historico: list[dict]) -> str:
+    """Formata o histórico de tentativas anteriores para o prompt do crítico."""
+    if not historico:
+        return "Nenhuma tentativa anterior (esta é a primeira)."
+
+    partes = []
+    for i, tent in enumerate(historico, 1):
+        bloco = f"--- Tentativa {i} ---\n"
+        bloco += f"SQL: {tent.get('sql', '(vazia)')}\n"
+        if tent.get("erro"):
+            bloco += f"Erro: {tent['erro']}\n"
+        if tent.get("feedback"):
+            bloco += f"Feedback: {tent['feedback']}\n"
+        partes.append(bloco)
+
+    return "\n".join(partes)
+
+
 def nos_nodo_critico(estado: EstadoTextToInsight, llm: ChatGoogleGenerativeAI) -> dict:
     """
     Nó Crítico: usa Gemini para avaliar qualidade do resultado.
@@ -55,6 +103,7 @@ def nos_nodo_critico(estado: EstadoTextToInsight, llm: ChatGoogleGenerativeAI) -
     conversa_previa = estado.get("historico_conversa", "")
     erro = estado.get("erro_execucao", "")
     status_exec = estado.get("status", "")
+    historico = estado.get("historico_tentativas", [])
 
     print("[CRITICO] Avaliando resultado...")
 
@@ -65,10 +114,13 @@ def nos_nodo_critico(estado: EstadoTextToInsight, llm: ChatGoogleGenerativeAI) -
         return {
             "feedback_critico": feedback,
             "status": "reprovado",
+            # Registrar tentativa com erro no histórico
+            "historico_tentativas": [{"sql": sql, "erro": erro, "feedback": feedback}],
         }
 
     # Formata preview para o prompt
     preview_str = str(preview[:10]) if preview else "Nenhum resultado"
+    historico_section = _formatar_historico_para_critico(historico)
 
     prompt = PROMPT_CRITIC.format(
         pergunta=pergunta,
@@ -78,6 +130,7 @@ def nos_nodo_critico(estado: EstadoTextToInsight, llm: ChatGoogleGenerativeAI) -
         total_linhas=total,
         preview=preview_str,
         erro=erro if erro else "Nenhum",
+        historico_tentativas_section=historico_section,
     )
 
     resposta = llm.invoke(prompt)
@@ -108,4 +161,7 @@ def nos_nodo_critico(estado: EstadoTextToInsight, llm: ChatGoogleGenerativeAI) -
         "tokens_input": in_tokens,
         "tokens_output": out_tokens,
         "tokens_total": total_tokens,
+        # Registrar esta tentativa no histórico (acumula via operator.add)
+        "historico_tentativas": [{"sql": sql, "feedback": feedback}],
     }
+
diff --git a/text_to_insight/runtime.py b/text_to_insight/runtime.py
index c1a7114..f8fe9b2 100644
--- a/text_to_insight/runtime.py
+++ b/text_to_insight/runtime.py
@@ -23,6 +23,7 @@ def construir_estado_inicial(pergunta: str, db_path: str) -> dict[str, Any]:
         "tentativas_loop": 0,
         "db_path": db_path,
         "espera_humana": False,
+        "historico_tentativas": [],
     }
 
 
diff --git a/text_to_insight/state.py b/text_to_insight/state.py
index 9d32550..fed878b 100644
--- a/text_to_insight/state.py
+++ b/text_to_insight/state.py
@@ -64,6 +64,7 @@ class EstadoTextToInsight(EstadoEntrada, total = False):
     historico_conversa: list[tuple[str, str]]
     tentativas_loop: int
     resposta_natural: str
+    historico_tentativas: Annotated[list[dict[str, str]], operator.add]
 
     # Campos exclusivos para métricas. Possibilita a soma automática dos tokens utilizados
     # por cada chamada do Gemini nos vários diferentes nós.