Skip to content
Closed
116 changes: 116 additions & 0 deletions lnt/server/db/migrations/upgrade_18_to_19.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""Add an ordinal column to each test suite's Order table.

On Postgres, also drops the PreviousOrder and NextOrder linked-list columns.
On SQLite, those columns are left in place because SQLite cannot drop columns
that are referenced by foreign key constraints.

The ordinal column stores the position of each order in the total ordering,
determined by sorting order field values via convert_revision().
"""

import sqlalchemy
from sqlalchemy import Column, Integer, text

from lnt.server.db.migrations.util import introspect_table
from lnt.server.db.util import add_column
from lnt.server.ui.util import convert_revision


def upgrade(engine):
is_sqlite = engine.dialect.name == 'sqlite'

# Find all test suites so we can migrate each Order table.
test_suite_table = introspect_table(engine, "TestSuite")
order_fields_table = introspect_table(engine, "TestSuiteOrderFields")

session = sqlalchemy.orm.sessionmaker(engine)()

test_suites = session.execute(
sqlalchemy.select([test_suite_table.c.DBKeyName])
).fetchall()

for (db_key_name,) in test_suites:
order_table_name = f'{db_key_name}_Order'

# 1. Add ordinal column (nullable initially so we can backfill).
ordinal_col = Column("ordinal", Integer)
add_column(engine, order_table_name, ordinal_col)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm getting an error that the ordinal column already exists when running this on a clean database, are you able to recreate this?

webserver-1  | [SQL: ALTER TABLE "NT_Order" ADD COLUMN ordinal INTEGER]
webserver-1  | (Background on this error at: http://sqlalche.me/e/13/f405)
webserver-1  | [2026-04-07 16:03:19 +0000] [11] [INFO] Worker exiting (pid: 11)
webserver-1  | (psycopg2.errors.DuplicateColumn) column "ordinal" of relation "NT_Order" already exists
webserver-1  | 
webserver-1  | [SQL: ALTER TABLE "NT_Order" ADD COLUMN ordinal INTEGER]
webserver-1  | (Background on this error at: http://sqlalche.me/e/13/f405)
webserver-1  | [2026-04-07 16:03:19 +0000] [12] [INFO] Worker exiting (pid: 12)
webserver-1  | [2026-04-07 16:03:19 +0000] [9] [ERROR] Exception in worker process
webserver-1  | Traceback (most recent call last):
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1276, in _execute_context
webserver-1  |     self.dialect.do_execute(
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 608, in do_execute
webserver-1  |     cursor.execute(statement, parameters)
webserver-1  | psycopg2.errors.DuplicateColumn: column "ordinal" of relation "NT_Order" already exists
webserver-1  | 
webserver-1  | 
webserver-1  | The above exception was the direct cause of the following exception:
webserver-1  | 
webserver-1  | Traceback (most recent call last):
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/gunicorn/arbiter.py", line 713, in spawn_worker
webserver-1  |     worker.init_process()
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/gunicorn/workers/base.py", line 136, in init_process
webserver-1  |     self.load_wsgi()
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/gunicorn/workers/base.py", line 148, in load_wsgi
webserver-1  |     self.wsgi = self.app.wsgi()
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/gunicorn/app/base.py", line 66, in wsgi
webserver-1  |     self.callable = self.load()
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/gunicorn/app/wsgiapp.py", line 57, in load
webserver-1  |     return self.load_wsgiapp()
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/gunicorn/app/wsgiapp.py", line 47, in load_wsgiapp
webserver-1  |     return util.import_app(self.app_uri)
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/gunicorn/util.py", line 377, in import_app
webserver-1  |     mod = importlib.import_module(module)
webserver-1  |   File "/usr/local/lib/python3.10/importlib/__init__.py", line 126, in import_module
webserver-1  |     return _bootstrap._gcd_import(name[level:], package, level)
webserver-1  |   File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
webserver-1  |   File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
webserver-1  |   File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
webserver-1  |   File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
webserver-1  |   File "<frozen importlib._bootstrap_external>", line 883, in exec_module
webserver-1  |   File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
webserver-1  |   File "/var/lib/lnt/instance/lnt_wsgi.py", line 3, in <module>
webserver-1  |     application = lnt.server.ui.app.App.create_standalone('/var/lib/lnt/instance/lnt.cfg')
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/lnt/server/ui/app.py", line 194, in create_standalone
webserver-1  |     instance = lnt.server.instance.Instance.frompath(config_path)
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/lnt/server/instance.py", line 61, in frompath
webserver-1  |     return Instance(config_path, config, tmpdir)
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/lnt/server/instance.py", line 69, in __init__
webserver-1  |     self.databases[name] = self.config.get_database(name)
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/lnt/server/config.py", line 194, in get_database
webserver-1  |     return lnt.server.db.v4db.V4DB(db_entry.path, self,
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/lnt/server/db/v4db.py", line 74, in __init__
webserver-1  |     lnt.server.db.migrate.update(self.engine)
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/lnt/server/db/migrate.py", line 201, in update
webserver-1  |     any_changed |= update_schema(engine, versions,
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/lnt/server/db/migrate.py", line 176, in update_schema
webserver-1  |     upgrade_method(engine)
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/lnt/server/db/migrations/upgrade_18_to_19.py", line 37, in upgrade
webserver-1  |     add_column(engine, order_table_name, ordinal_col)
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/lnt/server/db/util.py", line 38, in add_column
webserver-1  |     statement.execute(bind=connectable)
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/sqlalchemy/sql/ddl.py", line 100, in execute
webserver-1  |     return bind.execute(self.against(target))
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 2235, in execute
webserver-1  |     return connection.execute(statement, *multiparams, **params)
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1011, in execute
webserver-1  |     return meth(self, multiparams, params)
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/sqlalchemy/sql/ddl.py", line 72, in _execute_on_connection
webserver-1  |     return connection._execute_ddl(self, multiparams, params)
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1068, in _execute_ddl
webserver-1  |     ret = self._execute_context(
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1316, in _execute_context
webserver-1  |     self._handle_dbapi_exception(
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1510, in _handle_dbapi_exception
webserver-1  |     util.raise_(
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/sqlalchemy/util/compat.py", line 182, in raise_
webserver-1  |     raise exception
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 1276, in _execute_context
webserver-1  |     self.dialect.do_execute(
webserver-1  |   File "/root/.local/lib/python3.10/site-packages/sqlalchemy/engine/default.py", line 608, in do_execute
webserver-1  |     cursor.execute(statement, parameters)
webserver-1  | sqlalchemy.exc.ProgrammingError: (psycopg2.errors.DuplicateColumn) column "ordinal" of relation "NT_Order" already exists

I don't see anything immediately wrong with the migration code. This is with creating a brand new database with 2cba11d, then rebuilding and recreating the container with 8b86fa8.

I will try to triple check later that I'm not doing something wrong on my end

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, this issue seems real. I don't know why it didn't hit us before.

The problem seems to be that multiple gunicorn workers are attempting to perform the migration concurrently (there's nothing to prevent them all from trying to upgrade the DB when they start), and the ordinal column gets created multiple times.

I don't know why none of the other migrations have hit the same problem -- perhaps they would have but it's been so long since older DB versions are not around that it's not an issue in practice. Either way, I'll make a separate patch to fix this.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#211 should fix this


# 2. Load all orders with their order field values, sort them,
# and assign ordinals.
order_table = introspect_table(engine, order_table_name)

# Find the order field names for this test suite, sorted by their
# ordinal (the field ordinal, not our new column).
ts_id_query = sqlalchemy.select(
[test_suite_table.c.ID]
).where(test_suite_table.c.DBKeyName == db_key_name)
ts_id = session.execute(ts_id_query).scalar()

field_rows = session.execute(
sqlalchemy.select([order_fields_table.c.Name])
.where(order_fields_table.c.TestSuiteID == ts_id)
.order_by(order_fields_table.c.Ordinal)
).fetchall()
field_names = [row[0] for row in field_rows]

# Load all orders.
orders = session.execute(
sqlalchemy.select([order_table])
).fetchall()

# Build a sort key for each order using convert_revision on each
# order field.
cache = {}

def sort_key(row):
return tuple(
convert_revision(
getattr(row, fname) or '', cache=cache
)
for fname in field_names
)

sorted_orders = sorted(orders, key=sort_key)

# Assign ordinals.
for i, row in enumerate(sorted_orders):
session.execute(
order_table.update()
.where(order_table.c.ID == row.ID)
.values(ordinal=i)
)

session.commit()

# 3. On Postgres: drop PreviousOrder and NextOrder columns, set ordinal
# NOT NULL, and add a deferred unique constraint.
# On SQLite: skip all of this. SQLite cannot drop columns that are
# referenced by foreign key constraints, and doesn't support ALTER
# COLUMN SET NOT NULL or DEFERRABLE constraints. The orphaned columns
# are harmless (SQLAlchemy ignores unmapped columns), and application
# logic enforces ordinal uniqueness.
if not is_sqlite:
with engine.begin() as conn:
for col in ["NextOrder", "PreviousOrder"]:
conn.execute(text(
f'ALTER TABLE "{order_table_name}" '
f'DROP COLUMN "{col}"'
))
conn.execute(text(
f'ALTER TABLE "{order_table_name}" '
f'ALTER COLUMN "ordinal" SET NOT NULL'
))
conn.execute(text(
f'ALTER TABLE "{order_table_name}" '
f'ADD CONSTRAINT "{order_table_name}_ordinal_unique" '
f'UNIQUE ("ordinal") DEFERRABLE INITIALLY DEFERRED'
))

# 4. Create an index on the ordinal column for query performance.
new_order_table = introspect_table(engine, order_table_name)
idx_name = f"ix_{db_key_name}_Order_ordinal"
idx = sqlalchemy.Index(idx_name, new_order_table.c.ordinal)
idx.create(engine)

session.close()
4 changes: 2 additions & 2 deletions lnt/server/db/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def get_all_orders_for_machine(session, ts, machine):
return session.query(ts.Order) \
.join(ts.Run) \
.filter(ts.Run.machine_id == machine) \
.order_by(asc(ts.Order.llvm_project_revision)) \
.order_by(asc(ts.Order.ordinal)) \
.all()


Expand All @@ -98,7 +98,7 @@ def get_last_order_for_machine(session, ts, machine):
return session.query(ts.Order) \
.join(ts.Run) \
.filter(ts.Run.machine_id == machine) \
.order_by(desc(ts.Order.llvm_project_revision)) \
.order_by(desc(ts.Order.ordinal)) \
.first()


Expand Down
Loading
Loading