-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathquery.py
More file actions
132 lines (123 loc) · 4.64 KB
/
query.py
File metadata and controls
132 lines (123 loc) · 4.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from attrs import define
from typing import ClassVar
@define
class QueryContainer:
"""Container for SQL queries used in performance testing"""
# Query 1: Complex JOIN with aggregation (tests query processing)
COMPLEX_JOIN_AGGREGATION: ClassVar[str] = """
SELECT
p.ProductID,
p.Name AS ProductName,
pc.Name AS Category,
psc.Name AS Subcategory,
COUNT(sod.SalesOrderDetailID) AS TotalOrders,
SUM(sod.OrderQty) AS TotalQuantity,
SUM(sod.LineTotal) AS TotalRevenue,
AVG(sod.UnitPrice) AS AvgPrice
FROM Sales.SalesOrderDetail sod
INNER JOIN Production.Product p ON sod.ProductID = p.ProductID
INNER JOIN Production.ProductSubcategory psc ON p.ProductSubcategoryID = psc.ProductSubcategoryID
INNER JOIN Production.ProductCategory pc ON psc.ProductCategoryID = pc.ProductCategoryID
GROUP BY p.ProductID, p.Name, pc.Name, psc.Name
HAVING SUM(sod.LineTotal) > 10000
ORDER BY TotalRevenue DESC;
"""
# Query 2: Large dataset retrieval (tests data transfer speed)
LARGE_DATASET: ClassVar[str] = """
SELECT
soh.SalesOrderID,
soh.OrderDate,
soh.DueDate,
soh.ShipDate,
soh.Status,
soh.SubTotal,
soh.TaxAmt,
soh.Freight,
soh.TotalDue,
c.CustomerID,
p.FirstName,
p.LastName,
a.AddressLine1,
a.City,
sp.Name AS StateProvince,
cr.Name AS Country
FROM Sales.SalesOrderHeader soh
INNER JOIN Sales.Customer c ON soh.CustomerID = c.CustomerID
INNER JOIN Person.Person p ON c.PersonID = p.BusinessEntityID
INNER JOIN Person.BusinessEntityAddress bea ON p.BusinessEntityID = bea.BusinessEntityID
INNER JOIN Person.Address a ON bea.AddressID = a.AddressID
INNER JOIN Person.StateProvince sp ON a.StateProvinceID = sp.StateProvinceID
INNER JOIN Person.CountryRegion cr ON sp.CountryRegionCode = cr.CountryRegionCode
WHERE soh.OrderDate >= '2013-01-01';
"""
# Query 3: synthetic 1.2M rows result set
VERY_LARGE_DATASET: ClassVar[str] = """
SELECT
sod.SalesOrderID,
sod.SalesOrderDetailID,
sod.ProductID,
sod.OrderQty,
sod.UnitPrice,
sod.LineTotal,
p.Name AS ProductName,
p.ProductNumber,
p.Color,
p.ListPrice,
n1.number AS RowMultiplier1
FROM Sales.SalesOrderDetail sod
CROSS JOIN (SELECT TOP 10 ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS number
FROM Sales.SalesOrderDetail) n1
INNER JOIN Production.Product p ON sod.ProductID = p.ProductID;
"""
# Query 4: CTE
SUBQUERY_WITH_CTE: ClassVar[str] = """
WITH SalesSummary AS (
SELECT
soh.SalesPersonID,
YEAR(soh.OrderDate) AS OrderYear,
SUM(soh.TotalDue) AS YearlyTotal
FROM Sales.SalesOrderHeader soh
WHERE soh.SalesPersonID IS NOT NULL
GROUP BY soh.SalesPersonID, YEAR(soh.OrderDate)
),
RankedSales AS (
SELECT
SalesPersonID,
OrderYear,
YearlyTotal,
RANK() OVER (PARTITION BY OrderYear ORDER BY YearlyTotal DESC) AS SalesRank
FROM SalesSummary
)
SELECT
rs.SalesPersonID,
p.FirstName,
p.LastName,
rs.OrderYear,
rs.YearlyTotal,
rs.SalesRank
FROM RankedSales rs
INNER JOIN Person.Person p ON rs.SalesPersonID = p.BusinessEntityID
WHERE rs.SalesRank <= 10
ORDER BY rs.OrderYear DESC, rs.SalesRank;
"""
@classmethod
def get_query(cls, key: str | None = None) -> dict[str, str]:
"""Retrieves a query or all queries.
Args:
key (str | None, optional): query name. Defaults to None.
Raises:
KeyError: error if the query does not exist.
Returns:
str | dict[str, str]: query definition or dictionary of all queries.
"""
queries = {
'Complex JOIN Aggregation': cls.COMPLEX_JOIN_AGGREGATION,
'Large Dataset': cls.LARGE_DATASET,
'Very Large Dataset': cls.VERY_LARGE_DATASET,
'Subquery with CTE': cls.SUBQUERY_WITH_CTE
}
if key is None:
return queries
if key not in queries:
raise KeyError(f"Query '{key}' not found. Available keys: {list(queries.keys())}")
return {key: queries[key]} # must return a dict either way