Tuesday, January 27, 2015

Sử dụng các aggregates theo partition .

-- QUICK SYNTAX: RANK and DENSE RANK products with OVER PARTITION BY Color
SELECT ProductNumber, Color, ProductSubcategoryID,
  Ranking =            RANK() OVER (PARTITION BY Color ORDER BY ProductSubcategoryID),
  DenseRanking = DENSE_RANK() OVER (PARTITION BY Color ORDER BY ProductSubcategoryID)
FROM AdventureWorks2008.Production.Product
WHERE Color is not null AND ProductSubcategoryID is not null
ORDER BY Color, Ranking, ProductNumber
/*
ProductNumber     Color ProductSubcategoryID    Ranking     DenseRanking
...
VE-C304-S         Blue  25                      23          3
HL-U509-B         Blue  31                      26          4
PA-T100           Grey  35                      1           1
SB-M891-L         Multi 18                      1           1
SB-M891-M         Multi 18                      1           1
SB-M891-S         Multi 18                      1           1
CA-1098           Multi 19                      4           2
LJ-0192-L         Multi 21                      5           3
LJ-0192-M         Multi 21                      5           3
LJ-0192-S         Multi 21                      5           3
LJ-0192-X         Multi 21                      5           3
BK-R50R-44        Red   2                       1           1
BK-R50R-48        Red   2                       1           1
BK-R50R-52        Red   2                       1           1
....
*/
------------
-- Finding last order date with GROUP BY
SELECT CustomerID, LastOrder=MAX(OrderDate), OrderCount=COUNT(*)
FROM AdventureWorks2008.Sales.SalesOrderHeader
GROUP BY CustomerID
ORDER BY OrderCount DESC;

-- Finding last order date with OVER PARTITION BY
;WITH CTE AS (SELECT CustomerID, OrderDate,
       OrderCount = COUNT(*)  OVER ( PARTITION BY CustomerID),
       RN = ROW_NUMBER() OVER ( PARTITION BY CustomerID
                         ORDER BY OrderDate DESC)
FROM AdventureWorks2008.Sales.SalesOrderHeader)
SELECT * FROM CTE WHERE RN = 1
ORDER BY OrderCount DESC
------------
-- SQL over partiton by - QUICK SYNTAX - row_number over partition by - OVER clause
-- Find TOP selling salesstaff by city -- RANK() is used for ranking salesstaff

USE AdventureWorks2008;
 
WITH cteTopSalesStaffByCity
     AS (SELECT p.FirstName + ' ' + p.LastName                   AS SalesStaff,
                RANK()
                  OVER(PARTITION BY City ORDER BY SalesYTD DESC) AS RankNo  
                ,
                '$' + convert(VARCHAR,sp.SalesYTD,1)             AS SalesYTD,
                a.City,
                a.PostalCode
         FROM   Sales.SalesPerson sp
                INNER JOIN Person.Person p
                  ON sp.BusinessEntityID = p.BusinessEntityID
                INNER JOIN Person.Address a
                  ON a.AddressID = p.BusinessEntityID
         WHERE  TerritoryID IS NOT NULL)
SELECT   *
FROM     cteTopSalesStaffByCity
WHERE    RankNo = 1
ORDER BY City
GO
/*
SalesStaff        RankNo SalesYTD         City       PostalCode
Linda Mitchell    1     $5,200,475.23     Issaquah    98027
Jae Pak           1     $5,015,682.38     Renton      98055
*/

------------
-- Calculate aggregates over partition by PurchaseOrderID
------------
USE AdventureWorks;
GO
-- SQL Server over partition by - using partition by with aggregate functions
-- SQL Server windowing functions - partition by sql server - t sql partition by
SELECT PO = PurchaseOrderID,
       ProductID,
       OrderQty,
       SUM(OrderQty)
         OVER(PARTITION BY PurchaseOrderID ) AS 'TOTAL',
       AVG(OrderQty)
         OVER(PARTITION BY PurchaseOrderID ) AS 'Avg',
       COUNT(OrderQty)
         OVER(PARTITION BY PurchaseOrderID ) AS 'Count',
       MIN(OrderQty)
         OVER(PARTITION BY PurchaseOrderID ) AS 'Min',
       MAX(OrderQty)
         OVER(PARTITION BY PurchaseOrderID ) AS 'Max'
FROM     Purchasing.PurchaseOrderDetail
WHERE    PurchaseOrderID BETWEEN 10 AND 20
ORDER BY PurchaseOrderID,
         ProductID;
GO
/* Partial results
 
PO    ProductID   OrderQty    TOTAL Avg   Count Min   Max
10    320         3           66    22    3     3     60
10    321         3           66    22    3     3     60
10    322         60          66    22    3     3     60
11    438         3           12    3     4     3     3
11    439         3           12    3     4     3     3
11    440         3           12    3     4     3     3
11    441         3           12    3     4     3     3
*/
------------
-- SQL generate sequence number for partitioned data
------------
-- SQL Server row_number - using partition by with row_number()
-- SQL over partition by order by - t sql ranking functions
-- SQL Server COUNT aggregate function over partition by
-- MSSQL windowing functions
USE AdventureWorks;
 
SELECT   PO = PurchaseOrderID,
         VendorID,
         ShipDate = convert(VARCHAR,ShipDate,111),
         DailyPOCount = COUNT(*)
                          OVER(PARTITION BY convert(VARCHAR,ShipDate,111) ),
         SeqNo = Row_Number()
                   OVER(PARTITION BY convert(VARCHAR,ShipDate,111)
                     ORDER BY PurchaseOrderID)
FROM     Purchasing.PurchaseOrderHeader
WHERE    YEAR(OrderDate) = 2003
         AND MONTH(OrderDate) = 5
ORDER BY PurchaseOrderID
 
GO
/* Partial results
 
PO    VendorID    ShipDate    DailyPOCount      SeqNo
297   3           2003/05/16  4                 1
298   54          2003/05/16  4                 2
299   104         2003/05/16  4                 3
300   25          2003/05/16  4                 4
301   90          2003/05/23  4                 1
302   33          2003/05/23  4                 2
303   41          2003/05/23  4                 3
304   55          2003/05/23  4                 4
305   70          2003/06/04  16                1
306   94          2003/06/04  16                2
307   15          2003/06/04  16                3
308   62          2003/06/04  16                4
309   98          2003/06/04  16                5
310   72          2003/06/04  16                6
*/
------------
-- SQL ranking functions over SalesOrderID by OrderQty
------------
-- SQL Server row_number - using the OVER clause with ranking functions
-- SQL over partition by SalesOrderID - sql server windowing functions
SELECT   sod.SalesOrderID                                             AS SO,
         OrderQty,
         ROW_NUMBER()
           OVER(PARTITION BY sod.SalesOrderID ORDER BY OrderQty DESC) AS [SeqNo],
         RANK()
           OVER(PARTITION BY sod.SalesOrderID ORDER BY OrderQty DESC) AS [Rank],
         DENSE_RANK()
           OVER(PARTITION BY sod.SalesOrderID ORDER BY OrderQty DESC) AS [DenseRank]
FROM     AdventureWorks.Sales.SalesOrderDetail sod
         INNER JOIN AdventureWorks.Sales.SalesOrderHeader soh
           ON soh.SalesOrderID = sod.SalesOrderID
WHERE    OrderDate = '2004-02-01'
         AND OrderQty > 1
ORDER BY SalesOrderID,
         SeqNo
GO
/* Partial results
  
SO    OrderQty    SeqNo Rank  DenseRank
63293 6           1     1     1
63293 4           2     2     2
63293 3           3     3     3
63293 3           4     3     3
63293 3           5     3     3
63293 3           6     3     3
63293 2           7     7     4
*/

------------
-- SQL calculate minimum and maximum salaries by department
------------
-- SQL over partition by - aggregate functions: MIN, MAX
USE AdventureWorks;
 
WITH cteLastRaiseDate(EmployeeID,LastChangeDate)
     AS (SELECT   EmployeeID,
                  MAX(RateChangeDate)
         FROM     HumanResources.EmployeePayHistory
         GROUP BY EmployeeID),
     ctePayRate(EmployeeID,Rate,Department)
     AS (SELECT eph.EmployeeID,
                eph.Rate,
                d.Name
         FROM   HumanResources.EmployeePayHistory eph
                INNER JOIN cteLastRaiseDate lrd
                  ON eph.EmployeeID = lrd.EmployeeID
                     AND eph.RateChangeDate = lrd.LastChangeDate
                INNER JOIN HumanResources.EmployeeDepartmentHistory edh
                  ON eph.EmployeeID = edh.EmployeeID
                INNER JOIN HumanResources.Department d
                  ON edh.DepartmentID = d.DepartmentID
         WHERE  edh.EndDate IS NULL)
SELECT DISTINCT Department,
                MaxSalary = MAX(Rate)
                              OVER(PARTITION BY Department ),
                MinSalary = MIN(Rate)
                              OVER(PARTITION BY Department )
FROM   ctePayRate
ORDER BY Department
GO
/* Results 
Department                          MaxSalary   MinSalary
Document Control                    17.7885     10.25
Engineering                         63.4615     32.6923
Executive                           125.50      60.0962
Facilities and Maintenance          24.0385     9.25
Finance                             43.2692     13.4615
Human Resources                     27.1394     13.9423
Information Services                50.4808     27.4038
Marketing                           37.50       13.4615
Production                          84.1346     9.50
Production Control                  24.5192     16.00
Purchasing                          30.00       12.75
Quality Assurance                   28.8462     10.5769
Research and Development            50.4808     40.8654
Sales                               72.1154     23.0769
Shipping and Receiving              19.2308     9.00
Tool Design                         29.8462     25.00
*/
 
 
------------
-- SQL calculate aggregates by SalesOrderID
------------
-- SQL over partition by
USE AdventureWorks;
GO
SELECT   DISTINCT d.SalesOrderID,
         [Total Quantity] = SUM(OrderQty)
                              OVER(PARTITION BY d.SalesOrderID ),
         [Average Quantity] = convert(VARCHAR,convert(MONEY,AVG(1.0 * OrderQty)
                                 OVER(PARTITION BY d.SalesOrderID ),
                                                      1)),
         [Total Order Count] = COUNT(OrderQty)
                                 OVER(PARTITION BY d.SalesOrderID ),
         [Minimum Order Count] = MIN(OrderQty)
                                   OVER(PARTITION BY d.SalesOrderID ),
         [Maximum Order Count] = MAX(OrderQty)
                                   OVER(PARTITION BY d.SalesOrderID ),
         [Average Amount] = convert(VARCHAR,convert(MONEY,AVG(LineTotal)
                             OVER(PARTITION BY d.SalesOrderID ),
                                                    1)),
         [Total Amount] = convert(VARCHAR,convert(MONEY,SUM(LineTotal)
                             OVER(PARTITION BY d.SalesOrderID ),
                                                  1))
FROM     Sales.SalesOrderDetail d
         INNER JOIN Sales.SalesOrderHeader h
           ON h.SalesOrderID = d.SalesOrderID
         INNER JOIN Production.Product p
           ON d.ProductID = p.ProductID
WHERE    CustomerID = 100
ORDER BY SalesOrderID
GO
/* Partial results
 
SalesOrderID      Total Quantity    Average Quantity  Total Order Count
51818             81                2.61              31
57188             68                2.19              31
63290             68                2.96              23
69560             79                2.63              30
*/
------------
-- Percent on base calculation
-- First & Second COUNT(*) refer to GROUP BY - Third COUNT(*) refers to OVER()
------------
SELECT   Color,
         COUNT(*)                          AS ColorFrequency,
         ((COUNT(*)*100.0) / (SUM(COUNT(*))
                              OVER())) AS PctColor
FROM     Production.Product
GROUP BY Color
ORDER BY Color
/* Color                ColorFrequency    PctColor
      NULL              248               49.206349206349
      Black             93                18.452380952380
      Blue              26                5.158730158730
      Grey              1                 0.198412698412
      Multi             8                 1.587301587301
      Red               38                7.539682539682
      Silver            43                8.531746031746
      Silver/Black      7                 1.388888888888
      White             4                 0.793650793650
      Yellow            36                7.142857142857  */
------------

------------
-- SQL calculate daily average traffic batch size
------------
-- SQL over partition by dates
USE tempdb
GO
 
CREATE TABLE TollgateVehicleTraffic (
  DateOfTollCollection DATETIME    NOT NULL,
  Tolls                INT    NOT NULL);
GO
 
-- Populate table with daily batches
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 10,1260);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 9,2160);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 8,2265);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 7,1625);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 6,1550);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 5,1590);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 4,1200);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 4,2200);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 3,2150);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 2,2265);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 3,1465);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 1,1550);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 10,160);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 10,260);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 9,260);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 8,265);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 7,125);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 6,150);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 5,160);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 5,260);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 4,265);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 4,125);
INSERT INTO TollgateVehicleTraffic VALUES     (getdate() - 4,150);
 
GO
 
SELECT *
FROM   TollgateVehicleTraffic
ORDER BY DateOfTollCollection;
GO
/* Partial results
 
DateOfTollCollection          Tolls
2009-02-11 19:53:55.873       1260
2009-02-11 19:53:55.890       160
2009-02-11 19:53:55.890       260
2009-02-12 19:53:55.873       2160
2009-02-12 19:53:55.890       260
2009-02-13 19:53:55.890       265
2009-02-13 19:53:55.890       2265
*/
 
SELECT   DateOfTollCollection = convert(CHAR(10),DateOfTollCollection,112),
         Tolls,
         FLOOR(AVG(Tolls)
                 OVER()) AS AvgVehicleTrafficBatchSize
FROM     TollgateVehicleTraffic
ORDER BY convert(CHAR(10),DateOfTollCollection,112);
GO
/* Partial results
 
DateOfTollCollection    Tolls AvgVehicleTrafficBatchSize
20090211    1260  1020
20090211    160   1020
20090211    260   1020
*/
-- SQL over partition by - AVG aggregate function
SELECT   DISTINCT DateOfTollCollection = convert(CHAR(10),DateOfTollCollection,112),
                  FLOOR(AVG(Tolls)
            OVER(PARTITION BY convert(CHAR(10),DateOfTollCollection,112) ))
            AS AvgDailyVehicleTrafficBatchSize
FROM     TollgateVehicleTraffic
ORDER BY convert(CHAR(10),DateOfTollCollection,112);
 
GO
/* Results
 
DateOfTollCollection    AvgDailyVehicleTrafficBatchSize
20090211                560
20090212                1210
20090213                1265
20090214                875
20090215                850
20090216                670
20090217                788
20090218                1807
20090219                2265
20090220                1550
*/
 
-- Cleanup
DROP TABLE TollgateVehicleTraffic
------------ 

Source : sqlusa.com

No comments:

Post a Comment