开篇介绍
在 CUBE 设计过程中有一个非常重要的点就是定义维度与度量值组关系,维度的创建一般在前,而度量值组一般来源于一个事实表。当维度和度量值组在 CUBE 中定义完成之后,下一个最重要的动作就是定义两者之间的关系。在前面几篇文章中也已经看到了如何将度量值组和维度通过哪些维度属性进行关联的操作,但是那些关联通常都是 Regular 类型的,属于直接的普通的关联。
但是实际上除了 Regular 之外,还有 Fact, Referenced, Many to Many 和 Data Mining(数据挖掘类不在此讨论)。
今天在这篇文章中讲解 Referenced 和 Fact 两种关系的定义。
测试代码
USE BIWORK_SSIS
GO
SET NOCOUNT ON
IF OBJECT_ID('FactInternetSalesReason','U') IS NOT NULL
DROP TABLE FactInternetSalesReason
IF OBJECT_ID('FactInternetSales','U') IS NOT NULL
DROP TABLE FactInternetSales
IF OBJECT_ID('FactResellerSales','U') IS NOT NULL
DROP TABLE FactResellerSales
IF OBJECT_ID('DimEmployee','U') IS NOT NULL
DROP TABLE DimEmployee
IF OBJECT_ID('DimDate','U') IS NOT NULL
DROP TABLE DimDate
IF OBJECT_ID('DimProduct','U') IS NOT NULL
DROP TABLE DimProduct
IF OBJECT_ID('DimSalesReason','U') IS NOT NULL
DROP TABLE DimSalesReason
IF OBJECT_ID('DimProductSubcategory','U') IS NOT NULL
DROP TABLE DimProductSubcategory
IF OBJECT_ID('DimProductCategory','U') IS NOT NULL
DROP TABLE DimProductCategory
IF OBJECT_ID('DimReseller','U') IS NOT NULL
DROP TABLE DimReseller
IF OBJECT_ID('DimCustomer','U') IS NOT NULL
DROP TABLE DimCustomer
IF OBJECT_ID('DimGeography','U') IS NOT NULL
DROP TABLE DimGeography
GO
CREATE TABLE DimDate
(
DateKey INT PRIMARY KEY,
ShortDateName NVARCHAR(12) NOT NULL,
FullDateName NVARCHAR(20) NOT NULL,
DayNumberOfWeek TINYINT NOT NULL,
DayNameOfWeek NVARCHAR(10) NOT NULL,
DayNumberOfMonth TINYINT NOT NULL,
DayNumberOfYear SMALLINT NOT NULL,
WeekNumberOfYear TINYINT NOT NULL,
IsWeekend NVARCHAR(7) NOT NULL,
IsLeapYear BIT NOT NULL,
MonthKey INT NOT NULL,
MonthNumberOfYear TINYINT NOT NULL,
MonthNameOfYear NVARCHAR(10) NOT NULL,
MonthNameWithYear NVARCHAR(20) NOT NULL,
CalendarQuarterKey INT NOT NULL,
CalendarQuarterNumber TINYINT NOT NULL,
CalendarQuarterNameWithYear NVARCHAR(20) NOT NULL,
CalendarSemesterNumber TINYINT NOT NULL,
CalendarYearKey SMALLINT NOT NULL,
CalendarYearName NVARCHAR(20) NOT NULL,
FiscalQuarterKey INT,
FiscalQuarterNumber TINYINT NOT NULL,
FiscalQuarterName NVARCHAR(20),
FiscalSemester TINYINT NOT NULL,
FiscalYearKey SMALLINT NOT NULL,
FiscalYearName NVARCHAR(20),
)
DECLARE @StartDate DATETIME
DECLARE @EndDate DATETIME
SELECT @StartDate = '2005-01-01',
@EndDate = '2013-12-31'
WHILE (@StartDate <= @EndDate)
BEGIN
INSERT INTO DimDate
(
DateKey,
ShortDateName,
FullDateName,
DayNumberOfWeek,
DayNameOfWeek,
DayNumberOfMonth,
DayNumberOfYear,
WeekNumberOfYear,
IsWeekend,
IsLeapYear,
MonthKey,
MonthNumberOfYear,
MonthNameOfYear,
MonthNameWithYear,
CalendarQuarterKey,
CalendarQuarterNumber,
CalendarQuarterNameWithYear,
CalendarSemesterNumber,
CalendarYearKey,
CalendarYearName,
FiscalQuarterNumber,
FiscalSemester,
FiscalYearKey
)
SELECT CAST(CONVERT(VARCHAR(8),@StartDate,112) AS INT) AS 'DateKey',
CONVERT(VARCHAR(20), @StartDate,106) AS 'ShortDateName',
CONVERT(VARCHAR(2),DATENAME(DD,@StartDate))
+ ' '
+ DATENAME(MM,@StartDate)
+ ' '
+ CONVERT(CHAR(4), DATEPART(YY,@StartDate)) AS 'FullDateName', -- 1 July 2005
DATEPART(DW,@StartDate) AS 'DayNumberOfWeek',
DATENAME(DW,@StartDate) AS 'DayNameOfWeek',
DATENAME(DD,@StartDate) AS 'DayNumberOfMonth',
DATENAME(DY,@StartDate) AS 'DayNumberOfYear',
DATEPART(WW,@StartDate) AS 'WeekNumberOfYear',
CASE WHEN DATEPART(DW,@StartDate) IN (1,7)
THEN 'Weekend'
ELSE 'Weekday'
END AS 'IsWeekend',
CASE WHEN ((YEAR(@StartDate) % 4 = 0) AND (YEAR(@StartDate) % 100 != 0 OR YEAR(@StartDate) % 400 = 0))
THEN 1
ELSE 0
END AS 'IsLeapYear',
DATEPART(YY,@StartDate) * 100 + DATEPART(MM,@StartDate) AS 'MonthKey', -- 200507
DATEPART(MM,@StartDate) AS 'MonthNumberOfYear',
DATENAME(MM,@StartDate) AS 'MonthNameOfYear',
DATENAME(MM,@StartDate) + ' ' + CONVERT(CHAR(4),DATEPART(YY,@StartDate)) AS 'MonthNameWithYear', -- July 2005
DATEPART(YY,@StartDate) * 100 + DATEPART(QQ,@StartDate) AS 'CalendarQuarterKey', -- 200503
DATEPART(QQ,@StartDate) AS 'CalendarQuarterNumber',
'CY ' + CONVERT(CHAR(4),DATEPART(YY,@StartDate))
+ ' Qtr '
+ CONVERT(CHAR(1), DATEPART(QQ,@StartDate)) AS 'CalendarQuarterNameWithYear', -- CY 2005 Qtr 3
CASE WHEN DATEPART(MM,@StartDate) BETWEEN 1 AND 6
THEN 1
ELSE 2
END AS 'CalendarSemester',
DATEPART(YY,@StartDate) AS 'CalendarYearKey',
'CY ' + CONVERT(CHAR(4),DATEPART(YY,@StartDate)) AS 'CalendarYearName', -- CY 2005
CASE WHEN DATEPART(MM,@StartDate) BETWEEN 1 AND 6
THEN DATEPART(QQ,@StartDate) + 2
ELSE DATEPART(QQ,@StartDate) - 2
END AS 'FiscalQuarter',
CASE WHEN DATEPART(MM,@StartDate) BETWEEN 1 AND 6
THEN 2
ELSE 1
END AS 'FiscalSemester',
CASE WHEN DATEPART(MM,@StartDate) BETWEEN 1 AND 6
THEN DATEPART(YY,@StartDate)
ELSE DATEPART(YY,@StartDate) + 1
END AS 'FiscalYear'
UPDATE DimDate
SET FiscalQuarterKey = FiscalYearKey * 100 + FiscalQuarterNumber, -- 200601
FiscalYearName = 'FY ' + CONVERT(CHAR(4), FiscalYearKey), -- FY 2006
FiscalQuarterName = 'FY ' + CONVERT(Char(4), FiscalYearKey) + ' Qtr ' + CONVERT(CHAR(1), FiscalQuarterNumber) -- FY 2006 Qtr 1
WHERE DateKey = CONVERT(INT,CONVERT(VARCHAR(8),@StartDate,112))
SET @StartDate = @StartDate + 1
END
SELECT EmployeeKey,
ParentEmployeeKey,
EmployeeNationalIDAlternateKey,
CASE WHEN ISNULL(MiddleName,'') = '' THEN FirstName +' '+ LastName
ELSE FirstName +' '+ MiddleName +' '+LastName
END AS FullName,
Title
INTO DimEmployee
FROM AdventureWorksDW2012.dbo.DimEmployee
SELECT SalesReasonKey,
SalesReasonAlternateKey,
SalesReasonName,
SalesReasonReasonType
INTO DimSalesReason
FROM AdventureWorksDW2012.dbo.DimSalesReason
SELECT SalesOrderNumber,
SalesOrderLineNumber,
SalesReasonKey
INTO FactInternetSalesReason
FROM AdventureWorksDW2012.dbo.FactInternetSalesReason
SELECT ProductKey,
ProductAlternateKey,
ProductSubcategoryKey,
EnglishProductName,
StandardCost,
Color,
SafetyStockLevel,
ListPrice,
Class,
Size,
StartDate,
EndDate,
[Status],
ProductAlternateKey + ' (' + CONVERT (Char(10), StartDate, 120) + ')' AS ProductID
INTO DimProduct
FROM AdventureWorksDW2012.dbo.DimProduct
SELECT ProductSubcategoryKey,
ProductSubcategoryAlternateKey,
EnglishProductSubcategoryName,
ProductCategoryKey
INTO DimProductSubcategory
FROM AdventureWorksDW2012.dbo.DimProductSubcategory
SELECT ProductCategoryKey,
ProductCategoryAlternateKey,
EnglishProductCategoryName
INTO DimProductCategory
FROM AdventureWorksDW2012.dbo.DimProductCategory
SELECT ProductKey,
OrderDateKey,
EmployeeKey,
ResellerKey,
SalesOrderLineNumber,
SalesOrderNumber,
UnitPrice,
ProductStandardCost,
SalesAmount,
CarrierTrackingNumber,
CustomerPONumber,
SalesOrderNumber+'_'+CONVERT(VARCHAR(10),SalesOrderLineNumber) AS 'SalesOrderLineID'
INTO FactResellerSales
FROM AdventureWorksDW2012.dbo.FactResellerSales
SELECT ProductKey,
OrderDateKey,
DueDateKey,
ShipDateKey,
CustomerKey,
SalesOrderNumber,
SalesOrderLineNumber,
OrderQuantity,
UnitPrice,
SalesAmount
INTO FactInternetSales
FROM AdventureWorksDW2012.dbo.FactInternetSales
SELECT GeographyKey,
City,
StateProvinceCode,
StateProvinceName,
CountryRegionCode,
EnglishCountryRegionName
INTO DimGeography
FROM AdventureWorksDW2012.dbo.DimGeography
SELECT CustomerKey,
GeographyKey,
CustomerAlternateKey,
Title,
CASE WHEN ISNULL(MiddleName,'') = '' THEN FirstName +' '+ LastName
ELSE FirstName +' '+ MiddleName +' '+LastName
END AS CustomerName
INTO DimCustomer
FROM AdventureWorksDW2012.dbo.DimCustomer
SELECT ResellerKey,
GeographyKey,
ResellerAlternateKey,
ResellerName
INTO DimReseller
FROM AdventureWorksDW2012.dbo.DimReseller
------------------------------------------------------------------------------
-- Add Primary Key Constraint
------------------------------------------------------------------------------
ALTER TABLE DimEmployee
ADD CONSTRAINT PK_EmployeeKey PRIMARY KEY CLUSTERED (EmployeeKey)
ALTER TABLE DimSalesReason
ADD CONSTRAINT PK_SalesReasonKey PRIMARY KEY CLUSTERED (SalesReasonKey)
ALTER TABLE DimProduct
ADD CONSTRAINT PK_ProductKey PRIMARY KEY CLUSTERED(ProductKey)
ALTER TABLE DimProductSubcategory
ADD CONSTRAINT PK_SubcategoryKey PRIMARY KEY CLUSTERED(ProductSubcategoryKey)
ALTER TABLE DimProductCategory
ADD CONSTRAINT PK_CategoryKey PRIMARY KEY CLUSTERED(ProductCategoryKey)
ALTER TABLE DimGeography
ADD CONSTRAINT PK_GeographyKey PRIMARY KEY CLUSTERED(GeographyKey)
ALTER TABLE DimReseller
ADD CONSTRAINT PK_ResellerKey PRIMARY KEY CLUSTERED(ResellerKey)
ALTER TABLE DimCustomer
ADD CONSTRAINT PK_CustomerKey PRIMARY KEY CLUSTERED(CustomerKey)
ALTER TABLE FactInternetSales
ADD CONSTRAINT PK_InternetSales PRIMARY KEY CLUSTERED(SalesOrderNumber,SalesOrderLineNumber)
ALTER TABLE FactResellerSales
ADD CONSTRAINT PK_ResellerSales PRIMARY KEY CLUSTERED(SalesOrderNumber,SalesOrderLineNumber)
ALTER TABLE FactInternetSalesReason
ADD CONSTRAINT PK_InternetSalesReason PRIMARY KEY CLUSTERED(SalesOrderNumber,SalesOrderLineNumber,SalesReasonKey)
------------------------------------------------------------------------------
-- Add Primary Key Constraint
------------------------------------------------------------------------------
ALTER TABLE DimCustomer
ADD CONSTRAINT FK_Customer_GeographyKey FOREIGN KEY(GeographyKey) REFERENCES DimGeography(GeographyKey)
ALTER TABLE DimReseller
ADD CONSTRAINT FK_Reseller_GeographyKey FOREIGN KEY(GeographyKey) REFERENCES DimGeography(GeographyKey)
ALTER TABLE DimEmployee
ADD CONSTRAINT FK_Employee_ParentEmployeeKey FOREIGN KEY(ParentEmployeeKey) REFERENCES DimEmployee(EmployeeKey)
ALTER TABLE FactResellerSales
ADD CONSTRAINT FK_Reseller_EmployeeKey FOREIGN KEY(EmployeeKey) REFERENCES DimEmployee(EmployeeKey)
ALTER TABLE FactResellerSales
ADD CONSTRAINT FK_Reseller_ProductKey FOREIGN KEY(ProductKey) REFERENCES DimProduct(ProductKey)
ALTER TABLE FactResellerSales
ADD CONSTRAINT FK_Reseller_OrderDateKey FOREIGN KEY(OrderDateKey) REFERENCES DimDate(DateKey)
ALTER TABLE FactResellerSales
ADD CONSTRAINT FK_Reseller_ResellerKey FOREIGN KEY(ResellerKey) REFERENCES DimReseller(ResellerKey)
ALTER TABLE FactInternetSales
ADD CONSTRAINT FK_Internet_ProductKey FOREIGN KEY(ProductKey) REFERENCES DimProduct(ProductKey)
ALTER TABLE FactInternetSales
ADD CONSTRAINT FK_Internet_OrderDateKey FOREIGN KEY(OrderDateKey) REFERENCES DimDate(DateKey)
ALTER TABLE FactInternetSales
ADD CONSTRAINT FK_Internet_ShipDateKey FOREIGN KEY(ShipDateKey) REFERENCES DimDate(DateKey)
ALTER TABLE FactInternetSales
ADD CONSTRAINT FK_Internet_DueDateKey FOREIGN KEY(DueDateKey) REFERENCES DimDate(DateKey)
ALTER TABLE FactInternetSales
ADD CONSTRAINT FK_Internet_CustomerKey FOREIGN KEY(CustomerKey) REFERENCES DimCustomer(CustomerKey)
ALTER TABLE DimProduct
ADD CONSTRAINT FK_Product_SubcatetoryKey FOREIGN KEY (ProductSubcategoryKey) REFERENCES DimProductSubcategory (ProductSubcategoryKey)
ALTER TABLE DimProductSubcategory
ADD CONSTRAINT FK_Subcategory_CatetoryKey FOREIGN KEY (ProductCategoryKey) REFERENCES DimProductCategory (ProductCategoryKey)
ALTER TABLE FactInternetSalesReason
ADD CONSTRAINT FK_InternetSalesReason_Order FOREIGN KEY (SalesOrderNumber,SalesOrderLineNumber)
REFERENCES FactInternetSales (SalesOrderNumber,SalesOrderLineNumber)
ALTER TABLE FactInternetSalesReason
ADD CONSTRAINT FK_InternetSalesReason_OrderReason FOREIGN KEY (SalesReasonKey) REFERENCES DimSalesReason (SalesReasonKey)
SELECT * FROM DimEmployee
SELECT * FROM DimDate
SELECT * FROM DimProduct
SELECT * FROM DimProductSubcategory
SELECT * FROM DimProductCategory
SELECT * FROM DimCustomer
SELECT * FROM DimReseller
SELECT * FROM DimGeography
SELECT * FROM DimSalesReason
SELECT * FROM FactResellerSales
SELECT * FROM FactInternetSales
SELECT * FROM FactInternetSalesReason
SELECT TOP 20
ProductKey, -- 产品 KEY
SalesOrderLineID, -- 订单明细编号
SalesOrderNumber, -- 订单编号
SalesOrderLineNumber, -- 订单详细编号
UnitPrice, -- 单位价格
ProductStandardCost, -- 标准成本
SalesAmount, -- 销售额
CarrierTrackingNumber, -- 承运人跟踪号,
CustomerPONumber -- 采购订单号
FROM FactResellerSales
案例实现
新建 SSIS 项目并创建数据源和数据源视图,在前面的几篇博客中已经讲到了 -
- SSAS 系列 - 基于雪花模型的维度设计 - 主要已经处理了 DimProduct, DimProductSubcategory, DimProductCategory 这几张表。
- SSAS 系列 - 自定义的日期维度设计 - 主要处理 DimDate 表。
- SSAS系列 - 关于父子维度的设计 - 主要处理 DimEmployee 表。
- SSAS 系列 - 实现Cube 以及角色扮演维度,度量值格式化和计算成员的创建 - 基于以上这些维度的度量值,角色扮演维度等概念。
在这个数据源视图中,新加入了 DimCustomer, DimReseller, DimGeography , DimSalesReason, FactInternetSalesReason 这几个维度表。
那么现在假设我需要直接通过 Geography 地理纬度直接聚合访问 Reseller Sales 和 Internet Sales 度量值应该如何实现?
其实这个我们完全可以借鉴 SSAS 系列 - 基于雪花模型的维度设计 这篇文章中的有关 Product Category, Product Subcategory, Product 这种层次结构的设计而应用到 DimGeography 和 DimCustomer 或者 DimGeography 和 DimReseller 的关系中,一样可以实现通过 Geography 直接聚合访问度量值。
但是今天我们将换一种方式来实现,直接通过定义维度和度量值关联的 Referenced 关系来实现。
先快速创建好 Geography, Customer 和 Reseller 这几个维度创建好,只有先创建好了维度才能定义与度量值组之间的关系。
Geography 层次结构。
Customer 维度非常简单,只包含了一个非键属性 Geography Key, Reseller 维度也是一样创建的。
在多维数据集 Cube 的维度设计面板中选择添加新的维度 - Customer, Geography 和 Reseller。
根据数据源视图可以直接看到 FactInternetSales 和 FactResellerSales 是分别引用了 DimCustomer 的 CustomerKey 和 DimReseller 的 ResellerKey。所以当添加完这几个维度的时候,在维度用法中可以看到 Customer 维度和Reseller 维度是已经自动关联上了度量值组,并且它们之间的关系就是最简单的 Regular 关系。
可以观察数据源视图 FactInternetSales 是通过 DimCustomer 间接的关联到 DimGeography 上的,因此我们可以说:
度量值组 Internet Sales "引用了" Geography 维度,Internet Sales 的 "引用维度 Reference Dimension" 是 Geography。
是通过 “中间维度” Customer 维度引用的。
需要引用到"引用维度上的属性" Reference dimension attribute 是 GeographyKey。
通过"中间维度的属性" Intermediate dimension attribute 是 GeographyKey。
编辑 Geography 和 Internet Sales 交叉的区域,定义它们之间的关联关系,在这里选择 Referenced 引用关系。
然后对照下面的操作完成关联,其实下面的这些关联动作就是实现了上述关联描述。
Materialize 具体化 - 默认选项。它是指我们这种引用关系一旦被具体化,那么维度成员和事实数据之间的关联关系将成员多维数据集物理存储结构中的一部分。效率上是提高了,但是处理的时间可能会增多。 如果不选择具体化,那么每次查询的时候多维数据集需要根据中间维度关联到引用维度然后再计算聚合值,效率上会变得很低。因为此时,多维数据集的物理存储结构中将只会保存事实数据和中间维度的关联关系。
下图同样建立了 Reseller Sales 与 Reseller 维度之间的引用关联关系。
记住引用关联关系的图标,以后打开已有项目时通过图标就知道维度和度量值组是什么关联方式了。
部署并浏览数据,直接通过 Geography 维度来浏览 Internet Sales Amount 和 Reseller Sales Amount 度量值的数据。