-
Notifications
You must be signed in to change notification settings - Fork 0
SQLGraphExplorer‐7
论文中给出的公式包括如下关系代数运算符:
还缺少:
example:
-- sch(A)=A1,A2,A3, sch(C)=C1,C2
SELECT
A.A1 AS C1,
A.A2 AS C2
FROM A假设结果为
推导过程:
-- 结果对应语句
SELECT A1,A2,A3 FROM A WHERE A1=c1, A2=c2有三个目的:
- 简化布尔代数:(B=b AND B=b) -> (B=b)
- 理论需要:上次说的模糊之处,selection 的运算需要将过滤条件标准化成 sop
- 冗余过滤:详见下一小节
由于多表选择的原因,反向选择的结果存在冗余的 WHERE 过滤条件,详见test-case-1的结果:
-- A1
SELECT
A11,
A12,
A13
FROM
A1
WHERE
A1.A11 = gh
AND A1.A12 = xm
AND A1.A13 = gj
AND LENGTH(NVL(A1.A11,'')) > 0
AND A1.A14 <= '#{ETL_DT}'
AND A1.A15 > '#{ETL_DT}'
AND A1.B11 = B.B01
AND B.B02 <= '#{ETL_DT}'
AND B.B03 > '#{ETL_DT}'上面可见:明明是针对 A1 表的选择,在 WHERE 过滤条件中却出现了不相干的表 B,不妨称其为冗余问题
没有找到解决此问题的相关论文,但是有个“拍脑门”的明显的解决方法:直接将所有“存在无关表”的最小布尔表达式设置为 TRUE:
-- A1
SELECT
A11,
A12,
A13
FROM
A1
WHERE
A1.A11 = gh
AND A1.A12 = xm
AND A1.A13 = gj
AND LENGTH(NVL(A1.A11,'')) > 0
AND A1.A14 <= '#{ETL_DT}'
AND A1.A15 > '#{ETL_DT}'
AND TRUE
AND TRUE
AND TRUE然后使用上一小节的简化公式进行消除即可:
-- A1
SELECT
A11,
A12,
A13
FROM
A1
WHERE
A1.A11 = gh
AND A1.A12 = xm
AND A1.A13 = gj
AND LENGTH(NVL(A1.A11,'')) > 0
AND A1.A14 <= '#{ETL_DT}'
AND A1.A15 > '#{ETL_DT}'-- sch(A1)=A11,A12,A13,A14,A15,A16,B11
-- sch(A2)=A21,A22,A23,A24,A25,A26,B21
-- sch(B)= B01,B02,B03,B04
-- sch(C)=GH,XM,GJ
SELECT
A1.A11 AS GH, /*工号*/
A1.A12 AS XM, /*姓名*/
A1.A13 AS GJ /*国籍*/
FROM
A1 /*在职人员基本信息*/
JOIN B /*机构基本情况*/
ON A1.B11 = B.B01
AND B.B02 <= '#{ETL_DT}'
AND B.B03 > '#{ETL_DT}'
WHERE
LENGTH (NVL (A1.A11, '')) > 0 /*员工编号有值的数据*/
AND A1.A14 <= '#{ETL_DT}'
AND A1.A15 > '#{ETL_DT}'
UNION ALL --从减员人员基本信息表中获取除了状态为'非正常在岗'的人员信息
SELECT
A2.A21 AS GH, /*工号*/
A2.A22 AS XM, /*姓名*/
A2.A23 AS GJ /*国籍*/
FROM
A2
JOIN B /*机构基本情况*/
ON A2.B21 = B.B01
AND B.B02 <= '#{ETL_DT}'
AND B.B03 > '#{ETL_DT}'
WHERE
LENGTH (NVL (A2.A21, '')) > 0 /*员工编号有值的数据*/
AND A2.A24 <= '#{ETL_DT}'
AND A2.A25 > '#{ETL_DT}'
AND A2.A26 <> '0101';-- selection1=
LENGTH(NVL(A1.A11,'')) > 0
AND A1.A14 <= '#{ETL_DT}'
AND A1.A15 > '#{ETL_DT}'
-- selection2=
A1.B11 = B.B01
AND B.B02 <= '#{ETL_DT}'
AND B.B03 > '#{ETL_DT}'
-- selection3=
LENGTH(NVL(A2.A21,'')) > 0
AND A2.A24 <= '#{ETL_DT}'
AND A2.A25 > '#{ETL_DT}'
AND A2.A26 <> '0101';
-- selection4=
A2.B21 = B.B01
AND B.B02 <= '#{ETL_DT}'
AND B.B03 > '#{ETL_DT}'假设执行结果是

推导过程如下:
$$ \begin{align} & \widetilde{E}(\mathcal{P}) \ & =\widetilde{E_1}(\mathcal{P})\diamond \widetilde{E_2}(\mathcal{P}) \ & E_1=\Pi_{A11,A12,A13}(\sigma_{selection1}(\sigma_{selection2}(A1\bowtie B))) \ & E_2=\Pi_{A21,A22,A23}(\sigma_{selection3}(\sigma_{selection4}(A2\bowtie B))) \ \
& =\widetilde{E_5}([tuple_3, \top \wedge selection1])\diamond \widetilde{E_6}([tuple_4,\top \wedge selection3]) \ & E_5=\sigma_{selection2}(A1\bowtie B) \ & E_6=\sigma_{selection4}(A2\bowtie B) \ \
& =\widetilde{E_7}([tuple_3,\top \wedge selection1 \wedge selection2]) \diamond \widetilde{E_8}([tuple_4,\top \wedge selection3 \wedge condtion4]) \ & E_7=A1\bowtie B \ & E_8=A2\bowtie B \ \
& ={\widetilde{A1}(Proj(\mathcal{P}_1, sch(A1))) \odot \widetilde{B}(Proj(\mathcal{P}_1, sch(B)))} \ & \diamond {\widetilde{A2}(Proj(\mathcal{P}_2, sch(A2))) \odot \widetilde{B}(Proj(\mathcal{P}_2, sch(B)))} \ & \mathcal{P}_1=[tuple_3,selection1 \wedge selection2] \ & \mathcal{P}_2=[tuple_4,selection3 \wedge condtion4] \ \
& ={\widetilde{A1}([(gh,xm,gj,A14,A15,A16,B11),selection1 \wedge selection2]) \odot \widetilde{B}([(B01,B02,B03,B04),selection1 \wedge selection2])} \ & \diamond {\widetilde{A2}([(gh,xm,gj,A24,A25,A26,B21),selection3 \wedge selection4]) \odot \widetilde{B}([(B01,B02,B03,B04),selection3 \wedge selection4])} \ \
& ={[(gh,xm,gj,A14,A15,A16,B11),selection1 \wedge selection2], \ & [(B01,B02,B03,B04),selection1 \wedge selection2], \ & [(gh,xm,gj,A24,A25,A26,B21),selection3 \wedge selection4], \ & [(B01,B02,B03,B04),selection3 \wedge selection4]} \ \end{align} $$
-- A1
SELECT
A11,
A12,
A13
FROM
A1
WHERE
A1.A11 = gh
AND A1.A12 = xm
AND A1.A13 = gj
AND LENGTH(NVL(A1.A11,'')) > 0
AND A1.A14 <= '#{ETL_DT}'
AND A1.A15 > '#{ETL_DT}'
AND A1.B11 = B.B01
AND B.B02 <= '#{ETL_DT}'
AND B.B03 > '#{ETL_DT}'
-- A2
SELECT
A21,
A22,
A23
FROM
A2
WHERE
A2.A21 = gh
AND A2.A22 = xm
AND A2.A23 = gj
AND LENGTH(NVL(A2.A21,'')) > 0
AND A2.A24 <= '#{ETL_DT}'
AND A2.A25 > '#{ETL_DT}'
AND A2.A26 <> '0101';
AND A2.B21 = B.B01
AND B.B02 <= '#{ETL_DT}'
AND B.B03 > '#{ETL_DT}'