ChkDB |
Open Source Rule-Based Data Checking |
|
Home Contact Download Releases Open Source License Project Contributing The PL/SQL Using ChkDB Installation Read-Me Rule-Sets ChkDB Tables E-R Diagram
Tiny Sample External Links Data Quality and Data Profiling (a glossary) |
Sample Rule-SetThis is the sample_ruleset.sql file that comes with a ChkDB distribution (without the license section). To create your own rule-set, you create a file similar to the one below. It should have a name ending in _ruleset.sql)
-- ==============================================================
-- ChkDB - Sample Rule-Set SAMP_HR_RULES
-- for data domain: CHKDB_SAMP_HR
--
-- Define the SAMP_HR_RULES rule-set to check data in the
-- CHKDB_SAMP_HR data domain (which is an abstract view into
-- a sample Human Resources database).
--
-- This set of rules is intended to illustrate (and test)
-- each type of check; it is NOT an the set of all rules
-- that might logically be applied to the sample database.
--
-- This rule-set includes rules to find:
-- - bad data
-- - business situations that require action
--
-- Before this script is run:
-- - the ChkDB tables must exist - see chkdb_tables.sql
-- - the CHKDB_SAMP_HR domain must have been setup
-- see: sample_domain.sql
--
-- Note: In a pre-defined check, the column-type-code,
-- COLUMN_TYPE_CNDO, does not have to be specified
-- if the user running the checks is the owner of
-- the table being checked. In several rules, below,
-- COLUMN_TYPE_CNDO is not specified or set to null.
--
-- Note: In rules in which a where-clause is specified,
-- the table being checked has an alias of t
-- and all references to columns in this table
-- must use this alias.
--
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-- Define the SAMP_HR_RULES rule-set.
delete CHKDB_RULESET
where DATA_DOMAIN = 'CHKDB_SAMP_HR'
and RULESET_ID = 'SAMP_HR_RULES';
delete CHKDB_RULE
where DATA_DOMAIN = 'CHKDB_SAMP_HR'
and RULESET_ID = 'SAMP_HR_RULES';
insert into CHKDB_RULESET (DATA_DOMAIN, RULESET_ID, DESCRIPTION)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'Sample rules for tables in the sample HR database');
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-- Define the Rules in the SAMP_HR_RULES rule-set.
-- = = = = = = = = = = = = = = = =
-- Rules about the CHKDB_SAMP_HR_DEPT table/view...
-- foreign key check
insert into CHKDB_RULE -- manager valid?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
FOREIGN_KEY_LIST, REF_DATA_DOMAIN,
REF_TABLE_NAME, REF_KEY_LIST,
FK_NOT_ORPHAN_YN, FK_NOT_NULL_YN)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_DEPT', 'MANAGER_BAD', 3,
'Manager is invalid or not set',
'MANAGER', 'CHKDB_SAMP_HR',
'CHKDB_SAMP_HR_EMP', 'EMP_ID',
'Y', 'Y');
-- specified where-clause
insert into CHKDB_RULE -- manager in this dept?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
CHK_WHERE_CLS,
ROW_OUT_DESC, ROW_OUT_SPEC)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_DEPT', 'MNGR_DEPT_BAD', 2,
'Manager is not in this department',
'exists (select 99 from CHKDB_SAMP_HR_EMP e ' ||
'where e.EMP_ID = t.MANAGER' ||
' and e.dept_id <> t.dept_id)',
'MANAGER','MANAGER');
-- In the line above, the second 'MANAGER' is what will be
-- selected from the table to show the bad data, while the
-- first 'MANAGER' is the title for this output column.
-- predefined check
insert into CHKDB_RULE -- floor valid?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME, COLUMN_TYPE_CNDO,
NOT_NULL_YN, MIN_VALUE, MAX_VALUE, MAX_LENGTH)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_DEPT', 'FLOOR_BAD', 3,
'Floor is invalid or not set',
'FLOOR', 'C', 'Y', '1', '8', 1);
-- predefined check
insert into CHKDB_RULE -- floor not a number?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME, COLUMN_TYPE_CNDO, IS_A_NUMBER_YN)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_DEPT', 'FLOOR_NOT_NUM', 3,
'Floor is not a number',
'FLOOR', 'C', 'Y');
-- foreign key check (checking parent for children)
insert into CHKDB_RULE -- any employees?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
FOREIGN_KEY_LIST, REF_DATA_DOMAIN,
REF_TABLE_NAME, REF_KEY_LIST,
FK_NOT_ORPHAN_YN)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_DEPT', 'DEPT_EMPTY', 1,
'Department has no employees',
'DEPT_ID', 'CHKDB_SAMP_HR',
'CHKDB_SAMP_HR_EMP', 'DEPT_ID',
'Y');
-- predefined check [Business situation, not data error]
insert into CHKDB_RULE -- budget estimate off?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME, COLUMN_TYPE_CNDO,
COLUMN_2_NAME, COL_MINUS_COL2_MIN, COL_MINUS_COL2_MAX)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_DEPT', 'BUDGET_EST_OFF', 1,
'Budget estimate too far from budgeted amount',
'BUDGET_EST', 'N',
'BUDGET_AMT', -20000.00, 10000.00);
-- predefined check [Business situation, not data error]
insert into CHKDB_RULE -- not all of budget approved?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME, COLUMN_TYPE_CNDO,
COLUMN_2_NAME, COL_NOT_NE_COL2_YN)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_DEPT', 'BUDGET_AMT_APPRV_NEQ', 1,
'Budget amount different than approved budget',
'BUDGET_AMT', 'N', 'BUDGET_APPRV', 'Y');
-- Note that this rule is set to be NOT ACTIVE.
-- predefined check [Business situation, not data error]
insert into CHKDB_RULE -- not all of budget approved?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME, COLUMN_TYPE_CNDO,
MAX_VALUE, ACTIVE_YN)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_DEPT', 'BUDGET_AMT_HIGH', 1,
'Budget amount greater than $125,000',
'BUDGET_AMT', 'N', '125000.00', 'N');
-- = = = = = = = = = = = = = = = =
-- Rules about the CHKDB_SAMP_HR_GROUP table/view...
-- foreign key check (of primary key component)
insert into CHKDB_RULE -- department valid?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
FOREIGN_KEY_LIST, REF_DATA_DOMAIN,
REF_TABLE_NAME, REF_KEY_LIST,
FK_NOT_ORPHAN_YN, FK_NOT_NULL_YN)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_GROUP', 'DEPT_BAD', 4,
'Department is invalid or not set',
'DEPT_ID', 'CHKDB_SAMP_HR',
'CHKDB_SAMP_HR_DEPT', 'DEPT_ID',
'Y', 'Y');
-- predefined check (WITHOUT SPECIFYING COLUMN_TYPE_CNDO)
-- (For predefined checks, the column-type does not have
-- to be specified if the user that runs the checks is
-- the owner of the table being checked.)
insert into CHKDB_RULE -- active_yn valid?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME,
NOT_NULL_YN, IN_LIST)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_GROUP', 'ACTIVE_YN_BAD', 4,
'Active_YN must be Y or N',
'ACTIVE_YN', 'Y', 'Y,N');
-- predefined check (THAT DOES SPECIFY COLUMN_TYPE_CNDO)
-- (in case the block of code above doesn't work)
-- insert into CHKDB_RULE -- active_yn valid?
-- (DATA_DOMAIN, RULESET_ID,
-- TABLE_NAME, RULE_ID, RULE_CLASS,
-- DESCRIPTION,
-- COLUMN_NAME, COLUMN_TYPE_CNDO,
-- NOT_NULL_YN, IN_LIST)
-- values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
-- 'CHKDB_SAMP_HR_GROUP', 'ACTIVE_YN_BAD', 4,
-- 'Active_YN must be Y or N',
-- 'ACTIVE_YN', 'C', 'Y', 'Y,N');
-- = = = = = = = = = = = = = = = =
-- Rules about the CHKDB_SAMP_HR_EMP table/view...
-- foreign key check
insert into CHKDB_RULE -- DEPT_ID,GROUP_ID exists?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
FOREIGN_KEY_LIST, REF_DATA_DOMAIN,
REF_TABLE_NAME, REF_KEY_LIST,
FK_NOT_ORPHAN_YN, FK_NOT_PARTIAL_YN)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_EMP', 'DEPT_GROUP_BAD', 4,
'Dept_id|Group_id is invalid or Dept_id is not set',
'DEPT_ID,GROUP_ID', 'CHKDB_SAMP_HR',
'CHKDB_SAMP_HR_GROUP', 'DEPT_ID,GROUP_ID',
'Y', 'Y');
-- predefined check
insert into CHKDB_RULE -- position null?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME, COLUMN_TYPE_CNDO, NOT_NULL_YN)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_EMP', 'POSITION_NULL', 3,
'Position is not set',
'POSITION', 'C', 'Y');
-- predefined check
insert into CHKDB_RULE -- position too long?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME, COLUMN_TYPE_CNDO, MAX_LENGTH)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_EMP', 'POSITION_LONG', 3,
'Position is too long',
'POSITION', 'C', 20);
-- predefined check
insert into CHKDB_RULE -- position illegal char?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME, COLUMN_TYPE_CNDO, NO_ILLEGAL_CHARS)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_EMP', 'POSITION_BADCHAR', 3,
'Position contains illegal character (,)',
'POSITION', 'C', ',');
-- predefined check
insert into CHKDB_RULE -- hire-date valid?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME, COLUMN_TYPE_CNDO,
NOT_NULL_YN, MIN_VALUE, NOT_AFTER_TODAY_YN)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_EMP', 'HIRE_DT_BAD', 3,
'Hire-date is invalid or not set',
'HIRE_DT', 'D', 'Y', '1997-01-01', 'Y');
-- predefined check (note: not not-null, uses other column)
insert into CHKDB_RULE -- last-review valid?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME, COLUMN_TYPE_CNDO,
NOT_AFTER_TODAY_YN, COLUMN_2_NAME, COL_NOT_LT_COL2_YN)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_EMP', 'LAST_REVIEW_BAD', 2,
'Date of last review is invalid or before hire',
'LAST_REVIEW_DT', 'D', 'Y', 'HIRE_DT', 'Y');
-- predefined check (note: uses COL_NOT_EQ_COL2_YN)
-- NOTE: COLUMN_TYPE_CNDO is set to null (rather than 'D')
insert into CHKDB_RULE -- last-review = hire_date?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME, COLUMN_TYPE_CNDO,
NOT_AFTER_TODAY_YN, COLUMN_2_NAME, COL_NOT_EQ_COL2_YN)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_EMP', 'REVIEW_DT_ISSUE', 2,
'Date of last review is hire date or in future',
'LAST_REVIEW_DT', null, 'Y', 'HIRE_DT', 'Y');
-- predefined check
-- NOTE: COLUMN_TYPE_CNDO is set to null (rather than 'N')
insert into CHKDB_RULE -- vacn_days_tot valid?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME, COLUMN_TYPE_CNDO,
NOT_NULL_YN, MIN_VALUE, MAX_VALUE)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_EMP', 'VACN_DAYS_TOT_BAD', 3,
'Vacn_days_tot is invalid or not set',
'VACN_DAYS_TOT', null, 'Y', 10, 35);
-- predefined check [Business situation; not data error]
insert into CHKDB_RULE -- probation over?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME, COLUMN_TYPE_CNDO, NOT_BEFORE_TODAY_YN)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_EMP', 'PROBATION_OVER', 1,
'Probation period is over',
'PROBATION_END_DT', 'D', 'Y');
-- predefined check
insert into CHKDB_RULE -- too many vacation days?
(DATA_DOMAIN, RULESET_ID,
TABLE_NAME, RULE_ID, RULE_CLASS,
DESCRIPTION,
COLUMN_NAME, COLUMN_TYPE_CNDO,
COLUMN_2_NAME, COL_NOT_GT_COL2_YN)
values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES',
'CHKDB_SAMP_HR_EMP', 'VACN_DAYS_USED_HIGH', 1,
'Vacn_days_used is too high',
'VACN_DAYS_USED', 'N', 'VACN_DAYS_TOT', 'Y');
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
|