ChkDB

Open Source Rule-Based Data Checking
Home
Contact
Download
Releases
Open Source
License
Project
Contributing
The PL/SQL
Using ChkDB
Installation
Read-Me
Rule-Sets
ChkDB Tables
E-R Diagram

Tiny Sample
Database
Data Domain
Implementation
Rule-Set
Do Checks
Queries
Results


External Links
Data Quality and
Data Profiling
(a glossary)

Sample Rule-Set

This is the sample_ruleset.sql file that comes with a ChkDB distribution (without the license section).

To create your own rule-set, you create a file similar to the one below. It should have a name ending in _ruleset.sql)

-- ==============================================================
--  ChkDB  -  Sample Rule-Set  SAMP_HR_RULES
--              for data domain: CHKDB_SAMP_HR
--
--  Define the SAMP_HR_RULES rule-set to check data in the
--  CHKDB_SAMP_HR data domain (which is an abstract view into 
--  a sample Human Resources database).
--
--  This set of rules is intended to illustrate (and test)
--  each type of check; it is NOT an the set of all rules
--  that might logically be applied to the sample database.
--
--  This rule-set includes rules to find:
--     - bad data
--     - business situations that require action
--
--  Before this script is run:
--    - the ChkDB tables must exist - see chkdb_tables.sql
--    - the CHKDB_SAMP_HR domain must have been setup
--            see: sample_domain.sql
--
--  Note:  In a pre-defined check, the column-type-code,
--         COLUMN_TYPE_CNDO, does not have to be specified
--         if the user running the checks is the owner of
--         the table being checked.  In several rules, below,
--         COLUMN_TYPE_CNDO is not specified or set to null.
--
--  Note:  In rules in which a where-clause is specified,
--         the table being checked has an alias of t
--         and all references to columns in this table
--         must use this alias.
--
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
--  Define the SAMP_HR_RULES rule-set.

delete CHKDB_RULESET 
  where DATA_DOMAIN = 'CHKDB_SAMP_HR'
    and RULESET_ID  = 'SAMP_HR_RULES';

delete CHKDB_RULE 
  where DATA_DOMAIN = 'CHKDB_SAMP_HR'
    and RULESET_ID  = 'SAMP_HR_RULES';

insert into CHKDB_RULESET (DATA_DOMAIN, RULESET_ID, DESCRIPTION)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'Sample rules for tables in the sample HR database');

-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
--  Define the Rules in the SAMP_HR_RULES rule-set.

--  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
--  Rules about the CHKDB_SAMP_HR_DEPT table/view...

-- foreign key check
insert into CHKDB_RULE            -- manager valid?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     FOREIGN_KEY_LIST, REF_DATA_DOMAIN, 
     REF_TABLE_NAME, REF_KEY_LIST,
     FK_NOT_ORPHAN_YN, FK_NOT_NULL_YN)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_DEPT', 'MANAGER_BAD', 3,
          'Manager is invalid or not set', 
          'MANAGER', 'CHKDB_SAMP_HR',
          'CHKDB_SAMP_HR_EMP', 'EMP_ID',
          'Y', 'Y');

-- specified where-clause
insert into CHKDB_RULE            -- manager in this dept?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     CHK_WHERE_CLS, 
     ROW_OUT_DESC, ROW_OUT_SPEC) 
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_DEPT', 'MNGR_DEPT_BAD', 2,
          'Manager is not in this department', 
          'exists (select 99 from CHKDB_SAMP_HR_EMP e ' ||
                    'where e.EMP_ID = t.MANAGER'     ||
                    '  and e.dept_id <> t.dept_id)',
          'MANAGER','MANAGER');
-- In the line above, the second 'MANAGER' is what will be
-- selected from the table to show the bad data, while the
-- first 'MANAGER' is the title for this output column.

-- predefined check
insert into CHKDB_RULE            -- floor valid?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME, COLUMN_TYPE_CNDO,
       NOT_NULL_YN, MIN_VALUE, MAX_VALUE, MAX_LENGTH)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_DEPT', 'FLOOR_BAD', 3,
          'Floor is invalid or not set', 
          'FLOOR', 'C', 'Y', '1', '8', 1);

-- predefined check
insert into CHKDB_RULE            -- floor not a number?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME, COLUMN_TYPE_CNDO, IS_A_NUMBER_YN)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_DEPT', 'FLOOR_NOT_NUM', 3,
          'Floor is not a number', 
          'FLOOR', 'C', 'Y');

-- foreign key check (checking parent for children)
insert into CHKDB_RULE            -- any employees?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     FOREIGN_KEY_LIST, REF_DATA_DOMAIN, 
     REF_TABLE_NAME, REF_KEY_LIST,
     FK_NOT_ORPHAN_YN)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_DEPT', 'DEPT_EMPTY', 1,
          'Department has no employees', 
          'DEPT_ID', 'CHKDB_SAMP_HR',
          'CHKDB_SAMP_HR_EMP', 'DEPT_ID',
          'Y');

-- predefined check [Business situation, not data error]
insert into CHKDB_RULE            --  budget estimate off?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME, COLUMN_TYPE_CNDO,
     COLUMN_2_NAME, COL_MINUS_COL2_MIN, COL_MINUS_COL2_MAX) 
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_DEPT', 'BUDGET_EST_OFF', 1,
          'Budget estimate too far from budgeted amount', 
          'BUDGET_EST', 'N', 
          'BUDGET_AMT', -20000.00, 10000.00);

-- predefined check [Business situation, not data error]
insert into CHKDB_RULE         --  not all of budget approved?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME, COLUMN_TYPE_CNDO,
     COLUMN_2_NAME, COL_NOT_NE_COL2_YN) 
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_DEPT', 'BUDGET_AMT_APPRV_NEQ', 1,
          'Budget amount different than approved budget', 
          'BUDGET_AMT', 'N', 'BUDGET_APPRV', 'Y');

-- Note that this rule is set to be NOT ACTIVE.
-- predefined check [Business situation, not data error]
insert into CHKDB_RULE         --  not all of budget approved?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME, COLUMN_TYPE_CNDO,
     MAX_VALUE, ACTIVE_YN) 
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_DEPT', 'BUDGET_AMT_HIGH', 1,
          'Budget amount greater than $125,000', 
          'BUDGET_AMT', 'N', '125000.00', 'N');

--  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
--  Rules about the CHKDB_SAMP_HR_GROUP table/view...

-- foreign key check (of primary key component)
insert into CHKDB_RULE            -- department valid?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     FOREIGN_KEY_LIST, REF_DATA_DOMAIN, 
     REF_TABLE_NAME, REF_KEY_LIST,
     FK_NOT_ORPHAN_YN, FK_NOT_NULL_YN)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_GROUP', 'DEPT_BAD', 4,
          'Department is invalid or not set', 
          'DEPT_ID', 'CHKDB_SAMP_HR',
          'CHKDB_SAMP_HR_DEPT', 'DEPT_ID',
          'Y', 'Y');

-- predefined check  (WITHOUT SPECIFYING COLUMN_TYPE_CNDO)
--   (For predefined checks, the column-type does not have
--    to be specified if the user that runs the checks is
--    the owner of the table being checked.)
insert into CHKDB_RULE            -- active_yn valid?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME,
       NOT_NULL_YN, IN_LIST)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_GROUP', 'ACTIVE_YN_BAD', 4,
          'Active_YN must be Y or N', 
          'ACTIVE_YN', 'Y', 'Y,N');

-- predefined check  (THAT DOES SPECIFY COLUMN_TYPE_CNDO)
--  (in case the block of code above doesn't work)
-- insert into CHKDB_RULE            -- active_yn valid?
--     (DATA_DOMAIN, RULESET_ID, 
--      TABLE_NAME, RULE_ID, RULE_CLASS,
--      DESCRIPTION, 
--      COLUMN_NAME, COLUMN_TYPE_CNDO,
--        NOT_NULL_YN, IN_LIST)
--   values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
--           'CHKDB_SAMP_HR_GROUP', 'ACTIVE_YN_BAD', 4,
--           'Active_YN must be Y or N', 
--           'ACTIVE_YN', 'C', 'Y', 'Y,N');

--  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
--  Rules about the CHKDB_SAMP_HR_EMP table/view...

-- foreign key check
insert into CHKDB_RULE        -- DEPT_ID,GROUP_ID exists?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     FOREIGN_KEY_LIST, REF_DATA_DOMAIN, 
     REF_TABLE_NAME, REF_KEY_LIST,
     FK_NOT_ORPHAN_YN, FK_NOT_PARTIAL_YN)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_EMP', 'DEPT_GROUP_BAD', 4,
          'Dept_id|Group_id is invalid or Dept_id is not set', 
          'DEPT_ID,GROUP_ID', 'CHKDB_SAMP_HR',
          'CHKDB_SAMP_HR_GROUP', 'DEPT_ID,GROUP_ID',
          'Y', 'Y');

-- predefined check
insert into CHKDB_RULE            -- position null?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME, COLUMN_TYPE_CNDO, NOT_NULL_YN)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_EMP', 'POSITION_NULL', 3,
          'Position is not set', 
          'POSITION', 'C', 'Y');

-- predefined check
insert into CHKDB_RULE            -- position too long?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME, COLUMN_TYPE_CNDO, MAX_LENGTH)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_EMP', 'POSITION_LONG', 3,
          'Position is too long', 
          'POSITION', 'C', 20);

-- predefined check
insert into CHKDB_RULE            -- position illegal char?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME, COLUMN_TYPE_CNDO, NO_ILLEGAL_CHARS)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_EMP', 'POSITION_BADCHAR', 3,
          'Position contains illegal character (,)', 
          'POSITION', 'C', ',');

-- predefined check
insert into CHKDB_RULE            -- hire-date valid?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME, COLUMN_TYPE_CNDO,
       NOT_NULL_YN, MIN_VALUE, NOT_AFTER_TODAY_YN)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_EMP', 'HIRE_DT_BAD', 3,
          'Hire-date is invalid or not set', 
          'HIRE_DT', 'D', 'Y', '1997-01-01', 'Y');

-- predefined check (note: not not-null, uses other column)
insert into CHKDB_RULE            -- last-review valid?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME, COLUMN_TYPE_CNDO,
     NOT_AFTER_TODAY_YN, COLUMN_2_NAME, COL_NOT_LT_COL2_YN) 
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_EMP', 'LAST_REVIEW_BAD', 2,
          'Date of last review is invalid or before hire', 
          'LAST_REVIEW_DT', 'D', 'Y', 'HIRE_DT', 'Y');

-- predefined check (note: uses COL_NOT_EQ_COL2_YN)
--  NOTE: COLUMN_TYPE_CNDO is set to null (rather than 'D')
insert into CHKDB_RULE            -- last-review = hire_date?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME, COLUMN_TYPE_CNDO,
     NOT_AFTER_TODAY_YN, COLUMN_2_NAME, COL_NOT_EQ_COL2_YN) 
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_EMP', 'REVIEW_DT_ISSUE', 2,
          'Date of last review is hire date or in future', 
          'LAST_REVIEW_DT', null, 'Y', 'HIRE_DT', 'Y');

-- predefined check
--  NOTE: COLUMN_TYPE_CNDO is set to null (rather than 'N')
insert into CHKDB_RULE            -- vacn_days_tot valid?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME, COLUMN_TYPE_CNDO,
       NOT_NULL_YN, MIN_VALUE, MAX_VALUE)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_EMP', 'VACN_DAYS_TOT_BAD', 3,
          'Vacn_days_tot is invalid or not set', 
          'VACN_DAYS_TOT', null, 'Y', 10, 35);

-- predefined check  [Business situation; not data error]
insert into CHKDB_RULE            -- probation over?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME, COLUMN_TYPE_CNDO, NOT_BEFORE_TODAY_YN)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_EMP', 'PROBATION_OVER', 1,
          'Probation period is over', 
          'PROBATION_END_DT', 'D', 'Y');

-- predefined check
insert into CHKDB_RULE            -- too many vacation days?
    (DATA_DOMAIN, RULESET_ID, 
     TABLE_NAME, RULE_ID, RULE_CLASS,
     DESCRIPTION, 
     COLUMN_NAME, COLUMN_TYPE_CNDO,
       COLUMN_2_NAME, COL_NOT_GT_COL2_YN)
  values ('CHKDB_SAMP_HR', 'SAMP_HR_RULES', 
          'CHKDB_SAMP_HR_EMP', 'VACN_DAYS_USED_HIGH', 1,
          'Vacn_days_used is too high', 
          'VACN_DAYS_USED', 'N', 'VACN_DAYS_TOT', 'Y');

-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =