ChkDB

Open Source Rule-Based Data Checking
Home
Contact
Download
Releases
Open Source
License
Project
Contributing
The PL/SQL
Using ChkDB
Installation
Read-Me
Rule-Sets
ChkDB Tables
E-R Diagram

Tiny Sample
Database
Data Domain
Implementation
Rule-Set
Do Checks
Queries
Results


External Links
Data Quality and
Data Profiling
(a glossary)

The ChkDB Tables

This is the script to create the ChkDB tables (without the license section)...


-- =================================================================
--  Create the tables for ChkDB rule-based data checking.
--
--                 Overview
--  Table Name             Primary Key Columns
--  -----------------      --------------------------------------
--  CHKDB_DOMAIN           DATA_DOMAIN
--  CHKDB_TABLE            DATA_DOMAIN, TABLE_NAME
--                            COLUMN_SEQ_NUM
--  CHKDB_RULESET          DATA_DOMAIN, RULESET_ID
--  CHKDB_RULE             DATA_DOMAIN, RULESET_ID, TABLE_NAME, 
--                            RULE_ID
--  CHKDB_RESULT           DATA_DOMAIN, RULESET_ID, TABLE_NAME, 
--                            RULE_ID, SELECTED_ROWID
--  CHKDB_RULE_SQL         DATA_DOMAIN, RULESET_ID, TABLE_NAME, 
--                            RULE_ID
--  CHKDB_RULE_ERR         DATA_DOMAIN, RULESET_ID, TABLE_NAME, 
--                            RULE_ID
--
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
--  CHKDB_DOMAIN 
--  one row is: a set of tables and/or views against which
--     sets of data-checking rules can be defined.

create table CHKDB_DOMAIN (
  DATA_DOMAIN          VARCHAR2(20),
  DESCRIPTION          VARCHAR2(60),
  OWNER                VARCHAR2(40),
  ACTIVE_YN            CHAR(1)         DEFAULT 'Y' NOT NULL,
  constraint CHKDB_DOMAIN_PK
    primary key (DATA_DOMAIN)
);

-- OWNER can be used for the name of the person responsible
--   for the domain being in the database.


-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
--  CHKDB_TABLE  
--  one row is: a table or view in a data domain, either:
--     - a table/view to be checked 
--     - a table/view used to setup or use ChkDB
--
--  This table is used to describe:
--    - views into your database (for checking)
--    - tables populated from your database (for checking)
--    - tables to help define/populate ChkDB views/tables
--    - tables to be dropped when ChkDB is removed
--
--  This table should not describe tables in your corporate
--  database.
--
--  As a courtesy to the DBA responsible for the database,
--  the names of all tables and views created for ChkDB
--  checking should begin with 'CHKDB_'.

create table CHKDB_TABLE (
  DATA_DOMAIN          VARCHAR2(20),
  TABLE_NAME           VARCHAR2(30),
  DESCRIPTION          VARCHAR2(60),
  ACTIVE_YN            CHAR(1)         DEFAULT 'Y' NOT NULL,
  CHECKABLE_YN         CHAR(1)         DEFAULT 'Y' NOT NULL,
  TABLE_VIEW_TV        CHAR(1)         DEFAULT 'T' NOT NULL,
  DROP_ON_REMOVE_YN    CHAR(1)         DEFAULT 'Y' NOT NULL,
  PRIMARY_KEY_LIST     VARCHAR2(600),
  TABLE_GROUP          VARCHAR2(20),
  TABLE_SUBGROUP       VARCHAR2(20),
  constraint CHKDB_TABLE_PK
    primary key (DATA_DOMAIN, TABLE_NAME)
);

--  If CHECKABLE_YN = 'Y' then PRIMARY_KEY_LIST must
--  contain a list like 'COL1,COL2,COL3' (which may
--  contain spaces).
--
--  If DROP_ON_REMOVE_YN = 'Y', the table/view will be 
--  dropped if the domain or all of ChkDB is removed.


-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
--  CHKDB_RULESET
--  one row is: a set of rules for a data domain.

create table CHKDB_RULESET (
  DATA_DOMAIN          VARCHAR2(20),
  RULESET_ID           VARCHAR2(20),
  DESCRIPTION          VARCHAR2(60),
  OWNER                VARCHAR2(40),
  ACTIVE_YN            CHAR(1)         DEFAULT 'Y' NOT NULL,
  constraint CHKDB_RULESET_PK
    primary key (DATA_DOMAIN, RULESET_ID)
);

-- OWNER can be used for the name of the person responsible
--   for the ruleset being in the database.


-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
--  CHKDB_RULE  
--  one row is: a rule (in a rule set) for a table (in 
--     a data domain).

create table CHKDB_RULE (
    -- primary key...
  DATA_DOMAIN          VARCHAR2(20),
  RULESET_ID           VARCHAR2(20),
  TABLE_NAME           VARCHAR2(30),
  RULE_ID              VARCHAR2(20),
    -- about the rule...
  ACTIVE_YN            CHAR(1)         DEFAULT 'Y' NOT NULL,
  DESCRIPTION          VARCHAR2(60),
  RULE_CLASS           NUMBER(5)       DEFAULT 0  NOT NULL,
    -- how the rule selects rows...
        -- Way 1:  specify where-clause...
  CHK_WHERE_CLS        VARCHAR2(2000),
  ROW_OUT_DESC         VARCHAR2(1000),
  ROW_OUT_SPEC         VARCHAR2(1000),
        -- Way 2:  various predefined checks...
  COLUMN_NAME          VARCHAR2(30),
  COLUMN_TYPE_CNDO     CHAR(1),
  NOT_NULL_YN          CHAR(1),
  MIN_VALUE            VARCHAR2(20),
  MAX_VALUE            VARCHAR2(20),
  MAX_LENGTH           NUMBER(4),
  IS_A_NUMBER_YN       CHAR(1),
  NOT_BEFORE_TODAY_YN  CHAR(1),
  NOT_AFTER_TODAY_YN   CHAR(1),
  IN_LIST              VARCHAR2(1000),
  NO_ILLEGAL_CHARS     VARCHAR2(200),
  COLUMN_2_NAME        VARCHAR2(30),
  COL_NOT_EQ_COL2_YN   CHAR(1),
  COL_NOT_NE_COL2_YN   CHAR(1),
  COL_NOT_LT_COL2_YN   CHAR(1),
  COL_NOT_GT_COL2_YN   CHAR(1),
  COL_MINUS_COL2_MIN   NUMBER(10),
  COL_MINUS_COL2_MAX   NUMBER(10),
        -- Way 3: foreign key check...
  FOREIGN_KEY_LIST     VARCHAR2(600),
  REF_DATA_DOMAIN      VARCHAR2(20),
  REF_TABLE_NAME       VARCHAR2(30),
  REF_KEY_LIST         VARCHAR2(600),
  FK_NOT_ORPHAN_YN     CHAR(1),
  FK_NOT_NULL_YN       CHAR(1),
  FK_NOT_PARTIAL_YN    CHAR(1),
  constraint CHKDB_RULE_PK
    primary key (DATA_DOMAIN, RULESET_ID, TABLE_NAME, RULE_ID)
);

--  ACTIVE_YN - if 'N', rule will not be applied
--
--  DESCRIPTION describes the condition this rule is
--  designed to identify. ex: "Department Manager is not set"
--
--  RULE_CLASS can be used to classify, group or order rules.
--  Its intent is to group/order rows selected by rules - 
--  maybe like this:
--     0:  Aspect of Interest (default)
--     1:  Somewhat Undesirable
--     2:  Undesirable
--     3:  Very Undesirable
--     4:  Extremely Undesirable
--     5:  Not Acceptable
--
--  = = = = = = = = = = = = = = = = = =
--      How The Rule Selects Rows
--
--  A rule must use one (and only one) of the ways below. 
--
--    =  =  =  =  =  =  =  =  =  =
--    Way 1:  specify where-clause
--
--    This way is generally only used if the condition
--    of the rule can not be specified any other way.
--
--    CHK_WHERE_CLS must be set and these must be null:
--      COLUMN_NAME, FOREIGN_KEY_LIST
--
--    CHK_WHERE_CLS contains a where clause to select
--    rows from the table (which will have an alias of t).
--    Ex 1.  'MGR_EMP_ID is null'
--    Ex 2.  'NUM_SHIPPED > NUM_REQUESTED'
--    Ex 3.  'AA is not null and not exists 
--              (select ''x'' from REFTAB r 
--                 where r.BB = t.AA)'
--
--    Note: In an SQL string constant, a single quote is 
--      written as a pair of single quotes. 
--
--    ROW_OUT_SPEC contains an SQL string expression that 
--    will be selected by the rule (presumably to show the
--    data that caused the rule to select the row).
--    Examples: 'EMP_ID'   or   'to_char(hire_date)'
--
--    ROW_OUT_DESC is a description or title for the data
--    selected by ROW_OUT_SPEC.
--
--    =  =  =  =  =  =  =  =  =  =
--    Way 2:  predefined checks
--
--    This is the easy way of specifying a rule; it is
--    generally used if possible.
--
--    COLUMN_NAME must be set, and these must be null:
--      CHK_WHERE_CLS, FOREIGN_KEY_LIST
--
--    COLUMN_TYPE_CNDO must be set if the table is not
--      owned by the user of the chkdb software; 
--      otherwise it is optional - if it is not specified,
--      it will be determined from the system tables.
--
--    COLUMN_TYPE_CNDO indicates the general data type:
--        C means CHAR, VARCHAR, VARCHAR2, NCHAR etc.
--        N means NUMBER, NUMERIC, INTEGER, FLOAT, etc.
--        D means DATE, TIMESTAMP
--        O means Other
--
--    MIN_VALUE and MAX_VALUE may contain character strings,
--    numbers or dates in the form YYYY-MM-DD .
--
--    If a second column in the table is specified in
--    COLUMN_2_NAME, this column must have the same general
--    datatype as the column named in COLUMN_NAME.
--
--    IN_LIST may contain a list of acceptable values
--    separated by commas.  DO NOT enclose the list in
--    parentheses.  Spaces are removed before checking.
--    This check cannot be used when an acceptable value
--    contains a comma or meaningful spaces.
--    Ex:  'Y,N'   or  'PRIMARY,SECONDARY,TERTIARY'
--         '1,2,3' or  '1.00, 3.14'
--    IN_LIST does NOT support date columns.
--
--    NO_ILLEGAL_CHARS may contain characters that a
--    (character) value should not contain.
--    Ex:  '[]{}()"''`'
--    A single quote is specified as two single quotes.
--
--    =  =  =  =  =  =  =  =  =  =
--    Way 3: foreign key checks
--
--    This way is for checking a foreign key - a set of 
--    columns that, for a row in the table being checked,
--    identify one (or more) other row(s) in some table.
--
--    To use this way... FOREIGN_KEY_LIST must be set and
--    these must be null: CHK_WHERE_CLS, COLUMN_NAME
--
--    FOREIGN_KEY_LIST identifies a key in the table being
--    checked.  
--
--    REF_DATA_DOMAIN, REF_TABLE_NAME and REF_KEY_LIST
--    identify the key that the foreign key should match.
--    They are used only if FK_NOT_ORPHAN_YN = 'Y'.
--
--    A rule can check that...
--      - a code exists in the code-table
--      - a code in a code-table is used in a table
--    or, more generally...
--      - a child-table row has a row in the parent table
--      - a parent-table row has a row in a child table
--          
--    At least one of these flags must be set to 'Y':
--
--      FK_NOT_ORPHAN_YN - the rule will select rows where
--      no column in the FOREIGN_KEY_LIST key is null 
--      and there is no row in REF_TABLE_NAME where
--      the columns in REF_KEY_LIST match the columns
--      in FOREIGN_KEY_LIST.
--        - REF_KEY_LIST must be set
--        - if REF_TABLE_NAME is null, default is TABLE_NAME
--        - if REF_DATA_DOMAIN is null, default is DATA_DOMAIN
--
--      FK_NOT_NULL_YN - the rule will select rows where
--      all columns in the foreign are null.
--
--      FK_NOT_PARTIAL_YN - the rule will select rows where
--        - the last column in the key is not null
--        - some other column in the key is null
--
--
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
--  CHKDB_RESULT
--  one row is: an instance of a row in a table (in a data
--     domain) being selected by a rule (in a rule set).

create table CHKDB_RESULT (
  DATA_DOMAIN          VARCHAR2(20),
  RULESET_ID           VARCHAR2(20),
  TABLE_NAME           VARCHAR2(30),
  RULE_ID              VARCHAR2(20),
  SELECTED_ROWID       ROWID,
  ROW_PRIMARY_KEY      VARCHAR2(600),
  RULE_CLASS           NUMBER(5),
  ROW_OUT_DATA         VARCHAR2(1000),
  ROW_OUT_DESC         VARCHAR2(1000),
  CHECK_DATE           DATE,
  constraint CHKDB_RESULT_PK
    primary key (DATA_DOMAIN, RULESET_ID, TABLE_NAME, 
                 RULE_ID, SELECTED_ROWID)
);

--  If, in the rule's CHK_VALUE_SPEC is null, 
--  then SELECTED_VALUES will contain null.

--  ROW_OUT_DATA may contain data from the row indicating
--  why it was selected by the rule.  ROW_OUT_DESC describes
--  the contents of ROW_OUT_DATA.

--  Question for those who are interested:
--    Should CHECK_DATE be part of the primary key?
--    Or, should software that is going to insert rows have
--    to first delete rows for the table or rule-set?

-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
--  CHKDB_RULE_SQL
--  one row is: the sql generated for a rule when it was
--  last executed.

create table CHKDB_RULE_SQL (
  DATA_DOMAIN          VARCHAR2(20),
  RULESET_ID           VARCHAR2(20),
  TABLE_NAME           VARCHAR2(30),
  RULE_ID              VARCHAR2(20),
  RULE_SQL_DATE        DATE,
  RULE_SQL             VARCHAR2(3000),
  constraint CHKDB_RULE_SQL_PK
    primary key (DATA_DOMAIN, RULESET_ID, TABLE_NAME, RULE_ID)
);

-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
--  CHKDB_RULE_ERR
--  one row is: an instance of an attempt to apply a rule
--     but the rule was mal-formed and the SQL statement
--     resulted in an error.

create table CHKDB_RULE_ERR (
  DATA_DOMAIN          VARCHAR2(20),
  RULESET_ID           VARCHAR2(20),
  TABLE_NAME           VARCHAR2(30),
  RULE_ID              VARCHAR2(20),
  RULE_ERR_DATE        DATE,
  ERR_NUMBER           NUMBER,
  ERR_MESSAGE          VARCHAR2(100),
  constraint CHKDB_RULE_ERR_PK
    primary key (DATA_DOMAIN, RULESET_ID, TABLE_NAME, 
                 RULE_ID)
);


-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =