ChkDB |
Open Source Rule-Based Data Checking |
|
Home Contact Download Releases Open Source License Project Contributing The PL/SQL Using ChkDB Installation Read-Me Rule-Sets ChkDB Tables E-R Diagram
Tiny Sample External Links Data Quality and Data Profiling (a glossary) |
The ChkDB TablesThis is the script to create the ChkDB tables (without the license section)...
-- =================================================================
-- Create the tables for ChkDB rule-based data checking.
--
-- Overview
-- Table Name Primary Key Columns
-- ----------------- --------------------------------------
-- CHKDB_DOMAIN DATA_DOMAIN
-- CHKDB_TABLE DATA_DOMAIN, TABLE_NAME
-- COLUMN_SEQ_NUM
-- CHKDB_RULESET DATA_DOMAIN, RULESET_ID
-- CHKDB_RULE DATA_DOMAIN, RULESET_ID, TABLE_NAME,
-- RULE_ID
-- CHKDB_RESULT DATA_DOMAIN, RULESET_ID, TABLE_NAME,
-- RULE_ID, SELECTED_ROWID
-- CHKDB_RULE_SQL DATA_DOMAIN, RULESET_ID, TABLE_NAME,
-- RULE_ID
-- CHKDB_RULE_ERR DATA_DOMAIN, RULESET_ID, TABLE_NAME,
-- RULE_ID
--
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-- CHKDB_DOMAIN
-- one row is: a set of tables and/or views against which
-- sets of data-checking rules can be defined.
create table CHKDB_DOMAIN (
DATA_DOMAIN VARCHAR2(20),
DESCRIPTION VARCHAR2(60),
OWNER VARCHAR2(40),
ACTIVE_YN CHAR(1) DEFAULT 'Y' NOT NULL,
constraint CHKDB_DOMAIN_PK
primary key (DATA_DOMAIN)
);
-- OWNER can be used for the name of the person responsible
-- for the domain being in the database.
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-- CHKDB_TABLE
-- one row is: a table or view in a data domain, either:
-- - a table/view to be checked
-- - a table/view used to setup or use ChkDB
--
-- This table is used to describe:
-- - views into your database (for checking)
-- - tables populated from your database (for checking)
-- - tables to help define/populate ChkDB views/tables
-- - tables to be dropped when ChkDB is removed
--
-- This table should not describe tables in your corporate
-- database.
--
-- As a courtesy to the DBA responsible for the database,
-- the names of all tables and views created for ChkDB
-- checking should begin with 'CHKDB_'.
create table CHKDB_TABLE (
DATA_DOMAIN VARCHAR2(20),
TABLE_NAME VARCHAR2(30),
DESCRIPTION VARCHAR2(60),
ACTIVE_YN CHAR(1) DEFAULT 'Y' NOT NULL,
CHECKABLE_YN CHAR(1) DEFAULT 'Y' NOT NULL,
TABLE_VIEW_TV CHAR(1) DEFAULT 'T' NOT NULL,
DROP_ON_REMOVE_YN CHAR(1) DEFAULT 'Y' NOT NULL,
PRIMARY_KEY_LIST VARCHAR2(600),
TABLE_GROUP VARCHAR2(20),
TABLE_SUBGROUP VARCHAR2(20),
constraint CHKDB_TABLE_PK
primary key (DATA_DOMAIN, TABLE_NAME)
);
-- If CHECKABLE_YN = 'Y' then PRIMARY_KEY_LIST must
-- contain a list like 'COL1,COL2,COL3' (which may
-- contain spaces).
--
-- If DROP_ON_REMOVE_YN = 'Y', the table/view will be
-- dropped if the domain or all of ChkDB is removed.
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-- CHKDB_RULESET
-- one row is: a set of rules for a data domain.
create table CHKDB_RULESET (
DATA_DOMAIN VARCHAR2(20),
RULESET_ID VARCHAR2(20),
DESCRIPTION VARCHAR2(60),
OWNER VARCHAR2(40),
ACTIVE_YN CHAR(1) DEFAULT 'Y' NOT NULL,
constraint CHKDB_RULESET_PK
primary key (DATA_DOMAIN, RULESET_ID)
);
-- OWNER can be used for the name of the person responsible
-- for the ruleset being in the database.
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-- CHKDB_RULE
-- one row is: a rule (in a rule set) for a table (in
-- a data domain).
create table CHKDB_RULE (
-- primary key...
DATA_DOMAIN VARCHAR2(20),
RULESET_ID VARCHAR2(20),
TABLE_NAME VARCHAR2(30),
RULE_ID VARCHAR2(20),
-- about the rule...
ACTIVE_YN CHAR(1) DEFAULT 'Y' NOT NULL,
DESCRIPTION VARCHAR2(60),
RULE_CLASS NUMBER(5) DEFAULT 0 NOT NULL,
-- how the rule selects rows...
-- Way 1: specify where-clause...
CHK_WHERE_CLS VARCHAR2(2000),
ROW_OUT_DESC VARCHAR2(1000),
ROW_OUT_SPEC VARCHAR2(1000),
-- Way 2: various predefined checks...
COLUMN_NAME VARCHAR2(30),
COLUMN_TYPE_CNDO CHAR(1),
NOT_NULL_YN CHAR(1),
MIN_VALUE VARCHAR2(20),
MAX_VALUE VARCHAR2(20),
MAX_LENGTH NUMBER(4),
IS_A_NUMBER_YN CHAR(1),
NOT_BEFORE_TODAY_YN CHAR(1),
NOT_AFTER_TODAY_YN CHAR(1),
IN_LIST VARCHAR2(1000),
NO_ILLEGAL_CHARS VARCHAR2(200),
COLUMN_2_NAME VARCHAR2(30),
COL_NOT_EQ_COL2_YN CHAR(1),
COL_NOT_NE_COL2_YN CHAR(1),
COL_NOT_LT_COL2_YN CHAR(1),
COL_NOT_GT_COL2_YN CHAR(1),
COL_MINUS_COL2_MIN NUMBER(10),
COL_MINUS_COL2_MAX NUMBER(10),
-- Way 3: foreign key check...
FOREIGN_KEY_LIST VARCHAR2(600),
REF_DATA_DOMAIN VARCHAR2(20),
REF_TABLE_NAME VARCHAR2(30),
REF_KEY_LIST VARCHAR2(600),
FK_NOT_ORPHAN_YN CHAR(1),
FK_NOT_NULL_YN CHAR(1),
FK_NOT_PARTIAL_YN CHAR(1),
constraint CHKDB_RULE_PK
primary key (DATA_DOMAIN, RULESET_ID, TABLE_NAME, RULE_ID)
);
-- ACTIVE_YN - if 'N', rule will not be applied
--
-- DESCRIPTION describes the condition this rule is
-- designed to identify. ex: "Department Manager is not set"
--
-- RULE_CLASS can be used to classify, group or order rules.
-- Its intent is to group/order rows selected by rules -
-- maybe like this:
-- 0: Aspect of Interest (default)
-- 1: Somewhat Undesirable
-- 2: Undesirable
-- 3: Very Undesirable
-- 4: Extremely Undesirable
-- 5: Not Acceptable
--
-- = = = = = = = = = = = = = = = = = =
-- How The Rule Selects Rows
--
-- A rule must use one (and only one) of the ways below.
--
-- = = = = = = = = = =
-- Way 1: specify where-clause
--
-- This way is generally only used if the condition
-- of the rule can not be specified any other way.
--
-- CHK_WHERE_CLS must be set and these must be null:
-- COLUMN_NAME, FOREIGN_KEY_LIST
--
-- CHK_WHERE_CLS contains a where clause to select
-- rows from the table (which will have an alias of t).
-- Ex 1. 'MGR_EMP_ID is null'
-- Ex 2. 'NUM_SHIPPED > NUM_REQUESTED'
-- Ex 3. 'AA is not null and not exists
-- (select ''x'' from REFTAB r
-- where r.BB = t.AA)'
--
-- Note: In an SQL string constant, a single quote is
-- written as a pair of single quotes.
--
-- ROW_OUT_SPEC contains an SQL string expression that
-- will be selected by the rule (presumably to show the
-- data that caused the rule to select the row).
-- Examples: 'EMP_ID' or 'to_char(hire_date)'
--
-- ROW_OUT_DESC is a description or title for the data
-- selected by ROW_OUT_SPEC.
--
-- = = = = = = = = = =
-- Way 2: predefined checks
--
-- This is the easy way of specifying a rule; it is
-- generally used if possible.
--
-- COLUMN_NAME must be set, and these must be null:
-- CHK_WHERE_CLS, FOREIGN_KEY_LIST
--
-- COLUMN_TYPE_CNDO must be set if the table is not
-- owned by the user of the chkdb software;
-- otherwise it is optional - if it is not specified,
-- it will be determined from the system tables.
--
-- COLUMN_TYPE_CNDO indicates the general data type:
-- C means CHAR, VARCHAR, VARCHAR2, NCHAR etc.
-- N means NUMBER, NUMERIC, INTEGER, FLOAT, etc.
-- D means DATE, TIMESTAMP
-- O means Other
--
-- MIN_VALUE and MAX_VALUE may contain character strings,
-- numbers or dates in the form YYYY-MM-DD .
--
-- If a second column in the table is specified in
-- COLUMN_2_NAME, this column must have the same general
-- datatype as the column named in COLUMN_NAME.
--
-- IN_LIST may contain a list of acceptable values
-- separated by commas. DO NOT enclose the list in
-- parentheses. Spaces are removed before checking.
-- This check cannot be used when an acceptable value
-- contains a comma or meaningful spaces.
-- Ex: 'Y,N' or 'PRIMARY,SECONDARY,TERTIARY'
-- '1,2,3' or '1.00, 3.14'
-- IN_LIST does NOT support date columns.
--
-- NO_ILLEGAL_CHARS may contain characters that a
-- (character) value should not contain.
-- Ex: '[]{}()"''`'
-- A single quote is specified as two single quotes.
--
-- = = = = = = = = = =
-- Way 3: foreign key checks
--
-- This way is for checking a foreign key - a set of
-- columns that, for a row in the table being checked,
-- identify one (or more) other row(s) in some table.
--
-- To use this way... FOREIGN_KEY_LIST must be set and
-- these must be null: CHK_WHERE_CLS, COLUMN_NAME
--
-- FOREIGN_KEY_LIST identifies a key in the table being
-- checked.
--
-- REF_DATA_DOMAIN, REF_TABLE_NAME and REF_KEY_LIST
-- identify the key that the foreign key should match.
-- They are used only if FK_NOT_ORPHAN_YN = 'Y'.
--
-- A rule can check that...
-- - a code exists in the code-table
-- - a code in a code-table is used in a table
-- or, more generally...
-- - a child-table row has a row in the parent table
-- - a parent-table row has a row in a child table
--
-- At least one of these flags must be set to 'Y':
--
-- FK_NOT_ORPHAN_YN - the rule will select rows where
-- no column in the FOREIGN_KEY_LIST key is null
-- and there is no row in REF_TABLE_NAME where
-- the columns in REF_KEY_LIST match the columns
-- in FOREIGN_KEY_LIST.
-- - REF_KEY_LIST must be set
-- - if REF_TABLE_NAME is null, default is TABLE_NAME
-- - if REF_DATA_DOMAIN is null, default is DATA_DOMAIN
--
-- FK_NOT_NULL_YN - the rule will select rows where
-- all columns in the foreign are null.
--
-- FK_NOT_PARTIAL_YN - the rule will select rows where
-- - the last column in the key is not null
-- - some other column in the key is null
--
--
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-- CHKDB_RESULT
-- one row is: an instance of a row in a table (in a data
-- domain) being selected by a rule (in a rule set).
create table CHKDB_RESULT (
DATA_DOMAIN VARCHAR2(20),
RULESET_ID VARCHAR2(20),
TABLE_NAME VARCHAR2(30),
RULE_ID VARCHAR2(20),
SELECTED_ROWID ROWID,
ROW_PRIMARY_KEY VARCHAR2(600),
RULE_CLASS NUMBER(5),
ROW_OUT_DATA VARCHAR2(1000),
ROW_OUT_DESC VARCHAR2(1000),
CHECK_DATE DATE,
constraint CHKDB_RESULT_PK
primary key (DATA_DOMAIN, RULESET_ID, TABLE_NAME,
RULE_ID, SELECTED_ROWID)
);
-- If, in the rule's CHK_VALUE_SPEC is null,
-- then SELECTED_VALUES will contain null.
-- ROW_OUT_DATA may contain data from the row indicating
-- why it was selected by the rule. ROW_OUT_DESC describes
-- the contents of ROW_OUT_DATA.
-- Question for those who are interested:
-- Should CHECK_DATE be part of the primary key?
-- Or, should software that is going to insert rows have
-- to first delete rows for the table or rule-set?
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-- CHKDB_RULE_SQL
-- one row is: the sql generated for a rule when it was
-- last executed.
create table CHKDB_RULE_SQL (
DATA_DOMAIN VARCHAR2(20),
RULESET_ID VARCHAR2(20),
TABLE_NAME VARCHAR2(30),
RULE_ID VARCHAR2(20),
RULE_SQL_DATE DATE,
RULE_SQL VARCHAR2(3000),
constraint CHKDB_RULE_SQL_PK
primary key (DATA_DOMAIN, RULESET_ID, TABLE_NAME, RULE_ID)
);
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-- CHKDB_RULE_ERR
-- one row is: an instance of an attempt to apply a rule
-- but the rule was mal-formed and the SQL statement
-- resulted in an error.
create table CHKDB_RULE_ERR (
DATA_DOMAIN VARCHAR2(20),
RULESET_ID VARCHAR2(20),
TABLE_NAME VARCHAR2(30),
RULE_ID VARCHAR2(20),
RULE_ERR_DATE DATE,
ERR_NUMBER NUMBER,
ERR_MESSAGE VARCHAR2(100),
constraint CHKDB_RULE_ERR_PK
primary key (DATA_DOMAIN, RULESET_ID, TABLE_NAME,
RULE_ID)
);
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
|