ChkDB

Open Source Rule-Based Data Checking
Home
Contact
Download
Releases
Open Source
License
Project
Contributing
The PL/SQL
Using ChkDB
Installation
Read-Me
Rule-Sets
ChkDB Tables
E-R Diagram

Tiny Sample
Database
Data Domain
Implementation
Rule-Set
Do Checks
Queries
Results


External Links
Data Quality and
Data Profiling
(a glossary)

Sample Database

This is the sample_database.sql file that comes with a ChkDB distribution (without the license section)...

-- ==============================================================
--  ChkDB - Populate a tiny sample database 
--
--  This tiny sample database is an example of a corporate
--  database that is to be checked.
--
--  Note: Bad data has been deliberately included.

-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
--  Table Definitions

-- Note that FLOOR is a VARCHAR2 but it should contain numbers
create table CHKDB_SAMP_DEPT (
  DEPT_NAME        VARCHAR2(20),
  MANAGER          VARCHAR2(20),
  FLOOR            VARCHAR2(5),
  BUDGET_AMT       NUMBER(10,2),
  BUDGET_APPRV     NUMBER(10,2),
  BUDGET_EST       NUMBER(10,2),
  constraint CHKDB_SAMP_DEPT_PK
    primary key (DEPT_NAME)
);
 
create table CHKDB_SAMP_GROUP (
  DEPT_NAME        VARCHAR2(20),
  GROUP_NAME       VARCHAR2(20),
  ACTIVE_YN        CHAR(1)       NOT NULL,
  LONG_NAME        VARCHAR2(60),
  constraint CHKDB_SAMP_GROUP_PK
    primary key (DEPT_NAME,GROUP_NAME)
);
 
create table CHKDB_SAMP_EMPL (
  EMPL_ID          VARCHAR2(20),
  EMPL_NAME        VARCHAR2(60)  NOT NULL,
  DEPT_NAME        VARCHAR2(20),
  GROUP_NAME       VARCHAR2(20),
  POSITION         VARCHAR2(40),
  HIRE_DATE        DATE,
  LAST_REVIEW      DATE,
  PROBATION_ENDS   DATE,
  VACN_DAYS        NUMBER(6,2),
  VACN_USED        NUMBER(6,2),
  constraint CHKDB_SAMP_EMPL_PK
    primary key (EMPL_ID)
);
 
-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
--  Populate the sample tables

-- = = = = = = = = = = = = = =
--  Departments

-- Note: BUDGET_EST much greater BUDGET_AMT
insert into CHKDB_SAMP_DEPT 
    (DEPT_NAME, MANAGER, FLOOR, 
     BUDGET_AMT, BUDGET_APPRV, BUDGET_EST)
  values ('MANAGEMENT', 'E1', '1', 
          265000.00, 265000.00, 280000.00);

-- Note that FLOOR is not a number
insert into CHKDB_SAMP_DEPT 
    (DEPT_NAME, MANAGER, FLOOR, 
     BUDGET_AMT, BUDGET_APPRV, BUDGET_EST)
  values ('DEVELOPMENT', 'E2', '@', 
          190000.00, 190000.00, 185000.00);

-- Note bad BUDGET_EST - probably missing zero
-- Note BUDGET_AMT <> BUDGET_APPRV
-- Note missing FLOOR
insert into CHKDB_SAMP_DEPT 
    (DEPT_NAME, MANAGER, FLOOR, 
     BUDGET_AMT, BUDGET_APPRV, BUDGET_EST)
  values ('SALES', 'E3', null, 
          170000.00, 160000.00, 17400.00);

-- note missing MANAGER and bad FLOOR
-- note bad FLOOR
insert into CHKDB_SAMP_DEPT 
    (DEPT_NAME, MANAGER, FLOOR, 
     BUDGET_AMT, BUDGET_APPRV, BUDGET_EST)
  values ('DEPT_999', null, '444', 
          null, null, null);

-- = = = = = = = = = = = = = =
--  Groups

-- note bad DEPT_NAME
insert into CHKDB_SAMP_GROUP (DEPT_NAME, GROUP_NAME, 
                              ACTIVE_YN, LONG_NAME)
  values ('DEVELOPMENTTT', 'APP_TEAM', 
          'Y', 'Application Development Team');

-- note bad ACTIVE_YN: U - should be Y or N
insert into CHKDB_SAMP_GROUP (DEPT_NAME, GROUP_NAME, 
                              ACTIVE_YN, LONG_NAME)
  values ('DEVELOPMENT', 'WEB_TEAM', 
          'U', 'Web Development Team');

-- = = = = = = = = = = = = = =
--  Employees

-- note that LAST_REVIEW is the same as HIRE_DATE
insert into CHKDB_SAMP_EMPL (EMPL_ID, EMPL_NAME, 
                             DEPT_NAME, GROUP_NAME,
                             POSITION, VACN_DAYS, VACN_USED,
                             HIRE_DATE, LAST_REVIEW)
  values ('E1', 'Bob', 'MANAGEMENT', null,
          'President', 25.0, 2.0,
          to_date('1998-04-12','YYYY-MM-DD'),
          to_date('1998-04-12','YYYY-MM-DD'));

-- note that POSITION contains an illegal comma
insert into CHKDB_SAMP_EMPL (EMPL_ID, EMPL_NAME, 
                             DEPT_NAME, GROUP_NAME,
                             POSITION, VACN_DAYS, VACN_USED,
                             HIRE_DATE, LAST_REVIEW)
  values ('E2', 'Tom', 'DEVELOPMENT', null,
          'Manager, Development', 20, 5,
          to_date('1998-09-01','YYYY-MM-DD'),
          null);

-- note incorrect DEPT_NAME: Development - should be: Sales
insert into CHKDB_SAMP_EMPL (EMPL_ID, EMPL_NAME, 
                             DEPT_NAME, GROUP_NAME,
                             POSITION, VACN_DAYS, VACN_USED,
                             HIRE_DATE, LAST_REVIEW)
  values ('E3', 'Sue', 'DEVELOPMENT', null,
          'Manager of Sales', 20, 1,
          to_date('1999-05-01','YYYY-MM-DD'),
          to_date('2003-02-06','YYYY-MM-DD'));

-- note bad LAST_REVIEW - should not be after today
insert into CHKDB_SAMP_EMPL (EMPL_ID, EMPL_NAME, 
                             DEPT_NAME, GROUP_NAME,
                             POSITION, VACN_DAYS, VACN_USED,
                             HIRE_DATE, LAST_REVIEW)
  values ('E4', 'Eve', 'DEVELOPMENT', 'APP_TEAM',
          'Developer', 15, 0,
          to_date('1998-11-01','YYYY-MM-DD'),
          to_date('2303-01-27','YYYY-MM-DD'));

-- note bad partial foreign key (DEPT_NAME, GROUP_NAME)
--     where DEPT_NAME is null but GROUP_NAME is set
-- note bad VACN_USED: 33 - should not be > VACN_DAYS
-- note bad PROBATION_ENDS - should be after today
insert into CHKDB_SAMP_EMPL (EMPL_ID, EMPL_NAME, 
                             DEPT_NAME, GROUP_NAME,
                             POSITION, VACN_DAYS, VACN_USED,
                             HIRE_DATE, LAST_REVIEW,
                             PROBATION_ENDS)
  values ('E5', 'Jim', null, 'WEB_TEAM',
          'Developer', 15, 33,
          to_date('2003-03-01','YYYY-MM-DD'),
          null, 
          to_date('1903-06-01','YYYY-MM-DD'));

-- note bad POSITION: too long
-- note incorrect VACN_DAYS: 5 - should be >= 10
-- note bad LAST_REVIEW - should be after hire date
insert into CHKDB_SAMP_EMPL (EMPL_ID, EMPL_NAME, 
                             DEPT_NAME, GROUP_NAME,
                             POSITION, VACN_DAYS, VACN_USED,
                             HIRE_DATE, LAST_REVIEW)
  values ('E6', 'Sam', 'SALES', null,
          'Sales Representativeee', 5, 1,
          to_date('2001-02-01','YYYY-MM-DD'),
          to_date('1903-01-27','YYYY-MM-DD'));

-- note bad DEPT_NAME
-- note bad HIRE_DATE
-- note bad POSITION
insert into CHKDB_SAMP_EMPL (EMPL_ID, EMPL_NAME, 
                             DEPT_NAME, GROUP_NAME,
                             POSITION, VACN_DAYS, VACN_USED,
                             HIRE_DATE, LAST_REVIEW)
  values ('E7', 'Ann', 'SALESSS', null,
          null, 15, 0,
          to_date('1901-11-01','YYYY-MM-DD'),
          to_date('2003-01-24','YYYY-MM-DD'));

-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = =