The Amalgam Schema and Data Integration Test Suite
 
Renée J. Miller and Daniel Fisla and Mary Huang
and David Kalmuk and Fei Ku and Vivian Lee
Methodology. Four separate students were asked to do the following tasks.
To receive a tarred source of all the schemas and databases, please email miller@cs.toronto.edu.
To help determine if this benchmark may be of use in your project, we have included below the four schemas (together with the relational DDL statements needed to create the schemas). We will happily provide the data files and DBMS load statements (currently written for DB2) to any research project.
To reference this benchmark, please use the following citation:
@unpublished{Mil+01, author = {Ren\'{e}e J. Miller and Daniel Fisla and Mary Huang and David Kymlicka and Fei Ku and Vivian Lee}, title = {{The Amalgam Schema and Data Integration Test Suite}}, note = {www.cs.toronto.edu/~miller/amalgam}, year = 2001, }
The individual schemas are named s1, s2, s3 and s4. All schemas were created in a master SQL database named "amalgam".
-
please give time to load -
Schema S1 The original data source for s1 was a BIBTEX file was downloaded from the site: http://liinwww.ira.uka.de/bibliography/Database/Wiederhold/index.html
| |
1. ER
Diagram submitted by Fei Ku: |
|
create table s1.InProceedings \ |
|
2. ER
Diagram submitted by Vivian Lee: |
|
Schema S2 The original data source was the object-oriented bibliograph downloaded from the site: http://iamwww.unibe.ch/~scg/Resources/Bib/scg.bib |
|
create table s2.allBibs ( citKey char(20) not null, primary key (citKey) ) create table s2.citForm \ ( citKey char(20) not null, form char(20), \ primary key (citKey), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.authors \ ( citKey char(20) not null, autNm char(100) not null,
\ primary key (citKey, autNm), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.editors \ ( citKey char(20) not null, \ edNm char(100)
not null, primary key (citKey, edNm),
\ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.titles \ ( citKey char(20) not null, title char(200), \ primary key (citKey), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.schools \ ( citKey char(20) not null, schoolNm char(100), \ primary key (citKey), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.institutions \ ( citKey char(20) not null, institNm char(100), \ primary key (citKey), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.addresses \ ( citKey char(20) not null, address char(100), \ primary key (citKey), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.types \ ( citKey char(20) not null, type char(100), \ primary key (citKey), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.ISBN \ ( citKey char(20) not null, isbnNum char(20), \ primary key (citKey), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.notes \ ( citKey char(20) not null, note char(200) not null, \ primary key (citKey, note), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.volumes \ ( citKey char(20) not null, volNum char(50), \ primary key (citKey), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.numbers \ ( citKey char(20) not null, num char(50), \ primary key (citKey), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.months \ ( citKey char(20) not null, mon char(20), \ primary key (citKey), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.years \ ( citKey char(20) not null, yr char(20), \ primary key (citKey), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.pages \ ( citKey char(20) not null, pgRange char(50), \ primary key (citKey), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.abstracts \ ( citKey char(20) not null, txt varchar(3500), \ primary key (citKey), \ foreign key (citKey) references s2.allBibs on delete cascade ) create table s2.publisher \ (
pubNm char(100), pubID integer not null, \ primary key (pubID) ) create table s2.citPublisher \ ( citKey char(20) not null, pubID integer not null, \ primary key (citKey, pubID), \ foreign key (citKey) references s2.allBibs on delete cascade, \ foreign key (pubID) references s2.publisher on delete cascade ) create table s2.journal \ ( jrnlNm char(200), jrnlID integer not null, \ primary key (jrnlID) ) create table s2.citJournal \ ( citKey char(20) not null, jrnlID integer not null, \ primary key (citKey, jrnlID), \ foreign key (citKey) references s2.allBibs on delete cascade, \ foreign key (jrnlID) references s2.journal on delete cascade ) create table s2.series \ (
seriesNm char(50), seriesID integer not null, \ primary key (seriesID) ) create table s2.citSeries \ ( citKey char(20) not null, seriesID integer not null, \ primary key (citKey, seriesID), \ foreign key (citKey) references s2.allBibs on delete cascade, \ foreign key (seriesID) references s2.series on delete cascade ) create table s2.booktitle \ (
bkTitleNm char(100),
bktitleID integer not null, \ primary key (bktitleID) ) create table s2.citBkTitle \ ( citKey char(20) not null, bktitleID integer not null, \ primary key (citKey, bktitleID), \ foreign key (citKey) references s2.allBibs on delete cascade, \ foreign key (bktitleID) references s2.booktitle on delete cascade ) create table s2.keyWord \ ( word char(50), keyWdID integer not null, \ primary key (keyWdID) ) create table s2.citKeyWd \ ( citKey char(20) not null, keyWdID integer not null, \ primary key (citKey, keyWdID), \ foreign key (citKey) references s2.allBibs on delete cascade, \ foreign key (keyWdID) references s2.keyWord on delete cascade ) |
|
3. ER
Diagram submitted by Mary Huang: |
|
Schema S3 The original data source was downloaded from Information and Computation web site: http://theory.lcs.mit.edu/~iandc/homepage.html. |
|
create table s3.article (
\ articleID varchar(30) not null, \ title varchar(150) not null, \ volume int not null, \ number varchar(20)not null, \ pages varchar(30) not null, \ month varchar(100) not null, \ year int not null, \ refkey varchar(50), \ note varchar(150), \ remarks varchar(400), \ references varchar(2000), \ xxxreferences varchar(600), \ fullxxxreferences varchar(400), \ oldkey varchar(50), \ abstract varchar(3000), \ preliminary varchar(100), \ primary key (articleID)) IN CSC494TABLESPACE create table s3.author ( \ authorID int not null, \ name varchar(40) not null, \ primary key (authorID)) create table
s3.unpublished ( \ unpubID varchar(30) not null, \ title varchar(150) not null, \ refkey varchar(20), \ note varchar(50), \ preliminary varchar(100), \ references varchar(2000), \ primary key(unpubID)) create table
s3.articleAuthor ( \ articleID varchar(30) not null, \ authorID int not null, \ primary key (articleID, authorID), \ foreign key (articleID) references s3.article \ on delete cascade, \ foreign key (authorID) references s3.author \ on delete restrict) create table
s3.unpubAuthor ( \ unpubID varchar(30) not null, \ authorID int not null, \ primary key (unpubID, authorID), \ foreign key (unpubID) references s3.unpublished \ on delete cascade, \ foreign key (authorID) references s3.author \ on delete restrict) |
|
4. ER
Diagram submitted by Daniel Fisla: |
|
create
table s4.author \ (
AID INTEGER NOT NULL, Name VARCHAR(50) NOT NULL UNIQUE, \ Affiliations VARCHAR(200), \ PRIMARY KEY(AID) ) create
table s4.descriptor \ (
DID INTEGER NOT NULL, Descriptor VARCHAR(50) NOT NULL UNIQUE, \ PRIMARY KEY(DID) ) create
table s4.location \ (
LID INTEGER NOT NULL , CountryPub VARCHAR(50) NOT NULL, \ CountryOrigin VARCHAR(50) NOT NULL, \ UNIQUE(CountryPub,CountryOrigin), \ PRIMARY KEY(LID) ) CREATE
TABLE s4.publication \ (
PID INTEGER NOT NULL, Title VARCHAR(500) NOT NULL, \ TitleExt VARCHAR(100), Abstract
VARCHAR(3000) NOT NULL, \ AbstractInd VARCHAR(5) NOT NULL, Language
VARCHAR(50) NOT NULL, \ Journal VARCHAR(255), JournalAnn
VARCHAR(20), ConfInfo VARCHAR(500), \ Book VARCHAR(500), Category VARCHAR(100)
NOT NULL, \ PRIMARY KEY (PID) ) CREATE
TABLE s4.record \ (
RID INTEGER NOT NULL, Availability VARCHAR(255) NOT NULL, \ UpdateCode VARCHAR(25) NOT NULL, NumRef
VARCHAR(3), \ ContractNum VARCHAR(100), ISSN VARCHAR(25),
\ ISBN VARCHAR(25), Notes VARCHAR(100), \ Subfile VARCHAR(10) NOT NULL, Source
VARCHAR(255), \ Series VARCHAR(100), AccessionNum
VARCHAR(100) NOT NULL, \ PRIMARY KEY (RID) ) CREATE
TABLE s4.Described \ (PID INTEGER NOT NULL, DID INTEGER NOT
NULL, \ PRIMARY KEY(PID, DID), \ FOREIGN KEY(PID) REFERENCES
s4.publication, \ FOREIGN KEY(DID) REFERENCES s4.descriptor) CREATE
TABLE s4.Located \ (PID INTEGER NOT NULL, LID INTEGER NOT
NULL, \ PRIMARY KEY(PID, LID), \ FOREIGN KEY(PID) REFERENCES
s4.publication, \ FOREIGN
KEY(LID) REFERENCES s4.location) CREATE
TABLE s4.Recorded \ (PID INTEGER NOT NULL, RID INTEGER NOT
NULL, RecordType VARCHAR(100), \ PRIMARY KEY(PID, RID), \ FOREIGN KEY(PID) REFERENCES
s4.publication, \ FOREIGN KEY(RID) REFERENCES s4.record) CREATE
TABLE s4.Written \ (AID INTEGER NOT NULL, PID INTEGER NOT
NULL, \ PubYear VARCHAR(100) NOT NULL, Publisher
VARCHAR(100), \ PRIMARY KEY(PID, AID), \ FOREIGN KEY(PID) REFERENCES
s4.publication, \ FOREIGN KEY(AID) REFERENCES s4.author) |
|
|
|
|