Skip to content
Ayush Agarwal edited this page Dec 20, 2017 · 2 revisions

The database uses the following schema:

site_table

Column Type Additional Details
id integer not null SERIAL PRIMARY KEY
site_name character varying not null UNIQUE
site_url character varying not null
spider_name character varying not null UNIQUE

log_table

Column Type Additional Details
id integer not null SERIAL
site integer not null FOREIGN KEY (site) REFERENCES site_table(id)
start_time timestamp with time zone not null default now()
end_time timestamp with time zone
urls_parsed integer default 0
urls_scraped integer default 0
urls_dropped integer default 0
urls_stored integer default 0
shutdown_reason character varying default 0
job_id character varying default 0

item_table

Column Type Modifiers
id integer not null SERIAL PRIMARY KEY
title character varying not null
link character varying not null UNIQUE
content text not null
image character varying not null
newsdate timestamp with time zone not null
datescraped timestamp with time zone not null FOREIGN KEY (log_id) REFERENCES log_table(id)
site_id smallint not null FOREIGN KEY (site_id) REFERENCES site_table(id)
log_id integer not null

Clone this wiki locally