diff --git a/.gitignore b/.gitignore index ed367e8..e832dc4 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,8 @@ node_modules # Optional REPL history .node_repl_history + +#rest_api folder cache and DS_Store +rest_api/__pycache__ +rest_api/.DS_Store +rest_api/templates/.DS_Store diff --git a/CREATE b/CREATE new file mode 100644 index 0000000..e69de29 diff --git a/rest_api/models.py b/rest_api/models.py new file mode 100644 index 0000000..ef5b396 --- /dev/null +++ b/rest_api/models.py @@ -0,0 +1,46 @@ +from restful import db + + +class Backpagecontent(db.Model): + id = db.Column(db.Integer, primary_key=True) + backpagepostid = db.Column('backpagepostid', db.Integer, db.ForeignKey('backpagepost.id'), unique=True) + title = db.Column('title', db.Text(120)) + body = db.Column('body', db.Unicode) + textsearch = db.Column('textsearch', db.String(80)) + + +class Backpageemail(db.Model): + backpagepostid = db.Column('backpagepostid', db.Integer, primary_key=True) + email = db.Column('name', db.String(30)) + + +class Backpageentities(db.Model): + enitity_id = db.Column('enitity_id', db.Integer, primary_key=True) + backpagepostid = db.Column('backpagepostid', db.Integer) + + +class Backpagephone(db.Model): + backpagepostid = db.Column('backpagepostid', db.Integer, db.ForeignKey('backpagepost.id'), primary_key=True, ) + number = db.Column('number', db.String(20)) + + +class Backpagepost(db.Model): + id = db.Column(db.Integer, primary_key=True) + pageid = db.Column('pageid', db.Integer, unique=True) + oid = db.Column('oid', db.Integer) + posterage = db.Column('posterage', db.SmallInteger) + postdate = db.Column('postdate', db.DateTime(timezone=True)) + phone = db.relationship('Backpagephone', backref='backpagepost', lazy='dynamic') + content = db.relationship("Backpagecontent", backref='backpagepost', lazy='dynamic') + + +class Backpagesite(db.Model): + id = db.Column(db.Integer, primary_key=True) + name = db.Column('name', db.String(120)) + + +class Crawler(db.Model): + id = db.Column(db.Integer, primary_key=True) + name = db.Column('name', db.String(32)) + version = db.Column('version', db.VARCHAR) + diff --git a/rest_api/restful.py b/rest_api/restful.py new file mode 100644 index 0000000..296035a --- /dev/null +++ b/rest_api/restful.py @@ -0,0 +1,139 @@ +from flask import Flask, jsonify, request, render_template +from flask_sqlalchemy import SQLAlchemy +from healthcheck import HealthCheck, EnvironmentDump +from models import * +import requests +import json +import re + + +app = Flask(__name__) +app.config['SQLALCHEMY_DATABASE_URI'] = "postgresql://localhost:5432/crawler" +app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + +db = SQLAlchemy(app) + + +# wrap the flask app and give a heathcheck url to make sure DB is ok +health = HealthCheck(app, "/healthcheck") + +def health_database_status(): + is_database_working = True + output = 'database is ok' + + try: + session = db.session() + session.execute('SELECT * from backpageemail') + except Exception as e: + output = str(e) + is_database_working = False + + print (output) + return is_database_working, output + + +health_database_status() + + +@app.route('/', methods=['GET']) +def test(): + return jsonify({'message': 'It works!'}) + + +# Backpagecontent table API endpoints with integer params and text search - returns title, body, postid +@app.route('/api/backpage/content/', methods=['GET']) +def get_content(backpage_content_id): + contents = (Backpagecontent.query.filter_by(id=backpage_content_id).all()) + + return jsonify({'data': [ + dict(id=c.id, postId=c.backpagepostid, title=c.title, number=phonenumber.number) + for c in contents + ]}) + + +@app.route('/api/backpage/content/q=', methods=['GET']) +def get_search_results(search): + contents = (Backpagecontent.query.filter(Backpagecontent.title.contains(search)).all()) + + return jsonify({'data': [ + dict(id=c.id, postId=c.backpagepostid, title=c.title) + for c in contents if re.search(r'\b' + search + r'\b', c.title) + ]}) + + +# Backpagesite table endpoints - returns all cities in table +@app.route('/api/backpage/cities/', methods=['GET']) +def get_all_cities(): + cities = (Backpagesite.query.all()) + # In case we want only names without id's + citynames = [c.name for c in cities] + + return jsonify({'data': [ + dict(id=c.id, city=c.name) + for c in cities + ]}) + + +@app.route('/api/backpage/cities/q=', methods=['GET']) +def search(testsearch): + cities = (Backpagesite.query.all()) + citynames = [c.name for c in cities] + + return jsonify({'data': [ + dict(id=c.id, city=c.name) + for c in cities if testsearch in c.name + ]}) + + +# Backpagephone table endpoints - returns phone numbers and postids +@app.route('/api/backpage/phone/', methods=['GET']) +def get_all_numbers(): + numbers = (Backpagephone.query.all()) + + return jsonify({'data': [ + dict(backpagepostid=n.backpagepostid, number=n.number) + for n in numbers + ]}) + + +@app.route('/api/backpage/phone/', methods=['GET']) +def get_number(backpagepost_id): + numbers = (Backpagephone.query.filter_by(backpagepostid=backpagepost_id).all()) + + return jsonify({'numbers': [n.number for n in numbers]}) + + +@app.route('/api/backpage/phone/', methods=['GET']) +def getid_from_number(number): + ids = (Backpagephone.query.filter_by(number=number).all()) + + return jsonify({'backpagepost_ids': [i.backpagepostid for i in ids]}) + + +# Backpageemail endpoints - returrns emails and post ids +@app.route('/api/backpage/email/', methods=['GET']) +def get_email(backpagepost_id): + emails = (Backpageemail.query.filter_by(backpagepostid=backpagepost_id).all()) + + return jsonify({'Emails': [i.email for i in emails]}) + + +@app.route('/api/backpage/email/', methods=['GET']) +def getid_from_mail(email): + ids = (Backpageemail.query.filter_by(email=email).all()) + + return jsonify({'backpagepost_ids': [i.backpagepostid for i in ids]}) + + +# Backpagepost, Backpagephone and Backpagecontent tables joined at this endpoint +@app.route('/api/backpage//title', methods=['GET']) +def get_title_withID(backpagepost_id): + numbers = (Backpagephone.query.filter_by(backpagepostid=backpagepost_id).all()) + + # import pdb; pdb.set_trace() + return jsonify({'numbers': [content.title for n in numbers for content in n.backpagepost.content.all()]}) + + + +if __name__ == "__main__": + app.run(debug=True, port = 8000) diff --git a/rest_api/templates/index.html b/rest_api/templates/index.html new file mode 100644 index 0000000..6fcfbb3 --- /dev/null +++ b/rest_api/templates/index.html @@ -0,0 +1,8 @@ + + + Database API + + + + + \ No newline at end of file