Skip to content
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,8 @@ node_modules

# Optional REPL history
.node_repl_history

#rest_api folder cache and DS_Store
rest_api/__pycache__
rest_api/.DS_Store
rest_api/templates/.DS_Store
Empty file added CREATE
Empty file.
46 changes: 46 additions & 0 deletions rest_api/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from restful import db


class Backpagecontent(db.Model):
id = db.Column(db.Integer, primary_key=True)
backpagepostid = db.Column('backpagepostid', db.Integer, db.ForeignKey('backpagepost.id'), unique=True)
title = db.Column('title', db.Text(120))
body = db.Column('body', db.Unicode)
textsearch = db.Column('textsearch', db.String(80))


class Backpageemail(db.Model):
backpagepostid = db.Column('backpagepostid', db.Integer, primary_key=True)
email = db.Column('name', db.String(30))


class Backpageentities(db.Model):
enitity_id = db.Column('enitity_id', db.Integer, primary_key=True)
backpagepostid = db.Column('backpagepostid', db.Integer)


class Backpagephone(db.Model):
backpagepostid = db.Column('backpagepostid', db.Integer, db.ForeignKey('backpagepost.id'), primary_key=True, )
number = db.Column('number', db.String(20))


class Backpagepost(db.Model):
id = db.Column(db.Integer, primary_key=True)
pageid = db.Column('pageid', db.Integer, unique=True)
oid = db.Column('oid', db.Integer)
posterage = db.Column('posterage', db.SmallInteger)
postdate = db.Column('postdate', db.DateTime(timezone=True))
phone = db.relationship('Backpagephone', backref='backpagepost', lazy='dynamic')
content = db.relationship("Backpagecontent", backref='backpagepost', lazy='dynamic')


class Backpagesite(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column('name', db.String(120))


class Crawler(db.Model):
id = db.Column(db.Integer, primary_key=True)
name = db.Column('name', db.String(32))
version = db.Column('version', db.VARCHAR)

139 changes: 139 additions & 0 deletions rest_api/restful.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
from flask import Flask, jsonify, request, render_template
from flask_sqlalchemy import SQLAlchemy
from healthcheck import HealthCheck, EnvironmentDump
from models import *
import requests
import json
import re


app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = "postgresql://localhost:5432/crawler"
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False

db = SQLAlchemy(app)


# wrap the flask app and give a heathcheck url to make sure DB is ok
health = HealthCheck(app, "/healthcheck")

def health_database_status():
is_database_working = True
output = 'database is ok'

try:
session = db.session()
session.execute('SELECT * from backpageemail')
except Exception as e:
output = str(e)
is_database_working = False

print (output)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this necessary? or just a debug statement?

Copy link
Collaborator

@kaushik316 kaushik316 Jun 22, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will add the CREATE, .pyc and .DS_Store files to the gitignore on the next commit.

The lines you mentioned and the entire healthcheck method can be deleted, I just added them to make sure the database was working properly on my machine (for clarity, all this code can be removed):

health = HealthCheck(app, "/healthcheck")

def health_database_status():
    is_database_working = True
    output = 'database is ok'

    try:
        session = db.session()
        session.execute('SELECT * from backpageemail')
    except Exception as e:
        output = str(e)
        is_database_working = False

    print (output)

I'm not a Flask wizard either so I'll look into the pagination and try to implement that if that's possible in Flask. Currently you can pass a specific number or id as a parameter to get a subset of results.

return is_database_working, output


health_database_status()


@app.route('/', methods=['GET'])
def test():
return jsonify({'message': 'It works!'})


# Backpagecontent table API endpoints with integer params and text search - returns title, body, postid
@app.route('/api/backpage/content/<int:backpage_content_id>', methods=['GET'])
def get_content(backpage_content_id):
contents = (Backpagecontent.query.filter_by(id=backpage_content_id).all())

return jsonify({'data': [
dict(id=c.id, postId=c.backpagepostid, title=c.title, number=phonenumber.number)
for c in contents
]})


@app.route('/api/backpage/content/q=<search>', methods=['GET'])
def get_search_results(search):
contents = (Backpagecontent.query.filter(Backpagecontent.title.contains(search)).all())

return jsonify({'data': [
dict(id=c.id, postId=c.backpagepostid, title=c.title)
for c in contents if re.search(r'\b' + search + r'\b', c.title)
]})


# Backpagesite table endpoints - returns all cities in table
@app.route('/api/backpage/cities/', methods=['GET'])
def get_all_cities():
cities = (Backpagesite.query.all())
# In case we want only names without id's
citynames = [c.name for c in cities]

return jsonify({'data': [
dict(id=c.id, city=c.name)
for c in cities
]})


@app.route('/api/backpage/cities/q=<testsearch>', methods=['GET'])
def search(testsearch):
cities = (Backpagesite.query.all())
citynames = [c.name for c in cities]

return jsonify({'data': [
dict(id=c.id, city=c.name)
for c in cities if testsearch in c.name
]})


# Backpagephone table endpoints - returns phone numbers and postids
@app.route('/api/backpage/phone/', methods=['GET'])
def get_all_numbers():
numbers = (Backpagephone.query.all())

return jsonify({'data': [
dict(backpagepostid=n.backpagepostid, number=n.number)
for n in numbers
]})


@app.route('/api/backpage/phone/<int:backpagepost_id>', methods=['GET'])
def get_number(backpagepost_id):
numbers = (Backpagephone.query.filter_by(backpagepostid=backpagepost_id).all())

return jsonify({'numbers': [n.number for n in numbers]})


@app.route('/api/backpage/phone/<string:number>', methods=['GET'])
def getid_from_number(number):
ids = (Backpagephone.query.filter_by(number=number).all())

return jsonify({'backpagepost_ids': [i.backpagepostid for i in ids]})


# Backpageemail endpoints - returrns emails and post ids
@app.route('/api/backpage/email/<int:backpagepost_id>', methods=['GET'])
def get_email(backpagepost_id):
emails = (Backpageemail.query.filter_by(backpagepostid=backpagepost_id).all())

return jsonify({'Emails': [i.email for i in emails]})


@app.route('/api/backpage/email/<string:email>', methods=['GET'])
def getid_from_mail(email):
ids = (Backpageemail.query.filter_by(email=email).all())

return jsonify({'backpagepost_ids': [i.backpagepostid for i in ids]})


# Backpagepost, Backpagephone and Backpagecontent tables joined at this endpoint
@app.route('/api/backpage/<int:backpagepost_id>/title', methods=['GET'])
def get_title_withID(backpagepost_id):
numbers = (Backpagephone.query.filter_by(backpagepostid=backpagepost_id).all())

# import pdb; pdb.set_trace()
return jsonify({'numbers': [content.title for n in numbers for content in n.backpagepost.content.all()]})



if __name__ == "__main__":
app.run(debug=True, port = 8000)
8 changes: 8 additions & 0 deletions rest_api/templates/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<html>
<head>
<title>Database API</title>
</head>
<body>

</body>
</html>