Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion castrewinder/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
class Feed(db.Model):
__tablename__ = 'feed'
id = db.Column(db.Integer, primary_key=True)
url = db.Column(db.String, unique = True, index=True)
url = db.Column(db.String, unique=True, index=True)
etag = db.Column(db.String)
last_modified = db.Column(db.String)
last_published_element = db.Column(db.DateTime)
Expand All @@ -19,6 +19,8 @@ class Episode(db.Model):
published = db.Column(db.DateTime)
content = db.Column(db.Text)
feed_id = db.Column(db.Integer, db.ForeignKey('feed.id'), index=True)
enclosure_url = db.Column(db.Text)
enclosure_is_active = db.Column(db.Boolean, index=True)
# Use cascade='delete,all' to propagate the deletion of a Feed onto its Employees
feed = db.relationship('Feed',
backref = db.backref('episodes',
Expand Down
35 changes: 19 additions & 16 deletions castrewinder/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,23 +304,25 @@ def build_xml_feed(feed_object, feed_entries, publication_dates, options, feed_f
fe.content(content = strip_tags(content['value']) if 'value' in content else '',
type = content['type'] if 'type' in content else '')

for media in episode.get('media_content', []):
if media.get('type') != 'application/x-shockwave-flash':
fe.enclosure(url = media.get('url', ''),
length = str(media.get('filesize', '')),
type = media.get('type', ''))

for enclosure in episode.get('enclosure', []):
if enclosure.get('type') != 'application/x-shockwave-flash':
fe.enclosure(url = enclosure.get('url', ''),
length = str(enclosure.get('filesize', '')),
type = enclosure.get('type', ''))
if entry.enclosure_is_active:
# don't add an enclosure or media if the enclosure link is not active
for media in episode.get('media_content', []):
if media.get('type') != 'application/x-shockwave-flash':
fe.enclosure(url = media.get('url', ''),
length = str(media.get('filesize', '')),
type = media.get('type', ''))

for enclosure in episode.get('enclosure', []):
if enclosure.get('type') != 'application/x-shockwave-flash':
fe.enclosure(url = enclosure.get('url', ''),
length = str(enclosure.get('filesize', '')),
type = enclosure.get('type', ''))

link = episode.get('link', '')
if link == '':
link = feed.get('link')
if link == '':
# This would be the perfect place
if link == '':
# to link to a special HTML format feed. TODO.
link = "%s#%s" % (request.url, "castrewinder_%s_%s" % (request.url, episode.get('id', '')))

Expand All @@ -329,10 +331,11 @@ def build_xml_feed(feed_object, feed_entries, publication_dates, options, feed_f
for link in episode.get('links', []):
if link.get('href','') != '':
if link.get('rel') == 'enclosure':
links.append({'rel' : 'enclosure',
'href' : link.get('href'),
'type' : link.get('type', ''),
'length': link.get('length', 0)})
if entry.enclosure_is_active:
links.append({'rel' : 'enclosure',
'href' : link.get('href'),
'type' : link.get('type', ''),
'length': link.get('length', 0)})
else:
links.append({'rel' : 'alternate',
'href' : link.get('href'),
Expand Down
88 changes: 85 additions & 3 deletions feed_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import calendar
import time
import json
from requests import get
from requests import get, head
from dateutil import parser
from urllib.parse import urlparse

Expand Down Expand Up @@ -63,10 +63,21 @@ def add_entries_to_db(feed, feed_url, ignore_date = False):
except:
published = datetime.datetime.today()

if feed_object.last_published_element < published or ignore_date:
enclosure_url = get_enclosure_url_from_episode_content(content = entry)

# # Calling the enclosure url status is too costly as of yet
# enclosure_status = get_url_status(url = enclosure_url)
# # If there was 301s, set last URL
# if enclosure_status[1] != '':
# enclosure_url = enclosure_status[1]

if feed_object.last_published_element < published or ignore_date == True:
new_entry = Episode(published = published,
content = json.dumps(entry, default=json_serial),
feed_id = feed_object.id)
feed_id = feed_object.id,
enclosure_url = enclosure_url,
# enclosure_is_active = enclosure_status[0])
enclosure_is_active = True)
db.session.add(new_entry)

if not ignore_date:
Expand Down Expand Up @@ -494,6 +505,73 @@ def update_feeds():

return True

def verify_links():
""" This goes through every podcast file link
and checks if it’s still available"""

all_episodes = db.session.query(Episode).filter(Episode.enclosure_is_active == True).all()

for episode in all_episodes:

# If there's no enclosure_url specified
if not episode.enclosure_url:
# Gets the enclosure URL and sets in in DB
enclosure_url = get_enclosure_url_from_episode_content(content = json.loads(episode.content))
episode.enclosure_url = enclosure_url

enclosure_status = get_url_status(url = episode.enclosure_url)

# If there was 301s, the second part of the tuple is defined,
# set it as the enclosure URL
if enclosure_status[1] != '':
episode.enclosure_url = enclosure_status[1]

# Set status active/inactive in DB
episode.enclosure_is_active = enclosure_status[0]

db.session.commit()

return True

def get_url_status(url):
# Gets the head of a request, and returns a tuple with 2 items:
# - False if anything other than 2xx-3xx
# - new URL if 301, '' if none

try:
request_head = head(url, allow_redirects=True)
except Exception:
return (False, '')

# check history for 301
end_url = ''
history_codes = [resp.status_code for resp in reversed(request_head.history)]
if 301 in history_codes and 302 not in history_codes:
# the last occurence of 301 is the first index (bc history is reversed)
last_301 = history_codes.index(301)
end_url = request_head.url

return (True, end_url) if request_head.status_code == 200 else (False, None)


def get_enclosure_url_from_episode_content(content):
# Traverses an episode content element for enclosures
# RSS (or JSON Feed)
for enclosure in reversed(content.get('enclosure', [])):
# Only get the LAST enclosure of the post (as per RSS recommendations)
if enclosure.get('type') != 'application/x-shockwave-flash':
return enclosure.get('url')

# Atom
for link in content.get('links', []):
# Only get the first link[rel="enclosure"] of the post
if link.get('rel') == 'enclosure' \
and link.get('type') != 'application/x-shockwave-flash':
return link.get('href')

# if no <enclosure> and no link[rel="enclosure"], return False
return None


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='You can import feeds into Cast Rewinder.',
Expand All @@ -503,6 +581,7 @@ def update_feeds():
parser.add_argument('-w','--which_feed',help='''Get a feed’s ID from URL''')
parser.add_argument('--feed_info',help='''Get a feed’s info from ID''')
parser.add_argument('-u','--update_feeds',help='''Updates all feeds''', action='store_true')
parser.add_argument('-l','--verify_links',help='''Check all podcast links''', action='store_true')

args = parser.parse_args()

Expand All @@ -523,5 +602,8 @@ def update_feeds():
if args.update_feeds:
update_feeds()

if args.verify_links:
verify_links()

if not any(vars(args).values()):
ask_for_url()
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Added enclosures info for episodes

Revision ID: b5adf7fac7ab
Revises: 4251f6c2a939
Create Date: 2018-07-19 00:25:39.920621

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = 'b5adf7fac7ab'
down_revision = '4251f6c2a939'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('episode', sa.Column('enclosure_is_active', sa.Boolean(), nullable=True))
op.add_column('episode', sa.Column('enclosure_url', sa.Text(), nullable=True))
op.create_index(op.f('ix_episode_enclosure_is_active'), 'episode', ['enclosure_is_active'], unique=False)
# ### end Alembic commands ###

# Populate column with True
op.execute("""
UPDATE episode
SET enclosure_is_active = 'true'
""")
op.alter_column('episode', 'enclosure_is_active', nullable=False)


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_episode_enclosure_is_active'), table_name='episode')
op.drop_column('episode', 'enclosure_url')
op.drop_column('episode', 'enclosure_is_active')
# ### end Alembic commands ###