Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,31 +18,31 @@ eg:

eg: `python slackscrape.py -c C193MSB9J

will write channel messages to `general.json` in `output/channels/general/messages/`
will write channel messages to `general.json` in `output/channels/<channel>/messages/`

## get channels messages

`python get\_channels\_messages.py -u [optional update existing] -a [optional include archived]`
`python get_channels_messages.py -u [optional update existing] -a [optional include archived]`

eg: `python get\_channels\_messages.py -u
eg: `python get_channels_messages.py -u

Will get all channels messages and update with any new messages it finds and write to
`output/channels/<channel>/messages/<channel>.json`

## get channels info

`python get\_channels\_info.py -u [optional update existing]`
`python get_channels_info.py -u [optional update existing]`

eg: `python get\_channels\_info.py -u
eg: `python get_channels_info.py -u

Will get all channels metadata and write to
`output/channels/<channel>/info/<channel>.json`

## get users

`python get\_users.py -u [optional update existing]`
`python get_users.py -u [optional update existing]`

eg: `python get\_users.py -u
eg: `python get_users.py -u

Will get all users and write to `output/users/members/<user>.json`

Expand Down
39 changes: 22 additions & 17 deletions get_channels_info.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,12 @@
#!/usr/bin/env python
from json_utils import load_json, dump_json
from slackscrape import scrape_slack
from json_utils import *
from slackclient import SlackClient
import operator
import argparse
import os

config = load_json('./env.json')

def ensure_dir(directory):
if not os.path.exists(directory):
os.makedirs(directory)

return directory

if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument('-u', '--update', help = 'update channels', action="store_true")
args = vars(ap.parse_args())

def all_channels_info(args):
channel_args = {
'exclude_archived': 0,
}
Expand All @@ -27,15 +15,22 @@ def ensure_dir(directory):
response = sc.api_call('channels.list', **channel_args)
channels = response['channels']

return channels

def store_channel_info(args):
sc = SlackClient(config['token'])
channels = all_channels_info(args)

for idx, channel in enumerate(channels):
chan_name = channel['name']
chan_name = channel['name'].encode('utf-8')
print('{} | {} - {} MEMBERS'.format(idx, chan_name, channel['num_members']))

chan_path = ensure_dir('./output/channels/{}'.format(chan_name))
info_path = ensure_dir('./output/channels/{}/info'.format(chan_name))
output_path = '{}/{}.json'.format(info_path, chan_name)

try:
old_json = load_json('{}/{}.json'.format(info_path, chan_name))
old_json = load_json(output_path)
if not args['update']:
print('Already have channel {}, skipping ...'.format(chan_name))
continue
Expand All @@ -48,7 +43,17 @@ def ensure_dir(directory):

channel_info = sc.api_call('channels.info', **slack_args)
try:
dump_json('{}/{}.json'.format(info_path, chan_name), channel_info)
dump_json(output_path, channel_info)
except Exception as e:
print('ERROR DUMPING {}'.format(chan_name))
print(e)

return channels

if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument('-u', '--update', help = 'update channels', action="store_true")
args = vars(ap.parse_args())

store_channel_info(args)

18 changes: 6 additions & 12 deletions get_channels_messages.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
from json_utils import load_json, dump_json
from json_utils import *
from slackscrape import scrape_slack
from slackclient import SlackClient
import argparse
Expand All @@ -8,12 +8,6 @@

config = load_json('./env.json')

def ensure_dir(directory):
if not os.path.exists(directory):
os.makedirs(directory)

return directory

if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument('-a', '--archived', help = 'include archived channels', action="store_true")
Expand All @@ -31,14 +25,14 @@ def ensure_dir(directory):
sorted_channels = sorted(channels, key=lambda x: x['num_members'], reverse=True)

for idx, channel in enumerate(sorted_channels):
chan_name = channel['name']
chan_name = channel['name'].encode('utf-8')
print('{} | {} - {} MEMBERS'.format(idx, chan_name, channel['num_members']))
chan_path = ensure_dir('./output/channels/{}'.format(chan_name))
msg_path = ensure_dir('./output/channels/{}/messages'.format(chan_name))
output = './output/channels/{}/messages/{}.json'.format(chan_name, chan_name)
msg_path = ensure_dir('{}/messages'.format(chan_path))
dump_path = '{}/{}.json'.format(msg_path, chan_name)

try:
old_json = load_json(output)
old_json = load_json(dump_path)
if not args['update']:
print('Aready have messages, skipping...')
continue
Expand All @@ -55,4 +49,4 @@ def ensure_dir(directory):

if len(new_messages):
all_messages = new_messages + old_json
dump_json(output, all_messages)
dump_json(dump_path, all_messages)
7 changes: 7 additions & 0 deletions json_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from operator import itemgetter
from datetime import datetime
import time
import os

def load_json(path):
with io.open(path, encoding='utf-8') as f:
Expand All @@ -11,3 +12,9 @@ def load_json(path):
def dump_json(path, data):
with open(path, mode='w') as f:
json.dump(data, f, indent = 2)

def ensure_dir(directory):
if not os.path.exists(directory):
os.makedirs(directory)

return directory
28 changes: 21 additions & 7 deletions slackscrape.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
#!/usr/bin/env python
from json_utils import load_json, dump_json
from json_utils import *
from slackclient import SlackClient
from get_channels_info import *
import argparse

def get_messages(sc, slack_args, messages, filter_func):
history = sc.api_call("channels.history", **slack_args)
last_ts = history['messages'][-1]['ts'] if history['has_more'] else False
filtered = list(filter(filter_func, history['messages']))
last_ts = history['messages'][-1]['ts'] if ('has_more' in history and history['has_more']) else False
hist_messages = history['messages'] if ('messages' in history) else []
filtered = list(filter(filter_func, hist_messages))
all_messages = messages + filtered
print('Fetched {} messages. {} Total now.'.format(len(filtered), len(all_messages)))

Expand All @@ -26,6 +28,12 @@ def scrape_slack(token, slack_args, filter_func = lambda x: x):
print('Done fetching messages. Found {} in total.'.format(len(results['messages'])))
return results['messages']

def find_channel_by(key, val, return_key='name'):
channels = all_channels_info('')
for chan in channels:
if chan[key] == val:
return chan[return_key]

if __name__ == '__main__':
config = load_json('./env.json')

Expand All @@ -34,21 +42,27 @@ def scrape_slack(token, slack_args, filter_func = lambda x: x):
ap.add_argument('-o', '--output', help = 'file to save out')
args = vars(ap.parse_args())
channel = args['channel']
output = args['output']

channel_name = find_channel_by('id', channel)
print channel_name
output = args['output'] or channel_name

chan_path = ensure_dir('./output/channels/{}/messages/'.format(channel_name))
dump_path = '{}/{}.json'.format(chan_path, output)

try:
old_json = load_json(output)
old_json = load_json(dump_path)
except Exception as e:
old_json = []
print('No existing messages, starting from scratch...')

slack_args = {
'channel': config['channel_id'],
'channel': channel,
'oldest': old_json[0]['ts'] if len(old_json) else '',
}

new_messages = scrape_slack(config['token'], slack_args)

if len(new_messages):
all_messages = new_messages + old_json
dump_json(output, all_messages)
dump_json(dump_path, all_messages)