-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcollect.py
More file actions
46 lines (41 loc) · 1.47 KB
/
collect.py
File metadata and controls
46 lines (41 loc) · 1.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from requests import get
from pyquery import PyQuery as pq
from pathlib import Path
user_id = 'FLT'
submission_list_url = 'https://atcoder.jp/contests/{}/submissions?f.User=' + user_id
# crawl
res = get(f'https://kenkoooo.com/atcoder/atcoder-api/results?user={user_id}')
res.raise_for_status()
res = res.json()
# Get submission details
submission_urls = []
for contest in [x['contest_id'] for x in res if x['result'] == "AC"]:
url = submission_list_url.format(contest)
html = pq(url)
tr = html('#main-container > div.row > div:nth-child(3) > div.panel.panel-default.panel-submission > div.table-responsive > table > tbody > tr')
for el in tr[::-1]:
if pq(el[6]).text() == 'AC':
url = pq(el[9]).children('a').attr('href')
submission_urls.append(url)
print(url)
# parse
parsed = []
for url in submission_urls:
html = pq(f'https://atcoder.jp{url}')
contest_id = html('.contest-title').attr('href').split('/')[-1]
code = html('.linenums').html()
title = html('.panel > table > tr:nth-child(2) > td').text().replace(' ', '_')
print(f'{title}')
parsed.append({
'contest_id': contest_id,
'title': title,
'code': code,
})
# mkfile
root_dirname = 'atcoder'
Path(root_dirname).mkdir(exist_ok=True)
for item in parsed:
path = Path(root_dirname+f'/{item["contest_id"]}')
path.mkdir(exist_ok=True)
with (path/(item['title'][0]+'.py')).open(mode='w') as f:
f.write(item['code'])