Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions Task-1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import json
import xml.etree.ElementTree as ET
from urllib.request import urlopen


def main():
data = urlopen('https://lenta.ru/rss').read().decode('utf8')
root = ET.fromstring(data)
items = root.find('channel').findall('item')
tags = ['pubDate', 'title']
create_json(items, tags, 'news.json')


def create_json(items, tags, filename):
result = []
for item in items:
for i in range(len(tags)-1):
result.append(
{
tags[i]: item.find(tags[i]).text,
tags[i+1]: item.find(tags[i+1]).text
}
)
with open(filename, 'w', encoding='utf8') as file:
json.dump(result, file, indent=1, ensure_ascii=False)


if __name__ == '__main__':
main()
43 changes: 43 additions & 0 deletions Task-2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import json
import xml.etree.ElementTree as ET
from urllib.request import urlopen


def main():
data = urlopen('https://lenta.ru/rss').read().decode('utf8')
root = ET.fromstring(data)
items = root.find('channel').findall('item')
tags = ['pubDate', 'title']
create_json(items, tags, 'news.json')
parse_items(items, 'news1.json')


def create_json(items, tags, filename):
result = []
for item in items:
for i in range(len(tags)-1):
result.append(
{
tags[i]: item.find(tags[i]).text,
tags[i+1]: item.find(tags[i+1]).text
}
)
with open(filename, 'w', encoding='utf8') as file:
json.dump(result, file, indent=1, ensure_ascii=False)


def parse_items(items, filename):
result = []
for item in items:
data = {}
for i in item.iter():
if i == item:
continue
data[i.tag] = i.text
result.append(data)
with open(filename, 'w', encoding='utf8') as file:
json.dump(result, file, indent=1, ensure_ascii=False)


if __name__ == '__main__':
main()
29 changes: 29 additions & 0 deletions Task-3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from urllib.request import urlopen
from json import loads
import itertools
import datetime


def main():
url = 'https://ru.wikipedia.org/w/api.php?action=query&format=json&prop=revisions&rvlimit=500&titles=%D0%93%D1%80%D0%B0%D0%B4%D1%81%D0%BA%D0%B8%D0%B9,_%D0%90%D0%BB%D0%B5%D0%BA%D1%81%D0%B0%D0%BD%D0%B4%D1%80_%D0%91%D0%BE%D1%80%D0%B8%D1%81%D0%BE%D0%B2%D0%B8%D1%87'
get_statistic(url, '183903', 'revisions.txt')


def get_statistic(url, page_id, filename):
data = loads(urlopen(url).read().decode('utf8'))
sorted_revisions = sorted(data['query']['pages'][page_id]['revisions'], key=get_date, reverse=True)
groups = itertools.groupby(sorted_revisions, get_date)
statistics = {}
for date, group in groups:
statistics[date] = list(group)
with open(filename, 'w', encoding='utf8') as file:
for date in statistics:
print(date, len(statistics[date]), file=file)


def get_date(x):
return datetime.datetime.strptime(x['timestamp'], '%Y-%m-%dT%H:%M:%SZ').date()


if __name__ == '__main__':
main()
30 changes: 30 additions & 0 deletions Task-4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from urllib.request import urlopen
from json import loads
import itertools
import datetime


def main():
url = 'https://ru.wikipedia.org/w/api.php?action=query&format=json&prop=revisions&rvlimit=500&titles=%D0%91%D0' \
'%B5%D0%BB%D1%8C%D0%BC%D0%BE%D0%BD%D0%B4%D0%BE,_%D0%96%D0%B0%D0%BD-%D0%9F%D0%BE%D0%BB%D1%8C '
get_statistic(url, '192203', 'revisions1.txt')


def get_statistic(url, page_id, filename):
data = loads(urlopen(url).read().decode('utf8'))
sorted_revisions = sorted(data['query']['pages'][page_id]['revisions'], key=get_date, reverse=True)
groups = itertools.groupby(sorted_revisions, get_date)
statistics = {}
for date, group in groups:
statistics[date] = list(group)
with open(filename, 'w', encoding='utf8') as file:
for date in statistics:
print(date, len(statistics[date]), file=file)


def get_date(x):
return datetime.datetime.strptime(x['timestamp'], '%Y-%m-%dT%H:%M:%SZ').date()


if __name__ == '__main__':
main()
Loading