diff --git a/YYeTsFE b/YYeTsFE index b2792904c..52b9770c9 160000 --- a/YYeTsFE +++ b/YYeTsFE @@ -1 +1 @@ -Subproject commit b2792904c4de7d30383c48875ff1b69a6e1d173f +Subproject commit 52b9770c9654b5b64d97f99427db7acf64be5948 diff --git a/requirements.txt b/requirements.txt index c1ad363c0..27b2f6fec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,19 +1,20 @@ requests==2.27.1 -pytelegrambotapi==4.4.0 -beautifulsoup4==4.10.0 +pytelegrambotapi==4.4.1 +beautifulsoup4==4.11.1 tgbot-ping==1.0.4 redis==4.1.4 apscheduler==3.9.1 -pymongo==4.0.2 +pymongo==4.1.1 tornado==6.1 captcha==0.4 passlib==1.7.4 fakeredis==1.7.1 -pytz==2021.3 +pytz==2022.1 filetype==1.0.10 requests[socks] -tqdm==4.63.0 +tqdm==4.64.0 retry==0.9.2 pymysql==1.0.2 git+https://github.com/tgbot-collection/python-akismet -openpyxl==3.0.9 \ No newline at end of file +openpyxl==3.1.2 +zhconv==1.4.3 \ No newline at end of file diff --git a/yyetsbot/yyetsbot.py b/yyetsbot/yyetsbot.py index cc4b4791d..19959bb82 100644 --- a/yyetsbot/yyetsbot.py +++ b/yyetsbot/yyetsbot.py @@ -7,6 +7,7 @@ import io import json import logging +import os import re import tempfile import time @@ -14,6 +15,7 @@ import requests import telebot +import zhconv from apscheduler.schedulers.background import BackgroundScheduler from telebot import apihelper, types from tgbot_ping import get_runtime @@ -175,6 +177,8 @@ def send_my_response(message): @bot.message_handler(content_types=["photo", "text"]) def send_search(message): + if str(message.chat.id) == os.getenv("SPECIAL_ID") and message.text == "❤️": + bot.reply_to(message, "❤️") # normal ordered search if message.text in ("Voice Chat started", "Voice Chat ended"): logging.warning("This is really funny %s", message.text) @@ -196,11 +200,13 @@ def ban_user(message): yy = fansub.YYeTsOffline() client = yy.mongo user_col = client["zimuzu"]["users"] + comment_col = client["zimuzu"]["comment"] text = "" for line in user_list: user, reason = line.split(maxsplit=1) ban = {"disable": True, "reason": reason} user_col.update_one({"username": user}, {"$set": {"status": ban}}) + comment_col.delete_many({"username": user}) status = f"{user} 已经被禁言,原因:{reason}\n" logging.info("Banning %s", status) text += status @@ -222,7 +228,7 @@ def base_send_search(message, instance=None): send_my_response(message) return - name = message.text + name = zhconv.convert(message.text, "zh-hans") logging.info('Receiving message: %s from user %s(%s)', name, message.chat.username, message.chat.id) if name is None: today_request("invalid") diff --git a/yyetsweb/Mongo.py b/yyetsweb/Mongo.py index 14e9d4a40..cdd6577b1 100644 --- a/yyetsweb/Mongo.py +++ b/yyetsweb/Mongo.py @@ -92,6 +92,15 @@ def reset_top(self): # reset self.db["yyets"].update_many({}, {"$set": {"data.info.views": 0}}) + def import_ban_user(self): + usernames = self.db["users"].find({"status.disable": True}, projection={"username": True}) + r = Redis().r + r.delete("user_blacklist") + logging.info("Importing ban users to redis...%s", usernames) + for username in [u["username"] for u in usernames]: + r.hset("user_blacklist", username, 100) + r.close() + class AnnouncementMongoResource(AnnouncementResource, Mongo): def get_announcement(self, page: int, size: int) -> dict: @@ -507,7 +516,8 @@ def get_resource_data(self, resource_id: int, username: str) -> dict: {"data.info.id": resource_id}, {'$inc': {'data.info.views': 1}}, {'_id': False}) - + if not data: + return {} if username: user_like_data = self.db["users"].find_one({"username": username}) if user_like_data and resource_id in user_like_data.get("like", []): @@ -653,13 +663,12 @@ def get_most(self) -> list: def get_top_resource(self) -> dict: area_dict = dict(ALL={"$regex": ".*"}, US="美国", JP="日本", KR="韩国", UK="英国") - all_data = {} + all_data = {"ALL": "全部"} for abbr, area in area_dict.items(): data = self.db["yyets"].find({"data.info.area": area, "data.info.id": {"$ne": 233}}, self.projection). \ sort("data.info.views", pymongo.DESCENDING).limit(15) all_data[abbr] = list(data) - area_dict["ALL"] = "全部" all_data["class"] = area_dict return all_data @@ -915,7 +924,13 @@ def get_notification(self, username, page, size): # .sort("_id", pymongo.DESCENDING).limit(size).skip((page - 1) * size) notify = self.db["notification"].find_one({"username": username}, projection={"_id": False}) if not notify: - return {} + return { + "username": username, + "unread_item": [], + "read_item": [], + "unread_count": 0, + "read_count": 0 + } # size is shared unread = notify.get("unread", []) diff --git a/yyetsweb/database.py b/yyetsweb/database.py index 80ef196e0..69b257c76 100644 --- a/yyetsweb/database.py +++ b/yyetsweb/database.py @@ -55,60 +55,6 @@ def inner(*args, **kwargs): return func -class AntiCrawler: - - def __init__(self, instance): - self.tornado = instance - self.redis = Redis() - - def execute(self) -> bool: - header_result = self.header_check() - ban_check = self.ban_check() - if header_result or ban_check: - return True - - def header_check(self): - referer = self.tornado.request.headers.get("Referer") - resource_id = self.tornado.get_query_argument("id") - uri = self.tornado.request.uri - logging.info("Verifying: Referer:[%s] uri:[%s]", referer, uri) - if referer is None: - return True - if resource_id not in uri: - return True - if resource_id not in referer: - return True - - def ban_check(self): - con = self.redis - ip = self.get_real_ip() - str_count = con.r.get(ip) - if str_count and int(str_count) > 10: - return True - - def imprisonment(self, ip): - con = self.redis - # don't use incr - we need to set expire time - if con.r.exists(ip): - count_str = con.r.get(ip) - count = int(count_str) - count += 1 - else: - count = 1 - # ban rule: (count-10)*600 - if count > 10: - ex = (count - 10) * 3600 - else: - ex = None - con.r.set(ip, count, ex) - - def get_real_ip(self): - x_real = self.tornado.request.headers.get("X-Real-IP") - remote_ip = self.tornado.request.remote_ip - logging.debug("X-Real-IP:%s, Remote-IP:%s", x_real, remote_ip) - return x_real or remote_ip - - class OtherResource(): def reset_top(self): pass diff --git a/yyetsweb/handler.py b/yyetsweb/handler.py index 0f8db0740..6f15c187f 100644 --- a/yyetsweb/handler.py +++ b/yyetsweb/handler.py @@ -23,10 +23,12 @@ from http import HTTPStatus import filetype +import zhconv from tornado import escape, gen, web from tornado.concurrent import run_on_executor -from database import AntiCrawler, CaptchaResource, Redis +from database import CaptchaResource, Redis +from utils import add_cf_blacklist escape.json_encode = lambda value: json.dumps(value, ensure_ascii=False) logging.basicConfig(level=logging.INFO) @@ -41,7 +43,68 @@ index = pathlib.Path(__file__).parent.joinpath("templates", "index.html").as_posix() -class BaseHandler(web.RequestHandler): +class SecurityHandler(web.RequestHandler): + key = "user_blacklist" + + def __init__(self, application, request, **kwargs): + super().__init__(application, request, **kwargs) + self.r = Redis().r + + def prepare(self): + if self.check_request(): + self.set_status(HTTPStatus.FORBIDDEN) + self.finish() + + def data_received(self, chunk): + pass + + def check_request(self): + ban = self.__ip_check() + user = self.__user_check() + result = ban or user + if result: + self.ban() + return result + + def get_real_ip(self): + x_real = self.request.headers.get("X-Real-IP") + remote_ip = self.request.remote_ip + logging.debug("X-Real-IP:%s, Remote-IP:%s", x_real, remote_ip) + return x_real or remote_ip + + def ban(self): + ip = self.get_real_ip() + self.r.incr(ip) + count = int(self.r.get(ip)) + # ban rule: (count-10)*600 + if count <= 10: + ex = 120 + else: + ex = (count - 10) * 600 + if count >= 30: + add_cf_blacklist(ip) + self.r.set(ip, count, ex) + user = self.get_current_user() + if user: + self.r.hincrby(self.key, user) + + def get_current_user(self) -> str: + username = self.get_secure_cookie("username") or b"" + return username.decode("u8") + + def __user_check(self): + count = self.r.hget(self.key, self.get_current_user()) or 0 + count = int(count) + if count >= 20: + return True + + def __ip_check(self): + d = self.r.get(self.get_real_ip()) or 0 + if int(d) >= 10: + return True + + +class BaseHandler(SecurityHandler): executor = ThreadPoolExecutor(200) class_name = f"Fake{adapter}Resource" adapter_module = importlib.import_module(f"{adapter}") @@ -55,18 +118,12 @@ def __init__(self, application, request, **kwargs): def write_error(self, status_code, **kwargs): if status_code in [HTTPStatus.FORBIDDEN, - HTTPStatus.INTERNAL_SERVER_ERROR, HTTPStatus.UNAUTHORIZED, - HTTPStatus.NOT_FOUND]: + HTTPStatus.NOT_FOUND, + HTTPStatus.INTERNAL_SERVER_ERROR, + ]: self.write(str(kwargs.get('exc_info'))) - def data_received(self, chunk): - pass - - def get_current_user(self) -> str: - username = self.get_secure_cookie("username") or b"" - return username.decode("u8") - class TopHandler(BaseHandler): class_name = f"Top{adapter}Resource" @@ -121,7 +178,7 @@ def login_user(self): password = data["password"] captcha = data.get("captcha") captcha_id = data.get("captcha_id", "") - ip = AntiCrawler(self).get_real_ip() + ip = self.get_real_ip() browser = self.request.headers['user-agent'] response = self.instance.login_user(username, password, captcha, captcha_id, ip, browser) @@ -161,7 +218,7 @@ def get(self): # everytime we receive a GET request to this api, we'll update last_date and last_ip username = self.get_current_user() if username: - now_ip = AntiCrawler(self).get_real_ip() + now_ip = self.get_real_ip() self.instance.update_user_last(username, now_ip) @gen.coroutine @@ -179,20 +236,12 @@ class ResourceHandler(BaseHandler): @run_on_executor() def get_resource_data(self): - ban = AntiCrawler(self) - if ban.execute(): - logging.warning("%s@%s make you happy:-(", self.request.headers.get("user-agent"), ban.get_real_ip()) - self.set_status(HTTPStatus.FORBIDDEN) - return {} - else: - resource_id = int(self.get_query_argument("id")) - username = self.get_current_user() - data = self.instance.get_resource_data(resource_id, username) + resource_id = int(self.get_query_argument("id")) + username = self.get_current_user() + data = self.instance.get_resource_data(resource_id, username) if not data: - # not found, dangerous - ip = ban.get_real_ip() - ban.imprisonment(ip) + self.ban() self.set_status(HTTPStatus.NOT_FOUND) data = {} @@ -201,6 +250,8 @@ def get_resource_data(self): @run_on_executor() def search_resource(self): kw = self.get_query_argument("keyword").lower() + # convert any text to zh-hans + kw = zhconv.convert(kw, "zh-hans") return self.instance.search_resource(kw) @gen.coroutine @@ -380,7 +431,7 @@ def add_comment(self): resource_id = payload["resource_id"] comment_id = payload.get("comment_id") - real_ip = AntiCrawler(self).get_real_ip() + real_ip = self.get_real_ip() username = self.get_current_user() browser = self.request.headers['user-agent'] @@ -538,7 +589,7 @@ def add_announcement(self): payload = self.json content = payload["content"] - real_ip = AntiCrawler(self).get_real_ip() + real_ip = self.get_real_ip() browser = self.request.headers['user-agent'] self.instance.add_announcement(username, content, real_ip, browser) @@ -713,6 +764,10 @@ def get(self): class NotFoundHandler(BaseHandler): def get(self): # for react app + # if self.request.uri not in ["/", "/home", "/discuss", "/login", "/404", "/search", + # "/resource", "/me", "/database", "help", "/statistics" + # ]: + # self.ban() self.render(index) @@ -929,7 +984,7 @@ def process(self, method): obj_id = self.json.get("obj_id") token = self.json.get("token") ua = self.request.headers['user-agent'] - ip = AntiCrawler(self).get_real_ip() + ip = self.get_real_ip() logging.info("Authentication %s(%s) for spam API now...", ua, ip) if token == os.getenv("TOKEN"): return getattr(self.instance, method)(obj_id) diff --git a/yyetsweb/server.py b/yyetsweb/server.py index 4f3fd2e5a..c8428a704 100644 --- a/yyetsweb/server.py +++ b/yyetsweb/server.py @@ -103,9 +103,11 @@ def run_server(port, host): scheduler = BackgroundScheduler(timezone=timez) scheduler.add_job(OtherMongoResource().reset_top, trigger=CronTrigger.from_crontab("0 0 1 * *")) scheduler.add_job(sync_douban, trigger=CronTrigger.from_crontab("1 1 1 * *")) - scheduler.add_job(entry_dump, trigger=CronTrigger.from_crontab("2 2 * * *")) + scheduler.add_job(entry_dump, trigger=CronTrigger.from_crontab("2 2 1 * *")) scheduler.add_job(ResourceLatestMongoResource().refresh_latest_resource, 'interval', hours=1) + scheduler.add_job(OtherMongoResource().import_ban_user, 'interval', seconds=300) scheduler.start() + options.define("p", default=8888, help="running port", type=int) options.define("h", default='127.0.0.1', help="listen address", type=str) options.parse_command_line() diff --git a/yyetsweb/utils.py b/yyetsweb/utils.py index 6079a8c6c..5d6a8bf3a 100644 --- a/yyetsweb/utils.py +++ b/yyetsweb/utils.py @@ -8,6 +8,7 @@ __author__ = "Benny " import contextlib +import logging import os import smtplib import time @@ -15,6 +16,7 @@ from email.mime.text import MIMEText from email.utils import formataddr, parseaddr +import requests from akismet import Akismet @@ -64,5 +66,23 @@ def check_spam(ip, ua, author, content) -> int: return 0 +def add_cf_blacklist(ip): + logging.warning("Cloudflare: Blacklisting %s", ip) + zone_id = "b8e2d2fa75c6f7dc3c2e478e27f3061b" + filter_id = "cc6c810f7f2941d28a672bfb6ac6bebe" + api = f"https://api.cloudflare.com/client/v4/zones/{zone_id}/filters/{filter_id}" + s = requests.Session() + s.headers.update({"Authorization": "Bearer %s" % os.getenv("CF_TOKEN")}) + expr = s.get(api).json()["result"]["expression"] + if ip not in expr: + body = { + "id": filter_id, + "paused": False, + "expression": f"{expr} or (ip.src eq {ip})" + } + resp = s.put(api, json=body) + print(resp.json()) + + if __name__ == '__main__': - send_mail("benny.think@gmail.com", "subj", 'aaaa
bbb') + add_cf_blacklist("192.168.2.1")