Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
requests==2.27.1
pytelegrambotapi==4.4.0
beautifulsoup4==4.10.0
pytelegrambotapi==4.4.1
beautifulsoup4==4.11.1
tgbot-ping==1.0.4
redis==4.1.4
apscheduler==3.9.1
pymongo==4.0.2
pymongo==4.1.1
tornado==6.1
captcha==0.4
passlib==1.7.4
fakeredis==1.7.1
pytz==2021.3
pytz==2022.1
filetype==1.0.10
requests[socks]
tqdm==4.63.0
tqdm==4.64.0
retry==0.9.2
pymysql==1.0.2
git+https://github.com/tgbot-collection/python-akismet
openpyxl==3.0.9
openpyxl==3.1.2
zhconv==1.4.3
8 changes: 7 additions & 1 deletion yyetsbot/yyetsbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
import io
import json
import logging
import os
import re
import tempfile
import time
from urllib.parse import quote_plus

import requests
import telebot
import zhconv
from apscheduler.schedulers.background import BackgroundScheduler
from telebot import apihelper, types
from tgbot_ping import get_runtime
Expand Down Expand Up @@ -175,6 +177,8 @@ def send_my_response(message):

@bot.message_handler(content_types=["photo", "text"])
def send_search(message):
if str(message.chat.id) == os.getenv("SPECIAL_ID") and message.text == "❤️":
bot.reply_to(message, "❤️")
# normal ordered search
if message.text in ("Voice Chat started", "Voice Chat ended"):
logging.warning("This is really funny %s", message.text)
Expand All @@ -196,11 +200,13 @@ def ban_user(message):
yy = fansub.YYeTsOffline()
client = yy.mongo
user_col = client["zimuzu"]["users"]
comment_col = client["zimuzu"]["comment"]
text = ""
for line in user_list:
user, reason = line.split(maxsplit=1)
ban = {"disable": True, "reason": reason}
user_col.update_one({"username": user}, {"$set": {"status": ban}})
comment_col.delete_many({"username": user})
status = f"{user} 已经被禁言,原因:{reason}\n"
logging.info("Banning %s", status)
text += status
Expand All @@ -222,7 +228,7 @@ def base_send_search(message, instance=None):
send_my_response(message)
return

name = message.text
name = zhconv.convert(message.text, "zh-hans")
logging.info('Receiving message: %s from user %s(%s)', name, message.chat.username, message.chat.id)
if name is None:
today_request("invalid")
Expand Down
23 changes: 19 additions & 4 deletions yyetsweb/Mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,15 @@ def reset_top(self):
# reset
self.db["yyets"].update_many({}, {"$set": {"data.info.views": 0}})

def import_ban_user(self):
usernames = self.db["users"].find({"status.disable": True}, projection={"username": True})
r = Redis().r
r.delete("user_blacklist")
logging.info("Importing ban users to redis...%s", usernames)
for username in [u["username"] for u in usernames]:
r.hset("user_blacklist", username, 100)
r.close()


class AnnouncementMongoResource(AnnouncementResource, Mongo):
def get_announcement(self, page: int, size: int) -> dict:
Expand Down Expand Up @@ -507,7 +516,8 @@ def get_resource_data(self, resource_id: int, username: str) -> dict:
{"data.info.id": resource_id},
{'$inc': {'data.info.views': 1}},
{'_id': False})

if not data:
return {}
if username:
user_like_data = self.db["users"].find_one({"username": username})
if user_like_data and resource_id in user_like_data.get("like", []):
Expand Down Expand Up @@ -653,13 +663,12 @@ def get_most(self) -> list:

def get_top_resource(self) -> dict:
area_dict = dict(ALL={"$regex": ".*"}, US="美国", JP="日本", KR="韩国", UK="英国")
all_data = {}
all_data = {"ALL": "全部"}
for abbr, area in area_dict.items():
data = self.db["yyets"].find({"data.info.area": area, "data.info.id": {"$ne": 233}}, self.projection). \
sort("data.info.views", pymongo.DESCENDING).limit(15)
all_data[abbr] = list(data)

area_dict["ALL"] = "全部"
all_data["class"] = area_dict
return all_data

Expand Down Expand Up @@ -915,7 +924,13 @@ def get_notification(self, username, page, size):
# .sort("_id", pymongo.DESCENDING).limit(size).skip((page - 1) * size)
notify = self.db["notification"].find_one({"username": username}, projection={"_id": False})
if not notify:
return {}
return {
"username": username,
"unread_item": [],
"read_item": [],
"unread_count": 0,
"read_count": 0
}

# size is shared
unread = notify.get("unread", [])
Expand Down
54 changes: 0 additions & 54 deletions yyetsweb/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,60 +55,6 @@ def inner(*args, **kwargs):
return func


class AntiCrawler:

def __init__(self, instance):
self.tornado = instance
self.redis = Redis()

def execute(self) -> bool:
header_result = self.header_check()
ban_check = self.ban_check()
if header_result or ban_check:
return True

def header_check(self):
referer = self.tornado.request.headers.get("Referer")
resource_id = self.tornado.get_query_argument("id")
uri = self.tornado.request.uri
logging.info("Verifying: Referer:[%s] uri:[%s]", referer, uri)
if referer is None:
return True
if resource_id not in uri:
return True
if resource_id not in referer:
return True

def ban_check(self):
con = self.redis
ip = self.get_real_ip()
str_count = con.r.get(ip)
if str_count and int(str_count) > 10:
return True

def imprisonment(self, ip):
con = self.redis
# don't use incr - we need to set expire time
if con.r.exists(ip):
count_str = con.r.get(ip)
count = int(count_str)
count += 1
else:
count = 1
# ban rule: (count-10)*600
if count > 10:
ex = (count - 10) * 3600
else:
ex = None
con.r.set(ip, count, ex)

def get_real_ip(self):
x_real = self.tornado.request.headers.get("X-Real-IP")
remote_ip = self.tornado.request.remote_ip
logging.debug("X-Real-IP:%s, Remote-IP:%s", x_real, remote_ip)
return x_real or remote_ip


class OtherResource():
def reset_top(self):
pass
Expand Down
111 changes: 83 additions & 28 deletions yyetsweb/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
from http import HTTPStatus

import filetype
import zhconv
from tornado import escape, gen, web
from tornado.concurrent import run_on_executor

from database import AntiCrawler, CaptchaResource, Redis
from database import CaptchaResource, Redis
from utils import add_cf_blacklist

escape.json_encode = lambda value: json.dumps(value, ensure_ascii=False)
logging.basicConfig(level=logging.INFO)
Expand All @@ -41,7 +43,68 @@
index = pathlib.Path(__file__).parent.joinpath("templates", "index.html").as_posix()


class BaseHandler(web.RequestHandler):
class SecurityHandler(web.RequestHandler):
key = "user_blacklist"

def __init__(self, application, request, **kwargs):
super().__init__(application, request, **kwargs)
self.r = Redis().r

def prepare(self):
if self.check_request():
self.set_status(HTTPStatus.FORBIDDEN)
self.finish()

def data_received(self, chunk):
pass

def check_request(self):
ban = self.__ip_check()
user = self.__user_check()
result = ban or user
if result:
self.ban()
return result

def get_real_ip(self):
x_real = self.request.headers.get("X-Real-IP")
remote_ip = self.request.remote_ip
logging.debug("X-Real-IP:%s, Remote-IP:%s", x_real, remote_ip)
return x_real or remote_ip

def ban(self):
ip = self.get_real_ip()
self.r.incr(ip)
count = int(self.r.get(ip))
# ban rule: (count-10)*600
if count <= 10:
ex = 120
else:
ex = (count - 10) * 600
if count >= 30:
add_cf_blacklist(ip)
self.r.set(ip, count, ex)
user = self.get_current_user()
if user:
self.r.hincrby(self.key, user)

def get_current_user(self) -> str:
username = self.get_secure_cookie("username") or b""
return username.decode("u8")

def __user_check(self):
count = self.r.hget(self.key, self.get_current_user()) or 0
count = int(count)
if count >= 20:
return True

def __ip_check(self):
d = self.r.get(self.get_real_ip()) or 0
if int(d) >= 10:
return True


class BaseHandler(SecurityHandler):
executor = ThreadPoolExecutor(200)
class_name = f"Fake{adapter}Resource"
adapter_module = importlib.import_module(f"{adapter}")
Expand All @@ -55,18 +118,12 @@ def __init__(self, application, request, **kwargs):

def write_error(self, status_code, **kwargs):
if status_code in [HTTPStatus.FORBIDDEN,
HTTPStatus.INTERNAL_SERVER_ERROR,
HTTPStatus.UNAUTHORIZED,
HTTPStatus.NOT_FOUND]:
HTTPStatus.NOT_FOUND,
HTTPStatus.INTERNAL_SERVER_ERROR,
]:
self.write(str(kwargs.get('exc_info')))

def data_received(self, chunk):
pass

def get_current_user(self) -> str:
username = self.get_secure_cookie("username") or b""
return username.decode("u8")


class TopHandler(BaseHandler):
class_name = f"Top{adapter}Resource"
Expand Down Expand Up @@ -121,7 +178,7 @@ def login_user(self):
password = data["password"]
captcha = data.get("captcha")
captcha_id = data.get("captcha_id", "")
ip = AntiCrawler(self).get_real_ip()
ip = self.get_real_ip()
browser = self.request.headers['user-agent']

response = self.instance.login_user(username, password, captcha, captcha_id, ip, browser)
Expand Down Expand Up @@ -161,7 +218,7 @@ def get(self):
# everytime we receive a GET request to this api, we'll update last_date and last_ip
username = self.get_current_user()
if username:
now_ip = AntiCrawler(self).get_real_ip()
now_ip = self.get_real_ip()
self.instance.update_user_last(username, now_ip)

@gen.coroutine
Expand All @@ -179,20 +236,12 @@ class ResourceHandler(BaseHandler):

@run_on_executor()
def get_resource_data(self):
ban = AntiCrawler(self)
if ban.execute():
logging.warning("%s@%s make you happy:-(", self.request.headers.get("user-agent"), ban.get_real_ip())
self.set_status(HTTPStatus.FORBIDDEN)
return {}
else:
resource_id = int(self.get_query_argument("id"))
username = self.get_current_user()
data = self.instance.get_resource_data(resource_id, username)

resource_id = int(self.get_query_argument("id"))
username = self.get_current_user()
data = self.instance.get_resource_data(resource_id, username)
if not data:
# not found, dangerous
ip = ban.get_real_ip()
ban.imprisonment(ip)
self.ban()
self.set_status(HTTPStatus.NOT_FOUND)
data = {}

Expand All @@ -201,6 +250,8 @@ def get_resource_data(self):
@run_on_executor()
def search_resource(self):
kw = self.get_query_argument("keyword").lower()
# convert any text to zh-hans
kw = zhconv.convert(kw, "zh-hans")
return self.instance.search_resource(kw)

@gen.coroutine
Expand Down Expand Up @@ -380,7 +431,7 @@ def add_comment(self):
resource_id = payload["resource_id"]
comment_id = payload.get("comment_id")

real_ip = AntiCrawler(self).get_real_ip()
real_ip = self.get_real_ip()
username = self.get_current_user()
browser = self.request.headers['user-agent']

Expand Down Expand Up @@ -538,7 +589,7 @@ def add_announcement(self):

payload = self.json
content = payload["content"]
real_ip = AntiCrawler(self).get_real_ip()
real_ip = self.get_real_ip()
browser = self.request.headers['user-agent']

self.instance.add_announcement(username, content, real_ip, browser)
Expand Down Expand Up @@ -713,6 +764,10 @@ def get(self):

class NotFoundHandler(BaseHandler):
def get(self): # for react app
# if self.request.uri not in ["/", "/home", "/discuss", "/login", "/404", "/search",
# "/resource", "/me", "/database", "help", "/statistics"
# ]:
# self.ban()
self.render(index)


Expand Down Expand Up @@ -929,7 +984,7 @@ def process(self, method):
obj_id = self.json.get("obj_id")
token = self.json.get("token")
ua = self.request.headers['user-agent']
ip = AntiCrawler(self).get_real_ip()
ip = self.get_real_ip()
logging.info("Authentication %s(%s) for spam API now...", ua, ip)
if token == os.getenv("TOKEN"):
return getattr(self.instance, method)(obj_id)
Expand Down
Loading