diff --git a/README.md b/README.md index d99d08b..7265872 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,16 @@ # x-flow x infomation flow,推特信息流 +# 用法 +1. 设置环境变量: + - x_bearer_token + - EMAIL_PASSWORD(或 wangyi_emai_auth) + - EMAIL_FROM(可选,默认 19121220286@163.com) + - EMAILS(可选,逗号分隔)或 emails.txt(每行一个) + - AI_X_USERS(可选,逗号分隔) + - DAILY_TIME / ACTIVE_HOURS / MAX_RESULTS(可选) +2. 运行:python test.py + # 日志 - [x] 去年12月,获取推特信息流的想法,但是没做。 - [x] 2/24,测试grok3,发现它可以总结回答上周的AI大佬的文章,但每次回答准确度不同 diff --git a/test.py b/test.py index 096120a..ffd587f 100644 --- a/test.py +++ b/test.py @@ -1,83 +1,150 @@ import os -import tweepy import json import time -import yagmail +from datetime import datetime, timedelta, timezone + import schedule -from datetime import datetime +import tweepy +import yagmail + +DEFAULT_USERS = ["karpathy", "ylecun", "AndrewYNg", "geoffreyhinton", "sama"] +DATA_DIR = "./twitter_data" + + +def load_list(path): + if not os.path.exists(path): + return [] + with open(path, "r", encoding="utf-8") as f: + return [line.strip() for line in f if line.strip() and not line.startswith("#")] -def get_contents(path): - with open(path, 'r', encoding='utf-8') as f: - return f.read() def get_emails(path): - with open(path, 'r') as f: - return f.read().splitlines() + emails = load_list(path) + if emails: + return emails + env = os.getenv("EMAILS", "") + return [e.strip() for e in env.split(",") if e.strip()] + + +def get_users(): + env = os.getenv("AI_X_USERS", "") + if env.strip(): + return [u.strip().lstrip("@") for u in env.split(",") if u.strip()] + return DEFAULT_USERS + -def get_tweets(user): - client = tweepy.Client(os.getenv('x_bearer_token')) +def get_tweets(client, user, hours=24, max_results=10): + now = datetime.now(timezone.utc) + cutoff = now - timedelta(hours=hours) try: - user = client.get_user(username=user) - user_data = user.data - - tweets = client.get_users_tweets(id=user_data.id, max_results=10) - tweet_list = [tweet.text for tweet in tweets.data] - - result = { - 'user_data': str(user_data), - 'tweets': tweet_list, - 'timestamp': datetime.now().isoformat() + user_res = client.get_user(username=user) + user_data = user_res.data + if not user_data: + return { + "user": user, + "tweets": [], + "error": "user not found", + "timestamp": now.isoformat(), + } + tweets = client.get_users_tweets( + id=user_data.id, + max_results=max_results, + tweet_fields=["created_at"], + ) + tweet_list = [] + for tweet in tweets.data or []: + if tweet.created_at and tweet.created_at >= cutoff: + tweet_list.append( + {"text": tweet.text, "created_at": tweet.created_at.isoformat()} + ) + return { + "user": user, + "user_data": str(user_data), + "tweets": tweet_list, + "timestamp": now.isoformat(), } - - # 使用时间戳创建文件名 - filename = './twitter_data/' + f'{datetime.now().strftime("%Y_%m%d_%H_%M_%S")}.json' - with open(filename, 'w', encoding='utf-8') as f: - json.dump(result, f, ensure_ascii=False, indent=2) - - return result - except tweepy.TweepyException as e: - error_result = { - 'error': str(e), - 'timestamp': datetime.now().isoformat() + return { + "user": user, + "tweets": [], + "error": str(e), + "timestamp": now.isoformat(), } - filename = './twitter_data/' + f'{datetime.now().strftime("%Y_%m%d_%H_%M_%S")}.json' - with open(filename, 'w', encoding='utf-8') as f: - json.dump(error_result, f, ensure_ascii=False, indent=2) - return error_result -def send_email(src, dst, subject, contents): - pwd = os.environ.get('wangyi_emai_auth') +def build_email_content(items): + if not items: + return f"{datetime.now().strftime('%Y-%m-%d')} 无活跃AI大佬更新。" + lines = [f"{datetime.now().strftime('%Y-%m-%d')} AI大佬X信息流", ""] + for item in items: + lines.append(f"@{item['user']}") + for tweet in item["tweets"]: + lines.append(f"- {tweet['created_at']}: {tweet['text']}") + lines.append("") + return "\n".join(lines).strip() + + +def save_result(result): + os.makedirs(DATA_DIR, exist_ok=True) + filename = os.path.join( + DATA_DIR, f"{datetime.now().strftime('%Y_%m%d_%H_%M_%S')}.json" + ) + with open(filename, "w", encoding="utf-8") as f: + json.dump(result, f, ensure_ascii=False, indent=2) + - yag = yagmail.SMTP(user=src, password=pwd, host='smtp.163.com', port='465') +def send_email(src, dst, subject, contents): + pwd = os.getenv("EMAIL_PASSWORD") or os.getenv("wangyi_emai_auth") + if not pwd: + raise ValueError("missing EMAIL_PASSWORD") + host = os.getenv("EMAIL_HOST", "smtp.163.com") + port = int(os.getenv("EMAIL_PORT", "465")) + yag = yagmail.SMTP(user=src, password=pwd, host=host, port=port) yag.send(to=dst, subject=subject, contents=contents) yag.close() + def send_emails(src, tos, subject, contents): for to in tos: - send_email(src, to, subject, contents) + send_email(src, to, subject, contents) def daily_task(): try: - src = '19121220286@163.com' - tos = get_emails('emails.txt') - subject = '今日AI+头条项目' - contents = get_tweets('elonmusk') - print(contents) + token = os.getenv("x_bearer_token") + if not token: + print("missing x_bearer_token") + return + src = os.getenv("EMAIL_FROM", "19121220286@163.com") + tos = get_emails("emails.txt") + if not tos: + print("no recipients") + return + hours = int(os.getenv("ACTIVE_HOURS", "24")) + max_results = int(os.getenv("MAX_RESULTS", "10")) + subject = os.getenv("EMAIL_SUBJECT", "今日AI X信息流") + client = tweepy.Client(token) + users = get_users() + items = [] + for user in users: + data = get_tweets(client, user, hours, max_results) + if data.get("tweets"): + items.append(data) + contents = build_email_content(items) send_emails(src, tos, subject, contents) + save_result( + {"items": items, "users": users, "timestamp": datetime.now().isoformat()} + ) except Exception as e: print(f"{e} occured in daily_task") + if __name__ == "__main__": - # print(get_tweets('elonmusk')) try: - schedule.every().day.at('00:10').do(daily_task) - + daily_time = os.getenv("DAILY_TIME", "00:10") + schedule.every().day.at(daily_time).do(daily_task) while True: schedule.run_pending() time.sleep(1) - except Exception as e: print(f"{e} occured~") \ No newline at end of file