Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
# x-flow
x infomation flow,推特信息流

# 用法
1. 设置环境变量:
- x_bearer_token
- EMAIL_PASSWORD(或 wangyi_emai_auth)
- EMAIL_FROM(可选,默认 19121220286@163.com)
- EMAILS(可选,逗号分隔)或 emails.txt(每行一个)
- AI_X_USERS(可选,逗号分隔)
- DAILY_TIME / ACTIVE_HOURS / MAX_RESULTS(可选)
2. 运行:python test.py

# 日志
- [x] 去年12月,获取推特信息流的想法,但是没做。
- [x] 2/24,测试grok3,发现它可以总结回答上周的AI大佬的文章,但每次回答准确度不同
Expand Down
163 changes: 115 additions & 48 deletions test.py
Original file line number Diff line number Diff line change
@@ -1,83 +1,150 @@
import os
import tweepy
import json
import time
import yagmail
from datetime import datetime, timedelta, timezone

import schedule
from datetime import datetime
import tweepy
import yagmail

DEFAULT_USERS = ["karpathy", "ylecun", "AndrewYNg", "geoffreyhinton", "sama"]
DATA_DIR = "./twitter_data"


def load_list(path):
if not os.path.exists(path):
return []
with open(path, "r", encoding="utf-8") as f:
return [line.strip() for line in f if line.strip() and not line.startswith("#")]

def get_contents(path):
with open(path, 'r', encoding='utf-8') as f:
return f.read()

def get_emails(path):
with open(path, 'r') as f:
return f.read().splitlines()
emails = load_list(path)
if emails:
return emails
env = os.getenv("EMAILS", "")
return [e.strip() for e in env.split(",") if e.strip()]


def get_users():
env = os.getenv("AI_X_USERS", "")
if env.strip():
return [u.strip().lstrip("@") for u in env.split(",") if u.strip()]
return DEFAULT_USERS


def get_tweets(user):
client = tweepy.Client(os.getenv('x_bearer_token'))
def get_tweets(client, user, hours=24, max_results=10):
now = datetime.now(timezone.utc)
cutoff = now - timedelta(hours=hours)
try:
user = client.get_user(username=user)
user_data = user.data

tweets = client.get_users_tweets(id=user_data.id, max_results=10)
tweet_list = [tweet.text for tweet in tweets.data]

result = {
'user_data': str(user_data),
'tweets': tweet_list,
'timestamp': datetime.now().isoformat()
user_res = client.get_user(username=user)
user_data = user_res.data
if not user_data:
return {
"user": user,
"tweets": [],
"error": "user not found",
"timestamp": now.isoformat(),
}
tweets = client.get_users_tweets(
id=user_data.id,
max_results=max_results,
tweet_fields=["created_at"],
)
tweet_list = []
for tweet in tweets.data or []:
if tweet.created_at and tweet.created_at >= cutoff:
tweet_list.append(
{"text": tweet.text, "created_at": tweet.created_at.isoformat()}
)
return {
"user": user,
"user_data": str(user_data),
"tweets": tweet_list,
"timestamp": now.isoformat(),
}

# 使用时间戳创建文件名
filename = './twitter_data/' + f'{datetime.now().strftime("%Y_%m%d_%H_%M_%S")}.json'
with open(filename, 'w', encoding='utf-8') as f:
json.dump(result, f, ensure_ascii=False, indent=2)

return result

except tweepy.TweepyException as e:
error_result = {
'error': str(e),
'timestamp': datetime.now().isoformat()
return {
"user": user,
"tweets": [],
"error": str(e),
"timestamp": now.isoformat(),
}
filename = './twitter_data/' + f'{datetime.now().strftime("%Y_%m%d_%H_%M_%S")}.json'
with open(filename, 'w', encoding='utf-8') as f:
json.dump(error_result, f, ensure_ascii=False, indent=2)
return error_result


def send_email(src, dst, subject, contents):
pwd = os.environ.get('wangyi_emai_auth')
def build_email_content(items):
if not items:
return f"{datetime.now().strftime('%Y-%m-%d')} 无活跃AI大佬更新。"
lines = [f"{datetime.now().strftime('%Y-%m-%d')} AI大佬X信息流", ""]
for item in items:
lines.append(f"@{item['user']}")
for tweet in item["tweets"]:
lines.append(f"- {tweet['created_at']}: {tweet['text']}")
lines.append("")
return "\n".join(lines).strip()


def save_result(result):
os.makedirs(DATA_DIR, exist_ok=True)
filename = os.path.join(
DATA_DIR, f"{datetime.now().strftime('%Y_%m%d_%H_%M_%S')}.json"
)
with open(filename, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)


yag = yagmail.SMTP(user=src, password=pwd, host='smtp.163.com', port='465')
def send_email(src, dst, subject, contents):
pwd = os.getenv("EMAIL_PASSWORD") or os.getenv("wangyi_emai_auth")
if not pwd:
raise ValueError("missing EMAIL_PASSWORD")
host = os.getenv("EMAIL_HOST", "smtp.163.com")
port = int(os.getenv("EMAIL_PORT", "465"))
yag = yagmail.SMTP(user=src, password=pwd, host=host, port=port)
yag.send(to=dst, subject=subject, contents=contents)
yag.close()


def send_emails(src, tos, subject, contents):
for to in tos:
send_email(src, to, subject, contents)
send_email(src, to, subject, contents)


def daily_task():
try:
src = '19121220286@163.com'
tos = get_emails('emails.txt')
subject = '今日AI+头条项目'
contents = get_tweets('elonmusk')
print(contents)
token = os.getenv("x_bearer_token")
if not token:
print("missing x_bearer_token")
return
src = os.getenv("EMAIL_FROM", "19121220286@163.com")
tos = get_emails("emails.txt")
if not tos:
print("no recipients")
return
hours = int(os.getenv("ACTIVE_HOURS", "24"))
max_results = int(os.getenv("MAX_RESULTS", "10"))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Missing validation for MAX_RESULTS causes silent API failures

The Twitter API's get_users_tweets endpoint requires max_results to be between 5 and 100. The code reads this value from environment variable without validation.

Click to expand

How it happens

At test.py:124, the code reads MAX_RESULTS from environment:

max_results = int(os.getenv("MAX_RESULTS", "10"))

If a user sets MAX_RESULTS to a value less than 5 (e.g., MAX_RESULTS=3), the Twitter API will reject the request. The error is caught by the TweepyException handler at test.py:66-72, which returns {"tweets": [], "error": str(e)}.

At test.py:131, the code checks if data.get("tweets"): which is falsy for empty lists, so no users are included in items.

Impact

The email will be sent saying "无活跃AI大佬更新" (no active AI experts update) when in fact all API calls failed. Users receive a misleading email with no indication of the underlying error.

Recommendation: Add validation to ensure max_results is between 5 and 100: max_results = max(5, min(100, int(os.getenv("MAX_RESULTS", "10"))))

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

subject = os.getenv("EMAIL_SUBJECT", "今日AI X信息流")
client = tweepy.Client(token)
users = get_users()
items = []
for user in users:
data = get_tweets(client, user, hours, max_results)
if data.get("tweets"):
items.append(data)
contents = build_email_content(items)
send_emails(src, tos, subject, contents)
save_result(
{"items": items, "users": users, "timestamp": datetime.now().isoformat()}
)
except Exception as e:
print(f"{e} occured in daily_task")


if __name__ == "__main__":
# print(get_tweets('elonmusk'))
try:
schedule.every().day.at('00:10').do(daily_task)

daily_time = os.getenv("DAILY_TIME", "00:10")
schedule.every().day.at(daily_time).do(daily_task)
while True:
schedule.run_pending()
time.sleep(1)

except Exception as e:
print(f"{e} occured~")