mirror of
https://github.com/ermaozi/get_subscribe.git
synced 2026-07-02 15:54:44 +00:00
修复无法获取连接的问题
This commit is contained in:
parent
fcd3e48420
commit
6b835d3a53
193
main.py
193
main.py
@ -3,11 +3,14 @@ import re
|
||||
import smtplib
|
||||
import sys
|
||||
import time
|
||||
import html
|
||||
import xml.etree.ElementTree as ET
|
||||
from email.mime.text import MIMEText
|
||||
from email.utils import formataddr
|
||||
|
||||
import feedparser
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
requests.packages.urllib3.disable_warnings()
|
||||
|
||||
@ -27,6 +30,98 @@ def write_log(content, level="INFO"):
|
||||
with open(f'./log/{time.strftime("%Y-%m", time.localtime(time.time()))}-update.log', 'a', encoding="utf-8") as f:
|
||||
f.write(update_log)
|
||||
|
||||
|
||||
def _extract_urls(summary):
|
||||
decoded = html.unescape(summary)
|
||||
urls = []
|
||||
for raw_url in re.findall(r"https?://[^\s\"'<>]+", decoded):
|
||||
url = raw_url.strip().rstrip('.,;)')
|
||||
if url not in urls:
|
||||
urls.append(url)
|
||||
return urls, decoded
|
||||
|
||||
|
||||
def _pick_url(urls, mode):
|
||||
if mode == "v2ray":
|
||||
for suffix in (".txt", ".json"):
|
||||
for url in urls:
|
||||
if url.lower().endswith(suffix):
|
||||
return url
|
||||
if mode == "clash":
|
||||
for suffix in (".yaml", ".yml"):
|
||||
for url in urls:
|
||||
if url.lower().endswith(suffix):
|
||||
return url
|
||||
return ""
|
||||
|
||||
|
||||
def _pick_urls(urls, mode):
|
||||
matched = []
|
||||
suffixes = (".txt", ".json") if mode == "v2ray" else (".yaml", ".yml")
|
||||
for suffix in suffixes:
|
||||
for url in urls:
|
||||
if url.lower().endswith(suffix) and url not in matched:
|
||||
matched.append(url)
|
||||
return matched
|
||||
|
||||
|
||||
def _download_with_retry(urls):
|
||||
if not urls:
|
||||
return None, None
|
||||
for url in urls:
|
||||
for _ in range(3):
|
||||
try:
|
||||
req = requests.request(
|
||||
"GET",
|
||||
url,
|
||||
verify=False,
|
||||
timeout=20,
|
||||
headers={"User-Agent": "Mozilla/5.0"},
|
||||
)
|
||||
except requests.RequestException as e:
|
||||
print(f"请求 {url} 失败: {e}")
|
||||
continue
|
||||
if req.status_code in ok_code:
|
||||
return req, url
|
||||
return None, urls[0]
|
||||
|
||||
|
||||
def _build_session():
|
||||
session = requests.Session()
|
||||
retry = Retry(
|
||||
total=3,
|
||||
connect=3,
|
||||
read=3,
|
||||
backoff_factor=1,
|
||||
status_forcelist=[429, 500, 502, 503, 504],
|
||||
allowed_methods=["GET"],
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry)
|
||||
session.mount("http://", adapter)
|
||||
session.mount("https://", adapter)
|
||||
|
||||
proxy = os.environ.get("SUBSCRIBE_PROXY", "").strip()
|
||||
if proxy:
|
||||
session.proxies.update({"http": proxy, "https": proxy})
|
||||
|
||||
session.headers.update({"User-Agent": "Mozilla/5.0"})
|
||||
return session
|
||||
|
||||
|
||||
def _download_candidates(session, urls):
|
||||
if not urls:
|
||||
return None, None
|
||||
for url in urls:
|
||||
try:
|
||||
req = session.get(url, verify=False, timeout=20)
|
||||
except requests.RequestException as e:
|
||||
write_log(f"请求失败:{url} - {e}", "WARN")
|
||||
continue
|
||||
if req.status_code in ok_code:
|
||||
return req, url
|
||||
write_log(f"请求失败:{url} - {req.status_code}", "WARN")
|
||||
return None, urls[0]
|
||||
|
||||
def get_subscribe_url():
|
||||
dirs = './subscribe'
|
||||
if not os.path.exists(dirs):
|
||||
@ -35,38 +130,90 @@ def get_subscribe_url():
|
||||
if not os.path.exists(log_dir):
|
||||
os.makedirs(log_dir)
|
||||
|
||||
rss = feedparser.parse('https://www.cfmem.com/feeds/posts/default?alt=rss')
|
||||
entries = rss.get("entries")
|
||||
if not entries:
|
||||
write_log("更新失败!无法拉取原网站内容", "ERROR")
|
||||
return
|
||||
update_list = []
|
||||
summary = entries[0].get("summary")
|
||||
session = _build_session()
|
||||
try:
|
||||
rss_req = session.get(
|
||||
'https://www.cfmem.com/feeds/posts/default?alt=rss',
|
||||
timeout=20,
|
||||
)
|
||||
except requests.RequestException as ex:
|
||||
write_log(f"更新失败!拉取 RSS 异常: {ex}", "ERROR")
|
||||
return
|
||||
|
||||
if rss_req.status_code not in ok_code:
|
||||
write_log(f"更新失败!无法拉取原网站内容 - {rss_req.status_code}", "ERROR")
|
||||
return
|
||||
|
||||
try:
|
||||
root = ET.fromstring(rss_req.text)
|
||||
except ET.ParseError as ex:
|
||||
write_log(f"更新失败!RSS 解析失败: {ex}", "ERROR")
|
||||
return
|
||||
|
||||
item = root.find("./channel/item")
|
||||
if item is None:
|
||||
write_log("更新失败!RSS 中未找到可用条目", "ERROR")
|
||||
return
|
||||
|
||||
summary = item.findtext("description")
|
||||
if not summary:
|
||||
write_log("暂时没有可用的订阅更新", "WARN")
|
||||
return
|
||||
v2ray_list = re.findall(r">V2Ray/XRay -> (.*?)</span>", summary)
|
||||
|
||||
urls, decoded_summary = _extract_urls(summary)
|
||||
|
||||
v2ray_url = _pick_url(urls, "v2ray")
|
||||
clash_url = _pick_url(urls, "clash")
|
||||
v2ray_candidates = _pick_urls(urls, "v2ray")
|
||||
clash_candidates = _pick_urls(urls, "clash")
|
||||
|
||||
# 兼容旧页面结构,通用提取失败时再尝试历史规则
|
||||
if not v2ray_url:
|
||||
v2ray_list = re.findall(r">V2Ray/XRay -> (.*?)</span>", summary)
|
||||
if not v2ray_list:
|
||||
v2ray_list = re.findall(r">V2Ray/XRay -> (.*?)</span>", decoded_summary)
|
||||
if any(v2ray_list):
|
||||
v2ray_url = v2ray_list[-1].replace('amp;', '')
|
||||
if v2ray_url not in v2ray_candidates:
|
||||
v2ray_candidates.append(v2ray_url)
|
||||
|
||||
if not clash_url:
|
||||
clash_list = re.findall(r">clash -> (.*?)</span>", summary)
|
||||
if not clash_list:
|
||||
clash_list = re.findall(r">clash -> (.*?)</span>", decoded_summary)
|
||||
if any(clash_list) and not clash_list[-1].startswith("订阅地址生成失败"):
|
||||
clash_url = clash_list[-1].replace('amp;', '')
|
||||
if clash_url not in clash_candidates:
|
||||
clash_candidates.append(clash_url)
|
||||
|
||||
# 获取普通订阅链接
|
||||
if any(v2ray_list):
|
||||
v2ray_url = v2ray_list[-1].replace('amp;', '')
|
||||
v2ray_req = requests.request("GET", v2ray_url, verify=False)
|
||||
v2ray_code = v2ray_req.status_code
|
||||
if v2ray_code not in ok_code:
|
||||
write_log(f"获取 v2ray 订阅失败:{v2ray_url} - {v2ray_code}", "WARN")
|
||||
if v2ray_url:
|
||||
v2ray_req, used_v2ray_url = _download_candidates(session, v2ray_candidates)
|
||||
if not v2ray_req:
|
||||
cache_file = dirs + '/v2ray.txt'
|
||||
if os.path.exists(cache_file) and os.path.getsize(cache_file) > 0:
|
||||
update_list.append("v2ray: cache")
|
||||
write_log(f"获取 v2ray 订阅失败,已保留本地缓存:{used_v2ray_url}", "WARN")
|
||||
else:
|
||||
write_log(f"获取 v2ray 订阅失败:{used_v2ray_url}", "WARN")
|
||||
else:
|
||||
update_list.append(f"v2ray: {v2ray_code}")
|
||||
update_list.append(f"v2ray: {v2ray_req.status_code}")
|
||||
with open(dirs + '/v2ray.txt', 'w', encoding="utf-8") as f:
|
||||
f.write(v2ray_req.text)
|
||||
clash_list = re.findall(r">clash -> (.*?)</span>", summary)
|
||||
|
||||
# 获取clash订阅链接
|
||||
if any(clash_list) and not clash_list[-1].startswith("订阅地址生成失败"):
|
||||
clash_url = clash_list[-1].replace('amp;', '')
|
||||
clash_req = requests.request("GET", clash_url, verify=False)
|
||||
clash_code = clash_req.status_code
|
||||
if clash_code not in ok_code:
|
||||
write_log(f"获取 clash 订阅失败:{clash_url} - {clash_code}", "WARN")
|
||||
if clash_url and not clash_url.startswith("订阅地址生成失败"):
|
||||
clash_req, used_clash_url = _download_candidates(session, clash_candidates)
|
||||
if not clash_req:
|
||||
cache_file = dirs + '/clash.yml'
|
||||
if os.path.exists(cache_file) and os.path.getsize(cache_file) > 0:
|
||||
update_list.append("clash: cache")
|
||||
write_log(f"获取 clash 订阅失败,已保留本地缓存:{used_clash_url}", "WARN")
|
||||
else:
|
||||
write_log(f"获取 clash 订阅失败:{used_clash_url}", "WARN")
|
||||
else:
|
||||
update_list.append(f"clash: {clash_code}")
|
||||
update_list.append(f"clash: {clash_req.status_code}")
|
||||
with open(dirs + '/clash.yml', 'w', encoding="utf-8") as f:
|
||||
clash_content = clash_req.content.decode("utf-8")
|
||||
f.write(clash_content)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user