mirror of
https://github.com/ermaozi/get_subscribe.git
synced 2026-07-02 15:54:44 +00:00
修复无法获取连接的问题
This commit is contained in:
parent
fcd3e48420
commit
6b835d3a53
191
main.py
191
main.py
@ -3,11 +3,14 @@ import re
|
|||||||
import smtplib
|
import smtplib
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import html
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
from email.mime.text import MIMEText
|
from email.mime.text import MIMEText
|
||||||
from email.utils import formataddr
|
from email.utils import formataddr
|
||||||
|
|
||||||
import feedparser
|
|
||||||
import requests
|
import requests
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
|
from urllib3.util.retry import Retry
|
||||||
|
|
||||||
requests.packages.urllib3.disable_warnings()
|
requests.packages.urllib3.disable_warnings()
|
||||||
|
|
||||||
@ -27,6 +30,98 @@ def write_log(content, level="INFO"):
|
|||||||
with open(f'./log/{time.strftime("%Y-%m", time.localtime(time.time()))}-update.log', 'a', encoding="utf-8") as f:
|
with open(f'./log/{time.strftime("%Y-%m", time.localtime(time.time()))}-update.log', 'a', encoding="utf-8") as f:
|
||||||
f.write(update_log)
|
f.write(update_log)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_urls(summary):
|
||||||
|
decoded = html.unescape(summary)
|
||||||
|
urls = []
|
||||||
|
for raw_url in re.findall(r"https?://[^\s\"'<>]+", decoded):
|
||||||
|
url = raw_url.strip().rstrip('.,;)')
|
||||||
|
if url not in urls:
|
||||||
|
urls.append(url)
|
||||||
|
return urls, decoded
|
||||||
|
|
||||||
|
|
||||||
|
def _pick_url(urls, mode):
|
||||||
|
if mode == "v2ray":
|
||||||
|
for suffix in (".txt", ".json"):
|
||||||
|
for url in urls:
|
||||||
|
if url.lower().endswith(suffix):
|
||||||
|
return url
|
||||||
|
if mode == "clash":
|
||||||
|
for suffix in (".yaml", ".yml"):
|
||||||
|
for url in urls:
|
||||||
|
if url.lower().endswith(suffix):
|
||||||
|
return url
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _pick_urls(urls, mode):
|
||||||
|
matched = []
|
||||||
|
suffixes = (".txt", ".json") if mode == "v2ray" else (".yaml", ".yml")
|
||||||
|
for suffix in suffixes:
|
||||||
|
for url in urls:
|
||||||
|
if url.lower().endswith(suffix) and url not in matched:
|
||||||
|
matched.append(url)
|
||||||
|
return matched
|
||||||
|
|
||||||
|
|
||||||
|
def _download_with_retry(urls):
|
||||||
|
if not urls:
|
||||||
|
return None, None
|
||||||
|
for url in urls:
|
||||||
|
for _ in range(3):
|
||||||
|
try:
|
||||||
|
req = requests.request(
|
||||||
|
"GET",
|
||||||
|
url,
|
||||||
|
verify=False,
|
||||||
|
timeout=20,
|
||||||
|
headers={"User-Agent": "Mozilla/5.0"},
|
||||||
|
)
|
||||||
|
except requests.RequestException as e:
|
||||||
|
print(f"请求 {url} 失败: {e}")
|
||||||
|
continue
|
||||||
|
if req.status_code in ok_code:
|
||||||
|
return req, url
|
||||||
|
return None, urls[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _build_session():
|
||||||
|
session = requests.Session()
|
||||||
|
retry = Retry(
|
||||||
|
total=3,
|
||||||
|
connect=3,
|
||||||
|
read=3,
|
||||||
|
backoff_factor=1,
|
||||||
|
status_forcelist=[429, 500, 502, 503, 504],
|
||||||
|
allowed_methods=["GET"],
|
||||||
|
)
|
||||||
|
adapter = HTTPAdapter(max_retries=retry)
|
||||||
|
session.mount("http://", adapter)
|
||||||
|
session.mount("https://", adapter)
|
||||||
|
|
||||||
|
proxy = os.environ.get("SUBSCRIBE_PROXY", "").strip()
|
||||||
|
if proxy:
|
||||||
|
session.proxies.update({"http": proxy, "https": proxy})
|
||||||
|
|
||||||
|
session.headers.update({"User-Agent": "Mozilla/5.0"})
|
||||||
|
return session
|
||||||
|
|
||||||
|
|
||||||
|
def _download_candidates(session, urls):
|
||||||
|
if not urls:
|
||||||
|
return None, None
|
||||||
|
for url in urls:
|
||||||
|
try:
|
||||||
|
req = session.get(url, verify=False, timeout=20)
|
||||||
|
except requests.RequestException as e:
|
||||||
|
write_log(f"请求失败:{url} - {e}", "WARN")
|
||||||
|
continue
|
||||||
|
if req.status_code in ok_code:
|
||||||
|
return req, url
|
||||||
|
write_log(f"请求失败:{url} - {req.status_code}", "WARN")
|
||||||
|
return None, urls[0]
|
||||||
|
|
||||||
def get_subscribe_url():
|
def get_subscribe_url():
|
||||||
dirs = './subscribe'
|
dirs = './subscribe'
|
||||||
if not os.path.exists(dirs):
|
if not os.path.exists(dirs):
|
||||||
@ -35,38 +130,90 @@ def get_subscribe_url():
|
|||||||
if not os.path.exists(log_dir):
|
if not os.path.exists(log_dir):
|
||||||
os.makedirs(log_dir)
|
os.makedirs(log_dir)
|
||||||
|
|
||||||
rss = feedparser.parse('https://www.cfmem.com/feeds/posts/default?alt=rss')
|
|
||||||
entries = rss.get("entries")
|
|
||||||
if not entries:
|
|
||||||
write_log("更新失败!无法拉取原网站内容", "ERROR")
|
|
||||||
return
|
|
||||||
update_list = []
|
update_list = []
|
||||||
summary = entries[0].get("summary")
|
session = _build_session()
|
||||||
|
try:
|
||||||
|
rss_req = session.get(
|
||||||
|
'https://www.cfmem.com/feeds/posts/default?alt=rss',
|
||||||
|
timeout=20,
|
||||||
|
)
|
||||||
|
except requests.RequestException as ex:
|
||||||
|
write_log(f"更新失败!拉取 RSS 异常: {ex}", "ERROR")
|
||||||
|
return
|
||||||
|
|
||||||
|
if rss_req.status_code not in ok_code:
|
||||||
|
write_log(f"更新失败!无法拉取原网站内容 - {rss_req.status_code}", "ERROR")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
root = ET.fromstring(rss_req.text)
|
||||||
|
except ET.ParseError as ex:
|
||||||
|
write_log(f"更新失败!RSS 解析失败: {ex}", "ERROR")
|
||||||
|
return
|
||||||
|
|
||||||
|
item = root.find("./channel/item")
|
||||||
|
if item is None:
|
||||||
|
write_log("更新失败!RSS 中未找到可用条目", "ERROR")
|
||||||
|
return
|
||||||
|
|
||||||
|
summary = item.findtext("description")
|
||||||
if not summary:
|
if not summary:
|
||||||
write_log("暂时没有可用的订阅更新", "WARN")
|
write_log("暂时没有可用的订阅更新", "WARN")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
urls, decoded_summary = _extract_urls(summary)
|
||||||
|
|
||||||
|
v2ray_url = _pick_url(urls, "v2ray")
|
||||||
|
clash_url = _pick_url(urls, "clash")
|
||||||
|
v2ray_candidates = _pick_urls(urls, "v2ray")
|
||||||
|
clash_candidates = _pick_urls(urls, "clash")
|
||||||
|
|
||||||
|
# 兼容旧页面结构,通用提取失败时再尝试历史规则
|
||||||
|
if not v2ray_url:
|
||||||
v2ray_list = re.findall(r">V2Ray/XRay -> (.*?)</span>", summary)
|
v2ray_list = re.findall(r">V2Ray/XRay -> (.*?)</span>", summary)
|
||||||
# 获取普通订阅链接
|
if not v2ray_list:
|
||||||
|
v2ray_list = re.findall(r">V2Ray/XRay -> (.*?)</span>", decoded_summary)
|
||||||
if any(v2ray_list):
|
if any(v2ray_list):
|
||||||
v2ray_url = v2ray_list[-1].replace('amp;', '')
|
v2ray_url = v2ray_list[-1].replace('amp;', '')
|
||||||
v2ray_req = requests.request("GET", v2ray_url, verify=False)
|
if v2ray_url not in v2ray_candidates:
|
||||||
v2ray_code = v2ray_req.status_code
|
v2ray_candidates.append(v2ray_url)
|
||||||
if v2ray_code not in ok_code:
|
|
||||||
write_log(f"获取 v2ray 订阅失败:{v2ray_url} - {v2ray_code}", "WARN")
|
if not clash_url:
|
||||||
else:
|
|
||||||
update_list.append(f"v2ray: {v2ray_code}")
|
|
||||||
with open(dirs + '/v2ray.txt', 'w', encoding="utf-8") as f:
|
|
||||||
f.write(v2ray_req.text)
|
|
||||||
clash_list = re.findall(r">clash -> (.*?)</span>", summary)
|
clash_list = re.findall(r">clash -> (.*?)</span>", summary)
|
||||||
# 获取clash订阅链接
|
if not clash_list:
|
||||||
|
clash_list = re.findall(r">clash -> (.*?)</span>", decoded_summary)
|
||||||
if any(clash_list) and not clash_list[-1].startswith("订阅地址生成失败"):
|
if any(clash_list) and not clash_list[-1].startswith("订阅地址生成失败"):
|
||||||
clash_url = clash_list[-1].replace('amp;', '')
|
clash_url = clash_list[-1].replace('amp;', '')
|
||||||
clash_req = requests.request("GET", clash_url, verify=False)
|
if clash_url not in clash_candidates:
|
||||||
clash_code = clash_req.status_code
|
clash_candidates.append(clash_url)
|
||||||
if clash_code not in ok_code:
|
|
||||||
write_log(f"获取 clash 订阅失败:{clash_url} - {clash_code}", "WARN")
|
# 获取普通订阅链接
|
||||||
|
if v2ray_url:
|
||||||
|
v2ray_req, used_v2ray_url = _download_candidates(session, v2ray_candidates)
|
||||||
|
if not v2ray_req:
|
||||||
|
cache_file = dirs + '/v2ray.txt'
|
||||||
|
if os.path.exists(cache_file) and os.path.getsize(cache_file) > 0:
|
||||||
|
update_list.append("v2ray: cache")
|
||||||
|
write_log(f"获取 v2ray 订阅失败,已保留本地缓存:{used_v2ray_url}", "WARN")
|
||||||
else:
|
else:
|
||||||
update_list.append(f"clash: {clash_code}")
|
write_log(f"获取 v2ray 订阅失败:{used_v2ray_url}", "WARN")
|
||||||
|
else:
|
||||||
|
update_list.append(f"v2ray: {v2ray_req.status_code}")
|
||||||
|
with open(dirs + '/v2ray.txt', 'w', encoding="utf-8") as f:
|
||||||
|
f.write(v2ray_req.text)
|
||||||
|
|
||||||
|
# 获取clash订阅链接
|
||||||
|
if clash_url and not clash_url.startswith("订阅地址生成失败"):
|
||||||
|
clash_req, used_clash_url = _download_candidates(session, clash_candidates)
|
||||||
|
if not clash_req:
|
||||||
|
cache_file = dirs + '/clash.yml'
|
||||||
|
if os.path.exists(cache_file) and os.path.getsize(cache_file) > 0:
|
||||||
|
update_list.append("clash: cache")
|
||||||
|
write_log(f"获取 clash 订阅失败,已保留本地缓存:{used_clash_url}", "WARN")
|
||||||
|
else:
|
||||||
|
write_log(f"获取 clash 订阅失败:{used_clash_url}", "WARN")
|
||||||
|
else:
|
||||||
|
update_list.append(f"clash: {clash_req.status_code}")
|
||||||
with open(dirs + '/clash.yml', 'w', encoding="utf-8") as f:
|
with open(dirs + '/clash.yml', 'w', encoding="utf-8") as f:
|
||||||
clash_content = clash_req.content.decode("utf-8")
|
clash_content = clash_req.content.decode("utf-8")
|
||||||
f.write(clash_content)
|
f.write(clash_content)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user