自动制作屏蔽词脚本
使用到的仓库:
import requests
import os
# 清理文件夹
os.system("cd source && rm -rf ./*")
# 下载屏蔽词文件
block_word_url = "https://codeload.github.com/57ing/Sensitive-word/zip/refs/heads/master"
response = requests.get(block_word_url, stream=True)
response.raise_for_status()
with open("./source/block_words.zip", 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
# 解压屏蔽词文件
os.system("unzip ./source/block_words.zip -d ./source/")
os.system("rm ./source/block_words.zip")
os.system("cd ./source/Sensitive-word-master && rm README.md key.txt 敏感词库表统计.xlsx 敏感词库表统计.txt")
os.system("cd ./source/Sensitive-word-master && mv ./* ..")
os.system("cd ./source && rm -rf ./Sensitive-word-master")
# 转换屏蔽词文件编码
file_list = os.listdir("./source")
for file_item in file_list:
with open(f"./source/{file_item}", 'r', encoding="gb18030") as f_in:
content = f_in.read()
with open(f"./source/{file_item}", 'w', encoding="utf-8") as f_out:
f_out.write(content)
# 下载GFW文件
gfw_url = "https://raw.githubusercontent.com/gfwlist/gfwlist/refs/heads/master/gfwlist.txt"
response = requests.get(gfw_url, stream=True)
with open("./source/gfwlist_encode.txt", 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
os.system("cd ./source && base64 -d gfwlist_encode.txt > gfwlist_decode.txt")
os.system("cd ./source && rm gfwlist_encode.txt")
# 筛选GFW文件
with open("./source/gfwlist_decode.txt", "r", encoding="utf-8") as f:
content = f.read()
content = content.split("\n")
content = [i for i in content if i != ""]
lines = list()
for item in content:
if "*" in item:
continue
if item.startswith("||"):
lines.append(item[2:])
continue
if item.startswith("|"):
lines.append(item[1:])
continue
if item.startswith("."):
lines.append(item[1:])
continue
with open("./source/gfwlist.txt", "a", encoding="utf-8") as f:
for line in lines:
f.write(line+"\n")
os.system("cd ./source && rm gfwlist_decode.txt")
# 整合文件
file_list = os.listdir("./source")
for file_item in file_list:
with open(f"./source/{file_item}", 'r', encoding="utf-8") as f:
content = f.read()
content = content.split("\n")
content = [i for i in content if i != ""]
with open("./source/block-words.txt", 'a', encoding="utf-8") as f:
for line in content:
f.write(line+"\n")
os.system(f"cd ./source && rm {file_item}")
评论已关闭