compliance/cleanup_large_files.sh
2025-08-07 17:14:40 +08:00

229 lines
5.4 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# 大文件清理脚本
# 基于扫描结果清理Git仓库中的大文件
set -e
echo "🧹 Git大文件清理脚本"
echo "===================="
# 检查是否在Git仓库中
if ! git rev-parse --git-dir > /dev/null 2>&1; then
echo "❌ 当前目录不是Git仓库"
exit 1
fi
echo "📋 发现的大文件问题:"
echo "- Git仓库大小: 1.2GB"
echo "- 主要问题: build/, dist/, 日志文件, 字体文件, 压缩包"
echo ""
# 定义要清理的文件和目录
LARGE_FILES_TO_REMOVE=(
"history_local" # 87.84MB
"mvp.zip" # 53.65MB
"log.log" # 3.25MB
"log_dms1.txt" # 2.32MB
"log_dms.txt" # 2.32MB
"log_stage.txt" # 2.07MB
"归档.zip" # 103MB
"users.db" # 数据库文件
"dms.log" # 日志文件
"post_output.log" # 日志文件
)
DIRECTORIES_TO_REMOVE=(
"build/"
"dist/"
)
echo "⚠️ 将要从Git跟踪中移除以下文件和目录:"
echo ""
echo "📁 目录:"
for dir in "${DIRECTORIES_TO_REMOVE[@]}"; do
if git ls-files | grep -q "^$dir"; then
echo " - $dir"
fi
done
echo ""
echo "📄 大文件:"
for file in "${LARGE_FILES_TO_REMOVE[@]}"; do
if git ls-files | grep -q "^$file$"; then
size=$(ls -lh "$file" 2>/dev/null | awk '{print $5}' || echo "N/A")
echo " - $file ($size)"
fi
done
echo ""
echo "注意: 这只会移除Git跟踪不会删除本地文件"
echo ""
read -p "是否继续清理? (y/N): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
echo "❌ 操作已取消"
exit 1
fi
echo ""
echo "🔄 开始清理..."
# 移除目录
for dir in "${DIRECTORIES_TO_REMOVE[@]}"; do
if git ls-files | grep -q "^$dir"; then
echo "📁 移除目录: $dir"
git rm -r --cached "$dir" 2>/dev/null || echo "⚠️ $dir 移除失败"
fi
done
# 移除大文件
for file in "${LARGE_FILES_TO_REMOVE[@]}"; do
if git ls-files | grep -q "^$file$"; then
echo "📄 移除文件: $file"
git rm --cached "$file" 2>/dev/null || echo "⚠️ $file 移除失败"
fi
done
# 移除所有.pyc文件
echo "🐍 移除Python编译文件..."
find . -name "*.pyc" -exec git rm --cached {} \; 2>/dev/null || true
# 移除所有.log文件
echo "📝 移除日志文件..."
find . -name "*.log" -exec git rm --cached {} \; 2>/dev/null || true
# 移除所有.db文件
echo "🗄️ 移除数据库文件..."
find . -name "*.db" -exec git rm --cached {} \; 2>/dev/null || true
# 移除.DS_Store文件
echo "🍎 移除macOS系统文件..."
find . -name ".DS_Store" -exec git rm --cached {} \; 2>/dev/null || true
echo ""
echo "📊 清理后的状态:"
git status --short
echo ""
echo "🔄 更新.gitignore文件..."
# 更新.gitignore文件
cat >> .gitignore << 'EOF'
# 大文件和构建产物
build/
dist/
*.zip
*.tar.gz
*.rar
*.7z
# 日志文件
*.log
*.log.*
dms.log
post_output.log
# 数据库文件
*.db
*.sqlite
*.sqlite3
users.db
# Python编译文件
*.pyc
*.pyo
__pycache__/
# 系统文件
.DS_Store
Thumbs.db
# 临时文件
*.tmp
*.temp
*~
# 大的字体文件(如果不需要版本控制)
# assets/fonts/*.ttc
# assets/fonts/*.otf
# 二进制文件
*.bin
*.exe
*.pkg
# 历史文件
history_local
mvp.zip
归档.zip
EOF
echo "✅ .gitignore已更新"
echo ""
read -p "是否提交这些更改? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo "💾 提交更改..."
git add .gitignore
git commit -m "Remove large files and build artifacts from Git tracking
- Remove build/ and dist/ directories (>500MB total)
- Remove large log files (log.log, log_dms*.txt, etc.)
- Remove binary files (history_local, mvp.zip, 归档.zip)
- Remove Python compiled files (*.pyc)
- Remove database files (*.db)
- Remove system files (.DS_Store)
- Update .gitignore to prevent future tracking of these files
This reduces repository size significantly and follows best practices
for version control by excluding generated/temporary files."
echo ""
echo "✅ 更改已提交"
# 显示清理效果
echo ""
echo "📈 清理效果预估:"
echo "- 移除的大文件总计: ~500MB+"
echo "- Git仓库大小将显著减少"
echo "- 远程仓库在推送后会更新"
echo ""
read -p "是否推送到远程仓库? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo "🚀 推送到远程..."
git push
echo "✅ 已推送到远程仓库"
echo ""
echo "🎉 清理完成!"
echo ""
echo "💡 后续建议:"
echo "1. 其他开发者需要拉取更新: git pull"
echo "2. 如果需要进一步减小仓库大小,可以考虑:"
echo " git gc --aggressive --prune=now"
echo "3. 对于大的字体文件考虑使用Git LFS"
else
echo "⚠️ 记得稍后推送: git push"
fi
else
echo "⚠️ 更改未提交,记得稍后提交:"
echo " git add .gitignore"
echo " git commit -m 'Remove large files from Git tracking'"
echo " git push"
fi
echo ""
echo "🎯 清理总结:"
echo "- 移除了build/和dist/目录的跟踪"
echo "- 移除了大的日志文件和压缩包"
echo "- 移除了Python编译文件和数据库文件"
echo "- 更新了.gitignore防止未来跟踪这些文件"
echo "- 本地文件仍然存在只是不再被Git跟踪"