From 6d222792b2d3588c4286fd980ed3c3ebcce1ef2e Mon Sep 17 00:00:00 2001 From: BBear0115 <2267057785@qq.com> Date: Mon, 13 Apr 2026 00:01:17 +0800 Subject: [PATCH 1/4] feat: add comprehensive deployment scripts with SSL certificate handling - Add deploy.sh: main deployment script with automatic SSL certificate fixes - Add scripts/fix_ssl_server.sh: server-side SSL certificate repair - Add scripts/diagnose.sh: diagnostic tool for deployment issues - Add scripts/README.md: detailed documentation and troubleshooting guide - Update CLAUDE.md: document GitHub Potential Users feature and public site Key features: - Automatic Python SSL certificate detection and repair (macOS/Linux) - Detailed error handling for 8 common deployment issues - Pre-flight checks and post-deployment verification - Support for Chinese error messages and fix instructions - Color-coded logging for better readability --- CLAUDE.md | 10 +- deploy.sh | 704 ++++++++++++++++++++++++++++++++++++++ scripts/README.md | 424 +++++++++++++++++++++++ scripts/diagnose.sh | 371 ++++++++++++++++++++ scripts/fix_ssl_server.sh | 113 ++++++ 5 files changed, 1620 insertions(+), 2 deletions(-) create mode 100755 deploy.sh create mode 100644 scripts/README.md create mode 100755 scripts/diagnose.sh create mode 100755 scripts/fix_ssl_server.sh diff --git a/CLAUDE.md b/CLAUDE.md index d12820e..af5e04a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -31,11 +31,14 @@ OpenCMO is an open-source AI Chief Marketing Officer — a multi-agent system fo - `report_pipeline.py` — Multi-agent deep report pipeline (6 phases): Reflection → Insight Distiller → Outline Planner → Section Writers → Section Grader → Report Synthesizer. Uses `asyncio.Semaphore` to limit concurrent LLM calls (`_MAX_CONCURRENT_LLM_CALLS = 5`) - `llm.py` — Centralized LLM client with per-request key isolation via ContextVar. Solves BYOK concurrency bug. Key resolution: ContextVar → os.environ → DB settings. Includes retry logic with exponential backoff - `background/` — Background task queue system for long-running operations (scans, reports). Tasks have status tracking and progress events +- `agents/github.py` + `tools/github_discovery.py` — GitHub Potential Users (developer outreach): discovers users from seed profiles via social graph, enriches profiles in background (bio, repos, languages, stars), scores by outreach priority, generates personalized messages (email/Twitter/GitHub issue). All outreach queued for approval, never auto-sent +- `services/github_service.py` + `storage/github.py` — GitHub lead management: CRUD for leads, batch scoring, opt-out tracking, enrichment status ### Frontend layers -- `pages/` — Route-level components (Dashboard, SEO, GEO, SERP, Community, Graph, Chat, Approvals, Monitors) -- `components/` — Organized by domain: `charts/` (recharts + react-force-graph-3d), `chat/` (SSE streaming), `monitors/`, `auth/`, `layout/`, `dashboard/`, `project/` +- `pages/` — Route-level components (Dashboard, SEO, GEO, SERP, Community, Graph, Chat, Approvals, Monitors, GitHubLeadsPage). Also includes public marketing pages: `LandingPage.tsx`, `BlogPage.tsx` +- `components/` — Organized by domain: `charts/` (recharts + react-force-graph-3d), `chat/` (SSE streaming), `monitors/`, `auth/`, `layout/`, `dashboard/`, `project/`, `marketing/` (PublicSiteHeader, SectionReveal for landing page) +- `content/marketing.ts` — Centralized content for public site: landing page copy, blog articles, FAQs, navigation items. Supports i18n - `hooks/` — TanStack Query hooks per domain (`useProjects`, `useSeoData`, `useGraphData`, etc.). Stale time 30s, retry 1. `useChat` manages local state + SSE via async generator - `api/client.ts` — `apiFetch()` adds Bearer token, dispatches `opencmo:unauthorized` on 401. Domain modules export typed wrappers around `apiJson()` - `i18n/` — React context-based EN + ZH translations @@ -53,6 +56,8 @@ OpenCMO is an open-source AI Chief Marketing Officer — a multi-agent system fo - **Frontend proxies `/api` to `http://127.0.0.1:8080` in dev (vite.config.ts) - **Report generation optimization**: Data aggregation parallelized with `asyncio.gather()`. LLM concurrency limited to 5 simultaneous calls via `asyncio.Semaphore`. Grader threshold at 3.5/5.0 with max 1 retry to balance quality and speed - **BYOK (Bring Your Own Key)**: Per-request API keys isolated via ContextVar in `llm.py`. Never use `os.environ` for request-scoped keys to avoid concurrency bugs +- **Public marketing site**: Landing page and blog served at root `/` for unauthenticated users. Static fallback HTML for crawlers (SEO). Authenticated app at `/app/*`. Content centralized in `frontend/src/content/marketing.ts` +- **GitHub Potential Users workflow**: 1) Discover from seed username → 2) Background enrichment (async) → 3) Score leads by priority → 4) Generate personalized outreach → 5) Human approval required before sending ## Commands @@ -104,6 +109,7 @@ Key optional variables — see `.env.example` for full list: - `DATAFORSEO_LOGIN/PASSWORD` — SERP tracking - `OPENCMO_AUTO_PUBLISH=1` + Reddit/Twitter credentials — auto-publishing - `OPENCMO_SMTP_*` + `OPENCMO_REPORT_EMAIL` — email reports +- `GITHUB_TOKEN` — GitHub API access for Potential Users feature (discovery, enrichment, outreach) ## Performance Optimization Guidelines diff --git a/deploy.sh b/deploy.sh new file mode 100755 index 0000000..2879870 --- /dev/null +++ b/deploy.sh @@ -0,0 +1,704 @@ +#!/bin/bash + +################################################################################ +# OpenCMO Deployment Script +# Handles Python SSL certificates, dependencies, and deployment to BWG server +################################################################################ + +set -e # Exit on error (disabled in sections with custom error handling) + +# Color codes for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Deployment configuration +REMOTE_HOST="97.64.16.217" +REMOTE_PORT="2222" +REMOTE_USER="root" +REMOTE_PATH="/opt/OpenCMO" +SERVICE_NAME="opencmo" + +################################################################################ +# Logging functions +################################################################################ + +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +log_step() { + echo -e "\n${GREEN}==>${NC} $1" +} + +################################################################################ +# Error handling functions +################################################################################ + +handle_ssl_error() { + log_error "SSL 证书验证失败" + echo "可能的原因:" + echo " 1. Python 未安装系统 CA 证书" + echo " 2. certifi 包版本过旧" + echo " 3. 系统时间不正确" + echo "" + echo "尝试修复..." + + # 尝试修复 1: 安装/更新 certifi + log_info "更新 certifi 包..." + if pip install --upgrade certifi pip setuptools 2>/dev/null; then + log_success "certifi 更新成功" + else + log_warning "certifi 更新失败,尝试其他方法" + fi + + # 尝试修复 2: 安装系统 CA 证书到 Python + if [[ "$OSTYPE" == "darwin"* ]]; then + log_info "检测到 macOS,运行 Install Certificates.command..." + PYTHON_VERSION=$(python3 --version | cut -d' ' -f2 | cut -d'.' -f1,2) + CERT_SCRIPT="/Applications/Python ${PYTHON_VERSION}/Install Certificates.command" + if [ -f "$CERT_SCRIPT" ]; then + bash "$CERT_SCRIPT" 2>/dev/null || log_warning "证书安装脚本执行失败" + else + log_warning "未找到 Python 证书安装脚本" + log_info "尝试手动安装证书..." + pip install --upgrade certifi + python3 -c "import certifi; print(certifi.where())" + fi + elif [[ "$OSTYPE" == "linux-gnu"* ]]; then + log_info "检测到 Linux,更新系统 CA 证书..." + if command -v update-ca-certificates &> /dev/null; then + sudo update-ca-certificates 2>/dev/null || log_warning "CA 证书更新失败" + fi + fi + + # 尝试修复 3: 设置环境变量 + log_info "配置 SSL 环境变量..." + export REQUESTS_CA_BUNDLE=$(python3 -c "import certifi; print(certifi.where())" 2>/dev/null || echo "") + export SSL_CERT_FILE=$REQUESTS_CA_BUNDLE + + if [ -n "$REQUESTS_CA_BUNDLE" ]; then + log_success "SSL 证书路径: $REQUESTS_CA_BUNDLE" + else + log_error "无法获取 SSL 证书路径" + return 1 + fi +} + +handle_pip_install_error() { + local exit_code=$1 + log_error "依赖安装失败 (退出码: $exit_code)" + + echo "可能的原因:" + echo " 1. 网络连接问题" + echo " 2. PyPI 镜像源不可用" + echo " 3. 依赖冲突" + echo " 4. 磁盘空间不足" + echo "" + + # 检查磁盘空间 + log_info "检查磁盘空间..." + df -h . | tail -1 + + # 尝试使用国内镜像源 + log_info "尝试使用清华大学 PyPI 镜像源..." + if pip install -e ".[all]" -i https://pypi.tuna.tsinghua.edu.cn/simple 2>&1 | tee /tmp/pip_install.log; then + log_success "使用镜像源安装成功" + return 0 + fi + + log_warning "镜像源安装失败,尝试分步安装..." + + # 分步安装核心依赖 + local core_deps=("fastapi" "uvicorn" "aiosqlite" "openai" "anthropic") + for dep in "${core_deps[@]}"; do + log_info "安装 $dep..." + if ! pip install "$dep" 2>/dev/null; then + log_error "无法安装 $dep" + return 1 + fi + done + + log_info "安装项目(跳过可选依赖)..." + pip install -e . 2>&1 | tee -a /tmp/pip_install.log +} + +handle_git_error() { + local exit_code=$1 + log_error "Git 操作失败 (退出码: $exit_code)" + + echo "可能的原因:" + echo " 1. 本地有未提交的更改" + echo " 2. 远程仓库不可达" + echo " 3. 分支冲突" + echo "" + + log_info "检查 Git 状态..." + git status + + echo "" + echo "建议操作:" + echo " 1. 提交或暂存本地更改: git add . && git commit -m 'your message'" + echo " 2. 或者放弃本地更改: git reset --hard HEAD" + echo " 3. 检查远程连接: git remote -v" +} + +handle_ssh_error() { + local exit_code=$1 + log_error "SSH 连接失败 (退出码: $exit_code)" + + echo "可能的原因:" + echo " 1. SSH 密钥未配置" + echo " 2. 服务器不可达" + echo " 3. 端口被防火墙阻止" + echo " 4. 用户权限不足" + echo "" + + log_info "测试 SSH 连接..." + if ssh -p "$REMOTE_PORT" -o ConnectTimeout=10 "$REMOTE_USER@$REMOTE_HOST" "echo 'SSH 连接成功'" 2>&1; then + log_success "SSH 连接正常" + else + log_error "SSH 连接失败" + echo "" + echo "修复步骤:" + echo " 1. 检查 SSH 密钥: ssh-add -l" + echo " 2. 添加密钥: ssh-add ~/.ssh/id_rsa" + echo " 3. 测试连接: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST" + echo " 4. 检查防火墙: telnet $REMOTE_HOST $REMOTE_PORT" + return 1 + fi +} + +handle_frontend_build_error() { + local exit_code=$1 + log_error "前端构建失败 (退出码: $exit_code)" + + echo "可能的原因:" + echo " 1. Node.js 版本不兼容" + echo " 2. 依赖未安装或版本冲突" + echo " 3. 内存不足 (需要 >2GB)" + echo " 4. TypeScript 类型错误" + echo "" + + log_info "检查 Node.js 版本..." + node --version + npm --version + + log_info "检查可用内存..." + if [[ "$OSTYPE" == "darwin"* ]]; then + vm_stat | grep "Pages free" + else + free -h | grep Mem + fi + + echo "" + echo "修复步骤:" + echo " 1. 清理缓存: cd frontend && rm -rf node_modules dist && npm install" + echo " 2. 增加 Node 内存: export NODE_OPTIONS='--max-old-space-size=4096'" + echo " 3. 检查类型错误: npm run type-check" + echo " 4. 跳过类型检查构建: npm run build -- --mode production" +} + +handle_service_error() { + local exit_code=$1 + log_error "服务启动失败 (退出码: $exit_code)" + + echo "可能的原因:" + echo " 1. 端口 8080 已被占用" + echo " 2. 环境变量未配置" + echo " 3. 数据库文件损坏" + echo " 4. 依赖缺失" + echo "" + + log_info "检查服务状态..." + ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "systemctl status $SERVICE_NAME" || true + + log_info "检查服务日志..." + ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "journalctl -u $SERVICE_NAME -n 50 --no-pager" || true + + echo "" + echo "修复步骤:" + echo " 1. 检查端口占用: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'lsof -i:8080'" + echo " 2. 检查配置文件: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'cat $REMOTE_PATH/.env'" + echo " 3. 手动启动测试: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'cd $REMOTE_PATH && opencmo-web'" + echo " 4. 重置数据库: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'rm ~/.opencmo/data.db'" +} + +################################################################################ +# Pre-flight checks +################################################################################ + +preflight_checks() { + log_step "执行部署前检查" + + local has_error=0 + + # Check if we're in a git repository + if ! git rev-parse --git-dir > /dev/null 2>&1; then + log_error "当前目录不是 Git 仓库" + has_error=1 + else + log_success "Git 仓库检查通过" + fi + + # Check if we're on main branch + local current_branch=$(git branch --show-current) + if [ "$current_branch" != "main" ]; then + log_warning "当前分支是 '$current_branch',不是 'main'" + read -p "是否继续部署? (y/N): " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + log_info "部署已取消" + exit 0 + fi + else + log_success "分支检查通过 (main)" + fi + + # Check for uncommitted changes + if ! git diff-index --quiet HEAD --; then + log_warning "存在未提交的更改" + git status --short + read -p "是否继续部署? (y/N): " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + log_info "部署已取消" + exit 0 + fi + else + log_success "工作区检查通过 (无未提交更改)" + fi + + # Check Python version + if ! command -v python3 &> /dev/null; then + log_error "未找到 python3" + has_error=1 + else + local python_version=$(python3 --version | cut -d' ' -f2) + log_success "Python 版本: $python_version" + fi + + # Check Node.js version + if ! command -v node &> /dev/null; then + log_error "未找到 node" + has_error=1 + else + local node_version=$(node --version) + log_success "Node.js 版本: $node_version" + fi + + # Check SSH connectivity + log_info "测试 SSH 连接到服务器..." + if ssh -p "$REMOTE_PORT" -o ConnectTimeout=10 "$REMOTE_USER@$REMOTE_HOST" "echo 'SSH OK'" > /dev/null 2>&1; then + log_success "SSH 连接正常" + else + log_error "无法连接到服务器" + handle_ssh_error 1 + has_error=1 + fi + + # Check SSL certificates + log_info "检查 Python SSL 证书..." + if python3 -c "import ssl; import certifi; print(certifi.where())" > /dev/null 2>&1; then + local cert_path=$(python3 -c "import certifi; print(certifi.where())") + log_success "SSL 证书路径: $cert_path" + else + log_warning "SSL 证书检查失败,将在安装时修复" + handle_ssl_error || log_warning "SSL 证书修复失败,继续部署..." + fi + + if [ $has_error -eq 1 ]; then + log_error "部署前检查失败,请修复上述问题后重试" + exit 1 + fi + + log_success "所有部署前检查通过" +} + +################################################################################ +# Fix SSL certificates +################################################################################ + +fix_ssl_certificates() { + log_step "修复 Python SSL 证书" + + set +e # Disable exit on error for this section + + # Update certifi + log_info "更新 certifi 包..." + if pip install --upgrade certifi pip setuptools 2>&1 | tee /tmp/certifi_install.log; then + log_success "certifi 更新成功" + else + log_warning "certifi 更新失败" + cat /tmp/certifi_install.log + handle_ssl_error + fi + + # Platform-specific certificate installation + if [[ "$OSTYPE" == "darwin"* ]]; then + log_info "macOS 系统,安装证书到 Python..." + + # Find Python installation + PYTHON_VERSION=$(python3 --version | cut -d' ' -f2 | cut -d'.' -f1,2) + CERT_SCRIPT="/Applications/Python ${PYTHON_VERSION}/Install Certificates.command" + + if [ -f "$CERT_SCRIPT" ]; then + log_info "运行 Python 证书安装脚本..." + bash "$CERT_SCRIPT" 2>&1 | tee /tmp/cert_install.log || log_warning "证书安装脚本执行失败" + else + log_warning "未找到证书安装脚本: $CERT_SCRIPT" + log_info "使用 certifi 作为证书源..." + fi + + elif [[ "$OSTYPE" == "linux-gnu"* ]]; then + log_info "Linux 系统,更新系统证书..." + if command -v update-ca-certificates &> /dev/null; then + sudo update-ca-certificates 2>&1 | tee /tmp/cert_update.log || log_warning "证书更新失败" + fi + fi + + # Set environment variables + log_info "配置 SSL 环境变量..." + CERT_PATH=$(python3 -c "import certifi; print(certifi.where())" 2>/dev/null) + + if [ -n "$CERT_PATH" ] && [ -f "$CERT_PATH" ]; then + export REQUESTS_CA_BUNDLE="$CERT_PATH" + export SSL_CERT_FILE="$CERT_PATH" + export CURL_CA_BUNDLE="$CERT_PATH" + + log_success "SSL 证书配置完成" + log_info "证书路径: $CERT_PATH" + + # Add to shell profile for persistence + if [ -f ~/.zshrc ]; then + if ! grep -q "REQUESTS_CA_BUNDLE" ~/.zshrc; then + echo "" >> ~/.zshrc + echo "# OpenCMO SSL certificates" >> ~/.zshrc + echo "export REQUESTS_CA_BUNDLE=\"$CERT_PATH\"" >> ~/.zshrc + echo "export SSL_CERT_FILE=\"$CERT_PATH\"" >> ~/.zshrc + log_info "已添加环境变量到 ~/.zshrc" + fi + fi + + else + log_error "无法获取有效的证书路径" + return 1 + fi + + # Test SSL connection + log_info "测试 SSL 连接..." + if python3 -c "import urllib.request; urllib.request.urlopen('https://pypi.org')" 2>/dev/null; then + log_success "SSL 连接测试通过" + else + log_warning "SSL 连接测试失败,但继续部署..." + fi + + set -e # Re-enable exit on error +} + +################################################################################ +# Install dependencies +################################################################################ + +install_dependencies() { + log_step "安装 Python 依赖" + + set +e # Disable exit on error + + log_info "安装项目依赖 (包含所有可选依赖)..." + if pip install -e ".[all]" 2>&1 | tee /tmp/pip_install.log; then + log_success "依赖安装成功" + else + local exit_code=$? + log_error "依赖安装失败" + cat /tmp/pip_install.log + handle_pip_install_error $exit_code + + # Check if installation succeeded after retry + if [ $? -ne 0 ]; then + log_error "依赖安装失败,无法继续部署" + exit 1 + fi + fi + + # Initialize crawl4ai + log_info "初始化 crawl4ai..." + if command -v crawl4ai-setup &> /dev/null; then + if crawl4ai-setup 2>&1 | tee /tmp/crawl4ai_setup.log; then + log_success "crawl4ai 初始化成功" + else + log_warning "crawl4ai 初始化失败,但继续部署..." + cat /tmp/crawl4ai_setup.log + fi + else + log_warning "未找到 crawl4ai-setup 命令,跳过初始化" + fi + + set -e # Re-enable exit on error +} + +################################################################################ +# Build frontend +################################################################################ + +build_frontend() { + log_step "构建前端" + + if [ ! -d "frontend" ]; then + log_error "未找到 frontend 目录" + exit 1 + fi + + cd frontend + + set +e # Disable exit on error + + # Install npm dependencies + log_info "安装 npm 依赖..." + if npm install 2>&1 | tee /tmp/npm_install.log; then + log_success "npm 依赖安装成功" + else + log_error "npm 依赖安装失败" + cat /tmp/npm_install.log + cd .. + exit 1 + fi + + # Build frontend + log_info "构建前端 (这可能需要几分钟)..." + + # Increase Node.js memory limit + export NODE_OPTIONS="--max-old-space-size=4096" + + if npm run build 2>&1 | tee /tmp/npm_build.log; then + log_success "前端构建成功" + else + local exit_code=$? + log_error "前端构建失败" + cat /tmp/npm_build.log + handle_frontend_build_error $exit_code + cd .. + exit 1 + fi + + # Verify build output + if [ ! -d "dist" ] || [ -z "$(ls -A dist)" ]; then + log_error "构建输出目录为空" + cd .. + exit 1 + fi + + log_success "前端构建完成,输出目录: frontend/dist" + + cd .. + + set -e # Re-enable exit on error +} + +################################################################################ +# Deploy to server +################################################################################ + +deploy_to_server() { + log_step "部署到服务器" + + set +e # Disable exit on error + + # Push code to git + log_info "推送代码到 Git 仓库..." + if git push origin main 2>&1 | tee /tmp/git_push.log; then + log_success "代码推送成功" + else + local exit_code=$? + log_warning "代码推送失败" + cat /tmp/git_push.log + handle_git_error $exit_code + + read -p "是否继续部署? (y/N): " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + log_info "部署已取消" + exit 0 + fi + fi + + # Deploy backend + log_info "部署后端代码到服务器..." + + local deploy_cmd=" + set -e + cd $REMOTE_PATH || exit 1 + echo '拉取最新代码...' + git pull origin main || exit 1 + echo '安装依赖...' + pip install -e '.[all]' -q || exit 1 + echo '重启服务...' + systemctl restart $SERVICE_NAME || exit 1 + sleep 2 + echo '检查服务状态...' + systemctl is-active $SERVICE_NAME || exit 1 + " + + if ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "$deploy_cmd" 2>&1 | tee /tmp/deploy_backend.log; then + log_success "后端部署成功" + else + local exit_code=$? + log_error "后端部署失败" + cat /tmp/deploy_backend.log + handle_service_error $exit_code + exit 1 + fi + + # Deploy frontend + log_info "部署前端静态文件到服务器..." + + if rsync -avz --delete frontend/dist/ "$REMOTE_USER@$REMOTE_HOST:$REMOTE_PATH/frontend/dist/" -e "ssh -p $REMOTE_PORT" 2>&1 | tee /tmp/deploy_frontend.log; then + log_success "前端部署成功" + else + log_error "前端部署失败" + cat /tmp/deploy_frontend.log + exit 1 + fi + + set -e # Re-enable exit on error +} + +################################################################################ +# Verify deployment +################################################################################ + +verify_deployment() { + log_step "验证部署" + + set +e # Disable exit on error + + # Check service status + log_info "检查服务状态..." + if ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "systemctl is-active $SERVICE_NAME" > /dev/null 2>&1; then + log_success "服务运行正常" + else + log_error "服务未运行" + ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "systemctl status $SERVICE_NAME" + exit 1 + fi + + # Check HTTP endpoint + log_info "检查 HTTP 端点..." + local health_check=$(ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:8080/api/v1/health" 2>/dev/null) + + if [ "$health_check" = "200" ]; then + log_success "健康检查通过 (HTTP 200)" + else + log_warning "健康检查返回: HTTP $health_check" + fi + + # Show recent logs + log_info "最近的服务日志:" + ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "journalctl -u $SERVICE_NAME -n 20 --no-pager" + + set -e # Re-enable exit on error + + log_success "部署验证完成" +} + +################################################################################ +# Main deployment flow +################################################################################ + +main() { + echo "================================" + echo "OpenCMO 部署脚本" + echo "================================" + echo "" + + # Parse command line arguments + local skip_checks=0 + local skip_frontend=0 + local skip_backend=0 + + while [[ $# -gt 0 ]]; do + case $1 in + --skip-checks) + skip_checks=1 + shift + ;; + --skip-frontend) + skip_frontend=1 + shift + ;; + --skip-backend) + skip_backend=1 + shift + ;; + --help) + echo "用法: $0 [选项]" + echo "" + echo "选项:" + echo " --skip-checks 跳过部署前检查" + echo " --skip-frontend 跳过前端构建" + echo " --skip-backend 跳过后端部署" + echo " --help 显示此帮助信息" + exit 0 + ;; + *) + log_error "未知选项: $1" + echo "使用 --help 查看帮助" + exit 1 + ;; + esac + done + + # Run deployment steps + if [ $skip_checks -eq 0 ]; then + preflight_checks + else + log_warning "跳过部署前检查" + fi + + fix_ssl_certificates + + if [ $skip_backend -eq 0 ]; then + install_dependencies + else + log_warning "跳过后端依赖安装" + fi + + if [ $skip_frontend -eq 0 ]; then + build_frontend + else + log_warning "跳过前端构建" + fi + + deploy_to_server + verify_deployment + + echo "" + log_success "=========================================" + log_success "部署完成!" + log_success "=========================================" + echo "" + echo "访问地址: https://aidcmo.com" + echo "" + echo "有用的命令:" + echo " 查看日志: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'journalctl -u $SERVICE_NAME -f'" + echo " 重启服务: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'systemctl restart $SERVICE_NAME'" + echo " 检查状态: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'systemctl status $SERVICE_NAME'" + echo "" +} + +# Run main function +main "$@" diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..04880a9 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,424 @@ +# OpenCMO 部署脚本使用指南 + +本目录包含 OpenCMO 项目的部署和诊断脚本,重点解决 Python SSL 证书问题。 + +## 脚本列表 + +### 1. `deploy.sh` - 主部署脚本 + +完整的自动化部署脚本,包含详细的错误处理和修复流程。 + +**功能:** +- 部署前检查(Git、Python、Node.js、SSH 连接) +- 自动修复 Python SSL 证书问题 +- 安装 Python 依赖 +- 构建前端 +- 部署到 BWG 服务器 +- 部署后验证 + +**使用方法:** + +```bash +# 完整部署 +./deploy.sh + +# 跳过部署前检查 +./deploy.sh --skip-checks + +# 仅部署后端(跳过前端构建) +./deploy.sh --skip-frontend + +# 仅部署前端(跳过后端) +./deploy.sh --skip-backend + +# 查看帮助 +./deploy.sh --help +``` + +**部署流程:** + +1. **部署前检查** + - 验证 Git 仓库状态 + - 检查 Python/Node.js 版本 + - 测试 SSH 连接 + - 检查 SSL 证书配置 + +2. **修复 SSL 证书** + - 更新 certifi 包 + - macOS: 运行 Python 证书安装脚本 + - Linux: 更新系统 CA 证书 + - 设置环境变量(REQUESTS_CA_BUNDLE, SSL_CERT_FILE) + - 测试 SSL 连接 + +3. **安装依赖** + - 安装 Python 包(包含所有可选依赖) + - 初始化 crawl4ai + - 失败时自动尝试国内镜像源 + +4. **构建前端** + - 安装 npm 依赖 + - 构建生产版本(增加 Node.js 内存限制) + - 验证构建输出 + +5. **部署到服务器** + - 推送代码到 Git + - SSH 到服务器拉取最新代码 + - 安装服务器端依赖 + - 重启 systemd 服务 + - rsync 前端静态文件 + +6. **验证部署** + - 检查服务状态 + - HTTP 健康检查 + - 显示最近日志 + +### 2. `scripts/fix_ssl_server.sh` - 服务器端 SSL 修复 + +在 BWG 服务器上运行,修复 Python SSL 证书问题。 + +**使用方法:** + +```bash +# 上传并运行 +scp -P 2222 scripts/fix_ssl_server.sh root@97.64.16.217:/tmp/ +ssh -p 2222 root@97.64.16.217 'bash /tmp/fix_ssl_server.sh' +``` + +**功能:** +- 更新系统 CA 证书 +- 更新 Python certifi 包 +- 配置 systemd 服务环境变量 +- 添加环境变量到 ~/.bashrc +- 测试 SSL 连接 + +### 3. `scripts/diagnose.sh` - 诊断工具 + +快速诊断部署问题,提供详细的系统状态和修复建议。 + +**使用方法:** + +```bash +# 完整诊断 +./scripts/diagnose.sh + +# 仅诊断本地环境 +./scripts/diagnose.sh local + +# 仅诊断远程服务器 +./scripts/diagnose.sh remote + +# 仅诊断网络连接 +./scripts/diagnose.sh network + +# 显示常见问题修复方法 +./scripts/diagnose.sh fixes +``` + +**诊断内容:** + +**本地环境:** +- Git 仓库状态 +- Python 版本和 SSL 证书 +- Node.js/npm 版本 +- 前端构建状态 +- 磁盘空间和内存 + +**远程服务器:** +- SSH 连接状态 +- 服务运行状态 +- 健康检查端点 +- 端口监听状态 +- 代码版本 +- SSL 证书配置 +- 磁盘和内存使用 +- 最近的服务日志 +- Nginx 状态 +- HTTPS 证书过期时间 + +**网络连接:** +- 公网访问测试 +- API 健康检查 +- DNS 解析 +- Ping 测试 + +## 常见问题和解决方案 + +### 1. SSL 证书验证失败 + +**错误信息:** +``` +SSL: CERTIFICATE_VERIFY_FAILED +``` + +**原因:** +- Python 未安装系统 CA 证书 +- certifi 包版本过旧 +- 系统时间不正确 + +**解决方案:** + +本地修复: +```bash +# 方法 1: 运行部署脚本(自动修复) +./deploy.sh + +# 方法 2: 手动修复 +pip install --upgrade certifi pip setuptools + +# macOS 特定 +/Applications/Python\ 3.x/Install\ Certificates.command + +# 设置环境变量 +export REQUESTS_CA_BUNDLE=$(python3 -c "import certifi; print(certifi.where())") +export SSL_CERT_FILE=$REQUESTS_CA_BUNDLE +``` + +服务器修复: +```bash +scp -P 2222 scripts/fix_ssl_server.sh root@97.64.16.217:/tmp/ +ssh -p 2222 root@97.64.16.217 'bash /tmp/fix_ssl_server.sh' +ssh -p 2222 root@97.64.16.217 'systemctl restart opencmo' +``` + +### 2. 依赖安装失败 + +**错误信息:** +``` +ERROR: Could not find a version that satisfies the requirement +``` + +**解决方案:** + +```bash +# 使用国内镜像源 +pip install -e ".[all]" -i https://pypi.tuna.tsinghua.edu.cn/simple + +# 检查磁盘空间 +df -h + +# 清理 pip 缓存 +pip cache purge + +# 分步安装 +pip install fastapi uvicorn aiosqlite openai anthropic +pip install -e . +``` + +### 3. 前端构建内存不足 + +**错误信息:** +``` +FATAL ERROR: Reached heap limit Allocation failed - JavaScript heap out of memory +``` + +**解决方案:** + +```bash +# 增加 Node.js 内存限制 +export NODE_OPTIONS='--max-old-space-size=4096' +cd frontend && npm run build + +# 清理后重建 +cd frontend +rm -rf node_modules dist +npm install +npm run build +``` + +### 4. SSH 连接失败 + +**错误信息:** +``` +Permission denied (publickey) +``` + +**解决方案:** + +```bash +# 检查 SSH 密钥 +ssh-add -l + +# 添加密钥 +ssh-add ~/.ssh/id_rsa + +# 测试连接 +ssh -p 2222 root@97.64.16.217 + +# 使用密码登录(如果密钥失败) +ssh -p 2222 -o PreferredAuthentications=password root@97.64.16.217 +``` + +### 5. 服务启动失败 + +**错误信息:** +``` +Job for opencmo.service failed +``` + +**解决方案:** + +```bash +# 查看详细日志 +ssh -p 2222 root@97.64.16.217 'journalctl -u opencmo -n 100 --no-pager' + +# 检查端口占用 +ssh -p 2222 root@97.64.16.217 'lsof -i:8080' + +# 手动启动测试 +ssh -p 2222 root@97.64.16.217 'cd /opt/OpenCMO && opencmo-web' + +# 检查配置文件 +ssh -p 2222 root@97.64.16.217 'cat /opt/OpenCMO/.env' + +# 重置数据库(谨慎使用) +ssh -p 2222 root@97.64.16.217 'cp ~/.opencmo/data.db ~/.opencmo/data.db.backup' +ssh -p 2222 root@97.64.16.217 'rm ~/.opencmo/data.db' +ssh -p 2222 root@97.64.16.217 'systemctl restart opencmo' +``` + +### 6. Git 推送失败 + +**错误信息:** +``` +error: failed to push some refs +``` + +**解决方案:** + +```bash +# 查看状态 +git status + +# 提交本地更改 +git add . +git commit -m "your message" +git push + +# 拉取远程更改 +git pull --rebase origin main +git push + +# 强制推送(谨慎使用) +git push -f origin main +``` + +### 7. Nginx 502 错误 + +**原因:** +- 后端服务未运行 +- 端口配置错误 +- 其他 server block 冲突 + +**解决方案:** + +```bash +# 检查服务状态 +ssh -p 2222 root@97.64.16.217 'systemctl status opencmo' + +# 检查 Nginx 配置 +ssh -p 2222 root@97.64.16.217 'nginx -t' + +# 查看 Nginx 错误日志 +ssh -p 2222 root@97.64.16.217 'tail -50 /var/log/nginx/error.log' + +# 检查冲突的 server block +ssh -p 2222 root@97.64.16.217 'grep -r "server_name.*aidcmo.com" /etc/nginx/conf.d/' + +# 重启 Nginx +ssh -p 2222 root@97.64.16.217 'systemctl restart nginx' +``` + +## 部署检查清单 + +部署前确认: + +- [ ] 本地代码已提交到 Git +- [ ] 所有测试通过 +- [ ] .env 文件配置正确 +- [ ] Python 版本 >= 3.9 +- [ ] Node.js 版本 >= 18 +- [ ] SSH 密钥已配置 +- [ ] 磁盘空间充足(本地 >5GB,服务器 >2GB) + +部署后验证: + +- [ ] 服务状态正常:`systemctl status opencmo` +- [ ] 健康检查通过:`curl http://127.0.0.1:8080/api/v1/health` +- [ ] 公网可访问:`curl https://aidcmo.com` +- [ ] 前端加载正常 +- [ ] 登录功能正常 +- [ ] 查看日志无错误:`journalctl -u opencmo -n 50` + +## 有用的命令 + +```bash +# 查看实时日志 +ssh -p 2222 root@97.64.16.217 'journalctl -u opencmo -f' + +# 重启服务 +ssh -p 2222 root@97.64.16.217 'systemctl restart opencmo' + +# 检查服务状态 +ssh -p 2222 root@97.64.16.217 'systemctl status opencmo' + +# 查看端口占用 +ssh -p 2222 root@97.64.16.217 'lsof -i:8080' + +# 查看磁盘使用 +ssh -p 2222 root@97.64.16.217 'df -h' + +# 查看内存使用 +ssh -p 2222 root@97.64.16.217 'free -h' + +# 备份数据库 +ssh -p 2222 root@97.64.16.217 'cp ~/.opencmo/data.db ~/.opencmo/data.db.$(date +%Y%m%d_%H%M%S)' + +# 查看 Nginx 日志 +ssh -p 2222 root@97.64.16.217 'tail -f /var/log/nginx/access.log' +ssh -p 2222 root@97.64.16.217 'tail -f /var/log/nginx/error.log' + +# 测试 Nginx 配置 +ssh -p 2222 root@97.64.16.217 'nginx -t' + +# 重载 Nginx +ssh -p 2222 root@97.64.16.217 'systemctl reload nginx' +``` + +## 紧急回滚 + +如果部署后出现严重问题: + +```bash +# 1. 回滚代码 +ssh -p 2222 root@97.64.16.217 " + cd /opt/OpenCMO && + git log --oneline -5 && + git reset --hard && + systemctl restart opencmo +" + +# 2. 恢复数据库备份 +ssh -p 2222 root@97.64.16.217 " + ls -lh ~/.opencmo/data.db* && + cp ~/.opencmo/data.db.backup ~/.opencmo/data.db && + systemctl restart opencmo +" + +# 3. 恢复前端 +rsync -avz --delete frontend/dist.backup/ root@97.64.16.217:/opt/OpenCMO/frontend/dist/ -e "ssh -p 2222" +``` + +## 技术支持 + +如果遇到脚本无法解决的问题: + +1. 运行诊断工具:`./scripts/diagnose.sh` +2. 查看详细日志:`ssh -p 2222 root@97.64.16.217 'journalctl -u opencmo -n 200'` +3. 检查 GitHub Issues:https://github.com/anthropics/opencmo/issues +4. 联系开发团队 + +## 脚本维护 + +这些脚本会随着项目演进而更新。如果发现问题或有改进建议,请提交 PR 或 Issue。 + +**最后更新:** 2026-04-12 diff --git a/scripts/diagnose.sh b/scripts/diagnose.sh new file mode 100755 index 0000000..d676f1c --- /dev/null +++ b/scripts/diagnose.sh @@ -0,0 +1,371 @@ +#!/bin/bash + +################################################################################ +# OpenCMO Deployment Diagnostics Script +# Quick health check for deployment issues +################################################################################ + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[✓]${NC} $1" +} + +log_error() { + echo -e "${RED}[✗]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[!]${NC} $1" +} + +log_section() { + echo "" + echo -e "${GREEN}========================================${NC}" + echo -e "${GREEN}$1${NC}" + echo -e "${GREEN}========================================${NC}" +} + +REMOTE_HOST="97.64.16.217" +REMOTE_PORT="2222" +REMOTE_USER="root" +SERVICE_NAME="opencmo" + +################################################################################ +# Local diagnostics +################################################################################ + +diagnose_local() { + log_section "本地环境诊断" + + # Git status + echo -n "Git 仓库: " + if git rev-parse --git-dir > /dev/null 2>&1; then + log_success "正常" + echo " 分支: $(git branch --show-current)" + echo " 最新提交: $(git log -1 --oneline)" + + if git diff-index --quiet HEAD --; then + log_success "工作区干净" + else + log_warning "存在未提交的更改" + git status --short | head -10 + fi + else + log_error "不是 Git 仓库" + fi + + # Python + echo -n "Python: " + if command -v python3 &> /dev/null; then + local version=$(python3 --version) + log_success "$version" + else + log_error "未安装" + fi + + # Python SSL certificates + echo -n "Python SSL 证书: " + if python3 -c "import ssl; import certifi; print(certifi.where())" > /dev/null 2>&1; then + local cert_path=$(python3 -c "import certifi; print(certifi.where())") + log_success "$cert_path" + + # Test SSL connection + echo -n "SSL 连接测试: " + if python3 -c "import urllib.request; urllib.request.urlopen('https://pypi.org', timeout=5)" 2>/dev/null; then + log_success "通过" + else + log_error "失败" + echo " 修复: ./deploy.sh 会自动修复此问题" + fi + else + log_error "未配置" + echo " 修复: pip install --upgrade certifi" + fi + + # Node.js + echo -n "Node.js: " + if command -v node &> /dev/null; then + local version=$(node --version) + log_success "$version" + else + log_error "未安装" + fi + + # npm + echo -n "npm: " + if command -v npm &> /dev/null; then + local version=$(npm --version) + log_success "$version" + else + log_error "未安装" + fi + + # Frontend build + if [ -d "frontend/dist" ]; then + echo -n "前端构建: " + local file_count=$(find frontend/dist -type f | wc -l | tr -d ' ') + log_success "存在 ($file_count 个文件)" + else + echo -n "前端构建: " + log_warning "不存在 (需要运行 cd frontend && npm run build)" + fi + + # Disk space + echo -n "磁盘空间: " + local available=$(df -h . | tail -1 | awk '{print $4}') + log_info "$available 可用" + + # Memory + echo -n "可用内存: " + if [[ "$OSTYPE" == "darwin"* ]]; then + local free_mem=$(vm_stat | grep "Pages free" | awk '{print $3}' | sed 's/\.//') + local free_gb=$((free_mem * 4096 / 1024 / 1024 / 1024)) + log_info "${free_gb}GB" + else + local free_mem=$(free -h | grep Mem | awk '{print $4}') + log_info "$free_mem" + fi +} + +################################################################################ +# Remote diagnostics +################################################################################ + +diagnose_remote() { + log_section "远程服务器诊断" + + # SSH connectivity + echo -n "SSH 连接: " + if ssh -p "$REMOTE_PORT" -o ConnectTimeout=5 "$REMOTE_USER@$REMOTE_HOST" "echo 'OK'" > /dev/null 2>&1; then + log_success "正常" + else + log_error "失败" + echo " 检查: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST" + return 1 + fi + + # Server info + log_info "服务器信息:" + ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" " + echo ' 操作系统: '$(cat /etc/os-release | grep PRETTY_NAME | cut -d'\"' -f2) + echo ' 内核: '$(uname -r) + echo ' 运行时间: '$(uptime -p) + " 2>/dev/null + + # Service status + echo -n "服务状态: " + if ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "systemctl is-active $SERVICE_NAME" > /dev/null 2>&1; then + log_success "运行中" + else + log_error "未运行" + echo " 查看详情: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'systemctl status $SERVICE_NAME'" + fi + + # Service health + echo -n "健康检查: " + local health_code=$(ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:8080/api/v1/health" 2>/dev/null) + if [ "$health_code" = "200" ]; then + log_success "HTTP $health_code" + else + log_error "HTTP $health_code" + fi + + # Port listening + echo -n "端口 8080: " + if ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "lsof -i:8080" > /dev/null 2>&1; then + log_success "监听中" + else + log_error "未监听" + fi + + # Code version + echo -n "代码版本: " + local remote_commit=$(ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "cd /opt/OpenCMO && git log -1 --oneline" 2>/dev/null) + if [ -n "$remote_commit" ]; then + log_info "$remote_commit" + else + log_error "无法获取" + fi + + # Python SSL on server + echo -n "服务器 SSL 证书: " + local server_cert=$(ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "python3 -c 'import certifi; print(certifi.where())'" 2>/dev/null) + if [ -n "$server_cert" ]; then + log_success "$server_cert" + else + log_error "未配置" + echo " 修复: scp -P $REMOTE_PORT scripts/fix_ssl_server.sh $REMOTE_USER@$REMOTE_HOST:/tmp/ && ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'bash /tmp/fix_ssl_server.sh'" + fi + + # Disk space on server + echo -n "服务器磁盘: " + ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "df -h / | tail -1 | awk '{print \"使用 \" \$3 \" / \" \$2 \" (\" \$5 \")\"}'" 2>/dev/null + + # Memory on server + echo -n "服务器内存: " + ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "free -h | grep Mem | awk '{print \"使用 \" \$3 \" / \" \$2}'" 2>/dev/null + + # Recent logs + log_info "最近的服务日志 (最后 10 行):" + ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "journalctl -u $SERVICE_NAME -n 10 --no-pager" 2>/dev/null | sed 's/^/ /' + + # Nginx status + echo -n "Nginx 状态: " + if ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "systemctl is-active nginx" > /dev/null 2>&1; then + log_success "运行中" + else + log_error "未运行" + fi + + # SSL certificate expiry + echo -n "HTTPS 证书: " + local cert_expiry=$(ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "echo | openssl s_client -servername aidcmo.com -connect aidcmo.com:443 2>/dev/null | openssl x509 -noout -dates | grep notAfter | cut -d= -f2" 2>/dev/null) + if [ -n "$cert_expiry" ]; then + log_info "过期时间: $cert_expiry" + else + log_warning "无法获取" + fi +} + +################################################################################ +# Network diagnostics +################################################################################ + +diagnose_network() { + log_section "网络诊断" + + # Public site accessibility + echo -n "公网访问 (https://aidcmo.com): " + local http_code=$(curl -s -o /dev/null -w '%{http_code}' -m 10 https://aidcmo.com 2>/dev/null) + if [ "$http_code" = "200" ]; then + log_success "HTTP $http_code" + else + log_error "HTTP $http_code" + fi + + # API health endpoint + echo -n "API 健康检查: " + local api_code=$(curl -s -o /dev/null -w '%{http_code}' -m 10 https://aidcmo.com/api/v1/health 2>/dev/null) + if [ "$api_code" = "200" ]; then + log_success "HTTP $api_code" + else + log_error "HTTP $api_code" + fi + + # DNS resolution + echo -n "DNS 解析: " + local resolved_ip=$(dig +short aidcmo.com | tail -1) + if [ "$resolved_ip" = "$REMOTE_HOST" ]; then + log_success "$resolved_ip" + else + log_warning "解析为 $resolved_ip (期望 $REMOTE_HOST)" + fi + + # Ping test + echo -n "Ping 测试: " + if ping -c 1 -W 2 "$REMOTE_HOST" > /dev/null 2>&1; then + log_success "可达" + else + log_error "不可达" + fi +} + +################################################################################ +# Common issues and fixes +################################################################################ + +show_common_fixes() { + log_section "常见问题修复" + + echo "1. SSL 证书问题" + echo " 本地修复: ./deploy.sh 会自动修复" + echo " 服务器修复: scp -P $REMOTE_PORT scripts/fix_ssl_server.sh $REMOTE_USER@$REMOTE_HOST:/tmp/ && ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'bash /tmp/fix_ssl_server.sh'" + echo "" + + echo "2. 服务未运行" + echo " 重启服务: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'systemctl restart $SERVICE_NAME'" + echo " 查看日志: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'journalctl -u $SERVICE_NAME -f'" + echo "" + + echo "3. 前端构建失败" + echo " 清理重建: cd frontend && rm -rf node_modules dist && npm install && npm run build" + echo " 增加内存: export NODE_OPTIONS='--max-old-space-size=4096' && npm run build" + echo "" + + echo "4. 端口被占用" + echo " 查看占用: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'lsof -i:8080'" + echo " 杀死进程: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'kill -9 \$(lsof -t -i:8080)'" + echo "" + + echo "5. Git 推送失败" + echo " 查看状态: git status" + echo " 提交更改: git add . && git commit -m 'your message' && git push" + echo " 强制推送: git push -f origin main (谨慎使用)" + echo "" + + echo "6. 数据库问题" + echo " 备份数据库: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'cp ~/.opencmo/data.db ~/.opencmo/data.db.backup'" + echo " 重置数据库: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'rm ~/.opencmo/data.db && systemctl restart $SERVICE_NAME'" + echo "" + + echo "7. Nginx 配置问题" + echo " 测试配置: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'nginx -t'" + echo " 重载配置: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'systemctl reload nginx'" + echo "" +} + +################################################################################ +# Main +################################################################################ + +main() { + echo "================================" + echo "OpenCMO 部署诊断工具" + echo "================================" + + case "${1:-all}" in + local) + diagnose_local + ;; + remote) + diagnose_remote + ;; + network) + diagnose_network + ;; + fixes) + show_common_fixes + ;; + all) + diagnose_local + diagnose_remote + diagnose_network + show_common_fixes + ;; + *) + echo "用法: $0 [local|remote|network|fixes|all]" + echo "" + echo "选项:" + echo " local - 仅诊断本地环境" + echo " remote - 仅诊断远程服务器" + echo " network - 仅诊断网络连接" + echo " fixes - 显示常见问题修复方法" + echo " all - 完整诊断 (默认)" + exit 1 + ;; + esac + + echo "" + log_success "诊断完成" +} + +main "$@" diff --git a/scripts/fix_ssl_server.sh b/scripts/fix_ssl_server.sh new file mode 100755 index 0000000..b43b538 --- /dev/null +++ b/scripts/fix_ssl_server.sh @@ -0,0 +1,113 @@ +#!/bin/bash + +################################################################################ +# Server-side SSL Certificate Fix Script +# Run this on the BWG server to fix Python SSL certificate issues +################################################################################ + +set -e + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +log_step() { + echo -e "\n${GREEN}==>${NC} $1" +} + +log_step "修复服务器 Python SSL 证书" + +# Update system CA certificates +log_info "更新系统 CA 证书..." +if command -v update-ca-certificates &> /dev/null; then + update-ca-certificates 2>&1 || log_error "CA 证书更新失败" + log_success "系统 CA 证书已更新" +fi + +# Update certifi package +log_info "更新 Python certifi 包..." +pip install --upgrade certifi pip setuptools 2>&1 || log_error "certifi 更新失败" +log_success "certifi 已更新" + +# Get certificate path +CERT_PATH=$(python3 -c "import certifi; print(certifi.where())" 2>/dev/null) + +if [ -n "$CERT_PATH" ] && [ -f "$CERT_PATH" ]; then + log_success "证书路径: $CERT_PATH" + + # Set environment variables in systemd service + log_info "配置 systemd 服务环境变量..." + + SERVICE_FILE="/etc/systemd/system/opencmo.service" + + if [ -f "$SERVICE_FILE" ]; then + # Backup original service file + cp "$SERVICE_FILE" "${SERVICE_FILE}.backup" + + # Check if Environment variables already exist + if grep -q "REQUESTS_CA_BUNDLE" "$SERVICE_FILE"; then + log_info "环境变量已存在,更新中..." + sed -i "s|Environment=\"REQUESTS_CA_BUNDLE=.*\"|Environment=\"REQUESTS_CA_BUNDLE=$CERT_PATH\"|g" "$SERVICE_FILE" + sed -i "s|Environment=\"SSL_CERT_FILE=.*\"|Environment=\"SSL_CERT_FILE=$CERT_PATH\"|g" "$SERVICE_FILE" + else + log_info "添加环境变量到服务文件..." + # Add Environment variables after [Service] section + sed -i "/\[Service\]/a Environment=\"REQUESTS_CA_BUNDLE=$CERT_PATH\"\nEnvironment=\"SSL_CERT_FILE=$CERT_PATH\"" "$SERVICE_FILE" + fi + + log_success "服务文件已更新" + + # Reload systemd + log_info "重载 systemd 配置..." + systemctl daemon-reload + log_success "systemd 配置已重载" + + else + log_error "未找到服务文件: $SERVICE_FILE" + fi + + # Add to .bashrc for interactive sessions + if [ -f ~/.bashrc ]; then + if ! grep -q "REQUESTS_CA_BUNDLE" ~/.bashrc; then + echo "" >> ~/.bashrc + echo "# Python SSL certificates" >> ~/.bashrc + echo "export REQUESTS_CA_BUNDLE=\"$CERT_PATH\"" >> ~/.bashrc + echo "export SSL_CERT_FILE=\"$CERT_PATH\"" >> ~/.bashrc + log_success "环境变量已添加到 ~/.bashrc" + fi + fi + +else + log_error "无法获取证书路径" + exit 1 +fi + +# Test SSL connection +log_info "测试 SSL 连接..." +if python3 -c "import urllib.request; urllib.request.urlopen('https://pypi.org', timeout=10)" 2>/dev/null; then + log_success "SSL 连接测试通过" +else + log_error "SSL 连接测试失败" + exit 1 +fi + +log_success "SSL 证书修复完成" +echo "" +echo "下一步:" +echo " 1. 重启服务: systemctl restart opencmo" +echo " 2. 检查状态: systemctl status opencmo" +echo " 3. 查看日志: journalctl -u opencmo -f" From ce35051b8a9d6351f1ead12c4f7c3e6a4dff73f5 Mon Sep 17 00:00:00 2001 From: BBear0115 <2267057785@qq.com> Date: Wed, 6 May 2026 23:32:17 +0800 Subject: [PATCH 2/4] feat: add visual report charts --- src/opencmo/report_charts.py | 241 ++++++++++++++++++++++++++++++ src/opencmo/report_pipeline.py | 8 + src/opencmo/reports.py | 96 ++++++++++++ src/opencmo/web/routers/report.py | 12 +- tests/test_report_charts.py | 98 ++++++++++++ 5 files changed, 454 insertions(+), 1 deletion(-) create mode 100644 src/opencmo/report_charts.py create mode 100644 tests/test_report_charts.py diff --git a/src/opencmo/report_charts.py b/src/opencmo/report_charts.py new file mode 100644 index 0000000..309b155 --- /dev/null +++ b/src/opencmo/report_charts.py @@ -0,0 +1,241 @@ +"""Deterministic SVG chart generation for persisted AI CMO reports.""" + +from __future__ import annotations + +import html +import os +import re +import uuid +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Any + + +@dataclass(frozen=True) +class ReportChart: + title: str + description: str + data_source: str + points_count: int + asset_id: str + markdown: str + degraded: bool = False + + def to_meta(self) -> dict[str, Any]: + payload = asdict(self) + payload.pop("markdown", None) + return payload + + +def get_report_asset_dir() -> Path: + configured = os.environ.get("OPENCMO_REPORT_ASSET_DIR") + if configured: + return Path(configured) + db_path = os.environ.get("OPENCMO_DB_PATH") + if db_path: + return Path(db_path).expanduser().parent / "report_assets" + return Path.home() / ".opencmo" / "report_assets" + + +def get_report_asset_path(asset_id: str) -> Path | None: + if not re.fullmatch(r"[a-f0-9]{32}", asset_id): + return None + return get_report_asset_dir() / f"{asset_id}.svg" + + +def charts_to_markdown(charts: list[ReportChart]) -> str: + if not charts: + return "当前数据不足,未生成图表。" + blocks = [] + for chart in charts: + blocks.append( + "\n".join( + [ + f"### {chart.title}", + chart.markdown, + f"图表说明:{chart.description}", + f"数据来源:`{chart.data_source}`;数据点:{chart.points_count}。", + "数据限制:图表只使用系统已采集到的真实数据,缺失值不会被补造。", + ] + ) + ) + return "\n\n".join(blocks) + + +def build_report_charts(kind: str, facts: dict, meta: dict) -> list[ReportChart]: + charts: list[ReportChart] = [] + charts.extend(_strategic_charts(facts, meta) if kind == "strategic" else _periodic_charts(facts, meta)) + return charts[:4] + + +def _asset_id() -> str: + return uuid.uuid4().hex + + +def _write_svg(asset_id: str, svg: str) -> None: + directory = get_report_asset_dir() + directory.mkdir(parents=True, exist_ok=True) + (directory / f"{asset_id}.svg").write_text(svg, encoding="utf-8") + + +def _markdown(asset_id: str, title: str) -> str: + return f"![{title}](/api/v1/report-assets/{asset_id}.svg)" + + +def _to_percent(value: Any) -> float | None: + if value is None: + return None + try: + num = float(value) + except (TypeError, ValueError): + return None + if 0 <= num <= 1: + return round(num * 100, 1) + return round(num, 1) + + +def _number(value: Any) -> float | None: + if value is None: + return None + try: + return float(value) + except (TypeError, ValueError): + return None + + +def _latest_first(items: list[dict], date_key: str) -> list[dict]: + return sorted(items or [], key=lambda item: str(item.get(date_key) or ""), reverse=True) + + +def _strategic_charts(facts: dict, meta: dict) -> list[ReportChart]: + charts: list[ReportChart] = [] + latest = facts.get("latest_scans") or {} + kpis = [ + ("SEO", _to_percent((latest.get("seo") or {}).get("score"))), + ("GEO", _number((latest.get("geo") or {}).get("score"))), + ("Citability", _to_percent((facts.get("citability") or [{}])[0].get("avg_score") if facts.get("citability") else None)), + ("Brand", _number((facts.get("brand_presence") or [{}])[0].get("footprint_score") if facts.get("brand_presence") else None)), + ("Community", _number((latest.get("community") or {}).get("total_hits"))), + ] + kpis = [(label, value) for label, value in kpis if value is not None] + if kpis: + charts.append(_bar_chart("关键指标快照", kpis, "latest_scans/citability/brand_presence", "SEO、GEO、AI 引文可信度、品牌足迹与社区命中的当前快照。")) + + serp = [ + (str(item.get("keyword") or "keyword")[:24], _number(item.get("position"))) + for item in (facts.get("serp_latest") or []) + if item.get("position") is not None + ][:8] + if serp: + charts.append(_bar_chart("SERP 当前排名(数字越小越靠前)", serp, "serp_latest.position", "已跟踪关键词的当前自然搜索排名。")) + + coverage = [ + ("有数据", _number(meta.get("sample_count"))), + ("总数据源", _number(meta.get("total_data_sources"))), + ] + if all(value is not None for _, value in coverage): + charts.append(_bar_chart("数据覆盖度", coverage, "meta.sample_count/meta.total_data_sources", "本报告事实包的数据源覆盖情况。")) + + distribution = _finding_distribution(facts) + if distribution: + charts.append(_bar_chart("风险与建议分布", distribution, "findings/recommendations", "近期发现与建议按优先级聚合后的执行压力。")) + return charts + + +def _periodic_charts(facts: dict, meta: dict) -> list[ReportChart]: + charts: list[ReportChart] = [] + trend_series = [ + ("SEO", [(item.get("scanned_at"), _to_percent(item.get("score_performance"))) for item in _latest_first(facts.get("seo_history") or [], "scanned_at")]), + ("GEO", [(item.get("scanned_at"), _number(item.get("geo_score"))) for item in _latest_first(facts.get("geo_history") or [], "scanned_at")]), + ("Community", [(item.get("scanned_at"), _number(item.get("total_hits"))) for item in _latest_first(facts.get("community_history") or [], "scanned_at")]), + ] + for title, series in trend_series: + points = [(label, value) for label, value in reversed(series) if value is not None] + if len(points) >= 2: + charts.append(_line_chart(f"{title} 趋势", points[-10:], f"{title.lower()}_history", f"{title} 在本报告窗口内的真实历史走势。")) + + citability = [(item.get("created_at") or item.get("scanned_at"), _to_percent(item.get("avg_score"))) for item in _latest_first(facts.get("citability") or [], "created_at")] + citability_points = [(label, value) for label, value in reversed(citability) if value is not None] + if len(citability_points) >= 2: + charts.append(_line_chart("AI 引文可信度趋势", citability_points[-10:], "citability.avg_score", "AI 引文可信度在最近样本中的走势。")) + + distribution = _finding_distribution(facts) + if distribution: + charts.append(_bar_chart("本周风险与建议分布", distribution, "findings/recommendations", "本周期可行动问题按优先级聚合后的分布。")) + return charts + + +def _finding_distribution(facts: dict) -> list[tuple[str, float]]: + counts = {"high": 0, "medium": 0, "low": 0, "unknown": 0} + for item in facts.get("findings") or []: + priority = str((item.get("severity") or item.get("priority") or "unknown")).lower() + counts[priority if priority in counts else "unknown"] += 1 + for item in facts.get("recommendations") or []: + priority = str(item.get("priority") or "unknown").lower() + counts[priority if priority in counts else "unknown"] += 1 + return [(label, count) for label, count in counts.items() if count] + + +def _bar_chart(title: str, values: list[tuple[str, float | None]], source: str, description: str) -> ReportChart: + values = [(label, float(value)) for label, value in values if value is not None] + asset_id = _asset_id() + max_value = max((value for _, value in values), default=1) or 1 + width = 760 + row_h = 42 + height = 110 + row_h * len(values) + rows = [] + for index, (label, value) in enumerate(values): + y = 76 + index * row_h + bar_w = max(4, int((value / max_value) * 460)) + rows.append(f'{html.escape(label)}') + rows.append(f'') + rows.append(f'{value:g}') + svg = _svg_frame(width, height, title, "\n".join(rows)) + _write_svg(asset_id, svg) + return ReportChart(title, description, source, len(values), asset_id, _markdown(asset_id, title)) + + +def _line_chart(title: str, points: list[tuple[Any, float]], source: str, description: str) -> ReportChart: + points = [(str(label or index + 1), float(value)) for index, (label, value) in enumerate(points)] + asset_id = _asset_id() + width = 760 + height = 320 + min_v = min(value for _, value in points) + max_v = max(value for _, value in points) + span = max(max_v - min_v, 1) + left, right, top, bottom = 70, 700, 70, 250 + coords = [] + for index, (_, value) in enumerate(points): + x = left + (right - left) * (index / max(len(points) - 1, 1)) + y = bottom - ((value - min_v) / span) * (bottom - top) + coords.append((x, y, value)) + path = " ".join(("M" if index == 0 else "L") + f" {x:.1f} {y:.1f}" for index, (x, y, _) in enumerate(coords)) + circles = "\n".join( + f'{value:g}' + for x, y, value in coords + ) + labels = "\n".join( + f'{html.escape(label[:10])}' + for (label, _), (x, _, _) in zip(points, coords) + ) + body = ( + f'' + f'' + f'{max_v:g}' + f'{min_v:g}' + f'' + f"{circles}{labels}" + ) + svg = _svg_frame(width, height, title, body) + _write_svg(asset_id, svg) + return ReportChart(title, description, source, len(points), asset_id, _markdown(asset_id, title)) + + +def _svg_frame(width: int, height: int, title: str, body: str) -> str: + return ( + f'' + '' + f'' + f'{html.escape(title)}' + f"{body}" + ) diff --git a/src/opencmo/report_pipeline.py b/src/opencmo/report_pipeline.py index f15890f..d8672fa 100644 --- a/src/opencmo/report_pipeline.py +++ b/src/opencmo/report_pipeline.py @@ -458,6 +458,8 @@ async def _bounded_distill(dim): 3. 每个章节必须指定使用哪些 insights (用 id 引用) 作为论据 4. 章节数量:4-6 个主体章节 5. 引言和战略建议章节标记为 is_final_section: true(它们最后写) +6. 标题层级必须清晰:最终 Markdown 只能使用 `#`、`##`、`###`,不能规划更深层级 +7. 必须规划一个图表解释章节,用于解释后端提供的真实图表,不要要求模型自行创造图表数据 输出 JSON 格式: { @@ -493,6 +495,8 @@ async def _phase_plan_outline( f" 类别:{project['category']}\n" f" 网址:{project['url']}\n" f" 数据质量:{reflection.get('data_quality_score', '?')}/100\n\n" + f"可用真实图表:\n" + f"{facts.get('report_charts_markdown', '当前数据不足,未生成图表。')}\n\n" f"分析发现(共 {len(distilled.get('insights', []))} 条):\n" f"{_json_dump(distilled)}" ) @@ -535,6 +539,8 @@ async def _phase_plan_outline( 11. 对于问题诊断,必须进行根因分析(回答"为什么会这样"),列出2-3个可能原因 12. 如果有历史趋势数据,说明趋势方向和变化速度(如"过去3个月下降30%") 13. 每个问题都要关联到商业影响(流量、收入、市场份额等) +14. 标题层级只能使用 `##` 和 `###`,禁止使用 `####` 或更深标题 +15. 不要自行生成图表数据;如果需要提到图表,只能引用输入中已经提供的图表 输出纯 Markdown 文本(不要 JSON,不要代码块包裹)。 以 ## 开头写章节标题,然后是正文段落。""" @@ -691,6 +697,7 @@ async def _phase_revise_section( 4. 明确指出 1-3 个最高优先级行动和建议时间窗口,但不要编造 ROI、流量损失或竞品增速 5. 添加紧迫性提示,但只能基于输入中已经给出的事实和趋势 6. 面向CMO决策者,30秒内让人理解"为什么现在必须行动" +7. 如果输入包含真实图表,必须把图表作为证据引用,但不能改写图表数字 输出纯 Markdown(以 ## 执行摘要 开头)。""" @@ -778,6 +785,7 @@ async def _bounded_summarize(sec, content): f"网址:{project['url']}\n" f"报告标题:{outline.get('report_title', '深度分析报告')}\n" f"叙事线索:{outline.get('narrative_arc', '无')}\n\n" + f"真实图表证据:\n{facts.get('report_charts_markdown', '当前数据不足,未生成图表。')}\n\n" f"核心发现要点:\n" + "\n".join(f"- {p}" for p in distilled.get("executive_summary_points", [])) + f"\n\n贯穿主题:{', '.join(distilled.get('cross_cutting_themes', []))}\n\n" diff --git a/src/opencmo/reports.py b/src/opencmo/reports.py index d1263e9..479b32f 100644 --- a/src/opencmo/reports.py +++ b/src/opencmo/reports.py @@ -6,6 +6,7 @@ import html import json import logging +import re from datetime import datetime, timedelta, timezone from opencmo import storage @@ -120,6 +121,13 @@ def close_list() -> None: if not stripped: close_list() continue + image_match = re.fullmatch(r"!\[([^\]]*)\]\(([^)]+)\)", stripped) + if image_match: + close_list() + alt = html.escape(image_match.group(1)) + src = html.escape(image_match.group(2), quote=True) + html_lines.append(f'
{alt}
{alt}
') + continue if stripped.startswith("### "): close_list() html_lines.append(f"

{html.escape(stripped[4:])}

") @@ -145,6 +153,74 @@ def close_list() -> None: return "\n".join(html_lines) +def _normalize_report_headings(markdown_text: str) -> str: + """Keep report heading hierarchy to H1/H2/H3 only.""" + lines: list[str] = [] + seen_h1 = False + for raw_line in markdown_text.splitlines(): + match = re.match(r"^(#{1,6})\s+(.+?)\s*$", raw_line) + if not match: + lines.append(raw_line.rstrip()) + continue + level = len(match.group(1)) + title = match.group(2).strip() + if level == 1 and not seen_h1: + seen_h1 = True + lines.append(f"# {title}") + elif level == 1: + lines.append(f"## {title}") + elif level == 2: + lines.append(f"## {title}") + else: + lines.append(f"### {title}") + return "\n".join(lines).strip() + + +def _insert_after_first_section(markdown_text: str, section: str) -> str: + lines = markdown_text.splitlines() + h2_indices = [idx for idx, line in enumerate(lines) if line.startswith("## ")] + if len(h2_indices) >= 2: + insert_at = h2_indices[1] + elif len(lines) >= 1 and lines[0].startswith("# "): + insert_at = 1 + else: + insert_at = 0 + return "\n".join([*lines[:insert_at], "", section.strip(), "", *lines[insert_at:]]).strip() + + +def _postprocess_human_report_content(content: str, charts_markdown: str) -> str: + content = _normalize_report_headings(content) + if "## 数据图表速览" in content or "## 2. 数据图表速览" in content: + return content + chart_section = f"## 2. 数据图表速览\n\n{charts_markdown or '当前数据不足,未生成图表。'}" + return _insert_after_first_section(content, chart_section) + + +def _prepare_report_charts(kind: str, facts: dict, meta: dict) -> tuple[dict, dict, str]: + """Generate deterministic charts and return facts/meta copies enriched for prompts.""" + enriched_facts = dict(facts) + enriched_meta = dict(meta) + try: + from opencmo.report_charts import build_report_charts, charts_to_markdown + + charts = build_report_charts(kind, facts, meta) + charts_markdown = charts_to_markdown(charts) + enriched_facts["report_charts"] = [chart.to_meta() | {"markdown": chart.markdown} for chart in charts] + enriched_facts["report_charts_markdown"] = charts_markdown + enriched_meta["charts"] = [chart.to_meta() for chart in charts] + enriched_meta["chart_count"] = len(charts) + return enriched_facts, enriched_meta, charts_markdown + except Exception as exc: + logger.exception("Report chart generation failed for %s", kind) + enriched_meta["chart_error"] = str(exc) or exc.__class__.__name__ + charts_markdown = "当前数据不足或图表生成失败,未生成图表。" + enriched_facts["report_charts"] = [] + enriched_facts["report_charts_markdown"] = charts_markdown + enriched_meta["charts"] = [] + enriched_meta["chart_count"] = 0 + return enriched_facts, enriched_meta, charts_markdown + + async def _generate_llm_markdown(system_prompt: str, user_prompt: str, *, model_override: str | None = None) -> str: """Generate markdown with the configured LLM.""" from opencmo import llm @@ -637,6 +713,11 @@ def _prompts(kind: str, audience: str, facts: dict, meta: dict, previous_exists: if kind == "strategic" and audience == "human": system = _compose_report_system_prompt( "你的任务是生成一份极其深入的战略分析报告。输出 Markdown,报告总长度应在 2000-4000 字之间。\n\n" + "【标题与可视化硬性要求】\n" + "- `#` 只用于报告总标题,`##` 只用于一级章节,`###` 只用于二级小节,禁止使用 `####` 或更深标题。\n" + "- 必须保留并解释输入中提供的真实图表 Markdown,不能修改图表链接、标题或图表数字。\n" + "- 必须包含 `## 数据图表速览`,每张图后解释:图表说明、业务含义、数据限制。\n" + "- 一级章节标题要清晰可扫读,二级标题必须是结论型标题,不要写空泛标题。\n\n" "严格按以下 6 大模块结构生成,每个模块都必须展开详细论述,不能用简短的一两句话敷衍:\n\n" "## 1. 执行摘要与项目定性 (Executive Summary)\n" " - 一句话定义项目当前所处的增长阶段\n" @@ -675,6 +756,8 @@ def _prompts(kind: str, audience: str, facts: dict, meta: dict, previous_exists: f"版本是否已有历史报告:{previous_exists}\n" f"数据来源覆盖度:{meta.get('sample_count', 0)}/{meta.get('total_data_sources', 0)} 个数据源有数据\n" f"摘要元数据:{_json_dump(meta)}\n\n" + f"=== 必须引用的真实图表(由后端基于事实包生成,不得改写数字或链接)===\n" + f"{facts.get('report_charts_markdown', '当前数据不足,未生成图表。')}\n\n" f"=== 完整事实包(来自所有智能体的采集结果)===\n{_json_dump(facts)}" ) return system, user @@ -706,6 +789,11 @@ def _prompts(kind: str, audience: str, facts: dict, meta: dict, previous_exists: if kind == "periodic" and audience == "human": system = _compose_report_system_prompt( "你的任务是生成一份深度周报。输出 Markdown,报告总长度应在 1500-3000 字之间。\n\n" + "【标题与可视化硬性要求】\n" + "- `#` 只用于报告总标题,`##` 只用于一级章节,`###` 只用于二级小节,禁止使用 `####` 或更深标题。\n" + "- 必须保留并解释输入中提供的真实图表 Markdown,不能修改图表链接、标题或图表数字。\n" + "- 必须包含 `## 数据图表速览`,每张图后解释:图表说明、业务含义、数据限制。\n" + "- 少于 2 个时间点的指标不能写成趋势,只能写成当前快照。\n\n" "严格按以下结构生成,每个模块都要做深入的业务推导,不能停留在数据罗列层面:\n\n" "## 1. 本周最重要的变化 (Top Changes)\n" " - 列出 3-5 个最重要的变化,每个变化不仅要说「发生了什么」,还要解释「为什么重要」「对增长意味着什么」\n" @@ -733,6 +821,8 @@ def _prompts(kind: str, audience: str, facts: dict, meta: dict, previous_exists: f"统计窗口:{meta.get('window_start', '未知')} 到 {meta.get('window_end', '未知')}\n" f"数据来源覆盖度:{meta.get('sample_count', 0)}/{meta.get('total_data_sources', 0)} 个数据源有数据\n" f"元数据:{_json_dump(meta)}\n\n" + f"=== 必须引用的真实图表(由后端基于事实包生成,不得改写数字或链接)===\n" + f"{facts.get('report_charts_markdown', '当前数据不足,未生成图表。')}\n\n" f"=== 完整事实包(来自所有智能体的采集结果)===\n{_json_dump(facts)}" ) return system, user @@ -766,6 +856,10 @@ async def _generate_report_record( model = await _get_report_model() report_model = model content = "" + charts_markdown = "" + + if audience == "human": + facts, meta, charts_markdown = _prepare_report_charts(kind, facts, meta) # Human reports use the deep multi-agent pipeline; # Agent briefs stay single-call (they need to be concise). @@ -780,6 +874,7 @@ async def _generate_report_record( used_pipeline = True if not content.strip(): raise RuntimeError("Pipeline returned empty report.") + content = _postprocess_human_report_content(content, charts_markdown) except Exception as pipeline_exc: pipeline_error = str(pipeline_exc) or pipeline_exc.__class__.__name__ logger.warning( @@ -797,6 +892,7 @@ async def _generate_report_record( used_fallback = True if fallback_model: report_model = fallback_model + content = _postprocess_human_report_content(content, charts_markdown) except Exception as exc: llm_error = str(exc) or exc.__class__.__name__ logger.exception("Report generation failed for %s/%s", kind, audience) diff --git a/src/opencmo/web/routers/report.py b/src/opencmo/web/routers/report.py index 97137c5..6cd2ea2 100644 --- a/src/opencmo/web/routers/report.py +++ b/src/opencmo/web/routers/report.py @@ -5,7 +5,7 @@ import asyncio from fastapi import APIRouter, Request -from fastapi.responses import JSONResponse +from fastapi.responses import FileResponse, JSONResponse from opencmo import storage from opencmo.background import service as bg_service @@ -85,6 +85,16 @@ async def api_v1_report_detail(report_id: int): return JSONResponse(report) +@router.api_route("/report-assets/{asset_id}.svg", methods=["GET", "HEAD"]) +async def api_v1_report_asset(asset_id: str): + from opencmo.report_charts import get_report_asset_path + + asset_path = get_report_asset_path(asset_id) + if not asset_path or not asset_path.exists() or not asset_path.is_file(): + return JSONResponse({"error": "Not found"}, status_code=404) + return FileResponse(asset_path, media_type="image/svg+xml") + + @router.post("/projects/{project_id}/reports/{kind}/regenerate") async def api_v1_regenerate_report(project_id: int, kind: str, request: Request): project = await storage.get_project(project_id) diff --git a/tests/test_report_charts.py b/tests/test_report_charts.py new file mode 100644 index 0000000..e98eca0 --- /dev/null +++ b/tests/test_report_charts.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +import pytest +from fastapi.responses import FileResponse, JSONResponse + +from opencmo.report_charts import build_report_charts, get_report_asset_path +from opencmo.reports import ( + _normalize_report_headings, + _postprocess_human_report_content, + _simple_markdown_to_html, +) +from opencmo.web.routers.report import api_v1_report_asset + + +def test_strategic_chart_builder_uses_real_fact_values(tmp_path, monkeypatch): + monkeypatch.setenv("OPENCMO_REPORT_ASSET_DIR", str(tmp_path)) + facts = { + "latest_scans": { + "seo": {"score": 0.82}, + "geo": {"score": 57}, + "community": {"total_hits": 12}, + }, + "citability": [{"avg_score": 0.41}], + "brand_presence": [{"footprint_score": 33}], + "serp_latest": [{"keyword": "ai cmo", "position": 4}], + "findings": [{"severity": "high"}], + "recommendations": [{"priority": "medium"}], + } + charts = build_report_charts("strategic", facts, {"sample_count": 3, "total_data_sources": 5}) + + assert charts + assert charts[0].markdown.startswith("![") + svg = get_report_asset_path(charts[0].asset_id).read_text(encoding="utf-8") + assert "SEO" in svg + assert "82" in svg + assert "57" in svg + + +def test_periodic_chart_builder_requires_two_points_for_trends(tmp_path, monkeypatch): + monkeypatch.setenv("OPENCMO_REPORT_ASSET_DIR", str(tmp_path)) + facts = { + "seo_history": [{"scanned_at": "2026-05-01T00:00:00", "score_performance": 0.7}], + "geo_history": [ + {"scanned_at": "2026-05-01T00:00:00", "geo_score": 40}, + {"scanned_at": "2026-05-02T00:00:00", "geo_score": 50}, + ], + "community_history": [], + "citability": [], + "findings": [], + "recommendations": [], + } + charts = build_report_charts("periodic", facts, {"sample_count": 2, "total_data_sources": 8}) + + assert [chart.title for chart in charts] == ["GEO 趋势"] + svg = get_report_asset_path(charts[0].asset_id).read_text(encoding="utf-8") + assert "40" in svg + assert "50" in svg + + +def test_report_heading_normalization_and_chart_section_insertion(): + content = "# 总标题\n\n## 1. 执行摘要\n\n正文\n\n#### 深层标题\n\n内容" + + normalized = _normalize_report_headings(content) + assert "####" not in normalized + assert "### 深层标题" in normalized + + processed = _postprocess_human_report_content(normalized, "### 图表\n![图](/api/v1/report-assets/abc.svg)") + assert "## 2. 数据图表速览" in processed + assert processed.count("# 总标题") == 1 + + +def test_simple_markdown_to_html_supports_images(): + html = _simple_markdown_to_html("![关键指标](/api/v1/report-assets/abc.svg)") + + assert '关键指标' in html + assert "
关键指标
" in html + + +@pytest.mark.asyncio +async def test_report_asset_route_serves_svg(tmp_path, monkeypatch): + monkeypatch.setenv("OPENCMO_REPORT_ASSET_DIR", str(tmp_path)) + asset_id = "a" * 32 + (tmp_path / f"{asset_id}.svg").write_text("", encoding="utf-8") + + response = await api_v1_report_asset(asset_id) + + assert isinstance(response, FileResponse) + assert response.media_type == "image/svg+xml" + + +@pytest.mark.asyncio +async def test_report_asset_route_rejects_missing_or_invalid_assets(tmp_path, monkeypatch): + monkeypatch.setenv("OPENCMO_REPORT_ASSET_DIR", str(tmp_path)) + + response = await api_v1_report_asset("../bad") + + assert isinstance(response, JSONResponse) + assert response.status_code == 404 From 34305f912dd40f6f17d892057a047be1d59b854c Mon Sep 17 00:00:00 2001 From: JingWen Fan Date: Sat, 9 May 2026 14:03:49 +0800 Subject: [PATCH 3/4] chore: drop unrelated deploy scripts from chart PR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove deploy.sh and scripts/* (BWG-targeted, port 8080 — conflict with current production topology where newyork:8081 is primary). Revert CLAUDE.md text describing GitHub-leads/marketing-site features that belong to a separate change set. Keep only the report-charts work. --- CLAUDE.md | 10 +- deploy.sh | 704 -------------------------------------- scripts/README.md | 424 ----------------------- scripts/diagnose.sh | 371 -------------------- scripts/fix_ssl_server.sh | 113 ------ 5 files changed, 2 insertions(+), 1620 deletions(-) delete mode 100755 deploy.sh delete mode 100644 scripts/README.md delete mode 100755 scripts/diagnose.sh delete mode 100755 scripts/fix_ssl_server.sh diff --git a/CLAUDE.md b/CLAUDE.md index bba0545..8dca079 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -31,14 +31,11 @@ OpenCMO is an open-source AI Chief Marketing Officer — a multi-agent system fo - `report_pipeline.py` — Multi-agent deep report pipeline (6 phases): Reflection → Insight Distiller → Outline Planner → Section Writers → Section Grader → Report Synthesizer. Uses `asyncio.Semaphore` to limit concurrent LLM calls (`_MAX_CONCURRENT_LLM_CALLS = 5`) - `llm.py` — Centralized LLM client with per-request key isolation via ContextVar. Solves BYOK concurrency bug. Key resolution: ContextVar → os.environ → DB settings. Includes retry logic with exponential backoff - `background/` — Background task queue system for long-running operations (scans, reports). Tasks have status tracking and progress events -- `agents/github.py` + `tools/github_discovery.py` — GitHub Potential Users (developer outreach): discovers users from seed profiles via social graph, enriches profiles in background (bio, repos, languages, stars), scores by outreach priority, generates personalized messages (email/Twitter/GitHub issue). All outreach queued for approval, never auto-sent -- `services/github_service.py` + `storage/github.py` — GitHub lead management: CRUD for leads, batch scoring, opt-out tracking, enrichment status ### Frontend layers -- `pages/` — Route-level components (Dashboard, SEO, GEO, SERP, Community, Graph, Chat, Approvals, Monitors, GitHubLeadsPage). Also includes public marketing pages: `LandingPage.tsx`, `BlogPage.tsx` -- `components/` — Organized by domain: `charts/` (recharts + react-force-graph-3d), `chat/` (SSE streaming), `monitors/`, `auth/`, `layout/`, `dashboard/`, `project/`, `marketing/` (PublicSiteHeader, SectionReveal for landing page) -- `content/marketing.ts` — Centralized content for public site: landing page copy, blog articles, FAQs, navigation items. Supports i18n +- `pages/` — Route-level components (Dashboard, SEO, GEO, SERP, Community, Graph, Chat, Approvals, Monitors) +- `components/` — Organized by domain: `charts/` (recharts + react-force-graph-3d), `chat/` (SSE streaming), `monitors/`, `auth/`, `layout/`, `dashboard/`, `project/` - `hooks/` — TanStack Query hooks per domain (`useProjects`, `useSeoData`, `useGraphData`, etc.). Stale time 30s, retry 1. `useChat` manages local state + SSE via async generator - `api/client.ts` — `apiFetch()` adds Bearer token, dispatches `opencmo:unauthorized` on 401. Domain modules export typed wrappers around `apiJson()` - `i18n/` — React context-based EN/ZH/JA/KO/ES translations @@ -56,8 +53,6 @@ OpenCMO is an open-source AI Chief Marketing Officer — a multi-agent system fo - **Frontend proxies `/api` to `http://127.0.0.1:8080` in dev (vite.config.ts) - **Report generation optimization**: Data aggregation parallelized with `asyncio.gather()`. LLM concurrency limited to 5 simultaneous calls via `asyncio.Semaphore`. Grader threshold at 3.5/5.0 with max 1 retry to balance quality and speed - **BYOK (Bring Your Own Key)**: Per-request API keys isolated via ContextVar in `llm.py`. Never use `os.environ` for request-scoped keys to avoid concurrency bugs -- **Public marketing site**: Landing page and blog served at root `/` for unauthenticated users. Static fallback HTML for crawlers (SEO). Authenticated app at `/app/*`. Content centralized in `frontend/src/content/marketing.ts` -- **GitHub Potential Users workflow**: 1) Discover from seed username → 2) Background enrichment (async) → 3) Score leads by priority → 4) Generate personalized outreach → 5) Human approval required before sending ## Commands @@ -109,7 +104,6 @@ Key optional variables — see `.env.example` for full list: - `DATAFORSEO_LOGIN/PASSWORD` — SERP tracking - `OPENCMO_AUTO_PUBLISH=1` + Reddit/Twitter credentials — auto-publishing - `OPENCMO_SMTP_*` + `OPENCMO_REPORT_EMAIL` — email reports -- `GITHUB_TOKEN` — GitHub API access for Potential Users feature (discovery, enrichment, outreach) ## Performance Optimization Guidelines diff --git a/deploy.sh b/deploy.sh deleted file mode 100755 index 2879870..0000000 --- a/deploy.sh +++ /dev/null @@ -1,704 +0,0 @@ -#!/bin/bash - -################################################################################ -# OpenCMO Deployment Script -# Handles Python SSL certificates, dependencies, and deployment to BWG server -################################################################################ - -set -e # Exit on error (disabled in sections with custom error handling) - -# Color codes for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# Deployment configuration -REMOTE_HOST="97.64.16.217" -REMOTE_PORT="2222" -REMOTE_USER="root" -REMOTE_PATH="/opt/OpenCMO" -SERVICE_NAME="opencmo" - -################################################################################ -# Logging functions -################################################################################ - -log_info() { - echo -e "${BLUE}[INFO]${NC} $1" -} - -log_success() { - echo -e "${GREEN}[SUCCESS]${NC} $1" -} - -log_warning() { - echo -e "${YELLOW}[WARNING]${NC} $1" -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $1" -} - -log_step() { - echo -e "\n${GREEN}==>${NC} $1" -} - -################################################################################ -# Error handling functions -################################################################################ - -handle_ssl_error() { - log_error "SSL 证书验证失败" - echo "可能的原因:" - echo " 1. Python 未安装系统 CA 证书" - echo " 2. certifi 包版本过旧" - echo " 3. 系统时间不正确" - echo "" - echo "尝试修复..." - - # 尝试修复 1: 安装/更新 certifi - log_info "更新 certifi 包..." - if pip install --upgrade certifi pip setuptools 2>/dev/null; then - log_success "certifi 更新成功" - else - log_warning "certifi 更新失败,尝试其他方法" - fi - - # 尝试修复 2: 安装系统 CA 证书到 Python - if [[ "$OSTYPE" == "darwin"* ]]; then - log_info "检测到 macOS,运行 Install Certificates.command..." - PYTHON_VERSION=$(python3 --version | cut -d' ' -f2 | cut -d'.' -f1,2) - CERT_SCRIPT="/Applications/Python ${PYTHON_VERSION}/Install Certificates.command" - if [ -f "$CERT_SCRIPT" ]; then - bash "$CERT_SCRIPT" 2>/dev/null || log_warning "证书安装脚本执行失败" - else - log_warning "未找到 Python 证书安装脚本" - log_info "尝试手动安装证书..." - pip install --upgrade certifi - python3 -c "import certifi; print(certifi.where())" - fi - elif [[ "$OSTYPE" == "linux-gnu"* ]]; then - log_info "检测到 Linux,更新系统 CA 证书..." - if command -v update-ca-certificates &> /dev/null; then - sudo update-ca-certificates 2>/dev/null || log_warning "CA 证书更新失败" - fi - fi - - # 尝试修复 3: 设置环境变量 - log_info "配置 SSL 环境变量..." - export REQUESTS_CA_BUNDLE=$(python3 -c "import certifi; print(certifi.where())" 2>/dev/null || echo "") - export SSL_CERT_FILE=$REQUESTS_CA_BUNDLE - - if [ -n "$REQUESTS_CA_BUNDLE" ]; then - log_success "SSL 证书路径: $REQUESTS_CA_BUNDLE" - else - log_error "无法获取 SSL 证书路径" - return 1 - fi -} - -handle_pip_install_error() { - local exit_code=$1 - log_error "依赖安装失败 (退出码: $exit_code)" - - echo "可能的原因:" - echo " 1. 网络连接问题" - echo " 2. PyPI 镜像源不可用" - echo " 3. 依赖冲突" - echo " 4. 磁盘空间不足" - echo "" - - # 检查磁盘空间 - log_info "检查磁盘空间..." - df -h . | tail -1 - - # 尝试使用国内镜像源 - log_info "尝试使用清华大学 PyPI 镜像源..." - if pip install -e ".[all]" -i https://pypi.tuna.tsinghua.edu.cn/simple 2>&1 | tee /tmp/pip_install.log; then - log_success "使用镜像源安装成功" - return 0 - fi - - log_warning "镜像源安装失败,尝试分步安装..." - - # 分步安装核心依赖 - local core_deps=("fastapi" "uvicorn" "aiosqlite" "openai" "anthropic") - for dep in "${core_deps[@]}"; do - log_info "安装 $dep..." - if ! pip install "$dep" 2>/dev/null; then - log_error "无法安装 $dep" - return 1 - fi - done - - log_info "安装项目(跳过可选依赖)..." - pip install -e . 2>&1 | tee -a /tmp/pip_install.log -} - -handle_git_error() { - local exit_code=$1 - log_error "Git 操作失败 (退出码: $exit_code)" - - echo "可能的原因:" - echo " 1. 本地有未提交的更改" - echo " 2. 远程仓库不可达" - echo " 3. 分支冲突" - echo "" - - log_info "检查 Git 状态..." - git status - - echo "" - echo "建议操作:" - echo " 1. 提交或暂存本地更改: git add . && git commit -m 'your message'" - echo " 2. 或者放弃本地更改: git reset --hard HEAD" - echo " 3. 检查远程连接: git remote -v" -} - -handle_ssh_error() { - local exit_code=$1 - log_error "SSH 连接失败 (退出码: $exit_code)" - - echo "可能的原因:" - echo " 1. SSH 密钥未配置" - echo " 2. 服务器不可达" - echo " 3. 端口被防火墙阻止" - echo " 4. 用户权限不足" - echo "" - - log_info "测试 SSH 连接..." - if ssh -p "$REMOTE_PORT" -o ConnectTimeout=10 "$REMOTE_USER@$REMOTE_HOST" "echo 'SSH 连接成功'" 2>&1; then - log_success "SSH 连接正常" - else - log_error "SSH 连接失败" - echo "" - echo "修复步骤:" - echo " 1. 检查 SSH 密钥: ssh-add -l" - echo " 2. 添加密钥: ssh-add ~/.ssh/id_rsa" - echo " 3. 测试连接: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST" - echo " 4. 检查防火墙: telnet $REMOTE_HOST $REMOTE_PORT" - return 1 - fi -} - -handle_frontend_build_error() { - local exit_code=$1 - log_error "前端构建失败 (退出码: $exit_code)" - - echo "可能的原因:" - echo " 1. Node.js 版本不兼容" - echo " 2. 依赖未安装或版本冲突" - echo " 3. 内存不足 (需要 >2GB)" - echo " 4. TypeScript 类型错误" - echo "" - - log_info "检查 Node.js 版本..." - node --version - npm --version - - log_info "检查可用内存..." - if [[ "$OSTYPE" == "darwin"* ]]; then - vm_stat | grep "Pages free" - else - free -h | grep Mem - fi - - echo "" - echo "修复步骤:" - echo " 1. 清理缓存: cd frontend && rm -rf node_modules dist && npm install" - echo " 2. 增加 Node 内存: export NODE_OPTIONS='--max-old-space-size=4096'" - echo " 3. 检查类型错误: npm run type-check" - echo " 4. 跳过类型检查构建: npm run build -- --mode production" -} - -handle_service_error() { - local exit_code=$1 - log_error "服务启动失败 (退出码: $exit_code)" - - echo "可能的原因:" - echo " 1. 端口 8080 已被占用" - echo " 2. 环境变量未配置" - echo " 3. 数据库文件损坏" - echo " 4. 依赖缺失" - echo "" - - log_info "检查服务状态..." - ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "systemctl status $SERVICE_NAME" || true - - log_info "检查服务日志..." - ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "journalctl -u $SERVICE_NAME -n 50 --no-pager" || true - - echo "" - echo "修复步骤:" - echo " 1. 检查端口占用: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'lsof -i:8080'" - echo " 2. 检查配置文件: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'cat $REMOTE_PATH/.env'" - echo " 3. 手动启动测试: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'cd $REMOTE_PATH && opencmo-web'" - echo " 4. 重置数据库: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'rm ~/.opencmo/data.db'" -} - -################################################################################ -# Pre-flight checks -################################################################################ - -preflight_checks() { - log_step "执行部署前检查" - - local has_error=0 - - # Check if we're in a git repository - if ! git rev-parse --git-dir > /dev/null 2>&1; then - log_error "当前目录不是 Git 仓库" - has_error=1 - else - log_success "Git 仓库检查通过" - fi - - # Check if we're on main branch - local current_branch=$(git branch --show-current) - if [ "$current_branch" != "main" ]; then - log_warning "当前分支是 '$current_branch',不是 'main'" - read -p "是否继续部署? (y/N): " -n 1 -r - echo - if [[ ! $REPLY =~ ^[Yy]$ ]]; then - log_info "部署已取消" - exit 0 - fi - else - log_success "分支检查通过 (main)" - fi - - # Check for uncommitted changes - if ! git diff-index --quiet HEAD --; then - log_warning "存在未提交的更改" - git status --short - read -p "是否继续部署? (y/N): " -n 1 -r - echo - if [[ ! $REPLY =~ ^[Yy]$ ]]; then - log_info "部署已取消" - exit 0 - fi - else - log_success "工作区检查通过 (无未提交更改)" - fi - - # Check Python version - if ! command -v python3 &> /dev/null; then - log_error "未找到 python3" - has_error=1 - else - local python_version=$(python3 --version | cut -d' ' -f2) - log_success "Python 版本: $python_version" - fi - - # Check Node.js version - if ! command -v node &> /dev/null; then - log_error "未找到 node" - has_error=1 - else - local node_version=$(node --version) - log_success "Node.js 版本: $node_version" - fi - - # Check SSH connectivity - log_info "测试 SSH 连接到服务器..." - if ssh -p "$REMOTE_PORT" -o ConnectTimeout=10 "$REMOTE_USER@$REMOTE_HOST" "echo 'SSH OK'" > /dev/null 2>&1; then - log_success "SSH 连接正常" - else - log_error "无法连接到服务器" - handle_ssh_error 1 - has_error=1 - fi - - # Check SSL certificates - log_info "检查 Python SSL 证书..." - if python3 -c "import ssl; import certifi; print(certifi.where())" > /dev/null 2>&1; then - local cert_path=$(python3 -c "import certifi; print(certifi.where())") - log_success "SSL 证书路径: $cert_path" - else - log_warning "SSL 证书检查失败,将在安装时修复" - handle_ssl_error || log_warning "SSL 证书修复失败,继续部署..." - fi - - if [ $has_error -eq 1 ]; then - log_error "部署前检查失败,请修复上述问题后重试" - exit 1 - fi - - log_success "所有部署前检查通过" -} - -################################################################################ -# Fix SSL certificates -################################################################################ - -fix_ssl_certificates() { - log_step "修复 Python SSL 证书" - - set +e # Disable exit on error for this section - - # Update certifi - log_info "更新 certifi 包..." - if pip install --upgrade certifi pip setuptools 2>&1 | tee /tmp/certifi_install.log; then - log_success "certifi 更新成功" - else - log_warning "certifi 更新失败" - cat /tmp/certifi_install.log - handle_ssl_error - fi - - # Platform-specific certificate installation - if [[ "$OSTYPE" == "darwin"* ]]; then - log_info "macOS 系统,安装证书到 Python..." - - # Find Python installation - PYTHON_VERSION=$(python3 --version | cut -d' ' -f2 | cut -d'.' -f1,2) - CERT_SCRIPT="/Applications/Python ${PYTHON_VERSION}/Install Certificates.command" - - if [ -f "$CERT_SCRIPT" ]; then - log_info "运行 Python 证书安装脚本..." - bash "$CERT_SCRIPT" 2>&1 | tee /tmp/cert_install.log || log_warning "证书安装脚本执行失败" - else - log_warning "未找到证书安装脚本: $CERT_SCRIPT" - log_info "使用 certifi 作为证书源..." - fi - - elif [[ "$OSTYPE" == "linux-gnu"* ]]; then - log_info "Linux 系统,更新系统证书..." - if command -v update-ca-certificates &> /dev/null; then - sudo update-ca-certificates 2>&1 | tee /tmp/cert_update.log || log_warning "证书更新失败" - fi - fi - - # Set environment variables - log_info "配置 SSL 环境变量..." - CERT_PATH=$(python3 -c "import certifi; print(certifi.where())" 2>/dev/null) - - if [ -n "$CERT_PATH" ] && [ -f "$CERT_PATH" ]; then - export REQUESTS_CA_BUNDLE="$CERT_PATH" - export SSL_CERT_FILE="$CERT_PATH" - export CURL_CA_BUNDLE="$CERT_PATH" - - log_success "SSL 证书配置完成" - log_info "证书路径: $CERT_PATH" - - # Add to shell profile for persistence - if [ -f ~/.zshrc ]; then - if ! grep -q "REQUESTS_CA_BUNDLE" ~/.zshrc; then - echo "" >> ~/.zshrc - echo "# OpenCMO SSL certificates" >> ~/.zshrc - echo "export REQUESTS_CA_BUNDLE=\"$CERT_PATH\"" >> ~/.zshrc - echo "export SSL_CERT_FILE=\"$CERT_PATH\"" >> ~/.zshrc - log_info "已添加环境变量到 ~/.zshrc" - fi - fi - - else - log_error "无法获取有效的证书路径" - return 1 - fi - - # Test SSL connection - log_info "测试 SSL 连接..." - if python3 -c "import urllib.request; urllib.request.urlopen('https://pypi.org')" 2>/dev/null; then - log_success "SSL 连接测试通过" - else - log_warning "SSL 连接测试失败,但继续部署..." - fi - - set -e # Re-enable exit on error -} - -################################################################################ -# Install dependencies -################################################################################ - -install_dependencies() { - log_step "安装 Python 依赖" - - set +e # Disable exit on error - - log_info "安装项目依赖 (包含所有可选依赖)..." - if pip install -e ".[all]" 2>&1 | tee /tmp/pip_install.log; then - log_success "依赖安装成功" - else - local exit_code=$? - log_error "依赖安装失败" - cat /tmp/pip_install.log - handle_pip_install_error $exit_code - - # Check if installation succeeded after retry - if [ $? -ne 0 ]; then - log_error "依赖安装失败,无法继续部署" - exit 1 - fi - fi - - # Initialize crawl4ai - log_info "初始化 crawl4ai..." - if command -v crawl4ai-setup &> /dev/null; then - if crawl4ai-setup 2>&1 | tee /tmp/crawl4ai_setup.log; then - log_success "crawl4ai 初始化成功" - else - log_warning "crawl4ai 初始化失败,但继续部署..." - cat /tmp/crawl4ai_setup.log - fi - else - log_warning "未找到 crawl4ai-setup 命令,跳过初始化" - fi - - set -e # Re-enable exit on error -} - -################################################################################ -# Build frontend -################################################################################ - -build_frontend() { - log_step "构建前端" - - if [ ! -d "frontend" ]; then - log_error "未找到 frontend 目录" - exit 1 - fi - - cd frontend - - set +e # Disable exit on error - - # Install npm dependencies - log_info "安装 npm 依赖..." - if npm install 2>&1 | tee /tmp/npm_install.log; then - log_success "npm 依赖安装成功" - else - log_error "npm 依赖安装失败" - cat /tmp/npm_install.log - cd .. - exit 1 - fi - - # Build frontend - log_info "构建前端 (这可能需要几分钟)..." - - # Increase Node.js memory limit - export NODE_OPTIONS="--max-old-space-size=4096" - - if npm run build 2>&1 | tee /tmp/npm_build.log; then - log_success "前端构建成功" - else - local exit_code=$? - log_error "前端构建失败" - cat /tmp/npm_build.log - handle_frontend_build_error $exit_code - cd .. - exit 1 - fi - - # Verify build output - if [ ! -d "dist" ] || [ -z "$(ls -A dist)" ]; then - log_error "构建输出目录为空" - cd .. - exit 1 - fi - - log_success "前端构建完成,输出目录: frontend/dist" - - cd .. - - set -e # Re-enable exit on error -} - -################################################################################ -# Deploy to server -################################################################################ - -deploy_to_server() { - log_step "部署到服务器" - - set +e # Disable exit on error - - # Push code to git - log_info "推送代码到 Git 仓库..." - if git push origin main 2>&1 | tee /tmp/git_push.log; then - log_success "代码推送成功" - else - local exit_code=$? - log_warning "代码推送失败" - cat /tmp/git_push.log - handle_git_error $exit_code - - read -p "是否继续部署? (y/N): " -n 1 -r - echo - if [[ ! $REPLY =~ ^[Yy]$ ]]; then - log_info "部署已取消" - exit 0 - fi - fi - - # Deploy backend - log_info "部署后端代码到服务器..." - - local deploy_cmd=" - set -e - cd $REMOTE_PATH || exit 1 - echo '拉取最新代码...' - git pull origin main || exit 1 - echo '安装依赖...' - pip install -e '.[all]' -q || exit 1 - echo '重启服务...' - systemctl restart $SERVICE_NAME || exit 1 - sleep 2 - echo '检查服务状态...' - systemctl is-active $SERVICE_NAME || exit 1 - " - - if ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "$deploy_cmd" 2>&1 | tee /tmp/deploy_backend.log; then - log_success "后端部署成功" - else - local exit_code=$? - log_error "后端部署失败" - cat /tmp/deploy_backend.log - handle_service_error $exit_code - exit 1 - fi - - # Deploy frontend - log_info "部署前端静态文件到服务器..." - - if rsync -avz --delete frontend/dist/ "$REMOTE_USER@$REMOTE_HOST:$REMOTE_PATH/frontend/dist/" -e "ssh -p $REMOTE_PORT" 2>&1 | tee /tmp/deploy_frontend.log; then - log_success "前端部署成功" - else - log_error "前端部署失败" - cat /tmp/deploy_frontend.log - exit 1 - fi - - set -e # Re-enable exit on error -} - -################################################################################ -# Verify deployment -################################################################################ - -verify_deployment() { - log_step "验证部署" - - set +e # Disable exit on error - - # Check service status - log_info "检查服务状态..." - if ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "systemctl is-active $SERVICE_NAME" > /dev/null 2>&1; then - log_success "服务运行正常" - else - log_error "服务未运行" - ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "systemctl status $SERVICE_NAME" - exit 1 - fi - - # Check HTTP endpoint - log_info "检查 HTTP 端点..." - local health_check=$(ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:8080/api/v1/health" 2>/dev/null) - - if [ "$health_check" = "200" ]; then - log_success "健康检查通过 (HTTP 200)" - else - log_warning "健康检查返回: HTTP $health_check" - fi - - # Show recent logs - log_info "最近的服务日志:" - ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "journalctl -u $SERVICE_NAME -n 20 --no-pager" - - set -e # Re-enable exit on error - - log_success "部署验证完成" -} - -################################################################################ -# Main deployment flow -################################################################################ - -main() { - echo "================================" - echo "OpenCMO 部署脚本" - echo "================================" - echo "" - - # Parse command line arguments - local skip_checks=0 - local skip_frontend=0 - local skip_backend=0 - - while [[ $# -gt 0 ]]; do - case $1 in - --skip-checks) - skip_checks=1 - shift - ;; - --skip-frontend) - skip_frontend=1 - shift - ;; - --skip-backend) - skip_backend=1 - shift - ;; - --help) - echo "用法: $0 [选项]" - echo "" - echo "选项:" - echo " --skip-checks 跳过部署前检查" - echo " --skip-frontend 跳过前端构建" - echo " --skip-backend 跳过后端部署" - echo " --help 显示此帮助信息" - exit 0 - ;; - *) - log_error "未知选项: $1" - echo "使用 --help 查看帮助" - exit 1 - ;; - esac - done - - # Run deployment steps - if [ $skip_checks -eq 0 ]; then - preflight_checks - else - log_warning "跳过部署前检查" - fi - - fix_ssl_certificates - - if [ $skip_backend -eq 0 ]; then - install_dependencies - else - log_warning "跳过后端依赖安装" - fi - - if [ $skip_frontend -eq 0 ]; then - build_frontend - else - log_warning "跳过前端构建" - fi - - deploy_to_server - verify_deployment - - echo "" - log_success "=========================================" - log_success "部署完成!" - log_success "=========================================" - echo "" - echo "访问地址: https://aidcmo.com" - echo "" - echo "有用的命令:" - echo " 查看日志: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'journalctl -u $SERVICE_NAME -f'" - echo " 重启服务: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'systemctl restart $SERVICE_NAME'" - echo " 检查状态: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'systemctl status $SERVICE_NAME'" - echo "" -} - -# Run main function -main "$@" diff --git a/scripts/README.md b/scripts/README.md deleted file mode 100644 index 04880a9..0000000 --- a/scripts/README.md +++ /dev/null @@ -1,424 +0,0 @@ -# OpenCMO 部署脚本使用指南 - -本目录包含 OpenCMO 项目的部署和诊断脚本,重点解决 Python SSL 证书问题。 - -## 脚本列表 - -### 1. `deploy.sh` - 主部署脚本 - -完整的自动化部署脚本,包含详细的错误处理和修复流程。 - -**功能:** -- 部署前检查(Git、Python、Node.js、SSH 连接) -- 自动修复 Python SSL 证书问题 -- 安装 Python 依赖 -- 构建前端 -- 部署到 BWG 服务器 -- 部署后验证 - -**使用方法:** - -```bash -# 完整部署 -./deploy.sh - -# 跳过部署前检查 -./deploy.sh --skip-checks - -# 仅部署后端(跳过前端构建) -./deploy.sh --skip-frontend - -# 仅部署前端(跳过后端) -./deploy.sh --skip-backend - -# 查看帮助 -./deploy.sh --help -``` - -**部署流程:** - -1. **部署前检查** - - 验证 Git 仓库状态 - - 检查 Python/Node.js 版本 - - 测试 SSH 连接 - - 检查 SSL 证书配置 - -2. **修复 SSL 证书** - - 更新 certifi 包 - - macOS: 运行 Python 证书安装脚本 - - Linux: 更新系统 CA 证书 - - 设置环境变量(REQUESTS_CA_BUNDLE, SSL_CERT_FILE) - - 测试 SSL 连接 - -3. **安装依赖** - - 安装 Python 包(包含所有可选依赖) - - 初始化 crawl4ai - - 失败时自动尝试国内镜像源 - -4. **构建前端** - - 安装 npm 依赖 - - 构建生产版本(增加 Node.js 内存限制) - - 验证构建输出 - -5. **部署到服务器** - - 推送代码到 Git - - SSH 到服务器拉取最新代码 - - 安装服务器端依赖 - - 重启 systemd 服务 - - rsync 前端静态文件 - -6. **验证部署** - - 检查服务状态 - - HTTP 健康检查 - - 显示最近日志 - -### 2. `scripts/fix_ssl_server.sh` - 服务器端 SSL 修复 - -在 BWG 服务器上运行,修复 Python SSL 证书问题。 - -**使用方法:** - -```bash -# 上传并运行 -scp -P 2222 scripts/fix_ssl_server.sh root@97.64.16.217:/tmp/ -ssh -p 2222 root@97.64.16.217 'bash /tmp/fix_ssl_server.sh' -``` - -**功能:** -- 更新系统 CA 证书 -- 更新 Python certifi 包 -- 配置 systemd 服务环境变量 -- 添加环境变量到 ~/.bashrc -- 测试 SSL 连接 - -### 3. `scripts/diagnose.sh` - 诊断工具 - -快速诊断部署问题,提供详细的系统状态和修复建议。 - -**使用方法:** - -```bash -# 完整诊断 -./scripts/diagnose.sh - -# 仅诊断本地环境 -./scripts/diagnose.sh local - -# 仅诊断远程服务器 -./scripts/diagnose.sh remote - -# 仅诊断网络连接 -./scripts/diagnose.sh network - -# 显示常见问题修复方法 -./scripts/diagnose.sh fixes -``` - -**诊断内容:** - -**本地环境:** -- Git 仓库状态 -- Python 版本和 SSL 证书 -- Node.js/npm 版本 -- 前端构建状态 -- 磁盘空间和内存 - -**远程服务器:** -- SSH 连接状态 -- 服务运行状态 -- 健康检查端点 -- 端口监听状态 -- 代码版本 -- SSL 证书配置 -- 磁盘和内存使用 -- 最近的服务日志 -- Nginx 状态 -- HTTPS 证书过期时间 - -**网络连接:** -- 公网访问测试 -- API 健康检查 -- DNS 解析 -- Ping 测试 - -## 常见问题和解决方案 - -### 1. SSL 证书验证失败 - -**错误信息:** -``` -SSL: CERTIFICATE_VERIFY_FAILED -``` - -**原因:** -- Python 未安装系统 CA 证书 -- certifi 包版本过旧 -- 系统时间不正确 - -**解决方案:** - -本地修复: -```bash -# 方法 1: 运行部署脚本(自动修复) -./deploy.sh - -# 方法 2: 手动修复 -pip install --upgrade certifi pip setuptools - -# macOS 特定 -/Applications/Python\ 3.x/Install\ Certificates.command - -# 设置环境变量 -export REQUESTS_CA_BUNDLE=$(python3 -c "import certifi; print(certifi.where())") -export SSL_CERT_FILE=$REQUESTS_CA_BUNDLE -``` - -服务器修复: -```bash -scp -P 2222 scripts/fix_ssl_server.sh root@97.64.16.217:/tmp/ -ssh -p 2222 root@97.64.16.217 'bash /tmp/fix_ssl_server.sh' -ssh -p 2222 root@97.64.16.217 'systemctl restart opencmo' -``` - -### 2. 依赖安装失败 - -**错误信息:** -``` -ERROR: Could not find a version that satisfies the requirement -``` - -**解决方案:** - -```bash -# 使用国内镜像源 -pip install -e ".[all]" -i https://pypi.tuna.tsinghua.edu.cn/simple - -# 检查磁盘空间 -df -h - -# 清理 pip 缓存 -pip cache purge - -# 分步安装 -pip install fastapi uvicorn aiosqlite openai anthropic -pip install -e . -``` - -### 3. 前端构建内存不足 - -**错误信息:** -``` -FATAL ERROR: Reached heap limit Allocation failed - JavaScript heap out of memory -``` - -**解决方案:** - -```bash -# 增加 Node.js 内存限制 -export NODE_OPTIONS='--max-old-space-size=4096' -cd frontend && npm run build - -# 清理后重建 -cd frontend -rm -rf node_modules dist -npm install -npm run build -``` - -### 4. SSH 连接失败 - -**错误信息:** -``` -Permission denied (publickey) -``` - -**解决方案:** - -```bash -# 检查 SSH 密钥 -ssh-add -l - -# 添加密钥 -ssh-add ~/.ssh/id_rsa - -# 测试连接 -ssh -p 2222 root@97.64.16.217 - -# 使用密码登录(如果密钥失败) -ssh -p 2222 -o PreferredAuthentications=password root@97.64.16.217 -``` - -### 5. 服务启动失败 - -**错误信息:** -``` -Job for opencmo.service failed -``` - -**解决方案:** - -```bash -# 查看详细日志 -ssh -p 2222 root@97.64.16.217 'journalctl -u opencmo -n 100 --no-pager' - -# 检查端口占用 -ssh -p 2222 root@97.64.16.217 'lsof -i:8080' - -# 手动启动测试 -ssh -p 2222 root@97.64.16.217 'cd /opt/OpenCMO && opencmo-web' - -# 检查配置文件 -ssh -p 2222 root@97.64.16.217 'cat /opt/OpenCMO/.env' - -# 重置数据库(谨慎使用) -ssh -p 2222 root@97.64.16.217 'cp ~/.opencmo/data.db ~/.opencmo/data.db.backup' -ssh -p 2222 root@97.64.16.217 'rm ~/.opencmo/data.db' -ssh -p 2222 root@97.64.16.217 'systemctl restart opencmo' -``` - -### 6. Git 推送失败 - -**错误信息:** -``` -error: failed to push some refs -``` - -**解决方案:** - -```bash -# 查看状态 -git status - -# 提交本地更改 -git add . -git commit -m "your message" -git push - -# 拉取远程更改 -git pull --rebase origin main -git push - -# 强制推送(谨慎使用) -git push -f origin main -``` - -### 7. Nginx 502 错误 - -**原因:** -- 后端服务未运行 -- 端口配置错误 -- 其他 server block 冲突 - -**解决方案:** - -```bash -# 检查服务状态 -ssh -p 2222 root@97.64.16.217 'systemctl status opencmo' - -# 检查 Nginx 配置 -ssh -p 2222 root@97.64.16.217 'nginx -t' - -# 查看 Nginx 错误日志 -ssh -p 2222 root@97.64.16.217 'tail -50 /var/log/nginx/error.log' - -# 检查冲突的 server block -ssh -p 2222 root@97.64.16.217 'grep -r "server_name.*aidcmo.com" /etc/nginx/conf.d/' - -# 重启 Nginx -ssh -p 2222 root@97.64.16.217 'systemctl restart nginx' -``` - -## 部署检查清单 - -部署前确认: - -- [ ] 本地代码已提交到 Git -- [ ] 所有测试通过 -- [ ] .env 文件配置正确 -- [ ] Python 版本 >= 3.9 -- [ ] Node.js 版本 >= 18 -- [ ] SSH 密钥已配置 -- [ ] 磁盘空间充足(本地 >5GB,服务器 >2GB) - -部署后验证: - -- [ ] 服务状态正常:`systemctl status opencmo` -- [ ] 健康检查通过:`curl http://127.0.0.1:8080/api/v1/health` -- [ ] 公网可访问:`curl https://aidcmo.com` -- [ ] 前端加载正常 -- [ ] 登录功能正常 -- [ ] 查看日志无错误:`journalctl -u opencmo -n 50` - -## 有用的命令 - -```bash -# 查看实时日志 -ssh -p 2222 root@97.64.16.217 'journalctl -u opencmo -f' - -# 重启服务 -ssh -p 2222 root@97.64.16.217 'systemctl restart opencmo' - -# 检查服务状态 -ssh -p 2222 root@97.64.16.217 'systemctl status opencmo' - -# 查看端口占用 -ssh -p 2222 root@97.64.16.217 'lsof -i:8080' - -# 查看磁盘使用 -ssh -p 2222 root@97.64.16.217 'df -h' - -# 查看内存使用 -ssh -p 2222 root@97.64.16.217 'free -h' - -# 备份数据库 -ssh -p 2222 root@97.64.16.217 'cp ~/.opencmo/data.db ~/.opencmo/data.db.$(date +%Y%m%d_%H%M%S)' - -# 查看 Nginx 日志 -ssh -p 2222 root@97.64.16.217 'tail -f /var/log/nginx/access.log' -ssh -p 2222 root@97.64.16.217 'tail -f /var/log/nginx/error.log' - -# 测试 Nginx 配置 -ssh -p 2222 root@97.64.16.217 'nginx -t' - -# 重载 Nginx -ssh -p 2222 root@97.64.16.217 'systemctl reload nginx' -``` - -## 紧急回滚 - -如果部署后出现严重问题: - -```bash -# 1. 回滚代码 -ssh -p 2222 root@97.64.16.217 " - cd /opt/OpenCMO && - git log --oneline -5 && - git reset --hard && - systemctl restart opencmo -" - -# 2. 恢复数据库备份 -ssh -p 2222 root@97.64.16.217 " - ls -lh ~/.opencmo/data.db* && - cp ~/.opencmo/data.db.backup ~/.opencmo/data.db && - systemctl restart opencmo -" - -# 3. 恢复前端 -rsync -avz --delete frontend/dist.backup/ root@97.64.16.217:/opt/OpenCMO/frontend/dist/ -e "ssh -p 2222" -``` - -## 技术支持 - -如果遇到脚本无法解决的问题: - -1. 运行诊断工具:`./scripts/diagnose.sh` -2. 查看详细日志:`ssh -p 2222 root@97.64.16.217 'journalctl -u opencmo -n 200'` -3. 检查 GitHub Issues:https://github.com/anthropics/opencmo/issues -4. 联系开发团队 - -## 脚本维护 - -这些脚本会随着项目演进而更新。如果发现问题或有改进建议,请提交 PR 或 Issue。 - -**最后更新:** 2026-04-12 diff --git a/scripts/diagnose.sh b/scripts/diagnose.sh deleted file mode 100755 index d676f1c..0000000 --- a/scripts/diagnose.sh +++ /dev/null @@ -1,371 +0,0 @@ -#!/bin/bash - -################################################################################ -# OpenCMO Deployment Diagnostics Script -# Quick health check for deployment issues -################################################################################ - -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' - -log_info() { - echo -e "${BLUE}[INFO]${NC} $1" -} - -log_success() { - echo -e "${GREEN}[✓]${NC} $1" -} - -log_error() { - echo -e "${RED}[✗]${NC} $1" -} - -log_warning() { - echo -e "${YELLOW}[!]${NC} $1" -} - -log_section() { - echo "" - echo -e "${GREEN}========================================${NC}" - echo -e "${GREEN}$1${NC}" - echo -e "${GREEN}========================================${NC}" -} - -REMOTE_HOST="97.64.16.217" -REMOTE_PORT="2222" -REMOTE_USER="root" -SERVICE_NAME="opencmo" - -################################################################################ -# Local diagnostics -################################################################################ - -diagnose_local() { - log_section "本地环境诊断" - - # Git status - echo -n "Git 仓库: " - if git rev-parse --git-dir > /dev/null 2>&1; then - log_success "正常" - echo " 分支: $(git branch --show-current)" - echo " 最新提交: $(git log -1 --oneline)" - - if git diff-index --quiet HEAD --; then - log_success "工作区干净" - else - log_warning "存在未提交的更改" - git status --short | head -10 - fi - else - log_error "不是 Git 仓库" - fi - - # Python - echo -n "Python: " - if command -v python3 &> /dev/null; then - local version=$(python3 --version) - log_success "$version" - else - log_error "未安装" - fi - - # Python SSL certificates - echo -n "Python SSL 证书: " - if python3 -c "import ssl; import certifi; print(certifi.where())" > /dev/null 2>&1; then - local cert_path=$(python3 -c "import certifi; print(certifi.where())") - log_success "$cert_path" - - # Test SSL connection - echo -n "SSL 连接测试: " - if python3 -c "import urllib.request; urllib.request.urlopen('https://pypi.org', timeout=5)" 2>/dev/null; then - log_success "通过" - else - log_error "失败" - echo " 修复: ./deploy.sh 会自动修复此问题" - fi - else - log_error "未配置" - echo " 修复: pip install --upgrade certifi" - fi - - # Node.js - echo -n "Node.js: " - if command -v node &> /dev/null; then - local version=$(node --version) - log_success "$version" - else - log_error "未安装" - fi - - # npm - echo -n "npm: " - if command -v npm &> /dev/null; then - local version=$(npm --version) - log_success "$version" - else - log_error "未安装" - fi - - # Frontend build - if [ -d "frontend/dist" ]; then - echo -n "前端构建: " - local file_count=$(find frontend/dist -type f | wc -l | tr -d ' ') - log_success "存在 ($file_count 个文件)" - else - echo -n "前端构建: " - log_warning "不存在 (需要运行 cd frontend && npm run build)" - fi - - # Disk space - echo -n "磁盘空间: " - local available=$(df -h . | tail -1 | awk '{print $4}') - log_info "$available 可用" - - # Memory - echo -n "可用内存: " - if [[ "$OSTYPE" == "darwin"* ]]; then - local free_mem=$(vm_stat | grep "Pages free" | awk '{print $3}' | sed 's/\.//') - local free_gb=$((free_mem * 4096 / 1024 / 1024 / 1024)) - log_info "${free_gb}GB" - else - local free_mem=$(free -h | grep Mem | awk '{print $4}') - log_info "$free_mem" - fi -} - -################################################################################ -# Remote diagnostics -################################################################################ - -diagnose_remote() { - log_section "远程服务器诊断" - - # SSH connectivity - echo -n "SSH 连接: " - if ssh -p "$REMOTE_PORT" -o ConnectTimeout=5 "$REMOTE_USER@$REMOTE_HOST" "echo 'OK'" > /dev/null 2>&1; then - log_success "正常" - else - log_error "失败" - echo " 检查: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST" - return 1 - fi - - # Server info - log_info "服务器信息:" - ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" " - echo ' 操作系统: '$(cat /etc/os-release | grep PRETTY_NAME | cut -d'\"' -f2) - echo ' 内核: '$(uname -r) - echo ' 运行时间: '$(uptime -p) - " 2>/dev/null - - # Service status - echo -n "服务状态: " - if ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "systemctl is-active $SERVICE_NAME" > /dev/null 2>&1; then - log_success "运行中" - else - log_error "未运行" - echo " 查看详情: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'systemctl status $SERVICE_NAME'" - fi - - # Service health - echo -n "健康检查: " - local health_code=$(ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:8080/api/v1/health" 2>/dev/null) - if [ "$health_code" = "200" ]; then - log_success "HTTP $health_code" - else - log_error "HTTP $health_code" - fi - - # Port listening - echo -n "端口 8080: " - if ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "lsof -i:8080" > /dev/null 2>&1; then - log_success "监听中" - else - log_error "未监听" - fi - - # Code version - echo -n "代码版本: " - local remote_commit=$(ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "cd /opt/OpenCMO && git log -1 --oneline" 2>/dev/null) - if [ -n "$remote_commit" ]; then - log_info "$remote_commit" - else - log_error "无法获取" - fi - - # Python SSL on server - echo -n "服务器 SSL 证书: " - local server_cert=$(ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "python3 -c 'import certifi; print(certifi.where())'" 2>/dev/null) - if [ -n "$server_cert" ]; then - log_success "$server_cert" - else - log_error "未配置" - echo " 修复: scp -P $REMOTE_PORT scripts/fix_ssl_server.sh $REMOTE_USER@$REMOTE_HOST:/tmp/ && ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'bash /tmp/fix_ssl_server.sh'" - fi - - # Disk space on server - echo -n "服务器磁盘: " - ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "df -h / | tail -1 | awk '{print \"使用 \" \$3 \" / \" \$2 \" (\" \$5 \")\"}'" 2>/dev/null - - # Memory on server - echo -n "服务器内存: " - ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "free -h | grep Mem | awk '{print \"使用 \" \$3 \" / \" \$2}'" 2>/dev/null - - # Recent logs - log_info "最近的服务日志 (最后 10 行):" - ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "journalctl -u $SERVICE_NAME -n 10 --no-pager" 2>/dev/null | sed 's/^/ /' - - # Nginx status - echo -n "Nginx 状态: " - if ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "systemctl is-active nginx" > /dev/null 2>&1; then - log_success "运行中" - else - log_error "未运行" - fi - - # SSL certificate expiry - echo -n "HTTPS 证书: " - local cert_expiry=$(ssh -p "$REMOTE_PORT" "$REMOTE_USER@$REMOTE_HOST" "echo | openssl s_client -servername aidcmo.com -connect aidcmo.com:443 2>/dev/null | openssl x509 -noout -dates | grep notAfter | cut -d= -f2" 2>/dev/null) - if [ -n "$cert_expiry" ]; then - log_info "过期时间: $cert_expiry" - else - log_warning "无法获取" - fi -} - -################################################################################ -# Network diagnostics -################################################################################ - -diagnose_network() { - log_section "网络诊断" - - # Public site accessibility - echo -n "公网访问 (https://aidcmo.com): " - local http_code=$(curl -s -o /dev/null -w '%{http_code}' -m 10 https://aidcmo.com 2>/dev/null) - if [ "$http_code" = "200" ]; then - log_success "HTTP $http_code" - else - log_error "HTTP $http_code" - fi - - # API health endpoint - echo -n "API 健康检查: " - local api_code=$(curl -s -o /dev/null -w '%{http_code}' -m 10 https://aidcmo.com/api/v1/health 2>/dev/null) - if [ "$api_code" = "200" ]; then - log_success "HTTP $api_code" - else - log_error "HTTP $api_code" - fi - - # DNS resolution - echo -n "DNS 解析: " - local resolved_ip=$(dig +short aidcmo.com | tail -1) - if [ "$resolved_ip" = "$REMOTE_HOST" ]; then - log_success "$resolved_ip" - else - log_warning "解析为 $resolved_ip (期望 $REMOTE_HOST)" - fi - - # Ping test - echo -n "Ping 测试: " - if ping -c 1 -W 2 "$REMOTE_HOST" > /dev/null 2>&1; then - log_success "可达" - else - log_error "不可达" - fi -} - -################################################################################ -# Common issues and fixes -################################################################################ - -show_common_fixes() { - log_section "常见问题修复" - - echo "1. SSL 证书问题" - echo " 本地修复: ./deploy.sh 会自动修复" - echo " 服务器修复: scp -P $REMOTE_PORT scripts/fix_ssl_server.sh $REMOTE_USER@$REMOTE_HOST:/tmp/ && ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'bash /tmp/fix_ssl_server.sh'" - echo "" - - echo "2. 服务未运行" - echo " 重启服务: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'systemctl restart $SERVICE_NAME'" - echo " 查看日志: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'journalctl -u $SERVICE_NAME -f'" - echo "" - - echo "3. 前端构建失败" - echo " 清理重建: cd frontend && rm -rf node_modules dist && npm install && npm run build" - echo " 增加内存: export NODE_OPTIONS='--max-old-space-size=4096' && npm run build" - echo "" - - echo "4. 端口被占用" - echo " 查看占用: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'lsof -i:8080'" - echo " 杀死进程: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'kill -9 \$(lsof -t -i:8080)'" - echo "" - - echo "5. Git 推送失败" - echo " 查看状态: git status" - echo " 提交更改: git add . && git commit -m 'your message' && git push" - echo " 强制推送: git push -f origin main (谨慎使用)" - echo "" - - echo "6. 数据库问题" - echo " 备份数据库: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'cp ~/.opencmo/data.db ~/.opencmo/data.db.backup'" - echo " 重置数据库: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'rm ~/.opencmo/data.db && systemctl restart $SERVICE_NAME'" - echo "" - - echo "7. Nginx 配置问题" - echo " 测试配置: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'nginx -t'" - echo " 重载配置: ssh -p $REMOTE_PORT $REMOTE_USER@$REMOTE_HOST 'systemctl reload nginx'" - echo "" -} - -################################################################################ -# Main -################################################################################ - -main() { - echo "================================" - echo "OpenCMO 部署诊断工具" - echo "================================" - - case "${1:-all}" in - local) - diagnose_local - ;; - remote) - diagnose_remote - ;; - network) - diagnose_network - ;; - fixes) - show_common_fixes - ;; - all) - diagnose_local - diagnose_remote - diagnose_network - show_common_fixes - ;; - *) - echo "用法: $0 [local|remote|network|fixes|all]" - echo "" - echo "选项:" - echo " local - 仅诊断本地环境" - echo " remote - 仅诊断远程服务器" - echo " network - 仅诊断网络连接" - echo " fixes - 显示常见问题修复方法" - echo " all - 完整诊断 (默认)" - exit 1 - ;; - esac - - echo "" - log_success "诊断完成" -} - -main "$@" diff --git a/scripts/fix_ssl_server.sh b/scripts/fix_ssl_server.sh deleted file mode 100755 index b43b538..0000000 --- a/scripts/fix_ssl_server.sh +++ /dev/null @@ -1,113 +0,0 @@ -#!/bin/bash - -################################################################################ -# Server-side SSL Certificate Fix Script -# Run this on the BWG server to fix Python SSL certificate issues -################################################################################ - -set -e - -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' - -log_info() { - echo -e "${BLUE}[INFO]${NC} $1" -} - -log_success() { - echo -e "${GREEN}[SUCCESS]${NC} $1" -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $1" -} - -log_step() { - echo -e "\n${GREEN}==>${NC} $1" -} - -log_step "修复服务器 Python SSL 证书" - -# Update system CA certificates -log_info "更新系统 CA 证书..." -if command -v update-ca-certificates &> /dev/null; then - update-ca-certificates 2>&1 || log_error "CA 证书更新失败" - log_success "系统 CA 证书已更新" -fi - -# Update certifi package -log_info "更新 Python certifi 包..." -pip install --upgrade certifi pip setuptools 2>&1 || log_error "certifi 更新失败" -log_success "certifi 已更新" - -# Get certificate path -CERT_PATH=$(python3 -c "import certifi; print(certifi.where())" 2>/dev/null) - -if [ -n "$CERT_PATH" ] && [ -f "$CERT_PATH" ]; then - log_success "证书路径: $CERT_PATH" - - # Set environment variables in systemd service - log_info "配置 systemd 服务环境变量..." - - SERVICE_FILE="/etc/systemd/system/opencmo.service" - - if [ -f "$SERVICE_FILE" ]; then - # Backup original service file - cp "$SERVICE_FILE" "${SERVICE_FILE}.backup" - - # Check if Environment variables already exist - if grep -q "REQUESTS_CA_BUNDLE" "$SERVICE_FILE"; then - log_info "环境变量已存在,更新中..." - sed -i "s|Environment=\"REQUESTS_CA_BUNDLE=.*\"|Environment=\"REQUESTS_CA_BUNDLE=$CERT_PATH\"|g" "$SERVICE_FILE" - sed -i "s|Environment=\"SSL_CERT_FILE=.*\"|Environment=\"SSL_CERT_FILE=$CERT_PATH\"|g" "$SERVICE_FILE" - else - log_info "添加环境变量到服务文件..." - # Add Environment variables after [Service] section - sed -i "/\[Service\]/a Environment=\"REQUESTS_CA_BUNDLE=$CERT_PATH\"\nEnvironment=\"SSL_CERT_FILE=$CERT_PATH\"" "$SERVICE_FILE" - fi - - log_success "服务文件已更新" - - # Reload systemd - log_info "重载 systemd 配置..." - systemctl daemon-reload - log_success "systemd 配置已重载" - - else - log_error "未找到服务文件: $SERVICE_FILE" - fi - - # Add to .bashrc for interactive sessions - if [ -f ~/.bashrc ]; then - if ! grep -q "REQUESTS_CA_BUNDLE" ~/.bashrc; then - echo "" >> ~/.bashrc - echo "# Python SSL certificates" >> ~/.bashrc - echo "export REQUESTS_CA_BUNDLE=\"$CERT_PATH\"" >> ~/.bashrc - echo "export SSL_CERT_FILE=\"$CERT_PATH\"" >> ~/.bashrc - log_success "环境变量已添加到 ~/.bashrc" - fi - fi - -else - log_error "无法获取证书路径" - exit 1 -fi - -# Test SSL connection -log_info "测试 SSL 连接..." -if python3 -c "import urllib.request; urllib.request.urlopen('https://pypi.org', timeout=10)" 2>/dev/null; then - log_success "SSL 连接测试通过" -else - log_error "SSL 连接测试失败" - exit 1 -fi - -log_success "SSL 证书修复完成" -echo "" -echo "下一步:" -echo " 1. 重启服务: systemctl restart opencmo" -echo " 2. 检查状态: systemctl status opencmo" -echo " 3. 查看日志: journalctl -u opencmo -f" From c1121e09d174bb7e437d1a18e3231ac95a698bc1 Mon Sep 17 00:00:00 2001 From: JingWen Fan Date: Sat, 9 May 2026 15:04:14 +0800 Subject: [PATCH 4/4] fix: harden chart asset rendering, dedup, cleanup, and tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Restrict chart rendering to /api/v1/report-assets/{32-hex}.svg whitelist; non-matching markdown images fall back to escaped paragraph. - Detect existing chart references by asset URL instead of literal heading text (LLM produces variants that bypassed the old "## 数据图表速览" check and caused duplicate chart sections). - Delete generated SVG assets when human-report generation fails after the fallback path, preventing orphan files in ~/.opencmo/report_assets/. - Tests now exercise the asset endpoint via TestClient (real ASGI path), lock the URL whitelist policy with rejection cases, cover the new asset-URL dedup, and verify the cleanup helper. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/opencmo/report_charts.py | 13 ++++++- src/opencmo/reports.py | 29 +++++++++++---- tests/test_report_charts.py | 68 +++++++++++++++++++++++++++--------- 3 files changed, 87 insertions(+), 23 deletions(-) diff --git a/src/opencmo/report_charts.py b/src/opencmo/report_charts.py index 309b155..d72b7af 100644 --- a/src/opencmo/report_charts.py +++ b/src/opencmo/report_charts.py @@ -8,7 +8,7 @@ import uuid from dataclasses import asdict, dataclass from pathlib import Path -from typing import Any +from typing import Any, Iterable @dataclass(frozen=True) @@ -43,6 +43,17 @@ def get_report_asset_path(asset_id: str) -> Path | None: return get_report_asset_dir() / f"{asset_id}.svg" +def delete_chart_assets(asset_ids: Iterable[str]) -> None: + for asset_id in asset_ids: + asset_path = get_report_asset_path(asset_id) + if not asset_path: + continue + try: + asset_path.unlink(missing_ok=True) + except OSError: + continue + + def charts_to_markdown(charts: list[ReportChart]) -> str: if not charts: return "当前数据不足,未生成图表。" diff --git a/src/opencmo/reports.py b/src/opencmo/reports.py index 479b32f..3bba67d 100644 --- a/src/opencmo/reports.py +++ b/src/opencmo/reports.py @@ -17,6 +17,8 @@ _REPORT_MODEL_DEFAULT = "gpt-5.4" _PERIODIC_WINDOW_DAYS = 7 _REPORT_LLM_TIMEOUT_SECONDS = 300.0 +_CHART_ASSET_SRC_RE = re.compile(r"^/api/v1/report-assets/[a-f0-9]{32}\.svg$") +_CHART_ASSET_REF_RE = re.compile(r"/api/v1/report-assets/[a-f0-9]{32}\.svg") _REPORT_SYSTEM_COMMON = ( "你是 AI CMO(首席营销官),拥有完整的多智能体营销系统:SEO审计专家、GEO(AI搜索可见性)分析师、" "SERP排名追踪器、社区舆情监控(Reddit/HN/Dev.to/知乎/V2EX/掘金等)、AI引文可信度(Citability)评估引擎、" @@ -123,11 +125,13 @@ def close_list() -> None: continue image_match = re.fullmatch(r"!\[([^\]]*)\]\(([^)]+)\)", stripped) if image_match: - close_list() - alt = html.escape(image_match.group(1)) - src = html.escape(image_match.group(2), quote=True) - html_lines.append(f'
{alt}
{alt}
') - continue + raw_src = image_match.group(2) + if _CHART_ASSET_SRC_RE.fullmatch(raw_src): + close_list() + alt = html.escape(image_match.group(1)) + src = html.escape(raw_src, quote=True) + html_lines.append(f'
{alt}
{alt}
') + continue if stripped.startswith("### "): close_list() html_lines.append(f"

{html.escape(stripped[4:])}

") @@ -190,7 +194,7 @@ def _insert_after_first_section(markdown_text: str, section: str) -> str: def _postprocess_human_report_content(content: str, charts_markdown: str) -> str: content = _normalize_report_headings(content) - if "## 数据图表速览" in content or "## 2. 数据图表速览" in content: + if _CHART_ASSET_REF_RE.search(content): return content chart_section = f"## 2. 数据图表速览\n\n{charts_markdown or '当前数据不足,未生成图表。'}" return _insert_after_first_section(content, chart_section) @@ -857,9 +861,15 @@ async def _generate_report_record( report_model = model content = "" charts_markdown = "" + chart_asset_ids: list[str] = [] if audience == "human": facts, meta, charts_markdown = _prepare_report_charts(kind, facts, meta) + chart_asset_ids = [ + chart["asset_id"] + for chart in meta.get("charts", []) + if isinstance(chart.get("asset_id"), str) + ] # Human reports use the deep multi-agent pipeline; # Agent briefs stay single-call (they need to be concise). @@ -896,6 +906,13 @@ async def _generate_report_record( except Exception as exc: llm_error = str(exc) or exc.__class__.__name__ logger.exception("Report generation failed for %s/%s", kind, audience) + if chart_asset_ids: + try: + from opencmo.report_charts import delete_chart_assets + + delete_chart_assets(chart_asset_ids) + except Exception: + logger.exception("Failed to clean up chart assets for failed %s/%s report", kind, audience) return _failed_report_payload( meta, model, diff --git a/tests/test_report_charts.py b/tests/test_report_charts.py index e98eca0..dc7b875 100644 --- a/tests/test_report_charts.py +++ b/tests/test_report_charts.py @@ -1,15 +1,14 @@ from __future__ import annotations -import pytest -from fastapi.responses import FileResponse, JSONResponse +from fastapi.testclient import TestClient -from opencmo.report_charts import build_report_charts, get_report_asset_path +from opencmo.report_charts import build_report_charts, delete_chart_assets, get_report_asset_path from opencmo.reports import ( _normalize_report_headings, _postprocess_human_report_content, _simple_markdown_to_html, ) -from opencmo.web.routers.report import api_v1_report_asset +from opencmo.web.app import app def test_strategic_chart_builder_uses_real_fact_values(tmp_path, monkeypatch): @@ -70,29 +69,66 @@ def test_report_heading_normalization_and_chart_section_insertion(): def test_simple_markdown_to_html_supports_images(): - html = _simple_markdown_to_html("![关键指标](/api/v1/report-assets/abc.svg)") + asset_id = "a" * 32 + html = _simple_markdown_to_html(f"![关键指标](/api/v1/report-assets/{asset_id}.svg)") - assert '关键指标' in html + assert f'关键指标' in html assert "
关键指标
" in html -@pytest.mark.asyncio -async def test_report_asset_route_serves_svg(tmp_path, monkeypatch): +def test_simple_markdown_to_html_rejects_external_images(): + html = _simple_markdown_to_html("![x](https://attacker.com/pixel.gif)") + + assert "![x](https://attacker.com/pixel.gif)

" in html + + +def test_simple_markdown_to_html_rejects_javascript_url(): + html = _simple_markdown_to_html("![x](javascript:alert(1))") + + assert "![x](javascript:alert(1))

" in html + + +def test_postprocess_skips_chart_section_when_already_referenced(): + asset_id = "b" * 32 + content = f"# 总标题\n\n## 二、数据图表速览\n\n正文 /api/v1/report-assets/{asset_id}.svg" + + processed = _postprocess_human_report_content(content, "### 图表\n![图](/api/v1/report-assets/c.svg)") + + assert processed == content + assert "## 2. 数据图表速览" not in processed + + +def test_delete_chart_assets_removes_files_and_ignores_missing(tmp_path, monkeypatch): + monkeypatch.setenv("OPENCMO_REPORT_ASSET_DIR", str(tmp_path)) + asset_ids = ["c" * 32, "d" * 32] + for asset_id in asset_ids: + (tmp_path / f"{asset_id}.svg").write_text("", encoding="utf-8") + + delete_chart_assets([*asset_ids, "e" * 32, "not-valid"]) + + assert not (tmp_path / f"{asset_ids[0]}.svg").exists() + assert not (tmp_path / f"{asset_ids[1]}.svg").exists() + + +def test_report_asset_route_serves_svg(tmp_path, monkeypatch): monkeypatch.setenv("OPENCMO_REPORT_ASSET_DIR", str(tmp_path)) asset_id = "a" * 32 (tmp_path / f"{asset_id}.svg").write_text("", encoding="utf-8") - response = await api_v1_report_asset(asset_id) + response = TestClient(app).get(f"/api/v1/report-assets/{asset_id}.svg") - assert isinstance(response, FileResponse) - assert response.media_type == "image/svg+xml" + assert response.status_code == 200 + assert response.headers["content-type"].startswith("image/svg+xml") -@pytest.mark.asyncio -async def test_report_asset_route_rejects_missing_or_invalid_assets(tmp_path, monkeypatch): +def test_report_asset_route_rejects_missing_or_invalid_assets(tmp_path, monkeypatch): monkeypatch.setenv("OPENCMO_REPORT_ASSET_DIR", str(tmp_path)) + client = TestClient(app) - response = await api_v1_report_asset("../bad") + invalid_response = client.get("/api/v1/report-assets/not-valid.svg") + missing_response = client.get(f"/api/v1/report-assets/{'f' * 32}.svg") - assert isinstance(response, JSONResponse) - assert response.status_code == 404 + assert invalid_response.status_code == 404 + assert missing_response.status_code == 404