rohitdash08 · zhaog100 · Mar 20, 2026 · Mar 20, 2026
@@ -393,3 +393,172 @@ def _invalidate_expense_cache(uid: int, at: str):
             f"user:{uid}:dashboard_summary:*",
         ]
     )
+
+
+# ============================================================================
+# BULK IMPORT ENDPOINTS - Issue #115
+# ============================================================================
+
+@bp.post("/import/preview")
+@jwt_required()
+def preview_import():
+    """
+    预览导入数据
+    1. 上传文件
+    2. 验证数据
+    3. 返回预览和警告
+    """
+    from ..services.expense_import import validate_bulk_import, normalize_import_rows, extract_transactions_from_statement
+
+    uid = int(get_jwt_identity())
+
+    if 'file' not in request.files:
+        return jsonify(error="No file provided"), 400
+
+    file = request.files['file']
+    data = file.read()
+
+    try:
+        # 解析文件
+        rows = _parse_uploaded_file(file, data)
+
+        # 验证数据
+        validation_result = validate_bulk_import(rows)
+
+        logger.info("Preview import user=%s total=%s valid=%s errors=%s", 
+                   uid, validation_result["total"], 
+                   validation_result["valid_count"], 
+                   validation_result["error_count"])
+
+        return jsonify(validation_result), 200
+
+    except ValueError as e:
+        return jsonify(error=str(e)), 400
+    except Exception as e:
+        logger.error("Preview import error user=%s error=%s", uid, str(e))
+        return jsonify(error="Failed to process file"), 500
+
+
+@bp.post("/import/confirm")
+@jwt_required()
+def confirm_import():
+    """
+    确认导入数据
+    1. 使用预览结果
+    2. 批量导入
+    3. 返回结果
+    """
+    uid = int(get_jwt_identity())
+    data = request.get_json()
+
+    valid_rows = data.get('valid_rows', [])
+
+    if not valid_rows:
+        return jsonify(error="No valid rows to import"), 400
+
+    imported_count = 0
+    errors = []
+
+    for idx, row in enumerate(valid_rows, 1):
+        try:
+            amount = _parse_amount(row.get('amount'))
+            if amount is None:
+                errors.append(f"Row {idx}: Invalid amount")
+                continue
+
+            raw_date = row.get('date')
+            if not raw_date:
+                errors.append(f"Row {idx}: Missing date")
+                continue
+
+            expense = Expense(
+                user_id=uid,
+                amount=amount,
+                currency=row.get('currency', 'USD'),
+                category_id=row.get('category_id'),
+                notes=row.get('description', ''),
+                spent_at=date.fromisoformat(raw_date) if raw_date else date.today(),
+                expense_type=_infer_expense_type(row.get('expense_type'), row.get('description', ''), amount)
+            )
+            db.session.add(expense)
+            imported_count += 1
+
+        except Exception as e:
+            errors.append(f"Row {idx}: {str(e)}")
+            logger.warning("Import row error user=%s row=%s error=%s", uid, idx, str(e))
+
+    if imported_count > 0:
+        db.session.commit()
+        logger.info("Imported expenses user=%s count=%s", uid, imported_count)
+
+        # Invalidate caches
+        cache_delete_patterns([
+            f"user:{uid}:monthly_summary:*",
+            f"insights:{uid}:*",
+        ])
+
+    return jsonify({
+        "imported_count": imported_count,
+        "errors": errors,
+        "status": "success" if imported_count > 0 else "partial"
+    }), 201 if imported_count > 0 else 400
+
+
+def _parse_uploaded_file(file, data):
+    """解析上传的文件"""
+    filename = (file.filename or "").lower()
+    content_type = file.content_type or ""
+
+    if filename.endswith('.csv') or 'csv' in content_type:
+        return _parse_csv_rows(data)
+    elif filename.endswith('.xlsx') or 'excel' in content_type:
+        return _parse_excel_rows(data)
+    else:
+        raise ValueError("Only CSV and Excel files are supported")
+
+
+def _parse_csv_rows(data):
+    """解析 CSV 文件"""
+    import csv
+    import io
+
+    text = data.decode('utf-8-sig', errors='ignore')
+    reader = csv.DictReader(io.StringIO(text))
+    out = []
+    for row in reader:
+        out.append({
+            "date": row.get("date") or row.get("spent_at"),
+            "amount": row.get("amount"),
+            "description": row.get("description") or row.get("notes"),
+            "category_id": row.get("category_id"),
+            "currency": row.get("currency") or "USD",
+        })
+    return out
+
+
+def _parse_excel_rows(data):
+    """解析 Excel 文件"""
+    try:
+        import pandas as pd
+        df = pd.read_excel(io.BytesIO(data))
+        return df.to_dict('records')
+    except ImportError:
+        raise ValueError("Excel support requires pandas library")
+    except Exception as e:
+        raise ValueError(f"Failed to parse Excel file: {str(e)}")
+
+
+def _infer_expense_type(raw_type, description, amount):
+    """推断收支类型"""
+    t = str(raw_type or "").strip().upper()
+    if t in {"INCOME", "EXPENSE"}:
+        return t
+
+    if amount < 0:
+        return "EXPENSE"
+
+    income_keywords = ("SALARY", "PAYROLL", "REFUND", "INTEREST", "DIVIDEND", "CREDIT")
+    if any(k in description.upper() for k in income_keywords):
+        return "INCOME"
+
+    return "EXPENSE"
@@ -0,0 +1,200 @@
+"""Payee alias management routes for FinMind."""
+
+import re
+import logging
+from collections import Counter
+from flask import Blueprint, jsonify, request
+from flask_jwt_extended import jwt_required, get_jwt_identity
+from ..extensions import db
+from ..models import PayeeAlias, Expense
+
+bp = Blueprint("payee_aliases", __name__)
+logger = logging.getLogger("finmind.payee_aliases")
+
+
+@bp.get("")
+@jwt_required()
+def list_aliases():
+    """List all payee aliases for the authenticated user."""
+    uid = int(get_jwt_identity())
+    aliases = PayeeAlias.query.filter_by(user_id=uid).order_by(PayeeAlias.canonical_name).all()
+    return jsonify([a.to_dict() for a in aliases])
+
+
+@bp.post("")
+@jwt_required()
+def create_alias():
+    """Create a new payee alias."""
+    uid = int(get_jwt_identity())
+    data = request.get_json(silent=True) or {}
+    canonical = (data.get("canonical_name") or "").strip()
+    pattern = (data.get("alias_pattern") or "").strip()
+    match_type = (data.get("match_type") or "exact").strip()
+
+    if not canonical or not pattern:
+        return jsonify({"error": "canonical_name and alias_pattern are required"}), 400
+
+    valid_types = {"exact", "case_insensitive", "contains", "regex"}
+    if match_type not in valid_types:
+        return jsonify({"error": f"match_type must be one of {valid_types}"}), 400
+
+    if match_type == "regex":
+        try:
+            re.compile(pattern)
+        except re.error as e:
+            return jsonify({"error": f"Invalid regex: {e}"}), 400
+
+    alias = PayeeAlias(user_id=uid, canonical_name=canonical, alias_pattern=pattern, match_type=match_type)
+    db.session.add(alias)
+    try:
+        db.session.commit()
+    except Exception:
+        db.session.rollback()
+        return jsonify({"error": "Alias pattern already exists for this user"}), 409
+
+    return jsonify(alias.to_dict()), 201
+
+
+@bp.put("/<int:alias_id>")
+@jwt_required()
+def update_alias(alias_id):
+    """Update an existing payee alias."""
+    uid = int(get_jwt_identity())
+    alias = PayeeAlias.query.filter_by(id=alias_id, user_id=uid).first()
+    if not alias:
+        return jsonify({"error": "Alias not found"}), 404
+
+    data = request.get_json(silent=True) or {}
+    if "canonical_name" in data:
+        alias.canonical_name = data["canonical_name"].strip()
+    if "alias_pattern" in data:
+        alias.alias_pattern = data["alias_pattern"].strip()
+    if "match_type" in data:
+        valid_types = {"exact", "case_insensitive", "contains", "regex"}
+        if data["match_type"] not in valid_types:
+            return jsonify({"error": f"match_type must be one of {valid_types}"}), 400
+        alias.match_type = data["match_type"]
+
+    if alias.match_type == "regex":
+        try:
+            re.compile(alias.alias_pattern)
+        except re.error as e:
+            return jsonify({"error": f"Invalid regex: {e}"}), 400
+
+    db.session.commit()
+    return jsonify(alias.to_dict())
+
+
+@bp.delete("/<int:alias_id>")
+@jwt_required()
+def delete_alias(alias_id):
+    """Delete a payee alias."""
+    uid = int(get_jwt_identity())
+    alias = PayeeAlias.query.filter_by(id=alias_id, user_id=uid).first()
+    if not alias:
+        return jsonify({"error": "Alias not found"}), 404
+    db.session.delete(alias)
+    db.session.commit()
+    return jsonify({"message": "Deleted"}), 200
+
+
+@bp.post("/resolve")
+@jwt_required()
+def resolve_payees():
+    """Resolve payee names for expenses using alias rules."""
+    uid = int(get_jwt_identity())
+    aliases = PayeeAlias.query.filter_by(user_id=uid).all()
+    if not aliases:
+        return jsonify({"resolved": [], "message": "No aliases configured"})
+
+    rules = []
+    for a in aliases:
+        try:
+            if a.match_type == "regex":
+                rules.append((re.compile(a.alias_pattern, re.IGNORECASE), a.canonical_name))
+            else:
+                rules.append((a,))
+        except re.error:
+            continue
+
+    expenses = Expense.query.filter_by(user_id=uid).all()
+    resolved = []
+    for exp in expenses:
+        raw = exp.payee or exp.notes or ""
+        if not raw.strip():
+            continue
+        for rule in rules:
+            canonical = _match_rule(rule, raw)
+            if canonical:
+                resolved.append({
+                    "expense_id": exp.id,
+                    "raw_payee": raw[:100],
+                    "resolved_to": canonical,
+                })
+                break
+
+    return jsonify({"resolved": resolved, "rules_count": len(rules), "expenses_checked": len(expenses)})
+
+
+@bp.post("/auto-suggest")
+@jwt_required()
+def auto_suggest():
+    """Suggest potential payee aliases based on existing expense data."""
+    uid = int(get_jwt_identity())
+    expenses = Expense.query.filter_by(user_id=uid).all()
+
+    payees = []
+    for exp in expenses:
+        raw = (exp.payee or exp.notes or "").strip()
+        if raw:
+            payees.append(raw.lower())
+
+    # Group by normalized form
+    groups = Counter()
+    for p in payees:
+        # Normalize: lowercase, strip common suffixes
+        norm = re.sub(r"[^a-z0-9]", "", p)
+        groups[norm] += 1
+
+    # Find potential merges (different strings mapping to same normalized form)
+    suggestions = []
+    seen = set()
+    for exp in expenses:
+        raw = (exp.payee or exp.notes or "").strip()
+        if not raw or raw in seen:
+            continue
+        norm = re.sub(r"[^a-z0-9]", "", raw.lower())
+        if groups.get(norm, 0) >= 2:
+            seen.add(raw)
+            # Find the most common variant as canonical
+            variants = [e.payee or e.notes for e in expenses if (e.payee or e.notes or "").strip().lower() == raw.lower()]
+            if len(variants) > 1:
+                canonical = max(Counter(variants).items(), key=lambda x: x[1])[0]
+                suggestions.append({
+                    "canonical_name": canonical,
+                    "alias_pattern": raw,
+                    "match_type": "case_insensitive",
+                    "count": groups[norm],
+                })
+
+    return jsonify({"suggestions": suggestions[:20]})
+
+
+def _match_rule(rule, raw):
+    """Match a payee string against an alias rule."""
+    if len(rule) == 1:
+        a = rule[0]
+        if a.match_type == "exact":
+            if raw.lower() == a.alias_pattern.lower():
+                return a.canonical_name
+        elif a.match_type == "case_insensitive":
+            if raw.lower() == a.alias_pattern.lower():
+                return a.canonical_name
+        elif a.match_type == "contains":
+            if a.alias_pattern.lower() in raw.lower():
+                return a.canonical_name
+    else:
+        regex, canonical = rule
+        if regex.search(raw):
+            return canonical
+    return None