Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 169 additions & 0 deletions packages/backend/app/routes/expenses.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,3 +393,172 @@ def _invalidate_expense_cache(uid: int, at: str):
f"user:{uid}:dashboard_summary:*",
]
)


# ============================================================================
# BULK IMPORT ENDPOINTS - Issue #115
# ============================================================================

@bp.post("/import/preview")
@jwt_required()
def preview_import():
"""
预览导入数据
1. 上传文件
2. 验证数据
3. 返回预览和警告
"""
from ..services.expense_import import validate_bulk_import, normalize_import_rows, extract_transactions_from_statement

uid = int(get_jwt_identity())

if 'file' not in request.files:
return jsonify(error="No file provided"), 400

file = request.files['file']
data = file.read()

try:
# 解析文件
rows = _parse_uploaded_file(file, data)

# 验证数据
validation_result = validate_bulk_import(rows)

logger.info("Preview import user=%s total=%s valid=%s errors=%s",
uid, validation_result["total"],
validation_result["valid_count"],
validation_result["error_count"])

return jsonify(validation_result), 200

except ValueError as e:
return jsonify(error=str(e)), 400
except Exception as e:
logger.error("Preview import error user=%s error=%s", uid, str(e))
return jsonify(error="Failed to process file"), 500


@bp.post("/import/confirm")
@jwt_required()
def confirm_import():
"""
确认导入数据
1. 使用预览结果
2. 批量导入
3. 返回结果
"""
uid = int(get_jwt_identity())
data = request.get_json()

valid_rows = data.get('valid_rows', [])

if not valid_rows:
return jsonify(error="No valid rows to import"), 400

imported_count = 0
errors = []

for idx, row in enumerate(valid_rows, 1):
try:
amount = _parse_amount(row.get('amount'))
if amount is None:
errors.append(f"Row {idx}: Invalid amount")
continue

raw_date = row.get('date')
if not raw_date:
errors.append(f"Row {idx}: Missing date")
continue

expense = Expense(
user_id=uid,
amount=amount,
currency=row.get('currency', 'USD'),
category_id=row.get('category_id'),
notes=row.get('description', ''),
spent_at=date.fromisoformat(raw_date) if raw_date else date.today(),
expense_type=_infer_expense_type(row.get('expense_type'), row.get('description', ''), amount)
)
db.session.add(expense)
imported_count += 1

except Exception as e:
errors.append(f"Row {idx}: {str(e)}")
logger.warning("Import row error user=%s row=%s error=%s", uid, idx, str(e))

if imported_count > 0:
db.session.commit()
logger.info("Imported expenses user=%s count=%s", uid, imported_count)

# Invalidate caches
cache_delete_patterns([
f"user:{uid}:monthly_summary:*",
f"insights:{uid}:*",
])

return jsonify({
"imported_count": imported_count,
"errors": errors,
"status": "success" if imported_count > 0 else "partial"
}), 201 if imported_count > 0 else 400


def _parse_uploaded_file(file, data):
"""解析上传的文件"""
filename = (file.filename or "").lower()
content_type = file.content_type or ""

if filename.endswith('.csv') or 'csv' in content_type:
return _parse_csv_rows(data)
elif filename.endswith('.xlsx') or 'excel' in content_type:
return _parse_excel_rows(data)
else:
raise ValueError("Only CSV and Excel files are supported")


def _parse_csv_rows(data):
"""解析 CSV 文件"""
import csv
import io

text = data.decode('utf-8-sig', errors='ignore')
reader = csv.DictReader(io.StringIO(text))
out = []
for row in reader:
out.append({
"date": row.get("date") or row.get("spent_at"),
"amount": row.get("amount"),
"description": row.get("description") or row.get("notes"),
"category_id": row.get("category_id"),
"currency": row.get("currency") or "USD",
})
return out


def _parse_excel_rows(data):
"""解析 Excel 文件"""
try:
import pandas as pd
df = pd.read_excel(io.BytesIO(data))
return df.to_dict('records')
except ImportError:
raise ValueError("Excel support requires pandas library")
except Exception as e:
raise ValueError(f"Failed to parse Excel file: {str(e)}")


def _infer_expense_type(raw_type, description, amount):
"""推断收支类型"""
t = str(raw_type or "").strip().upper()
if t in {"INCOME", "EXPENSE"}:
return t

if amount < 0:
return "EXPENSE"

income_keywords = ("SALARY", "PAYROLL", "REFUND", "INTEREST", "DIVIDEND", "CREDIT")
if any(k in description.upper() for k in income_keywords):
return "INCOME"

return "EXPENSE"
200 changes: 200 additions & 0 deletions packages/backend/app/routes/payee_aliases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
"""Payee alias management routes for FinMind."""

import re
import logging
from collections import Counter
from flask import Blueprint, jsonify, request
from flask_jwt_extended import jwt_required, get_jwt_identity
from ..extensions import db
from ..models import PayeeAlias, Expense

bp = Blueprint("payee_aliases", __name__)
logger = logging.getLogger("finmind.payee_aliases")


@bp.get("")
@jwt_required()
def list_aliases():
"""List all payee aliases for the authenticated user."""
uid = int(get_jwt_identity())
aliases = PayeeAlias.query.filter_by(user_id=uid).order_by(PayeeAlias.canonical_name).all()
return jsonify([a.to_dict() for a in aliases])


@bp.post("")
@jwt_required()
def create_alias():
"""Create a new payee alias."""
uid = int(get_jwt_identity())
data = request.get_json(silent=True) or {}
canonical = (data.get("canonical_name") or "").strip()
pattern = (data.get("alias_pattern") or "").strip()
match_type = (data.get("match_type") or "exact").strip()

if not canonical or not pattern:
return jsonify({"error": "canonical_name and alias_pattern are required"}), 400

valid_types = {"exact", "case_insensitive", "contains", "regex"}
if match_type not in valid_types:
return jsonify({"error": f"match_type must be one of {valid_types}"}), 400

if match_type == "regex":
try:
re.compile(pattern)
except re.error as e:
return jsonify({"error": f"Invalid regex: {e}"}), 400

alias = PayeeAlias(user_id=uid, canonical_name=canonical, alias_pattern=pattern, match_type=match_type)
db.session.add(alias)
try:
db.session.commit()
except Exception:
db.session.rollback()
return jsonify({"error": "Alias pattern already exists for this user"}), 409

return jsonify(alias.to_dict()), 201


@bp.put("/<int:alias_id>")
@jwt_required()
def update_alias(alias_id):
"""Update an existing payee alias."""
uid = int(get_jwt_identity())
alias = PayeeAlias.query.filter_by(id=alias_id, user_id=uid).first()
if not alias:
return jsonify({"error": "Alias not found"}), 404

data = request.get_json(silent=True) or {}
if "canonical_name" in data:
alias.canonical_name = data["canonical_name"].strip()
if "alias_pattern" in data:
alias.alias_pattern = data["alias_pattern"].strip()
if "match_type" in data:
valid_types = {"exact", "case_insensitive", "contains", "regex"}
if data["match_type"] not in valid_types:
return jsonify({"error": f"match_type must be one of {valid_types}"}), 400
alias.match_type = data["match_type"]

if alias.match_type == "regex":
try:
re.compile(alias.alias_pattern)
except re.error as e:
return jsonify({"error": f"Invalid regex: {e}"}), 400

db.session.commit()
return jsonify(alias.to_dict())


@bp.delete("/<int:alias_id>")
@jwt_required()
def delete_alias(alias_id):
"""Delete a payee alias."""
uid = int(get_jwt_identity())
alias = PayeeAlias.query.filter_by(id=alias_id, user_id=uid).first()
if not alias:
return jsonify({"error": "Alias not found"}), 404
db.session.delete(alias)
db.session.commit()
return jsonify({"message": "Deleted"}), 200


@bp.post("/resolve")
@jwt_required()
def resolve_payees():
"""Resolve payee names for expenses using alias rules."""
uid = int(get_jwt_identity())
aliases = PayeeAlias.query.filter_by(user_id=uid).all()
if not aliases:
return jsonify({"resolved": [], "message": "No aliases configured"})

rules = []
for a in aliases:
try:
if a.match_type == "regex":
rules.append((re.compile(a.alias_pattern, re.IGNORECASE), a.canonical_name))
else:
rules.append((a,))
except re.error:
continue

expenses = Expense.query.filter_by(user_id=uid).all()
resolved = []
for exp in expenses:
raw = exp.payee or exp.notes or ""
if not raw.strip():
continue
for rule in rules:
canonical = _match_rule(rule, raw)
if canonical:
resolved.append({
"expense_id": exp.id,
"raw_payee": raw[:100],
"resolved_to": canonical,
})
break

return jsonify({"resolved": resolved, "rules_count": len(rules), "expenses_checked": len(expenses)})


@bp.post("/auto-suggest")
@jwt_required()
def auto_suggest():
"""Suggest potential payee aliases based on existing expense data."""
uid = int(get_jwt_identity())
expenses = Expense.query.filter_by(user_id=uid).all()

payees = []
for exp in expenses:
raw = (exp.payee or exp.notes or "").strip()
if raw:
payees.append(raw.lower())

# Group by normalized form
groups = Counter()
for p in payees:
# Normalize: lowercase, strip common suffixes
norm = re.sub(r"[^a-z0-9]", "", p)
groups[norm] += 1

# Find potential merges (different strings mapping to same normalized form)
suggestions = []
seen = set()
for exp in expenses:
raw = (exp.payee or exp.notes or "").strip()
if not raw or raw in seen:
continue
norm = re.sub(r"[^a-z0-9]", "", raw.lower())
if groups.get(norm, 0) >= 2:
seen.add(raw)
# Find the most common variant as canonical
variants = [e.payee or e.notes for e in expenses if (e.payee or e.notes or "").strip().lower() == raw.lower()]
if len(variants) > 1:
canonical = max(Counter(variants).items(), key=lambda x: x[1])[0]
suggestions.append({
"canonical_name": canonical,
"alias_pattern": raw,
"match_type": "case_insensitive",
"count": groups[norm],
})

return jsonify({"suggestions": suggestions[:20]})


def _match_rule(rule, raw):
"""Match a payee string against an alias rule."""
if len(rule) == 1:
a = rule[0]
if a.match_type == "exact":
if raw.lower() == a.alias_pattern.lower():
return a.canonical_name
elif a.match_type == "case_insensitive":
if raw.lower() == a.alias_pattern.lower():
return a.canonical_name
elif a.match_type == "contains":
if a.alias_pattern.lower() in raw.lower():
return a.canonical_name
else:
regex, canonical = rule
if regex.search(raw):
return canonical
return None
Loading