From 7d02c6358007785bf8e4e45733a3314432068bb1 Mon Sep 17 00:00:00 2001 From: Automation Bot Date: Tue, 30 Sep 2025 12:59:34 +0000 Subject: [PATCH] feat: Add/update bank processor for KARNATAKA --- .../bankStatementProcessor-KARNATAKA.js | 472 ++++++++++++++++++ src/BANK.js | 1 + 2 files changed, 473 insertions(+) create mode 100644 Services/BankStatementProcessor/bankStatementProcessor-KARNATAKA.js diff --git a/Services/BankStatementProcessor/bankStatementProcessor-KARNATAKA.js b/Services/BankStatementProcessor/bankStatementProcessor-KARNATAKA.js new file mode 100644 index 0000000..db867f3 --- /dev/null +++ b/Services/BankStatementProcessor/bankStatementProcessor-KARNATAKA.js @@ -0,0 +1,472 @@ +/** + * Generated by OpenAI - Attempt 1 + * Processes bank statement data from Excel + * @param {Array} rawData - Array of objects from bank statement Excel + * @returns {Object} Processed bank statement data + */ +function processBankStatement(rawData) { + // Ensure input is an array + if (!Array.isArray(rawData)) rawData = []; + + // Utilities + const toStr = (v) => (v === null || v === undefined) ? "" : String(v); + const normalize = (s) => toStr(s).replace(/\s+/g, " ").trim(); + const normLower = (s) => normalize(s).toLowerCase(); + const stripPunc = (s) => normLower(s).replace(/[^\w\s]/g, ""); + const mergeSpaces = (s) => toStr(s).replace(/\s+/g, " "); + const isEmptyLike = (v) => v === null || v === undefined || (typeof v === "string" && normalize(v) === "") || (typeof v === "string" && /^\s*$/.test(v)); + const excelSerialToDateString = (num) => { + // Excel date serial number to YYYY-MM-DD (1900-based) + try { + const base = new Date(Date.UTC(1899, 11, 30)); // 1899-12-30 + const d = new Date(base.getTime() + Math.round(Number(num)) * 86400000); + const y = d.getUTCFullYear(); + const m = String(d.getUTCMonth() + 1).padStart(2, "0"); + const day = String(d.getUTCDate()).padStart(2, "0"); + if (y < 1900 || y > 2100) return null; + return `${y}-${m}-${day}`; + } catch { + return null; + } + }; + const monthMap = { + jan: 1, feb: 2, mar: 3, apr: 4, may: 5, jun: 6, + jul: 7, aug: 8, sep: 9, sept: 9, oct: 10, nov: 11, dec: 12 + }; + const parseDate = (value, lastDate) => { + if (value === null || value === undefined) return lastDate || null; + if (typeof value === "number" && isFinite(value)) { + const s = excelSerialToDateString(value); + return s || lastDate || null; + } + const sRaw = normalize(value); + if (!sRaw) return lastDate || null; + + // Try ISO YYYY-MM-DD or YYYY/MM/DD + let m = sRaw.match(/^(\d{4})[\/\-](\d{1,2})[\/\-](\d{1,2})$/); + if (m) { + const y = +m[1], mo = +m[2], d = +m[3]; + const dt = new Date(Date.UTC(y, mo - 1, d)); + if (dt && !isNaN(dt)) return `${y}-${String(mo).padStart(2, "0")}-${String(d).padStart(2, "0")}`; + } + + // Try DD/MM/YYYY or DD-MM-YYYY (default day-first) + m = sRaw.match(/^(\d{1,2})[\/\-\.](\d{1,2})[\/\-\.](\d{2,4})$/); + if (m) { + let d = +m[1], mo = +m[2], y = +m[3]; + if (y < 100) y += (y >= 70 ? 1900 : 2000); + if (d >= 1 && d <= 31 && mo >= 1 && mo <= 12) { + const dt = new Date(Date.UTC(y, mo - 1, d)); + if (dt && !isNaN(dt)) return `${y}-${String(mo).padStart(2, "0")}-${String(d).padStart(2, "0")}`; + } + // If ambiguous and d <= 12 and mo > 12, swap + if (d <= 12 && mo > 12) { + const t = d; d = mo; mo = t; + const dt = new Date(Date.UTC(y, mo - 1, d)); + if (dt && !isNaN(dt)) return `${y}-${String(mo).padStart(2, "0")}-${String(d).padStart(2, "0")}`; + } + } + + // Try DD-MMM-YYYY or DD-Mon-YYYY or variants with spaces/slashes + m = sRaw.match(/^(\d{1,2})[ \/\-]([A-Za-z]{3,})[ \/\-](\d{2,4})$/); + if (m) { + const d = +m[1], monTxt = normLower(m[2]).slice(0, 3), yRaw = +m[3]; + const y = yRaw < 100 ? (yRaw >= 70 ? 1900 + yRaw : 2000 + yRaw) : yRaw; + const mo = monthMap[monTxt] || null; + if (mo && d >= 1 && d <= 31) { + const dt = new Date(Date.UTC(y, mo - 1, d)); + if (dt && !isNaN(dt)) return `${y}-${String(mo).padStart(2, "0")}-${String(d).padStart(2, "0")}`; + } + } + + // Try MMM DD YYYY or YYYY MMM DD + m = sRaw.match(/^([A-Za-z]{3,})[ \-](\d{1,2})[ \-](\d{2,4})$/); + if (m) { + const monTxt = normLower(m[1]).slice(0, 3), d = +m[2], yRaw = +m[3]; + const y = yRaw < 100 ? (yRaw >= 70 ? 1900 + yRaw : 2000 + yRaw) : yRaw; + const mo = monthMap[monTxt] || null; + if (mo && d >= 1 && d <= 31) { + const dt = new Date(Date.UTC(y, mo - 1, d)); + if (dt && !isNaN(dt)) return `${y}-${String(mo).padStart(2, "0")}-${String(d).padStart(2, "0")}`; + } + } + + // Fallback: Date.parse + const dp = Date.parse(sRaw); + if (!isNaN(dp)) { + const d = new Date(dp); + const y = d.getFullYear(); + const mo = d.getMonth() + 1; + const day = d.getDate(); + return `${y}-${String(mo).padStart(2, "0")}-${String(day).padStart(2, "0")}`; + } + + return lastDate || null; + }; + const safeNum = (val) => { + if (val === null || val === undefined) return null; + if (typeof val === "number" && isFinite(val)) return val; + let s = String(val); + if (!s) return null; + s = s.replace(/[^0-9\.\-\(\)]+/g, ""); // remove currency symbols, commas, spaces, etc. + if (!s) return null; + let neg = false; + if (s.includes("(") && s.includes(")")) neg = true; + const num = parseFloat(s.replace(/[()]/g, "")); + if (isNaN(num)) return null; + return neg ? -Math.abs(num) : num; + }; + const looksLikeIfsc = (s) => { + const m = toStr(s).toUpperCase().match(/\b([A-Z]{4}0[A-Z0-9]{6})\b/); + return m ? m[1] : null; + }; + const extractKVPairsFromText = (text) => { + const pairs = []; + const t = toStr(text); + // Split attempts on common separators + let m = t.match(/(.{0,60}?)(?:\s*[:=\-]\s*|\s{2,})(.{1,200})$/); + if (m) { + pairs.push({ key: normalize(m[1]), value: normalize(m[2]) }); + } else { + // Variants like "IFSC Code :- UTIB0000047" + m = t.match(/(.{0,60}?)(?:\s*[:-]\s*)([^\s].{0,200})/); + if (m) pairs.push({ key: normalize(m[1]), value: normalize(m[2]) }); + } + return pairs; + }; + const kvNormalizeKey = (k) => stripPunc(k).replace(/\s+/g, " "); + const tryAssignDetail = (k, v, details) => { + const nk = kvNormalizeKey(k); + const nv = normalize(v); + if (!nk || !nv) return; + + if (/(^|\s)(bank name|name of bank)(\s|$)/i.test(k)) { + if (!details.bank_name) details.bank_name = nv; + } + if (/(^|\s)(account holder name|account name|a\/c name|ac name|name)(\s|$)/i.test(k)) { + if (!details.account_holder_name) details.account_holder_name = nv.replace(/^:?\s*/, ""); + } + if (/(^|\s)(account no|account number|a\/c no|ac no|a\/c number|a\/c#|account #|a c no|statement of account no)(\s|$)/i.test(k)) { + const m = nv.match(/[A-Za-z0-9\-]{6,}/); + if (m && !details.account_no) details.account_no = m[0].replace(/\s/g, ""); + } + if (/(^|\s)(ifsc|ifsc code|ifs code)(\s|$)/i.test(k)) { + const code = looksLikeIfsc(nv) || looksLikeIfsc(k); + if (code) details.ifsc = code; + } + if (/(^|\s)(branch name|branch)(\s|$)/i.test(k)) { + if (!details.branch_name) details.branch_name = nv; + } + if (/(^|\s)(branch code|branch id|sol|ifsc branch code)(\s|$)/i.test(k)) { + if (!details.branch_code) details.branch_code = nv.replace(/[^\w\-]/g, ""); + } + if (/(^|\s)(address|addr)(\s|$)/i.test(k)) { + if (!details.address) details.address = nv; + } + if (/(^|\s)(city|place|location|town)(\s|$)/i.test(k)) { + if (!details.city) details.city = nv; + } + if (/(^|\s)(opening bal|opening balance)(\s|$)/i.test(k)) { + const n = safeNum(nv); + if (n !== null) details.opening_balance = n; + } + }; + const scanForIfscAnywhere = (data) => { + for (let r of data) { + for (let k in r) { + const code = looksLikeIfsc(k) || looksLikeIfsc(r[k]); + if (code) return code; + } + } + return null; + }; + + // 1) Bank details extraction + const bank_details = { + bank_name: null, + opening_balance: 0, + ifsc: null, + address: null, + city: null, + account_no: null, + account_holder_name: null, + branch_name: null, + branch_code: null + }; + + const preScanLimit = Math.min(rawData.length, 50); + for (let i = 0; i < preScanLimit; i++) { + const row = rawData[i] || {}; + // Scan keys and values for IFSC and other clues + for (let key in row) { + const kStr = toStr(key); + const vStr = toStr(row[key]); + + // Direct IFSC code search + if (!bank_details.ifsc) { + const c1 = looksLikeIfsc(kStr); + const c2 = looksLikeIfsc(vStr); + if (c1) bank_details.ifsc = c1; + if (!bank_details.ifsc && c2) bank_details.ifsc = c2; + } + + // Attempt kv from key text like "Name :- John Doe" + const keyPairs = extractKVPairsFromText(kStr); + keyPairs.forEach(({ key: k, value: v }) => tryAssignDetail(k, v, bank_details)); + + // Attempt kv from value text + const valPairs = extractKVPairsFromText(vStr); + valPairs.forEach(({ key: k, value: v }) => tryAssignDetail(k, v, bank_details)); + + // Try plain text heuristics on value + const vNorm = normalize(vStr); + + // Opening balance inline like "Opening Balance : 1234.56" + if (/opening\s*bal(ance)?/i.test(vNorm)) { + const m = vNorm.match(/opening\s*bal(?:ance)?\s*[:\-]?\s*([-\(₹,.\d\s]+)/i); + if (m) { + const n = safeNum(m[1]); + if (n !== null) bank_details.opening_balance = n; + } + } + + // Account number references + if (!bank_details.account_no) { + let m = vNorm.match(/(account\s*(?:no|number|#)|a\/c\s*(?:no|number)|ac\s*no)\s*[:\-]?\s*([A-Za-z0-9\- ]{6,})/i); + if (m) { + bank_details.account_no = normalize(m[2]).replace(/\s/g, ""); + } else { + m = vNorm.match(/statement of account no\s*[-:\s]*([A-Za-z0-9\- ]{6,})/i); + if (m) bank_details.account_no = normalize(m[1]).replace(/\s/g, ""); + } + } + + // Account holder "Name :- ABC" + if (/^name\b/i.test(kStr) && !bank_details.account_holder_name) { + const m = kStr.match(/name\s*[:-]\s*(.+)$/i); + if (m) bank_details.account_holder_name = normalize(m[1]); + } + if (!bank_details.account_holder_name && /^name\b/i.test(vStr)) { + const m2 = vStr.match(/name\s*[:-]\s*(.+)$/i); + if (m2) bank_details.account_holder_name = normalize(m2[1]); + } + + // Bank name heuristic: look for strong "BANK" mention in early rows + if (!bank_details.bank_name) { + const cand = [kStr, vStr].map(normalize).find(s => /\bbank\b/i.test(s) && !/bank statement|banking/i.test(s)); + if (cand) { + // If string contains IFSC line, skip using that as bank_name + if (!/ifsc/i.test(cand)) { + // Keep simple bank name phrase + // e.g., "AXIS BANK LTD" "HDFC BANK" "STATE BANK OF INDIA" + const m = cand.match(/([A-Z ]*BANK(?: [A-Z ]+)*)/i); + bank_details.bank_name = m ? normalize(m[1]) : cand; + } + } + } + + // Branch code sometimes labeled SOL + if (!bank_details.branch_code && /\bsol\b/i.test(vStr)) { + const m = vStr.match(/\bsol\b[:\-]?\s*([A-Za-z0-9\-]+)/i); + if (m) bank_details.branch_code = normalize(m[1]); + } + + // Address heuristic + if (!bank_details.address && /\baddress\b/i.test(vStr)) { + const m = vStr.match(/address\s*[:-]?\s*(.+)$/i); + if (m) bank_details.address = normalize(m[1]); + } + } + } + + // If IFSC still not found, scan whole dataset + if (!bank_details.ifsc) { + const code = scanForIfscAnywhere(rawData); + if (code) bank_details.ifsc = code; + } + + // 2) Header Row Identification & Key Mapping + const headerSyn = { + date: ["date", "txn date", "transaction date", "tran date", "value date", "post date", "date value", "trans date"], + narr: ["narration", "description", "details", "particulars", "remarks", "transaction details", "transaction description", "particular"], + debit: ["debit", "withdrawal", "dr", "withdraw", "wdl", "debit amount", "withdrawal amount"], + credit: ["credit", "deposit", "cr", "credit amount", "deposits", "dep"], + balance: ["balance", "bal", "closing balance", "running balance", "available balance", "balance amount", "bal."] + }; + const labelMatch = (cell, arr) => { + const s1 = normLower(cell).replace(/\./g, "").trim(); + const s2 = stripPunc(cell); + for (let a of arr) { + const t = a.toLowerCase(); + if (s1 === t) return true; + if (s2 === t.replace(/[^\w\s]/g, "")) return true; + // contains or startsWith + if (s1.includes(t)) return true; + } + return false; + }; + const findHeaderRow = (data) => { + for (let i = 0; i < data.length; i++) { + const row = data[i] || {}; + let map = { dateKey: null, narrationKey: null, debitKey: null, creditKey: null, balanceKey: null }; + let found = { date: false, narr: false, debit: false, credit: false, balance: false }; + for (let key in row) { + const val = row[key]; + if (isEmptyLike(val)) continue; + if (!found.date && labelMatch(val, headerSyn.date)) { map.dateKey = key; found.date = true; } + if (!found.narr && labelMatch(val, headerSyn.narr)) { map.narrationKey = key; found.narr = true; } + if (!found.debit && labelMatch(val, headerSyn.debit)) { map.debitKey = key; found.debit = true; } + if (!found.credit && labelMatch(val, headerSyn.credit)) { map.creditKey = key; found.credit = true; } + if (!found.balance && labelMatch(val, headerSyn.balance)) { map.balanceKey = key; found.balance = true; } + } + const categoriesMatched = Object.values(found).filter(Boolean).length; + const hasDate = !!map.dateKey; + const hasDebitOrCredit = !!(map.debitKey || map.creditKey); + // Consider row a header if it has at least Date and (Debit or Credit), or has 3+ labels total + if ((hasDate && hasDebitOrCredit) || categoriesMatched >= 3) { + return { index: i, mapping: map }; + } + } + return { index: -1, mapping: { dateKey: null, narrationKey: null, debitKey: null, creditKey: null, balanceKey: null } }; + }; + + const { index: headerIndex, mapping } = findHeaderRow(rawData); + + // 3) Transaction Processing + const transactions = []; + if (headerIndex >= 0) { + let voucher = 1; + let lastDate = null; + for (let i = headerIndex + 1; i < rawData.length; i++) { + const row = rawData[i] || {}; + const dateVal = mapping.dateKey ? row[mapping.dateKey] : null; + let date = parseDate(dateVal, lastDate); + // Some rows may not have date but are continuation lines; use lastDate if any cell looks like narration or amount + const debitVal = mapping.debitKey ? row[mapping.debitKey] : null; + const creditVal = mapping.creditKey ? row[mapping.creditKey] : null; + const balanceVal = mapping.balanceKey ? row[mapping.balanceKey] : null; + let desc = mapping.narrationKey ? toStr(row[mapping.narrationKey]) : ""; + + // If narration missing, try to compose from other textual cells + if (!desc || normalize(desc) === "" || desc === "-") { + const parts = []; + for (let k in row) { + if (k === mapping.debitKey || k === mapping.creditKey || k === mapping.balanceKey || k === mapping.dateKey) continue; + const val = normalize(row[k]); + if (val && !/^\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2,4}$/.test(val) && !/^[-,.\d\s]+$/.test(val) && val !== "-") { + parts.push(val); + } + } + desc = normalize(parts.join(" | ")); + } + desc = mergeSpaces(desc).trim(); + + const debit = safeNum(debitVal); + const credit = safeNum(creditVal); + const balanceNum = safeNum(balanceVal); + + // Determine if this row looks like a transaction + const maybeAmountPresent = (debit !== null && !isNaN(debit) && debit !== 0) || (credit !== null && !isNaN(credit) && credit !== 0); + const maybeHasText = !!normalize(desc); + const maybeTotalOrSummary = /total|summary|closing|available balance|carried forward|b\/f|c\/f/i.test(desc || "") || /total/i.test(toStr(row[mapping.narrationKey])); + const maybeHeaderAgain = /date|narration|debit|credit|balance/i.test(toStr(row[mapping.narrationKey])); + + if (maybeTotalOrSummary || maybeHeaderAgain) continue; + + // If row is empty + const allEmpty = Object.values(row).every(v => isEmptyLike(v) || normalize(v) === "-" || normalize(v).toLowerCase() === "nan"); + if (allEmpty) continue; + + // If no amount but has date and narration, it could be a zero-entry; skip + if (!maybeAmountPresent && !maybeHasText) continue; + + // Date fallback + if (!date && (maybeAmountPresent || maybeHasText)) { + date = lastDate; + } + if (!date) continue; // Cannot accept transaction with no date at all + + lastDate = date; + + let type = null; + let amount = null; + if (debit !== null && !isNaN(debit) && Math.abs(debit) > 0) { + type = "withdrawal"; + amount = Math.abs(debit); + } + if ((!type || amount === null) && credit !== null && !isNaN(credit) && Math.abs(credit) > 0) { + type = "deposit"; + amount = Math.abs(credit); + } + // If both exist, prioritize debit per requirement + if (debit !== null && !isNaN(debit) && Math.abs(debit) > 0 && credit !== null && !isNaN(credit) && Math.abs(credit) > 0) { + type = "withdrawal"; + amount = Math.abs(debit); + } + if (!type || amount === null) continue; + + // From/To extraction + const parseFromTo = (description, txType) => { + const d = toStr(description); + const res = { from: null, to: null }; + if (!d) return res; + + // UPI ID + const upiIdMatch = d.match(/\b([a-z0-9.\-_]+@[a-z]+)\b/i); + const nameTokenHeuristic = () => { + // Split by '/', '|', '-', and collapse spaces + const tokens = d.split(/[\/|\-]+/).map(t => normalize(t)).filter(Boolean); + // Remove common keywords + const skip = new Set(["upi", "p2m", "p2a", "imps", "neft", "rtgs", "atm", "pos", "paid v", "pay", "payment", "credit", "debit", "transfer", "cheque", "chqno", "visa", "mastercard", "axis bank", "icici bank", "hdfc bank", "bank", "ltd", "ref"]); + for (let t of tokens) { + const tl = normLower(t); + if (skip.has(tl)) continue; + if (/[A-Za-z]/.test(t) && !/^\d+$/.test(t) && tl.length > 2) return t; + } + return null; + }; + + if (upiIdMatch) { + if (txType === "withdrawal") { + res.to = upiIdMatch[1]; + } else { + res.from = upiIdMatch[1]; + } + } else { + // Try name token after payment mode keywords + const nameTok = nameTokenHeuristic(); + if (nameTok) { + if (txType === "withdrawal") res.to = nameTok; + else res.from = nameTok; + } + } + return res; + }; + + const ft = parseFromTo(desc, type); + + transactions.push({ + date: date, + voucher_number: voucher++, + amount: Number(amount), + desc: desc || "", + from: ft.from, + to: ft.to, + type: type, + balance: (balanceNum !== null && isFinite(balanceNum)) ? Number(balanceNum) : null + }); + } + } + + // Final IFSC safety: ensure property exists (even if null) + if (!bank_details.ifsc) { + bank_details.ifsc = null; + } + + return { + bank_details, + transactions + }; +} + +module.exports = processBankStatement; \ No newline at end of file diff --git a/src/BANK.js b/src/BANK.js index 5a6953f..f00b361 100644 --- a/src/BANK.js +++ b/src/BANK.js @@ -9,6 +9,7 @@ const BANK = { YES: "yes", OVERSEASE: "oversease", KOTAK: "kotak", + KARNATAKA: "karnataka", } module.exports = BANK;