diff --git a/Services/BankStatementProcessor/bankStatementProcessor-KARNATAKA.js b/Services/BankStatementProcessor/bankStatementProcessor-KARNATAKA.js new file mode 100644 index 0000000..ce03b19 --- /dev/null +++ b/Services/BankStatementProcessor/bankStatementProcessor-KARNATAKA.js @@ -0,0 +1,564 @@ +/** + * Generated by OpenAI - Attempt 1 + * Processes bank statement data from Excel + * @param {Array} rawData - Array of objects from bank statement Excel + * @returns {Object} Processed bank statement data + */ +function processBankStatement(rawData) { + // Helper: safe get values of an object preserving key order + function entries(obj) { + return Object.keys(obj || {}).map((k) => [k, obj[k]]); + } + + function isEmptyValue(v) { + return v === null || v === undefined || (typeof v === 'string' && v.trim() === ''); + } + + function toStr(v) { + if (v === null || v === undefined) return ''; + if (typeof v === 'string') return v; + if (typeof v === 'number') return String(v); + if (v && typeof v.toString === 'function') return v.toString(); + try { return JSON.stringify(v); } catch (e) { return String(v); } + } + + function normalize(str) { + return toStr(str) + .replace(/\s+/g, ' ') + .trim(); + } + + function normAlpha(str) { + return normalize(str) + .toLowerCase() + .replace(/[_\-():,.;]/g, ' ') + .replace(/\s+/g, ' ') + .trim(); + } + + function includesAny(str, arr) { + const n = normAlpha(str); + return arr.some((a) => n.includes(a)); + } + + function exactOneOf(str, arr) { + const n = normAlpha(str); + return arr.some((a) => n === a); + } + + function toNumber(val) { + if (val === null || val === undefined) return null; + if (typeof val === 'number') { + if (!isFinite(val)) return null; + return val; + } + const s = toStr(val).trim(); + if (!s) return null; + + // handle accounting negatives in parentheses + const isNeg = /^\s*\(.*\)\s*$/.test(s); + let clean = s.replace(/[()]/g, ''); + + // remove currency symbols and spaces + clean = clean.replace(/[^\d.\-]/g, ''); + // handle multiple dots by keeping last decimal point + const parts = clean.split('.'); + if (parts.length > 2) { + const last = parts.pop(); + clean = parts.join('') + '.' + last; + } + const num = parseFloat(clean); + if (isNaN(num)) return null; + return isNeg ? -Math.abs(num) : num; + } + + function isLikelyExcelSerial(n) { + return typeof n === 'number' && n > 20000 && n < 80000; // roughly years 1955-2120 + } + + function excelSerialToISODate(n) { + // Excel date serial to JS Date (assuming 1900 system) + const epoch = new Date(Date.UTC(1899, 11, 30)); // Excel's day 1 is 1899-12-31; adjust for leap bug + const ms = epoch.getTime() + Math.round(n * 86400000); + const d = new Date(ms); + if (isNaN(d.getTime())) return null; + return d.toISOString().slice(0, 10); + } + + function pad2(n) { + return (n < 10 ? '0' : '') + n; + } + + function toISO(y, m, d) { + const mm = pad2(m); + const dd = pad2(d); + return `${y}-${mm}-${dd}`; + } + + function parseDate(val) { + if (val === null || val === undefined || val === '') return null; + if (typeof val === 'number' && isLikelyExcelSerial(val)) { + return excelSerialToISODate(val); + } + const s = toStr(val).trim(); + if (!s) return null; + + // Standard parse if already ISO-like + const isoMatch = s.match(/^(\d{4})[\/\-](\d{1,2})[\/\-](\d{1,2})$/); + if (isoMatch) { + const y = parseInt(isoMatch[1], 10); + const m = parseInt(isoMatch[2], 10); + const d = parseInt(isoMatch[3], 10); + if (m >= 1 && m <= 12 && d >= 1 && d <= 31) return toISO(y, m, d); + } + + // dd/mm/yyyy or dd-mm-yyyy + let m1 = s.match(/^(\d{1,2})[\/\-\.](\d{1,2})[\/\-\.](\d{2,4})$/); + if (m1) { + let d = parseInt(m1[1], 10); + let m = parseInt(m1[2], 10); + let y = parseInt(m1[3], 10); + if (y < 100) y = y + (y >= 50 ? 1900 : 2000); + if (m >= 1 && m <= 12 && d >= 1 && d <= 31) return toISO(y, m, d); + } + + // dd-Mon-YYYY or dd-MMM-YYYY + const monMap = { + jan: 1, feb: 2, mar: 3, apr: 4, may: 5, jun: 6, + jul: 7, aug: 8, sep: 9, sept: 9, oct: 10, nov: 11, dec: 12 + }; + let m2 = s.match(/^(\d{1,2})[\-\/\s]([A-Za-z]{3,4})[\-\/\s](\d{2,4})$/); + if (m2) { + let d = parseInt(m2[1], 10); + let monStr = m2[2].slice(0, 4).toLowerCase(); + let m = monMap[monStr]; + let y = parseInt(m2[3], 10); + if (y < 100) y = y + (y >= 50 ? 1900 : 2000); + if (m && d >= 1 && d <= 31) return toISO(y, m, d); + } + + // Try native Date parsing as last resort + const dt = new Date(s); + if (!isNaN(dt.getTime())) { + return dt.toISOString().slice(0, 10); + } + + return null; + } + + function looksLikeHeaderValue(val) { + const v = normAlpha(val); + const known = [ + 'date', 'txn date', 'tran date', 'transaction date', 'value date', 'posting date', + 'narration', 'description', 'details', 'particulars', 'transaction details', 'remarks', + 'withdrawal', 'withdrawals', 'dr', 'debit', 'debit amount', + 'deposit', 'deposits', 'cr', 'credit', 'credit amount', + 'balance', 'bal', 'closing balance', 'running balance', 'balance amount' + ]; + return known.includes(v); + } + + function scoreHeaderRow(row) { + let score = 0; + for (const [, v] of entries(row)) { + if (isEmptyValue(v)) continue; + const s = toStr(v); + if (looksLikeHeaderValue(s)) score += 2; + if (includesAny(s, ['date', 'narration', 'description', 'particulars', 'debit', 'credit', 'balance', 'dr', 'cr', 'withdrawal', 'deposit'])) score += 1; + } + return score; + } + + function mapHeaderKeys(row) { + const map = { + dateKey: null, + narrationKey: null, + debitKey: null, + creditKey: null, + balanceKey: null + }; + const dateNames = ['date', 'txn date', 'tran date', 'transaction date', 'value date', 'posting date']; + const narrNames = ['narration', 'description', 'details', 'particulars', 'transaction details', 'remarks']; + const debitNames = ['debit', 'dr', 'withdrawal', 'withdrawals', 'debit amount']; + const creditNames = ['credit', 'cr', 'deposit', 'deposits', 'credit amount']; + const balanceNames = ['balance', 'bal', 'closing balance', 'running balance', 'balance amount', 'bal.']; + + for (const [k, v] of entries(row)) { + if (isEmptyValue(v)) continue; + const s = toStr(v); + const n = normAlpha(s); + if (!map.dateKey && dateNames.some((w) => n.includes(w))) map.dateKey = k; + if (!map.narrationKey && narrNames.some((w) => n.includes(w))) map.narrationKey = k; + if (!map.debitKey && debitNames.some((w) => n === w || n.includes(w))) map.debitKey = k; + if (!map.creditKey && creditNames.some((w) => n === w || n.includes(w))) map.creditKey = k; + if (!map.balanceKey && balanceNames.some((w) => n === w || n.includes(w))) map.balanceKey = k; + } + return map; + } + + function extractKVFromString(str) { + const s = toStr(str); + if (!s) return null; + const separators = [':-', ':', '-', '—', '–']; + for (const sep of separators) { + const idx = s.indexOf(sep); + if (idx > 0 && idx < s.length - 1) { + const key = normalize(s.slice(0, idx)); + const val = normalize(s.slice(idx + sep.length)); + if (key && val) return { key, value: val }; + } + } + return null; + } + + function extractIFSCFromString(str) { + const s = toStr(str); + const m = s.toUpperCase().match(/\b([A-Z]{4}0[A-Z0-9]{6})\b/); + return m ? m[1] : null; + } + + function extractDigitsFromString(str) { + const s = toStr(str); + const m = s.match(/(\d{6,})/); + return m ? m[1] : null; + } + + function extractOpeningBalanceFromString(str) { + const s = toStr(str); + if (!s) return null; + if (includesAny(s, ['opening balance', 'opening bal', 'open bal', 'balance b f', 'balance b/f', 'balance brought forward'])) { + const nums = s.match(/[-]?\d[\d,]*\.?\d*/g); + if (nums && nums.length) { + const n = toNumber(nums[nums.length - 1]); + if (n !== null) return n; + } + } + return null; + } + + function extractBankDetails(data) { + const details = { + bank_name: null, + opening_balance: 0, + ifsc: null, + address: null, + city: null, + account_no: null, + account_holder_name: null, + branch_name: null, + branch_code: null + }; + + const maxRowsToScan = Math.min(60, Array.isArray(data) ? data.length : 0); + const labelMap = [ + { field: 'bank_name', labels: ['bank name', 'bank'] }, + { field: 'account_holder_name', labels: ['account holder name', 'account holder', 'acc holder', 'account name', 'customer name', 'name'] }, + { field: 'account_no', labels: ['account number', 'account no', 'a c no', 'a/c no', 'account id', 'account #'] }, + { field: 'ifsc', labels: ['ifsc', 'ifsc code', 'ifs'] }, + { field: 'branch_name', labels: ['branch name'] }, + { field: 'branch_code', labels: ['branch code', 'sol id', 'sol code', 'sol'] }, + { field: 'address', labels: ['address'] }, + { field: 'city', labels: ['city', 'place', 'location'] }, + { field: 'opening_balance', labels: ['opening balance', 'opening bal', 'open bal', 'balance b f', 'balance b/f'] } + ]; + + for (let i = 0; i < maxRowsToScan; i++) { + const row = data[i] || {}; + for (const [k, v] of entries(row)) { + const kvInK = extractKVFromString(k); + const kvInV = extractKVFromString(v); + + const candidates = []; + if (kvInK) candidates.push(kvInK); + if (kvInV) candidates.push(kvInV); + + // If neither splits into key:value, still examine raw strings for IFSC and account no + const strsToCheck = [k, v].map(toStr).filter(Boolean); + + for (const kv of candidates) { + for (const lm of labelMap) { + if (includesAny(kv.key, lm.labels)) { + if (lm.field === 'opening_balance') { + const ob = toNumber(kv.value); + if (ob !== null) details.opening_balance = ob; + } else if (lm.field === 'ifsc') { + const code = extractIFSCFromString(kv.value) || kv.value.toUpperCase(); + details.ifsc = code; + } else if (lm.field === 'account_no') { + const acc = extractDigitsFromString(kv.value) || kv.value; + details.account_no = acc; + } else if (lm.field === 'branch_code') { + const bcode = extractDigitsFromString(kv.value) || kv.value; + details.branch_code = bcode; + } else { + if (!details[lm.field]) details[lm.field] = kv.value; + } + } + } + } + + // Direct string scans (no explicit key:value structure) + for (const s of strsToCheck) { + if (!details.ifsc) { + const ifsc = extractIFSCFromString(s); + if (ifsc) details.ifsc = ifsc; + } + if (!details.account_no && includesAny(s, ['account no', 'account number', 'a c no', 'a/c no'])) { + const acc = extractDigitsFromString(s); + if (acc) details.account_no = acc; + } + if (!details.opening_balance) { + const ob = extractOpeningBalanceFromString(s); + if (ob !== null) details.opening_balance = ob; + } + if (!details.branch_code && includesAny(s, ['sol id', 'sol code', 'sol'])) { + const bcode = extractDigitsFromString(s); + if (bcode) details.branch_code = bcode; + } + } + } + } + + // If account number still missing, scan the first 80 rows for "Statement of Account No - XYZ" + if (!details.account_no) { + const upto = Math.min(80, data.length); + for (let i = 0; i < upto; i++) { + for (const [, v] of entries(data[i])) { + const s = toStr(v); + if (!s) continue; + if (includesAny(s, ['statement of account no', 'account no'])) { + const acc = extractDigitsFromString(s); + if (acc) details.account_no = acc; + } + } + } + } + + // If address lines appear as consecutive lines without explicit labels, attempt to stitch early lines + if (!details.address) { + let addrLines = []; + for (let i = 0; i < Math.min(6, data.length); i++) { + const row = data[i] || {}; + for (const [, v] of entries(row)) { + const s = normalize(v); + if (!s) continue; + // Skip known labeled lines + if (includesAny(s, ['ifsc', 'micr', 'customer id', 'email', 'mobile', 'pan', 'statement', 'account', 'branch', 'sol'])) continue; + // Likely address-style line: contains letters and possibly digits or hyphens + if (/[A-Za-z]/.test(s) && s.length > 3) addrLines.push(s); + } + } + if (addrLines.length >= 2) { + details.address = addrLines.slice(0, 3).join(', '); + // Extract city if any line contains pattern city-zip + const cityMatch = details.address.match(/([A-Za-z ]+)\s*-\s*\d{3,6}/); + if (!details.city && cityMatch) { + details.city = normalize(cityMatch[1]); + } + } + } + + return details; + } + + function identifyHeader(data) { + let bestIdx = -1; + let bestScore = 0; + for (let i = 0; i < data.length; i++) { + const row = data[i] || {}; + const sc = scoreHeaderRow(row); + if (sc > bestScore) { + bestScore = sc; + bestIdx = i; + } + } + if (bestIdx >= 0) { + const headerMap = mapHeaderKeys(data[bestIdx] || {}); + const hasAny = headerMap.dateKey || headerMap.narrationKey || headerMap.debitKey || headerMap.creditKey || headerMap.balanceKey; + if (hasAny) { + return { index: bestIdx, map: headerMap }; + } + } + return { index: -1, map: { dateKey: null, narrationKey: null, debitKey: null, creditKey: null, balanceKey: null } }; + } + + function inferFromTo(desc) { + if (!desc) return { from: null, to: null }; + const s = toStr(desc); + + // UPI id + const upiMatch = s.match(/\b([a-z0-9.\-_]+@[a-z]+)\b/i); + let counterparty = upiMatch ? upiMatch[1] : null; + + // Try to pick a descriptive name between slashes or after NEFT/IMPS/UPI tokens + if (!counterparty) { + const parts = s.split(/[\/|]/).map((p) => normalize(p)).filter(Boolean); + // Heuristics: look for tokens that are not common keywords and are alphabetic or contain spaces + const blacklist = ['upi', 'imps', 'neft', 'rtgs', 'p2p', 'p2m', 'paid', 'payment', 'pay', 'transfer', 'to', 'from', 'axis bank', 'hdfc bank ltd', 'icici bank', 'yes bank', 'bank', 'upi ref']; + const candidate = parts.find((p) => { + const np = normAlpha(p); + if (!/[A-Za-z]/.test(p)) return false; + if (blacklist.some((w) => np.includes(w))) return false; + return p.length >= 3; + }); + if (candidate) counterparty = candidate; + } + + // Determine direction + let isTo = /\bto\b/i.test(s) || /\bdr\b/i.test(s) || /\bdebit/i.test(s) || /\bpaid\b/i.test(s); + let isFrom = /\bfrom\b/i.test(s) || /\bcr\b/i.test(s) || /\bcredit/i.test(s) || /\breceived\b/i.test(s); + + // If UPI/NEFT/IMPS with "P2M" it is usually merchant (outgoing) + if (!isFrom && !isTo) { + if (/p2m/i.test(s)) isTo = true; + if (/p2p/i.test(s)) isTo = true; + } + + let res = { from: null, to: null }; + if (counterparty) { + if (isFrom && !isTo) res.from = counterparty; + else if (isTo && !isFrom) res.to = counterparty; + else { + // ambiguous: place in 'to' by default + res.to = counterparty; + } + } + return res; + } + + function isSummaryRow(desc) { + const d = normAlpha(desc || ''); + if (!d) return false; + const terms = ['opening balance', 'closing balance', 'total', 'summary', 'grand total', 'balance carried forward', 'brought forward']; + return terms.some((t) => d.includes(t)); + } + + // Begin processing + const safeData = Array.isArray(rawData) ? rawData : []; + + const bank_details = extractBankDetails(safeData); + + const { index: headerIndex, map: headerMap } = identifyHeader(safeData); + + const transactions = []; + let voucherCounter = 1; + + if (headerIndex >= 0) { + for (let i = headerIndex + 1; i < safeData.length; i++) { + const row = safeData[i] || {}; + const dateVal = headerMap.dateKey ? row[headerMap.dateKey] : null; + const narrVal = headerMap.narrationKey ? row[headerMap.narrationKey] : null; + const debitVal = headerMap.debitKey ? row[headerMap.debitKey] : null; + const creditVal = headerMap.creditKey ? row[headerMap.creditKey] : null; + const balanceVal = headerMap.balanceKey ? row[headerMap.balanceKey] : null; + + const hasSomeAmount = toNumber(debitVal) !== null || toNumber(creditVal) !== null; + const hasSomeContent = !isEmptyValue(dateVal) || !isEmptyValue(narrVal) || hasSomeAmount || !isEmptyValue(balanceVal); + if (!hasSomeContent) continue; + + const dateISO = parseDate(dateVal); + // Some statements repeat headers within the body; skip rows that look like headers + const rowLooksLikeHeader = scoreHeaderRow(row) >= 2 && !dateISO; + if (rowLooksLikeHeader) continue; + + const desc = normalize(narrVal || ''); + + if (isSummaryRow(desc)) continue; + + const dAmt = toNumber(debitVal); + const cAmt = toNumber(creditVal); + let amount = null; + let type = null; + if (dAmt !== null && dAmt !== 0 && (cAmt === null || cAmt === 0 || Math.abs(dAmt) >= Math.abs(cAmt))) { + amount = Math.abs(dAmt); + type = 'withdrawal'; + } else if (cAmt !== null && cAmt !== 0) { + amount = Math.abs(cAmt); + type = 'deposit'; + } + + const balance = toNumber(balanceVal); + + // If we still don't have a date but have a description that starts with a date-like token, try to parse from desc + let finalDate = dateISO; + if (!finalDate) { + const leadingToken = toStr(desc).split(/\s+/)[0]; + const tryDate = parseDate(leadingToken); + if (tryDate) finalDate = tryDate; + } + + // Require at least date and amount + if (!finalDate || amount === null) continue; + + const { from, to } = inferFromTo(desc); + + const txn = { + date: finalDate, + voucher_number: voucherCounter++, + amount: amount, + desc: desc || '', + from: from || null, + to: to || null, + type: type || (amount >= 0 ? 'deposit' : 'withdrawal'), + balance: balance !== null ? balance : null + }; + transactions.push(txn); + } + } + + // If opening_balance missing (0) and we have first transaction with balance, infer it + if ((!bank_details.opening_balance || bank_details.opening_balance === 0) && transactions.length > 0) { + const first = transactions[0]; + if (first && typeof first.balance === 'number' && typeof first.amount === 'number') { + if (first.type === 'withdrawal') { + bank_details.opening_balance = parseFloat((first.balance + first.amount).toFixed(2)); + } else if (first.type === 'deposit') { + bank_details.opening_balance = parseFloat((first.balance - first.amount).toFixed(2)); + } + } + } + + // If IFSC still missing, scan the entire dataset for any string matching IFSC + if (!bank_details.ifsc) { + outer: for (let i = 0; i < safeData.length; i++) { + for (const [, v] of entries(safeData[i])) { + const code = extractIFSCFromString(v); + if (code) { + bank_details.ifsc = code; + break outer; + } + } + } + } + + // Ensure mandatory fields exist; if IFSC still not found, keep as null to handle gracefully + if (bank_details.opening_balance === null || bank_details.opening_balance === undefined) { + bank_details.opening_balance = 0; + } + + return { + bank_details: { + bank_name: bank_details.bank_name || null, + opening_balance: typeof bank_details.opening_balance === 'number' ? bank_details.opening_balance : 0, + ifsc: bank_details.ifsc || null, + address: bank_details.address || null, + city: bank_details.city || null, + account_no: bank_details.account_no || null, + account_holder_name: bank_details.account_holder_name || null, + branch_name: bank_details.branch_name || null, + branch_code: bank_details.branch_code || null + }, + transactions: transactions.map((t) => ({ + date: t.date || null, + voucher_number: t.voucher_number, + amount: typeof t.amount === 'number' ? Math.abs(t.amount) : null, + desc: t.desc || '', + from: t.from || null, + to: t.to || null, + type: t.type === 'withdrawal' || t.type === 'deposit' ? t.type : (t.amount >= 0 ? 'deposit' : 'withdrawal'), + balance: t.balance !== null ? t.balance : null + })) + }; +} + +module.exports = processBankStatement; \ No newline at end of file diff --git a/src/BANK.js b/src/BANK.js index 5a6953f..f00b361 100644 --- a/src/BANK.js +++ b/src/BANK.js @@ -9,6 +9,7 @@ const BANK = { YES: "yes", OVERSEASE: "oversease", KOTAK: "kotak", + KARNATAKA: "karnataka", } module.exports = BANK;