diff --git a/Services/BankStatementProcessor/bankStatementProcessor-axis.js b/Services/BankStatementProcessor/bankStatementProcessor-axis.js new file mode 100644 index 0000000..357887b --- /dev/null +++ b/Services/BankStatementProcessor/bankStatementProcessor-axis.js @@ -0,0 +1,537 @@ +/** + * Generated by OpenAI - Attempt 1 + * Processes bank statement data from Excel + * @param {Array} rawData - Array of objects from bank statement Excel + * @returns {Object} Processed bank statement data + */ +function processBankStatement(rawData) { + function safeTrim(val) { + if (val === null || val === undefined) return ''; + if (typeof val === 'number') return String(val); + return String(val).replace(/\s+/g, ' ').trim(); + } + + function toNumber(x) { + if (x === null || x === undefined) return NaN; + if (typeof x === 'number') return isFinite(x) ? x : NaN; + let s = String(x).trim(); + if (!s || s === '-' || s === '--') return NaN; + let negative = false; + if (/\(.*\)/.test(s)) negative = true; + s = s.replace(/[₹$,]/g, '').replace(/\(|\)/g, '').replace(/\s+/g, ' '); + // If contains non-digits, try to extract last number + let m = s.match(/-?\d+(?:\.\d+)?/g); + let num = m ? parseFloat(m[m.length - 1]) : NaN; + if (negative && !isNaN(num)) num = -Math.abs(num); + return isFinite(num) ? num : NaN; + } + + function pad(n) { + return String(n).padStart(2, '0'); + } + + function formatDate(y, m, d) { + return `${y}-${pad(m)}-${pad(d)}`; + } + + function parseExcelSerial(num) { + // Excel (1900 system): day 1 = 1899-12-31; JS epoch 1970-01-01 + // Common conversion uses 25569 offset days + const ms = Math.round((num - 25569) * 86400 * 1000); + const dt = new Date(ms); + if (isNaN(dt.getTime())) return null; + return formatDate(dt.getUTCFullYear(), dt.getUTCMonth() + 1, dt.getUTCDate()); + } + + function parseDate(val) { + if (val === null || val === undefined || val === '') return null; + if (typeof val === 'number' && isFinite(val)) { + if (val > 30000 && val < 90000) { + return parseExcelSerial(val); + } + // Unix timestamp seconds or ms guesses (not typical for banks) + if (val > 1e11) { + const dt = new Date(val); + if (!isNaN(dt.getTime())) return formatDate(dt.getFullYear(), dt.getMonth() + 1, dt.getDate()); + } + if (val > 1e9) { + const dt = new Date(val * 1000); + if (!isNaN(dt.getTime())) return formatDate(dt.getFullYear(), dt.getMonth() + 1, dt.getDate()); + } + } + const s0 = String(val).trim(); + if (!s0) return null; + const s = s0.replace(/[.]/g, '-').replace(/\s+/g, ' ').trim(); + + // YYYY-MM-DD or YYYY/MM/DD + let m = s.match(/^(\d{4})[\/\-](\d{1,2})[\/\-](\d{1,2})$/); + if (m) { + let y = parseInt(m[1], 10); + let mo = parseInt(m[2], 10); + let d = parseInt(m[3], 10); + if (mo >= 1 && mo <= 12 && d >= 1 && d <= 31) return formatDate(y, mo, d); + } + + // DD-MM-YYYY or DD/MM/YYYY (assume Indian format) + m = s.match(/^(\d{1,2})[\/\-](\d{1,2})[\/\-](\d{2,4})$/); + if (m) { + let d = parseInt(m[1], 10); + let mo = parseInt(m[2], 10); + let y = parseInt(m[3], 10); + if (y < 100) y += y < 50 ? 2000 : 1900; + if (mo >= 1 && mo <= 12 && d >= 1 && d <= 31) return formatDate(y, mo, d); + } + + // DD-Mon-YYYY or DD Mon YYYY + const months = { + jan: 1, feb: 2, mar: 3, apr: 4, may: 5, jun: 6, + jul: 7, aug: 8, sep: 9, sept: 9, oct: 10, nov: 11, dec: 12 + }; + m = s.match(/^(\d{1,2})[ \-]([A-Za-z]{3,4})[ \-](\d{2,4})$/); + if (m) { + let d = parseInt(m[1], 10); + let mo = months[(m[2] || '').substr(0, 4).toLowerCase()]; + let y = parseInt(m[3], 10); + if (y < 100) y += y < 50 ? 2000 : 1900; + if (mo && d >= 1 && d <= 31) return formatDate(y, mo, d); + } + + // Mon DD YYYY + m = s.match(/^([A-Za-z]{3,4})[ \-](\d{1,2})[ \-](\d{2,4})$/); + if (m) { + let mo = months[(m[1] || '').substr(0, 4).toLowerCase()]; + let d = parseInt(m[2], 10); + let y = parseInt(m[3], 10); + if (y < 100) y += y < 50 ? 2000 : 1900; + if (mo && d >= 1 && d <= 31) return formatDate(y, mo, d); + } + + // Try native Date parsing cautiously (may interpret as mm/dd/yyyy) + const tentative = new Date(s); + if (!isNaN(tentative.getTime())) { + return formatDate(tentative.getFullYear(), tentative.getMonth() + 1, tentative.getDate()); + } + return null; + } + + function classifyHeader(text) { + const t = text.toLowerCase().replace(/\s+/g, ' ').trim(); + if (!t) return null; + if (/(^|[^a-z])(date|txn date|tran date|transaction date|value date|posting date)([^a-z]|$)/.test(t)) return 'date'; + if (/(narration|particulars|details|description|remark|transaction details|info)/.test(t)) return 'narration'; + if (/(debit|dr|withdrawal|withdraw|payment|paid|outflow|debit amt|wdl)/.test(t)) return 'debit'; + if (/(credit|cr|deposit|received|receipt|inflow|credit amt)/.test(t)) return 'credit'; + if (/(balance|bal|closing balance|running balance|available balance)/.test(t)) return 'balance'; + return null; + } + + function extractLabelValue(rawText) { + const text = safeTrim(rawText); + if (!text) return null; + // Common separators + let sepMatch = text.match(/\s*[:\-–]\s*/); + if (sepMatch) { + const idx = text.indexOf(sepMatch[0]); + if (idx > -1) { + const label = safeTrim(text.slice(0, idx)); + const value = safeTrim(text.slice(idx + sepMatch[0].length)); + if (label && value) return { label, value }; + } + } + // Patterns like "IFSC Code :- XXXX" already matched above; if no clear separator, return null + return null; + } + + function guessBankName(str) { + const s = (str || '').toString(); + // Try to capture something ending with BANK + let m = s.match(/\b([A-Z][A-Z &.-]{2,}BANK(?: LIMITED| LTD)?(?: [A-Z.&-]+)?)\b/); + if (m) return m[1].trim(); + // Or common Indian banks + const banks = ['HDFC BANK', 'ICICI BANK', 'AXIS BANK', 'STATE BANK OF INDIA', 'SBI', 'YES BANK', 'KOTAK MAHINDRA BANK', 'INDUSIND BANK', 'IDFC FIRST BANK', 'BANK OF BARODA', 'CANARA BANK', 'UNION BANK', 'PNB', 'PUNJAB NATIONAL BANK']; + for (let b of banks) { + if (s.toUpperCase().includes(b)) return b; + } + return null; + } + + function findIfsc(s) { + if (!s) return null; + // IFSC pattern: 4 letters + 0 + 6 alnum + const m = s.toUpperCase().match(/\b([A-Z]{4}0[A-Z0-9]{6})\b/); + return m ? m[1] : null; + } + + function extractAccountNo(s) { + if (!s) return null; + // Statement of Account No - 9180... + let m = s.match(/account\s*(no|number|#|id)?\s*[-:]*\s*([A-Z0-9\- ]{6,})/i); + if (m) { + const raw = safeTrim(m[2]); + const digits = raw.replace(/[^A-Za-z0-9]/g, ''); + if (digits.length >= 6) return digits; + } + // Fallback: long 9+ digit sequence + m = s.match(/\b(\d{9,20})\b/); + if (m) return m[1]; + return null; + } + + function extractName(s) { + if (!s) return null; + // "Name :- JOHN DOE" or "Account Holder Name : Jane" + const lv = extractLabelValue(s); + if (lv && /name/i.test(lv.label)) { + return lv.value; + } + return null; + } + + function extractOpeningBalanceFromString(s) { + if (!s) return null; + if (/opening\s*balance/i.test(s)) { + const n = toNumber(s); + if (!isNaN(n)) return n; + } + return null; + } + + function rowStrings(row) { + const arr = []; + for (const k in row) { + if (row.hasOwnProperty(k)) { + const keyStr = safeTrim(k); + if (keyStr) arr.push(keyStr); + const v = row[k]; + if (v !== null && v !== undefined) { + const valStr = safeTrim(v); + if (valStr) arr.push(valStr); + } + } + } + return arr; + } + + function extractCounterparty(desc) { + const s = (desc || '').toString(); + if (!s) return null; + // UPI VPA + let m = s.match(/\b([a-z0-9.\-_]{2,}@[a-z]{2,})\b/i); + if (m) return m[1]; + // Name-like token between slashes + const parts = s.split(/[\/|,]/).map(t => safeTrim(t)).filter(Boolean); + const blacklist = new Set(['upi', 'p2p', 'p2m', 'imps', 'neft', 'rtgs', 'pay', 'paid', 'payment', 'transfer', 'ach', 'ecs', 'bank', 'ltd', 'axis bank', 'hdfc bank', 'icici bank', 'sbi', 'idfc', 'yes bank', 'kotak', 'bank ltd', 'state bank of india', 'debit', 'credit']); + for (let p of parts) { + const pl = p.toLowerCase(); + if (pl.length < 3) continue; + if (blacklist.has(pl)) continue; + if (/^\d+$/.test(pl)) continue; + if (/^\w{2,}$/.test(pl)) return p; + } + // Fallback: words + m = s.match(/\b([A-Za-z][A-Za-z .'-]{2,})\b/); + if (m) { + const val = m[1].trim(); + if (!blacklist.has(val.toLowerCase())) return val; + } + return null; + } + + const result = { + bank_details: { + bank_name: null, + opening_balance: 0, + ifsc: null, + address: null, + city: null, + account_no: null, + account_holder_name: null, + branch_name: null, + branch_code: null + }, + transactions: [] + }; + + if (!Array.isArray(rawData)) return result; + + const maxScan = Math.min(rawData.length, 40); + let headerRowIndex = -1; + let mapping = { dateKey: null, narrationKey: null, debitKey: null, creditKey: null, balanceKey: null }; + + // Pass 1: Extract bank details from initial rows + let addressCandidates = []; + for (let i = 0; i < maxScan; i++) { + const row = rawData[i] || {}; + const cells = rowStrings(row); + + // IFSC + if (!result.bank_details.ifsc) { + for (const cell of cells) { + const ifsc = findIfsc(cell); + if (ifsc) { + result.bank_details.ifsc = ifsc; + break; + } + } + } + + // Account number + if (!result.bank_details.account_no) { + for (const cell of cells) { + const acc = extractAccountNo(cell); + if (acc) { + result.bank_details.account_no = acc; + break; + } + } + } + + // Account holder name + if (!result.bank_details.account_holder_name) { + for (const cell of cells) { + const nm = extractName(cell); + if (nm) { + result.bank_details.account_holder_name = nm; + break; + } + } + } + + // Bank name + if (!result.bank_details.bank_name) { + for (const cell of cells) { + const b = guessBankName(cell); + if (b) { + result.bank_details.bank_name = b; + break; + } + } + } + + // Branch name + if (!result.bank_details.branch_name) { + for (const cell of cells) { + const lv = extractLabelValue(cell); + if (lv && /branch\s*name/i.test(lv.label)) { + result.bank_details.branch_name = lv.value; + break; + } + } + } + + // Branch code / SOL + if (!result.bank_details.branch_code) { + for (const cell of cells) { + const lv = extractLabelValue(cell); + if (lv && /(branch\s*code|sol|sol id)/i.test(lv.label)) { + result.bank_details.branch_code = lv.value.replace(/\s+/g, ''); + break; + } + const m = cell.match(/\bSOL\s*[:\-]?\s*([A-Za-z0-9\-]+)/i); + if (m) { + result.bank_details.branch_code = safeTrim(m[1]); + break; + } + } + } + + // Opening balance + if (result.bank_details.opening_balance === 0) { + for (const cell of cells) { + const ob = extractOpeningBalanceFromString(cell); + if (ob !== null) { + result.bank_details.opening_balance = Number(ob); + break; + } + } + } + + // Address / City heuristic (collect potential address lines before headers) + for (const cell of cells) { + const c = safeTrim(cell); + if (!c) continue; + if (/^name\b/i.test(c)) continue; + if (/ifsc/i.test(c)) continue; + if (/customer id/i.test(c)) continue; + if (/micr/i.test(c)) continue; + if (/email|mobile|phone|nominee|pan|ckyc/i.test(c)) continue; + if (/statement of account/i.test(c)) continue; + if (/[A-Za-z]/.test(c) && c.length >= 6 && i < 15) { + addressCandidates.push(c); + } + } + } + + // Try to derive city from any address candidate + for (const ln of addressCandidates) { + let m = ln.match(/([A-Za-z ]+)\s+CITY\b/i); + if (m && !result.bank_details.city) { + result.bank_details.city = safeTrim(m[1]); + } + m = ln.match(/\b([A-Za-z ]+)\s*-\s*\d{5,6}\b/); + if (m && !result.bank_details.city) { + result.bank_details.city = safeTrim(m[1]); + } + } + if (!result.bank_details.address && addressCandidates.length) { + result.bank_details.address = addressCandidates.slice(0, 3).join(', '); + } + + // Pass 2: Identify header row and map keys + for (let i = 0; i < rawData.length; i++) { + const row = rawData[i] || {}; + let score = 0; + const localMap = {}; + for (const k in row) { + if (!row.hasOwnProperty(k)) continue; + const valStr = safeTrim(row[k]); + const keyStr = safeTrim(k); + const candidates = []; + if (valStr) candidates.push(valStr); + if (keyStr && keyStr !== valStr) candidates.push(keyStr); + for (const c of candidates) { + const cls = classifyHeader(c); + if (cls) { + score++; + if (cls === 'date' && !localMap.dateKey) localMap.dateKey = k; + if (cls === 'narration' && !localMap.narrationKey) localMap.narrationKey = k; + if (cls === 'debit' && !localMap.debitKey) localMap.debitKey = k; + if (cls === 'credit' && !localMap.creditKey) localMap.creditKey = k; + if (cls === 'balance' && !localMap.balanceKey) localMap.balanceKey = k; + } + } + } + const hasCore = (localMap.dateKey && (localMap.debitKey || localMap.creditKey)) || (localMap.narrationKey && (localMap.debitKey || localMap.creditKey)); + if (score >= 2 && hasCore) { + headerRowIndex = i; + mapping = Object.assign(mapping, localMap); + break; + } + } + + if (headerRowIndex === -1) { + // Fallback: try to infer by scanning for first row that includes a parseable date and at least one amount + for (let i = 0; i < rawData.length; i++) { + const row = rawData[i] || {}; + const keys = Object.keys(row); + for (const k of keys) { + const d = parseDate(row[k]); + if (d) { + // Guess other columns by numeric content + let numericKeys = keys.filter(kk => !isNaN(toNumber(row[kk]))); + if (numericKeys.length >= 1) { + headerRowIndex = i - 1; + if (headerRowIndex < 0) headerRowIndex = i; + // Build mapping heuristically: dateKey is k; narrationKey is first non-numeric string key, others for amounts + mapping.dateKey = k; + for (const kk of keys) { + if (kk === k) continue; + const val = row[kk]; + const num = toNumber(val); + if (!isNaN(num)) { + if (!mapping.debitKey) mapping.debitKey = kk; + else if (!mapping.creditKey) mapping.creditKey = kk; + else if (!mapping.balanceKey) mapping.balanceKey = kk; + } else { + if (!mapping.narrationKey && typeof val === 'string' && val.trim()) mapping.narrationKey = kk; + } + } + break; + } + } + } + if (headerRowIndex !== -1) break; + } + } + + // Pass 3: Process transactions after header + let voucher = 1; + if (headerRowIndex !== -1) { + for (let i = headerRowIndex + 1; i < rawData.length; i++) { + const row = rawData[i] || {}; + const dateRaw = mapping.dateKey ? row[mapping.dateKey] : null; + const date = parseDate(dateRaw); + // Some tables have blank lines or page breaks; skip if no date and no narration and no amounts + const narRaw = mapping.narrationKey ? row[mapping.narrationKey] : null; + const narration = safeTrim(narRaw); + const debitRaw = mapping.debitKey ? row[mapping.debitKey] : null; + const creditRaw = mapping.creditKey ? row[mapping.creditKey] : null; + const balRaw = mapping.balanceKey ? row[mapping.balanceKey] : null; + + const debit = toNumber(debitRaw); + const credit = toNumber(creditRaw); + const balance = isNaN(toNumber(balRaw)) ? null : toNumber(balRaw); + + const allStrings = rowStrings(row).join(' ').toLowerCase(); + if (/^\s*$/i.test(narration) && !date && isNaN(debit) && isNaN(credit) && balance === null) continue; + if (narration) { + const low = narration.toLowerCase(); + if (/(total|closing|summary|balance brought|balance forward|b\/f|c\/f|page \d+ of \d+)/i.test(low)) continue; + } + if (!date) { + // Sometimes date missing but amounts present — skip such row as unknown + if (isNaN(debit) && isNaN(credit)) continue; + } + + let amount = NaN; + let type = null; + if (!isNaN(debit) && debit > 0) { + amount = Math.abs(debit); + type = 'withdrawal'; + } + if (!isNaN(credit) && credit > 0) { + // Prefer debit if both present; otherwise pick credit + if (isNaN(amount) || amount <= 0) { + amount = Math.abs(credit); + type = 'deposit'; + } + } + if (isNaN(amount) || amount <= 0) { + // Sometimes values may be zeros or blanks; skip non-amount rows + continue; + } + + const cp = extractCounterparty(narration); + const tx = { + date: date || null, + voucher_number: voucher++, + amount: Number(amount), + desc: safeTrim(narration), + from: type === 'deposit' ? (cp || null) : null, + to: type === 'withdrawal' ? (cp || null) : null, + type: type || null, + balance: balance !== null ? Number(balance) : null + }; + result.transactions.push(tx); + } + } + + // If opening balance not found earlier, try to infer from first transaction with balance + if ((result.bank_details.opening_balance === 0 || result.bank_details.opening_balance === null) && result.transactions.length > 0) { + const first = result.transactions[0]; + if (first && first.balance !== null && typeof first.amount === 'number' && first.type) { + if (first.type === 'deposit') { + const ob = first.balance - first.amount; + if (isFinite(ob)) result.bank_details.opening_balance = Number(ob.toFixed(2)); + } else if (first.type === 'withdrawal') { + const ob = first.balance + first.amount; + if (isFinite(ob)) result.bank_details.opening_balance = Number(ob.toFixed(2)); + } + } + } + + // Ensure all required fields exist + if (!('bank_name' in result.bank_details)) result.bank_details.bank_name = null; + if (!('opening_balance' in result.bank_details)) result.bank_details.opening_balance = 0; + if (!('ifsc' in result.bank_details)) result.bank_details.ifsc = null; + if (!('address' in result.bank_details)) result.bank_details.address = null; + if (!('city' in result.bank_details)) result.bank_details.city = null; + if (!('account_no' in result.bank_details)) result.bank_details.account_no = null; + if (!('account_holder_name' in result.bank_details)) result.bank_details.account_holder_name = null; + if (!('branch_name' in result.bank_details)) result.bank_details.branch_name = null; + if (!('branch_code' in result.bank_details)) result.bank_details.branch_code = null; + + return result; +} + +module.exports = processBankStatement; \ No newline at end of file