Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion 2025/example_single.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

# Extra items for schedule A
'F1040sa' : {
'11' : 500, # charitable contributions
'11' : 500, # charitable contributions
},
}

Expand Down
346 changes: 346 additions & 0 deletions 2025/taxEngine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,346 @@
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import cv2
import os
import re
import numpy as np
import pytesseract
import ast
from pathlib import Path
from pdf2image import convert_from_path
try:
from f1040 import F1040
from ca540 import CA540
from form import FilingStatus
except ImportError as e:
print(f"CRITICAL: Engine components missing: {e}")
exit()
# To auto-provision, uncomment the call to bootstrap_environment()
#def bootstrap_environment():
# import platform
# import subprocess
# import shutil

# os_type = platform.system()
# print(f"Detecting Host OS: {os_type}")
# commands = {
# "Linux": "sudo apt update && sudo apt install -y tesseract-ocr poppler-utils",
# "Darwin": "brew install tesseract poppler", # macOS
# "Windows": "echo 'Download Tesseract: https://github.com and Poppler: https://github.com'"
# }
#
# cmd = commands.get(os_type)
#
# if os_type == "Windows":
# print(f"[MANUAL INTERVENTION REQUIRED]: {cmd}")
# elif cmd:
# try:
# print(f"Executing: {cmd}")
# subprocess.run(cmd, shell=True, check=True)
# print("Ground stack synchronized.")
# except Exception as e:
# print(f"[CRITICAL] Bootstrap failure: {e}")
# else:
# print("[ERROR] Unknown OS architecture. Manual setup required.")

# --- PAYLOAD SYNC: AUTO-PROVISION FROM REQUIREMENTS.TXT ---
# def sync_payload_from_manifest():
# import sys
# import subprocess
# from pathlib import Path

# req_file = Path(__file__).parent / "requirements.txt"
# if not req_file.exists():
# print("Manifest (requirements.txt) not found. Skipping auto-sync.")
# return
# try:
# print(f"Synchronizing environment via {req_file.name}...")
# subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", str(req_file)])
# print("Environment parity achieved.")
# except Exception as e:
# print(f"[CRITICAL] Sync failed: {e}")

BASE_URL = Path(__file__).resolve().parent

#document routing
FORM_ROUTING = {
'wages': 'Form W-2',
'wages_ss': 'Form W-2',
'ss_withheld': 'Form W-2',
'withholding': 'Form W-2',
'taxable_interest': 'Form 1099-INT',
'tax_exempt_interest': 'Form 1099-INT',
'dividends': 'Form 1099-DIV',
'qualified_dividends': 'Form 1099-DIV',
'business_income': 'Schedule C',
'business_expenses': 'Schedule C',
'status': '1040 Identity Section',
'charitable_contributions': 'Schedule A'
}
#keys the OCR reader searches for with associated legacy values ***can be expanded to become more robust very rudementary***
DATA_ANCHORS = {
"wages, tips": "wages",
"wages,": "wages",
"federal income tax": "withholding",
"social security wages": "wages_ss",
"social security tax withheld": "ss_withheld",
"social security tax": "ss_withheld",
"interest income": "taxable_interest",
"exempt interest": "tax_exempt_interest",
"ordinary dividends": "dividends",
"qualified dividends": "qualified_dividends",
"gross receipts": "business_income",
"total expenses": "business_expenses",
"charitable": "charitable_contributions"
}

#parsing tool used to acquire expected fields of input based on the example
class TaxMission:
def __init__(self, directory):
self.directory = Path(directory)
self.required_keys = self._discover_schema()
self.field_types = self._infer_field_types()
for k in FORM_ROUTING.keys():
if k not in self.required_keys: self.required_keys.append(k)
self.payload = {str(key): None for key in self.required_keys}
self.payload['F1040sa'] = {'11': 0}
self.payload['exemptions'] = 1

def _discover_schema(self):
keys = set()
for path in self.directory.glob('f*.py'):
try:
with open(path, 'r') as f:
tree = ast.parse(f.read())
for node in ast.walk(tree):
if isinstance(node, ast.Subscript) and getattr(node.value, 'id', '') == 'inputs':
if isinstance(node.slice, ast.Constant): keys.add(node.slice.value)
elif isinstance(node, ast.Call) and getattr(node.func, 'attr', '') == 'get':
if getattr(node.func.value, 'id', '') == 'inputs' and node.args:
if isinstance(node.args[0], ast.Constant): keys.add(node.args[0].value)
elif isinstance(node, ast.Call) and getattr(node.func, 'attr', '') == 'spouseSum':
if len(node.args) >= 2 and isinstance(node.args[1], ast.Constant):
keys.add(node.args[1].value)
except: continue
return sorted([str(k) for k in keys])

def _infer_field_types(self):
types = {}
for path in self.directory.glob('f*.py'):
try:
with open(path, 'r') as f:
tree = ast.parse(f.read())
for node in ast.walk(tree):
if isinstance(node, ast.Call) and getattr(node.func, 'id', '') == 'sum':
if 'inputs' in str(node.args[0]):
if hasattr(node.args[0], 'args') and node.args[0].args:
types[node.args[0].args[0].value] = 'list'
elif isinstance(node, ast.Subscript) and isinstance(node.slice, ast.Subscript):
if getattr(node.slice.value, 'id', '') == 'inputs':
types[node.slice.slice.value] = 'enum'
except: continue
return types

def add_field(self, key, value, source="Sensor"):
if key in self.payload:
self.payload[key] = value
print(f"📡 [DATA] {key} -> {value} ({source})")

def _sanitize(self):
data = self.payload.copy()
for k, v in data.items():
if v is None or v == "":
data[k] = 0.0
if isinstance(v, str):
clean_v = re.sub(r'[^-0-9.]', '', v)
try:
data[k] = float(clean_v)
except:
data[k] = 0.0
try:
status_input = str(data.get('status', '1'))
if '1' in status_input or 'joint' in status_input.lower():
data['status'] = FilingStatus.JOINT
elif '2' in status_input or 'seperated' in status_input.lower():
data['status'] = FilingStatus.SEPARATE
elif '3' in status_input or 'head' in status_input.lower():
data['status'] = FilingStatus.HEAD
elif '4' in status_input or 'widow' in status_input.lower():
data['status'] = FilingStatus.WIDOW
else:
data['status'] = FilingStatus.SINGLE
except:
data['status'] = FilingStatus.SINGLE
charity = data.get('charitable_contributions', 0.0)
data['F1040sa'] = {'11': float(charity or 0.0)}
data['estimated_payments'] = [0.0, 0.0, 0.0, 0.0]
for k in data:
if k not in ['status', 'F1040sa', 'estimated_payments', 'F1040sa_items']:
try:
data[k] = float(data[k])
except:
pass

return data

#the push to legacy
def ignite(self):
clean_data = self._sanitize()
try:
print("🚀 Launching Engine...")
f = F1040(clean_data)
ca = CA540(clean_data, f)
return f, ca
except Exception as e:
print(f"💥 ENGINE BUST: {e}")
return None

#grabs all available border boxes after some image manipulation grayscale -> threshold dilation tends to increase noise
def putTheMoneyInTheBox(image):
pad = 20
image_padded = cv2.copyMakeBorder(image, pad, pad, pad, pad, cv2.BORDER_CONSTANT, value=(0,0,0))
gray = cv2.cvtColor(image_padded, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
candidates = []
if hierarchy is not None:
for i, c in enumerate(contours):
x, y, w, h = cv2.boundingRect(c)
if hierarchy[0][i][3] != -1 and 40 < w < 800 and 15 < h < 110:
if (w / h) > 1.1: candidates.append({'rect': (x, y, w, h), 'area': w * h})
candidates.sort(key=lambda x: x['area'], reverse=True)
rects = [item['rect'] for item in candidates]
rects.sort(key=lambda b: (b[1] // 30, b[0]))
return rects, image_padded

#seperates the border boxes inside the form into seperate images then converts to strings
def contextual_harvest(padded_image, sorted_rects):
gray = cv2.cvtColor(padded_image, cv2.COLOR_BGR2GRAY)
img_h, img_w = gray.shape
captured_data = {}
norm_anchors = {k.lower(): v for k, v in DATA_ANCHORS.items()}

for i, (x, y, w, h) in enumerate(sorted_rects):
y1, y2, x1, x2 = max(0, y-5), min(img_h, y+h+5), max(0, x-5), min(img_w, x+w+5)
roi = gray[y1:y2, x1:x2]
if roi.size == 0: continue
roi_up = cv2.resize(roi, None, fx=2, fy=2, interpolation=cv2.INTER_LANCZOS4)
_, roi_bin = cv2.threshold(roi_up, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
raw_text = pytesseract.image_to_string(roi_bin, config='--psm 6').strip().lower()
if raw_text:
for anchor, engine_key in norm_anchors.items():
if anchor in raw_text:
matches = re.findall(r"[\d,.]+", raw_text)
valid_values = []
for m in matches:
clean_val = m.replace(',', '').rstrip('.')
try:
val = float(clean_val)
if val < 20 and "." not in m:
continue
valid_values.append(val)
except: continue

if valid_values:
highest = max(valid_values)
captured_data[engine_key] = highest
print(f"🎯 [PAYLOAD] Assigned {engine_key} -> {highest}")
break
return captured_data

#takes file input and converts it from pdf can be expanded to convert more file types
def ingest_payload(file_path):
if str(file_path).lower().endswith('.pdf'):
pages = convert_from_path(file_path, 300)
return np.array(pages[0])[:, :, ::-1].copy()
return cv2.imread(str(file_path))

#UI interface
class AlgoFriendUI:
def __init__(self, root):
self.root = root
self.mission = TaxMission(BASE_URL)
self.root.title("🚀 AlgoFriend | Mission Control")
self.root.geometry("850x750")
self.root.configure(bg="#1e1e1e")
self.entries = {}
self.status_options = {
"Single": "0",
"Married Joint": "1",
"Married Separate": "2",
"Head of Household": "3",
"Widow(er)": "4"
}
self.status_label = tk.Label(root, text="SYSTEM INITIALIZED", bg="#1e1e1e", fg="#00ff00", font=("Courier", 18, "bold"))
self.status_label.pack(pady=15)
self.worksheet_frame = tk.LabelFrame(root, text=" LIVE TELEMETRY WORKSHEET ", bg="#1e1e1e", fg="white", font=("Courier", 10))
self.worksheet_frame.pack(pady=10, padx=20, fill="both")
for idx, (key, form) in enumerate(FORM_ROUTING.items()):
r, c = idx // 2, (idx % 2) * 2
tk.Label(self.worksheet_frame, text=f"{key.upper()}:", bg="#1e1e1e", fg="#aaa", font=("Courier", 8)).grid(row=r, column=c, sticky="e", padx=5, pady=2)
if key == 'status':
self.entries[key] = ttk.Combobox(self.worksheet_frame, values=list(self.status_options.keys()), state="readonly", width=16)
self.entries[key].set("Single")
self.entries[key].grid(row=r, column=c+1, padx=5, pady=2)
else:
entry = tk.Entry(self.worksheet_frame, width=18, bg="#000", fg="#00ff00", insertbackground="white", borderwidth=0)
entry.insert(0, "0.0")
entry.grid(row=r, column=c+1, padx=5, pady=2)
self.entries[key] = entry
self.btn_frame = tk.Frame(root, bg="#1e1e1e")
self.btn_frame.pack(pady=20)
tk.Button(self.btn_frame, text="📁 SELECT DOCUMENT", command=self.select_file,
width=25, bg="#333", fg="white", font=("Courier", 10)).pack(pady=5)
tk.Button(self.btn_frame, text="🏁 IGNITE ENGINE", command=self.launch_engine,
width=25, bg="#005500", fg="white", font=("Courier", 12, "bold")).pack(pady=15)

def select_file(self):
file_path = filedialog.askopenfilename(filetypes=[("Tax Forms", "*.png *.jpg *.pdf")])
if file_path:
self.status_label.config(text="SCANNING PAYLOAD...", fg="#ffcc00")
self.root.update()
raw_img = ingest_payload(file_path)
if raw_img is not None:
rects, padded = putTheMoneyInTheBox(raw_img)
vision_data = contextual_harvest(padded, rects)
for key, val in vision_data.items():
if key in self.entries:
if key == 'status':
continue
try:
current_val = float(self.entries[key].get() or 0.0)
except ValueError:
current_val = 0.0
self.entries[key].delete(0, tk.END)
self.entries[key].insert(0, str(current_val + float(val)))
self.entries[key].config(bg="#003300")
self.status_label.config(text="DATA VERIFIED", fg="#00ff00")

def launch_engine(self):
for key, widget in self.entries.items():
val = self.status_options.get(widget.get(), "0") if key == 'status' else widget.get()
self.mission.add_field(key, val, source="Worksheet")
results = self.mission.ignite()
if results:
fed, state = results
messagebox.showinfo("Mission Success", "Check terminal for 1040 results.")
fed.printAllForms()

def verify_ground_stack():
import shutil
tess = shutil.which("tesseract")
pdf = shutil.which("pdftoppm")
if not tess or not pdf:
print("⚠️ [SENSORS OFFLINE] Tesseract or Poppler not found in $PATH.")
print("💡 [INTEL] See 'ENVIRONMENT BOOTSTRAP' section in source to auto-repair.")
return False
return True


if __name__ == "__main__":
#bootstrap_enviroment()
#sync_payload_from_manifest()
if verify_ground_stack():
root = tk.Tk()
app = AlgoFriendUI(root)
root.mainloop()
Loading