From 7ee26cb2b9754d5f84a36e2c5c3041102203eec1 Mon Sep 17 00:00:00 2001 From: CPR Date: Tue, 5 May 2026 11:26:00 -0400 Subject: [PATCH 1/2] feat: integrated AST-driven UI for dynamic telemetry ingestion --- 2025/example_single.py | 8 +- 2025/taxEngine.py | 346 +++++++++++++++++++++++++++++++++++++++++ requirements.txt | 53 +++++++ 3 files changed, 406 insertions(+), 1 deletion(-) create mode 100644 2025/taxEngine.py create mode 100644 requirements.txt diff --git a/2025/example_single.py b/2025/example_single.py index f46baed..8aa38b2 100755 --- a/2025/example_single.py +++ b/2025/example_single.py @@ -7,6 +7,12 @@ from f1040 import F1040 from ca540 import CA540 from form import FilingStatus +from pathlib import Path +import numpy as np +import pandas as pd +import os +import ast + inputs = { 'status': FilingStatus.SINGLE, @@ -33,7 +39,7 @@ # Extra items for schedule A 'F1040sa' : { - '11' : 500, # charitable contributions + '11' : 500, # charitable contributions }, } diff --git a/2025/taxEngine.py b/2025/taxEngine.py new file mode 100644 index 0000000..1fb3155 --- /dev/null +++ b/2025/taxEngine.py @@ -0,0 +1,346 @@ +import tkinter as tk +from tkinter import filedialog, messagebox, ttk +import cv2 +import os +import re +import numpy as np +import pytesseract +import ast +from pathlib import Path +from pdf2image import convert_from_path +try: + from f1040 import F1040 + from ca540 import CA540 + from form import FilingStatus +except ImportError as e: + print(f"CRITICAL: Engine components missing: {e}") + exit() +# To auto-provision, uncomment the call to bootstrap_environment() +#def bootstrap_environment(): +# import platform +# import subprocess +# import shutil + +# os_type = platform.system() +# print(f"Detecting Host OS: {os_type}") +# commands = { +# "Linux": "sudo apt update && sudo apt install -y tesseract-ocr poppler-utils", +# "Darwin": "brew install tesseract poppler", # macOS +# "Windows": "echo 'Download Tesseract: https://github.com and Poppler: https://github.com'" +# } +# +# cmd = commands.get(os_type) +# +# if os_type == "Windows": +# print(f"[MANUAL INTERVENTION REQUIRED]: {cmd}") +# elif cmd: +# try: +# print(f"Executing: {cmd}") +# subprocess.run(cmd, shell=True, check=True) +# print("Ground stack synchronized.") +# except Exception as e: +# print(f"[CRITICAL] Bootstrap failure: {e}") +# else: +# print("[ERROR] Unknown OS architecture. Manual setup required.") + +# --- PAYLOAD SYNC: AUTO-PROVISION FROM REQUIREMENTS.TXT --- +# def sync_payload_from_manifest(): +# import sys +# import subprocess +# from pathlib import Path + +# req_file = Path(__file__).parent / "requirements.txt" +# if not req_file.exists(): +# print("Manifest (requirements.txt) not found. Skipping auto-sync.") +# return +# try: +# print(f"Synchronizing environment via {req_file.name}...") +# subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", str(req_file)]) +# print("Environment parity achieved.") +# except Exception as e: +# print(f"[CRITICAL] Sync failed: {e}") + +BASE_URL = Path(__file__).resolve().parent + +#document routing +FORM_ROUTING = { + 'wages': 'Form W-2', + 'wages_ss': 'Form W-2', + 'ss_withheld': 'Form W-2', + 'withholding': 'Form W-2', + 'taxable_interest': 'Form 1099-INT', + 'tax_exempt_interest': 'Form 1099-INT', + 'dividends': 'Form 1099-DIV', + 'qualified_dividends': 'Form 1099-DIV', + 'business_income': 'Schedule C', + 'business_expenses': 'Schedule C', + 'status': '1040 Identity Section', + 'charitable_contributions': 'Schedule A' +} +#keys the OCR reader searches for with associated legacy values ***can be expanded to become more robust very rudementary*** +DATA_ANCHORS = { + "wages, tips": "wages", + "wages,": "wages", + "federal income tax": "withholding", + "social security wages": "wages_ss", + "social security tax withheld": "ss_withheld", + "social security tax": "ss_withheld", + "interest income": "taxable_interest", + "exempt interest": "tax_exempt_interest", + "ordinary dividends": "dividends", + "qualified dividends": "qualified_dividends", + "gross receipts": "business_income", + "total expenses": "business_expenses", + "charitable": "charitable_contributions" +} + +#parsing tool used to acquire expected fields of input based on the example +class TaxMission: + def __init__(self, directory): + self.directory = Path(directory) + self.required_keys = self._discover_schema() + self.field_types = self._infer_field_types() + for k in FORM_ROUTING.keys(): + if k not in self.required_keys: self.required_keys.append(k) + self.payload = {str(key): None for key in self.required_keys} + self.payload['F1040sa'] = {'11': 0} + self.payload['exemptions'] = 1 + + def _discover_schema(self): + keys = set() + for path in self.directory.glob('f*.py'): + try: + with open(path, 'r') as f: + tree = ast.parse(f.read()) + for node in ast.walk(tree): + if isinstance(node, ast.Subscript) and getattr(node.value, 'id', '') == 'inputs': + if isinstance(node.slice, ast.Constant): keys.add(node.slice.value) + elif isinstance(node, ast.Call) and getattr(node.func, 'attr', '') == 'get': + if getattr(node.func.value, 'id', '') == 'inputs' and node.args: + if isinstance(node.args[0], ast.Constant): keys.add(node.args[0].value) + elif isinstance(node, ast.Call) and getattr(node.func, 'attr', '') == 'spouseSum': + if len(node.args) >= 2 and isinstance(node.args[1], ast.Constant): + keys.add(node.args[1].value) + except: continue + return sorted([str(k) for k in keys]) + + def _infer_field_types(self): + types = {} + for path in self.directory.glob('f*.py'): + try: + with open(path, 'r') as f: + tree = ast.parse(f.read()) + for node in ast.walk(tree): + if isinstance(node, ast.Call) and getattr(node.func, 'id', '') == 'sum': + if 'inputs' in str(node.args[0]): + if hasattr(node.args[0], 'args') and node.args[0].args: + types[node.args[0].args[0].value] = 'list' + elif isinstance(node, ast.Subscript) and isinstance(node.slice, ast.Subscript): + if getattr(node.slice.value, 'id', '') == 'inputs': + types[node.slice.slice.value] = 'enum' + except: continue + return types + + def add_field(self, key, value, source="Sensor"): + if key in self.payload: + self.payload[key] = value + print(f"📡 [DATA] {key} -> {value} ({source})") + + def _sanitize(self): + data = self.payload.copy() + for k, v in data.items(): + if v is None or v == "": + data[k] = 0.0 + if isinstance(v, str): + clean_v = re.sub(r'[^-0-9.]', '', v) + try: + data[k] = float(clean_v) + except: + data[k] = 0.0 + try: + status_input = str(data.get('status', '1')) + if '1' in status_input or 'joint' in status_input.lower(): + data['status'] = FilingStatus.JOINT + elif '2' in status_input or 'seperated' in status_input.lower(): + data['status'] = FilingStatus.SEPARATE + elif '3' in status_input or 'head' in status_input.lower(): + data['status'] = FilingStatus.HEAD + elif '4' in status_input or 'widow' in status_input.lower(): + data['status'] = FilingStatus.WIDOW + else: + data['status'] = FilingStatus.SINGLE + except: + data['status'] = FilingStatus.SINGLE + charity = data.get('charitable_contributions', 0.0) + data['F1040sa'] = {'11': float(charity or 0.0)} + data['estimated_payments'] = [0.0, 0.0, 0.0, 0.0] + for k in data: + if k not in ['status', 'F1040sa', 'estimated_payments', 'F1040sa_items']: + try: + data[k] = float(data[k]) + except: + pass + + return data + + #the push to legacy + def ignite(self): + clean_data = self._sanitize() + try: + print("🚀 Launching Engine...") + f = F1040(clean_data) + ca = CA540(clean_data, f) + return f, ca + except Exception as e: + print(f"💥 ENGINE BUST: {e}") + return None + +#grabs all available border boxes after some image manipulation grayscale -> threshold dilation tends to increase noise +def putTheMoneyInTheBox(image): + pad = 20 + image_padded = cv2.copyMakeBorder(image, pad, pad, pad, pad, cv2.BORDER_CONSTANT, value=(0,0,0)) + gray = cv2.cvtColor(image_padded, cv2.COLOR_BGR2GRAY) + thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2) + contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + candidates = [] + if hierarchy is not None: + for i, c in enumerate(contours): + x, y, w, h = cv2.boundingRect(c) + if hierarchy[0][i][3] != -1 and 40 < w < 800 and 15 < h < 110: + if (w / h) > 1.1: candidates.append({'rect': (x, y, w, h), 'area': w * h}) + candidates.sort(key=lambda x: x['area'], reverse=True) + rects = [item['rect'] for item in candidates] + rects.sort(key=lambda b: (b[1] // 30, b[0])) + return rects, image_padded + +#seperates the border boxes inside the form into seperate images then converts to strings +def contextual_harvest(padded_image, sorted_rects): + gray = cv2.cvtColor(padded_image, cv2.COLOR_BGR2GRAY) + img_h, img_w = gray.shape + captured_data = {} + norm_anchors = {k.lower(): v for k, v in DATA_ANCHORS.items()} + + for i, (x, y, w, h) in enumerate(sorted_rects): + y1, y2, x1, x2 = max(0, y-5), min(img_h, y+h+5), max(0, x-5), min(img_w, x+w+5) + roi = gray[y1:y2, x1:x2] + if roi.size == 0: continue + roi_up = cv2.resize(roi, None, fx=2, fy=2, interpolation=cv2.INTER_LANCZOS4) + _, roi_bin = cv2.threshold(roi_up, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + raw_text = pytesseract.image_to_string(roi_bin, config='--psm 6').strip().lower() + if raw_text: + for anchor, engine_key in norm_anchors.items(): + if anchor in raw_text: + matches = re.findall(r"[\d,.]+", raw_text) + valid_values = [] + for m in matches: + clean_val = m.replace(',', '').rstrip('.') + try: + val = float(clean_val) + if val < 20 and "." not in m: + continue + valid_values.append(val) + except: continue + + if valid_values: + highest = max(valid_values) + captured_data[engine_key] = highest + print(f"🎯 [PAYLOAD] Assigned {engine_key} -> {highest}") + break + return captured_data + +#takes file input and converts it from pdf can be expanded to convert more file types +def ingest_payload(file_path): + if str(file_path).lower().endswith('.pdf'): + pages = convert_from_path(file_path, 300) + return np.array(pages[0])[:, :, ::-1].copy() + return cv2.imread(str(file_path)) + +#UI interface +class AlgoFriendUI: + def __init__(self, root): + self.root = root + self.mission = TaxMission(BASE_URL) + self.root.title("🚀 AlgoFriend | Mission Control") + self.root.geometry("850x750") + self.root.configure(bg="#1e1e1e") + self.entries = {} + self.status_options = { + "Single": "0", + "Married Joint": "1", + "Married Separate": "2", + "Head of Household": "3", + "Widow(er)": "4" + } + self.status_label = tk.Label(root, text="SYSTEM INITIALIZED", bg="#1e1e1e", fg="#00ff00", font=("Courier", 18, "bold")) + self.status_label.pack(pady=15) + self.worksheet_frame = tk.LabelFrame(root, text=" LIVE TELEMETRY WORKSHEET ", bg="#1e1e1e", fg="white", font=("Courier", 10)) + self.worksheet_frame.pack(pady=10, padx=20, fill="both") + for idx, (key, form) in enumerate(FORM_ROUTING.items()): + r, c = idx // 2, (idx % 2) * 2 + tk.Label(self.worksheet_frame, text=f"{key.upper()}:", bg="#1e1e1e", fg="#aaa", font=("Courier", 8)).grid(row=r, column=c, sticky="e", padx=5, pady=2) + if key == 'status': + self.entries[key] = ttk.Combobox(self.worksheet_frame, values=list(self.status_options.keys()), state="readonly", width=16) + self.entries[key].set("Single") + self.entries[key].grid(row=r, column=c+1, padx=5, pady=2) + else: + entry = tk.Entry(self.worksheet_frame, width=18, bg="#000", fg="#00ff00", insertbackground="white", borderwidth=0) + entry.insert(0, "0.0") + entry.grid(row=r, column=c+1, padx=5, pady=2) + self.entries[key] = entry + self.btn_frame = tk.Frame(root, bg="#1e1e1e") + self.btn_frame.pack(pady=20) + tk.Button(self.btn_frame, text="📁 SELECT DOCUMENT", command=self.select_file, + width=25, bg="#333", fg="white", font=("Courier", 10)).pack(pady=5) + tk.Button(self.btn_frame, text="🏁 IGNITE ENGINE", command=self.launch_engine, + width=25, bg="#005500", fg="white", font=("Courier", 12, "bold")).pack(pady=15) + + def select_file(self): + file_path = filedialog.askopenfilename(filetypes=[("Tax Forms", "*.png *.jpg *.pdf")]) + if file_path: + self.status_label.config(text="SCANNING PAYLOAD...", fg="#ffcc00") + self.root.update() + raw_img = ingest_payload(file_path) + if raw_img is not None: + rects, padded = putTheMoneyInTheBox(raw_img) + vision_data = contextual_harvest(padded, rects) + for key, val in vision_data.items(): + if key in self.entries: + if key == 'status': + continue + try: + current_val = float(self.entries[key].get() or 0.0) + except ValueError: + current_val = 0.0 + self.entries[key].delete(0, tk.END) + self.entries[key].insert(0, str(current_val + float(val))) + self.entries[key].config(bg="#003300") + self.status_label.config(text="DATA VERIFIED", fg="#00ff00") + + def launch_engine(self): + for key, widget in self.entries.items(): + val = self.status_options.get(widget.get(), "0") if key == 'status' else widget.get() + self.mission.add_field(key, val, source="Worksheet") + results = self.mission.ignite() + if results: + fed, state = results + messagebox.showinfo("Mission Success", "Check terminal for 1040 results.") + fed.printAllForms() + +def verify_ground_stack(): + import shutil + tess = shutil.which("tesseract") + pdf = shutil.which("pdftoppm") + if not tess or not pdf: + print("⚠️ [SENSORS OFFLINE] Tesseract or Poppler not found in $PATH.") + print("💡 [INTEL] See 'ENVIRONMENT BOOTSTRAP' section in source to auto-repair.") + return False + return True + + +if __name__ == "__main__": +#bootstrap_enviroment() +#sync_payload_from_manifest() + if verify_ground_stack(): + root = tk.Tk() + app = AlgoFriendUI(root) + root.mainloop() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e83ac8f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,53 @@ +asgiref==3.11.1 +beautifulsoup4==4.14.3 +blinker==1.9.0 +certifi==2026.4.22 +cffi==2.0.0 +charset-normalizer==3.4.5 +click==8.3.3 +cmake==4.3.1 +curl_cffi==0.13.0 +Django==5.2.13 +django-cors-headers==4.9.0 +django-database-url==1.0.3 +djangorestframework==3.17.1 +djangorestframework_simplejwt==5.5.1 +Flask==3.1.3 +flask-cors==6.0.2 +frozendict==2.4.7 +gunicorn==25.3.0 +idna==3.11 +itsdangerous==2.2.0 +Jinja2==3.1.6 +MarkupSafe==3.0.3 +multitasking==0.0.12 +numpy==2.4.3 +opencv-python==4.13.0.92 +osmium==4.3.1 +packaging==26.1 +pandas==3.0.1 +pdf2image==1.17.0 +peewee==4.0.2 +pillow==12.2.0 +platformdirs==4.9.4 +protobuf==7.34.0 +psycopg2-binary==2.9.12 +pycparser==3.0 +PyJWT==2.12.1 +PyQt6==6.11.0 +PyQt6-Qt6==6.11.0 +PyQt6_sip==13.11.1 +pytesseract==0.3.13 +python-dateutil==2.9.0.post0 +pytz==2026.1.post1 +requests==2.32.5 +six==1.17.0 +soupsieve==2.8.3 +sqlparse==0.5.5 +stripe==15.1.0 +tkinterdnd2-universal==1.7.3 +typing_extensions==4.15.0 +urllib3==2.6.3 +websockets==16.0 +Werkzeug==3.1.8 +yfinance==1.2.0 From ba0554226515e5829cc44459fc63bd79d3689f0d Mon Sep 17 00:00:00 2001 From: AlgoFriend Date: Tue, 5 May 2026 12:39:09 -0400 Subject: [PATCH 2/2] feat: integrated AST-driven UI for dynamic telemetry ingestion-base-file-revert --- 2025/example_single.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/2025/example_single.py b/2025/example_single.py index 8aa38b2..e730036 100755 --- a/2025/example_single.py +++ b/2025/example_single.py @@ -7,12 +7,6 @@ from f1040 import F1040 from ca540 import CA540 from form import FilingStatus -from pathlib import Path -import numpy as np -import pandas as pd -import os -import ast - inputs = { 'status': FilingStatus.SINGLE,