diff --git a/README.md b/README.md index 6fa7287..39686e4 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,6 @@ This repository is organized into a core framework, a registry of skills, and do ```text Skillware/ -Skillware/ ├── skillware/ # Core Framework Package │ └── core/ │ ├── base_skill.py # Abstract Base Class for skills @@ -55,18 +54,27 @@ Skillware/ │ └── env.py # Environment Management ├── skills/ # Skill Registry (Domain-driven) │ └── finance/ -│ └── wallet_screening/ +│ └── wallet_screening/ │ ├── skill.py # Logic │ ├── manifest.yaml # Metadata & Constitution │ ├── instructions.md # Cognitive Map │ ├── card.json # UI Presentation │ ├── data/ # Integrated Knowledge Base │ └── maintenance/ # Maintenance Tools +│ └── office/ +│ └── pdf_form_filler/ +│ ├── skill.py # Logic +│ ├── manifest.yaml # Metadata +│ ├── instructions.md # Cognitive Map +│ ├── utils.py # PDF Processing +│ └── card.json # UI Presentation ├── templates/ # New Skill Templates │ └── python_skill/ # Standard Python Skill Template ├── examples/ # Reference Implementations │ ├── gemini_wallet_check.py # Google Gemini Integration -│ └── claude_wallet_check.py # Anthropic Claude Integration +│ ├── claude_wallet_check.py # Anthropic Claude Integration +│ ├── gemini_pdf_form_filler.py +│ └── claude_pdf_form_filler.py ├── docs/ # Comprehensive Documentation │ ├── introduction.md # Philosophy & Design │ ├── usage/ # Integration Guides @@ -152,12 +160,12 @@ Skillware differs from the Model Context Protocol (MCP) or Anthropic's Skills re For questions, suggestions, or contributions, please open an issue or reach out to us: * **Email**: [skillware-os@arpacorp.net](mailto:skillware-os@arpacorp.net) -* **Issues**: [GitHub Issues](https://github.com/arpa/skillware/issues) +* **Issues**: [GitHub Issues](https://github.com/arpahls/skillware/issues) ---
ARPA Logo
- Built & Maintained by ARPA Hellenic Logical Systems + Built & Maintained by ARPA Hellenic Logical Systems & the Community
diff --git a/docs/skills/README.md b/docs/skills/README.md index c23ddbf..baf148b 100644 --- a/docs/skills/README.md +++ b/docs/skills/README.md @@ -2,7 +2,14 @@ Welcome to the official catalog of Skillware capabilities. -## 💳 Finance & Compliance +### Office +Skills for document processing, email automation, and productivity. + +| Skill | ID | Description | +| :--- | :--- | :--- | +| **[PDF Form Filler](pdf_form_filler.md)** | `office/pdf_form_filler` | Fills AcroForm-based PDFs by mapping user instructions to detected form fields using LLM-based semantic understanding. | + +## Finance Tools for financial analysis, blockchain interaction, and regulatory compliance. | Skill | ID | Description | diff --git a/docs/skills/pdf_form_filler.md b/docs/skills/pdf_form_filler.md new file mode 100644 index 0000000..91bb7e3 --- /dev/null +++ b/docs/skills/pdf_form_filler.md @@ -0,0 +1,79 @@ +# PDF Form Filler Skill + +**ID**: `office/pdf_form_filler` + +A productivity skill that fills AcroForm-based PDFs by mapping natural language instructions to detected form fields using semantic understanding. + +## 📋 Capabilities + +* **Smart Field Detection**: Automatically identifies text fields, checkboxes, radio buttons, and dropdowns in standard PDFs. +* **Semantic Mapping**: Uses an internal LLM (Claude) to understand user instructions (e.g., "Sign me up for the newsletter") and map them to the correct field (e.g., `checkbox_subscribe_newsletter`). +* **Context Awareness**: Extracts nearby text labels to ensure accurate mapping, even if field names are obscure (e.g., `field_123` vs label "First Name"). +* **Type Safety**: Automatically converts values to the correct format (booleans for checkboxes, specific options for dropdowns). + +## 📂 Internal Architecture + +The skill is self-contained in `skillware/skills/office/pdf_form_filler/`. + +### 1. The Mind (`instructions.md`) +The system prompt teaches the internal mapping engine to: +* Analyze the provided "User Instructions". +* Review the list of "Detected Fields" (ID, Type, Context, Options). +* Output a strict JSON mapping of `Field ID -> Value`. +* Handle ambiguities by preferring precision over guessing. + +### 2. The Body (`skill.py` & `utils.py`) +* **PDF Processing**: Uses `PyMuPDF` (fitz) for high-fidelity rendering and widget manipulation. +* **LLM Integration**: Wraps the Anthropic SDK to perform the semantic reasoning step. +* **Validation**: Ensures values match the field type (e.g., selecting a valid option from a dropdown). + +## 💻 Integration Guide + +### Environment Variables +You must provide an Anthropic API key for the semantic mapping engine. + +```bash +ANTHROPIC_API_KEY="sk-ant-..." +``` + +### Usage (Skillware Loader) + +```python +from skillware.core.loader import SkillLoader + +# 1. Load the Skill +skill_bundle = SkillLoader.load_skill("office/pdf_form_filler") +PDFFormFillerSkill = skill_bundle['module'].PDFFormFillerSkill + +# 2. Initialize +filler = PDFFormFillerSkill() + +# 3. Execute +result = filler.execute({ + "pdf_path": "/absolute/path/to/form.pdf", + "instructions": "Name: John Doe. Check the terms of service box." +}) + +print(f"Filled PDF saved to: {result['output_path']}") +``` + +## 📊 Data Schema + +The skill returns a JSON object with the result of the operation. + +```json +{ + "status": "success", + "output_path": "/path/to/form_filled.pdf", + "filled_fields": [ + "page0_full_name", + "page0_terms_check" + ], + "message": "Successfully filled 2 fields." +} +``` + +## ⚠️ Limitations + +* **AcroForms Only**: Does not support XFA forms or non-interactive "flat" PDFs. +* **LLM Dependency**: Requires an active internet connection and valid API key for the semantic mapping step. diff --git a/examples/claude_pdf_form_filler.py b/examples/claude_pdf_form_filler.py new file mode 100644 index 0000000..3022d0b --- /dev/null +++ b/examples/claude_pdf_form_filler.py @@ -0,0 +1,91 @@ +import os +import sys +import json +# Add repo root to path +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +import anthropic +from skillware.core.loader import SkillLoader +from skillware.core.env import load_env_file + +# Load Env (Requires ANTHROPIC_API_KEY for both Agent and Skill) +load_env_file() + +# 1. Load the Skill +skill_bundle = SkillLoader.load_skill("office/pdf_form_filler") +print(f"Loaded Skill: {skill_bundle['manifest']['name']}") + +# 2. Instantiate Skill +PDFFormFillerSkill = skill_bundle['module'].PDFFormFillerSkill +pdf_skill = PDFFormFillerSkill() + +# 3. Setup Claude Client +client = anthropic.Anthropic( + api_key=os.environ.get("ANTHROPIC_API_KEY"), +) + +tools = [SkillLoader.to_claude_tool(skill_bundle)] + +# 4. Run Agent Loop +pdf_path = os.path.abspath("test_form.pdf") +user_query = f"Please fill out the form at {pdf_path}. My name is John Smith and I want to enable notifications." + +print(f"User: {user_query}") + +message = client.messages.create( + model="claude-3-opus-20240229", + max_tokens=1024, + system=skill_bundle['instructions'], + messages=[ + {"role": "user", "content": user_query} + ], + tools=tools, +) + +if message.stop_reason == "tool_use": + tool_use = next(block for block in message.content if block.type == "tool_use") + tool_name = tool_use.name + tool_input = tool_use.input + + print(f"\nClaude requested tool: {tool_name}") + print(f"Input: {tool_input}") + + if tool_name == "pdf_form_filler": + # Check file + if not os.path.exists(tool_input.get('pdf_path', '')): + print(f"⚠️ Warning: File {tool_input.get('pdf_path')} does not exist. Execution might fail.") + + # Execute + print("⚙️ Executing skill...") + try: + result = pdf_skill.execute(tool_input) + print("✅ Skill Execution Result:") + print(json.dumps(result, indent=2)) + except Exception as e: + result = {"error": str(e)} + print(f"❌ Error: {e}") + + # Feed back to Claude + response = client.messages.create( + model="claude-3-opus-20240229", + max_tokens=1024, + system=skill_bundle['instructions'], + tools=tools, + messages=[ + {"role": "user", "content": user_query}, + {"role": "assistant", "content": message.content}, + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": tool_use.id, + "content": json.dumps(result) + } + ], + }, + ], + ) + + print("\nAgent Final Response:") + print(response.content[0].text) diff --git a/examples/gemini_pdf_form_filler.py b/examples/gemini_pdf_form_filler.py new file mode 100644 index 0000000..a8597dd --- /dev/null +++ b/examples/gemini_pdf_form_filler.py @@ -0,0 +1,93 @@ +import os +import sys +# Add repo root to path +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +import google.generativeai as genai +from skillware.core.loader import SkillLoader +from skillware.core.env import load_env_file + +# Load Env (Requires GOOGLE_API_KEY for the Agent, and ANTHROPIC_API_KEY for the Skill's internal logic) +load_env_file() + +# 1. Load the Skill +skill_bundle = SkillLoader.load_skill("office/pdf_form_filler") +print(f"Loaded Skill: {skill_bundle['manifest']['name']}") + +# 2. Instantiate the Skill +# The skill needs ANTHROPIC_API_KEY in env to perform semantic mapping +PDFFormFillerSkill = skill_bundle['module'].PDFFormFillerSkill +pdf_skill = PDFFormFillerSkill() + +# 3. Setup Gemini Agent +genai.configure(api_key=os.environ.get("GOOGLE_API_KEY")) + +# Define tool for Gemini +tools = [SkillLoader.to_gemini_tool(skill_bundle)] + +model = genai.GenerativeModel( + 'gemini-2.0-flash-exp', + tools=tools, + system_instruction=skill_bundle['instructions'] # Inject skill's cognitive map +) + +chat = model.start_chat(enable_automatic_function_calling=True) + +# 4. Run the Agent Loop +# Note: You need a real PDF file for this to work. +pdf_path = os.path.abspath("test_form.pdf") +user_query = f"Fill out the form at {pdf_path}. Set the name to 'Jane Doe' and check the 'Subscribe' box." + +print(f"User: {user_query}") + +# Create a function map for manual execution if needed (Python SDK handles this automatically usually) +# But for completeness: +function_map = { + 'pdf_form_filler': pdf_skill.execute +} + +response = chat.send_message(user_query) + +# Simple manual tool execution loop (if auto-calling isn't fully handled or we want to inspect) +# Note: Recent genai SDKs handle this better, but explicit loops are safer for demos. +while response.candidates and response.candidates[0].content.parts: + part = response.candidates[0].content.parts[0] + + if part.function_call: + fn_name = part.function_call.name + fn_args = dict(part.function_call.args) + + print(f"🤖 Agent wants to call: {fn_name}") + print(f" Args: {fn_args}") + + if fn_name == 'pdf_form_filler': + try: + # Check if file exists before running + if not os.path.exists(fn_args.get('pdf_path', '')): + print(f"⚠️ Error: PDF file not found at {fn_args.get('pdf_path')}") + result = {"error": "PDF file not found."} + else: + print("⚙️ Executing skill...") + result = pdf_skill.execute(fn_args) + print(f"✅ Result: {result}") + except Exception as e: + result = {"error": str(e)} + + # Send result back + response = chat.send_message( + [ + { + "function_response": { + "name": fn_name, + "response": {'result': result} + } + } + ] + ) + else: + break + else: + break + +print("\n💬 Agent Final Response:") +print(response.text) diff --git a/skills/office/pdf_form_filler/card.json b/skills/office/pdf_form_filler/card.json new file mode 100644 index 0000000..1ee8a75 --- /dev/null +++ b/skills/office/pdf_form_filler/card.json @@ -0,0 +1,25 @@ +{ + "name": "PDF Form Filler", + "description": "Smartly fills PDF forms from natural language instructions.", + "icon": "document-text", + "color": "rose", + "ui_schema": { + "type": "card", + "fields": [ + { + "key": "message", + "label": "Status" + }, + { + "key": "filled_fields", + "label": "Filled Fields", + "type": "tags" + }, + { + "key": "output_path", + "label": "Download", + "type": "file_path" + } + ] + } +} \ No newline at end of file diff --git a/skills/office/pdf_form_filler/instructions.md b/skills/office/pdf_form_filler/instructions.md new file mode 100644 index 0000000..56b0527 --- /dev/null +++ b/skills/office/pdf_form_filler/instructions.md @@ -0,0 +1,21 @@ +You are an expert form-filling assistant. Your goal is to map user instructions to specific form fields in a PDF. + +You will be given: +1. A list of detected form fields from a PDF, including their ID, type, and nearby text context. +2. User instructions describing what values to fill in. + +Your Task: +- Analyze the user instructions and match them to the correct form fields based on the field context. +- Output a JSON object where potential keys are the `field_id`s and values are the content to fill. +- Only include fields that the user has provided information for. +- For Checkboxes: Use boolean `true` or `false`. +- For Dropdowns: Use the exact string from the options list if available. +- If a user instruction is ambiguous or doesn't match a field, ignore it. + +Output Format: +```json +{ + "page0_field_name": "Value", + "page0_checkbox_1": true +} +``` diff --git a/skills/office/pdf_form_filler/manifest.yaml b/skills/office/pdf_form_filler/manifest.yaml new file mode 100644 index 0000000..c693c34 --- /dev/null +++ b/skills/office/pdf_form_filler/manifest.yaml @@ -0,0 +1,29 @@ +name: pdf_form_filler +version: 0.1.0 +description: Fills PDF forms based on natural language instructions. +parameters: + type: object + properties: + pdf_path: + type: string + description: Absolute path to the PDF form to fill. + instructions: + type: string + description: User instructions for filling the form (e.g. "Name: Alice, Age: 30"). + output_path: + type: string + description: Optional path to save the filled PDF. + required: + - pdf_path + - instructions +requirements: + - pymupdf + - anthropic +constitution: | + 1. USER PRIVACY: Do not store the PDF content externally; process it in-memory or locally. + 2. ACCURACY: If instructions are ambiguous, default to not filling the field rather than guessing. + 3. SECURITY: Do not execute any scripts embedded within the PDF. +env_vars: + ANTHROPIC_API_KEY: + description: "Required for the internal semantic engine (Claude) to map instructions to fields." + required: true diff --git a/skills/office/pdf_form_filler/skill.py b/skills/office/pdf_form_filler/skill.py new file mode 100644 index 0000000..be9f3ad --- /dev/null +++ b/skills/office/pdf_form_filler/skill.py @@ -0,0 +1,123 @@ +from typing import Dict, Any +import os +import json +import anthropic +import yaml +from skillware.core.base_skill import BaseSkill + + +class PDFFormFillerSkill(BaseSkill): + """ + A skill that fills PDF forms based on natural language instructions. + """ + + def __init__(self, config: Dict[str, Any] = None): + super().__init__(config) + # Initialize Anthropic client - expects ANTHROPIC_API_KEY in env + self.client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY")) + + @property + def manifest(self) -> Dict[str, Any]: + # Helper to load manifest from this directory + manifest_path = os.path.join(os.path.dirname(__file__), 'manifest.yaml') + if os.path.exists(manifest_path): + with open(manifest_path, 'r', encoding='utf-8') as f: + return yaml.safe_load(f) + return {} + + def execute(self, params: Dict[str, Any]) -> Any: + # Import here to avoid top-level linter issues with relative imports + try: + from .utils import detect_form_fields, apply_edits + except ImportError: + import sys + sys.path.append(os.path.dirname(__file__)) + from utils import detect_form_fields, apply_edits + + # 1. Parse Inputs + pdf_path = params.get('pdf_path') + instructions = params.get('instructions') + output_path = params.get('output_path') + + if not pdf_path or not os.path.exists(pdf_path): + return {"error": f"PDF file not found: {pdf_path}"} + + if not instructions: + return {"error": "No instructions provided."} + + # 2. Analyze PDF + with open(pdf_path, 'rb') as f: + pdf_bytes = f.read() + + fields = detect_form_fields(pdf_bytes) + if not fields: + return {"status": "warning", "message": "No fillable fields found in PDF."} + + # 3. Construct LLM Prompt + # Load system instructions + inst_path = os.path.join(os.path.dirname(__file__), 'instructions.md') + system_prompt = "You are a form filling assistant." + if os.path.exists(inst_path): + with open(inst_path, 'r', encoding='utf-8') as f: + system_prompt = f.read() + + # Prepare field context for LLM + fields_context = [f.to_dict() for f in fields] + + user_message = f""" + User Instructions: {instructions} + + Detected Fields: + {json.dumps(fields_context, indent=2)} + + Return a JSON object mapping field_ids to values. + """ + + # 4. Call LLM to map instructions -> fields + try: + message = self.client.messages.create( + model="claude-3-haiku-20240307", + max_tokens=4096, + system=system_prompt, + messages=[ + {"role": "user", "content": user_message} + ] + ) + response_text = message.content[0].text + + # Extract JSON from response + json_str = response_text.strip() + if "```json" in json_str: + json_str = json_str.split("```json")[1].split("```")[0] + elif "```" in json_str: + json_str = json_str.split("```")[1].split("```")[0] + + edits = json.loads(json_str) + + except Exception as e: + return {"error": f"LLM processing failed: {str(e)}"} + + # 5. Apply Edits + if not edits: + return {"status": "no_change", "message": "LLM determined no fields needed to be changed."} + + try: + filled_pdf_bytes = apply_edits(pdf_bytes, edits) + + # Determine output location + if not output_path: + base, ext = os.path.splitext(pdf_path) + output_path = f"{base}_filled{ext}" + + with open(output_path, 'wb') as f: + f.write(filled_pdf_bytes) + + return { + "status": "success", + "output_path": output_path, + "filled_fields": list(edits.keys()), + "message": f"Successfully filled {len(edits)} fields." + } + + except Exception as e: + return {"error": f"Failed to apply edits to PDF: {str(e)}"} diff --git a/skills/office/pdf_form_filler/utils.py b/skills/office/pdf_form_filler/utils.py new file mode 100644 index 0000000..78ce0b1 --- /dev/null +++ b/skills/office/pdf_form_filler/utils.py @@ -0,0 +1,145 @@ +import fitz # PyMuPDF +from dataclasses import dataclass, asdict +from enum import Enum +from typing import List, Optional, Dict, Any, Tuple + +class FieldType(Enum): + TEXT = "text" + CHECKBOX = "checkbox" + DROPDOWN = "dropdown" + RADIO = "radio" + +@dataclass +class DetectedField: + """Represents a detected form field in the PDF.""" + field_id: str + field_type: FieldType + bbox: Tuple[float, float, float, float] # (x0, y0, x1, y1) + page: int + label_context: str # nearby text for semantic understanding + current_value: Optional[str] = None + options: Optional[List[str]] = None + native_field_name: Optional[str] = None + friendly_label: Optional[str] = None + + def to_dict(self) -> Dict[str, Any]: + d = asdict(self) + d['field_type'] = self.field_type.value + return d + +def _widget_type_to_field_type(widget_type: int) -> FieldType: + mapping = { + 7: FieldType.TEXT, # PDF_WIDGET_TYPE_TEXT + 2: FieldType.CHECKBOX, # PDF_WIDGET_TYPE_CHECKBOX + 3: FieldType.DROPDOWN, # PDF_WIDGET_TYPE_COMBOBOX + 4: FieldType.DROPDOWN, # PDF_WIDGET_TYPE_LISTBOX + 5: FieldType.RADIO, # PDF_WIDGET_TYPE_RADIOBUTTON + } + return mapping.get(widget_type, FieldType.TEXT) + +def _extract_nearby_text(page: "fitz.Page", rect: "fitz.Rect", radius: int = 100) -> str: + """Extract text near a bounding box to understand field context.""" + search_rect = fitz.Rect(rect) + search_rect.x0 -= radius + search_rect.y0 -= radius + search_rect.x1 += radius + search_rect.y1 += radius + + # Clip to page bounds + page_rect = page.rect + search_rect.intersect(page_rect) + + text = page.get_text("text", clip=search_rect).strip() + + # Clean up whitespace + lines = [line.strip() for line in text.split('\n') if line.strip()] + return ' | '.join(lines) + +def detect_form_fields(pdf_bytes: bytes) -> List[DetectedField]: + """Detect all fillable AcroForm fields in the PDF.""" + doc = fitz.open(stream=pdf_bytes, filetype="pdf") + fields = [] + + for page_num in range(len(doc)): + page = doc[page_num] + + # PyMuPDF 1.22+ uses page.widgets(), older versions use page.load_widgets() or similar + # Assuming modern PyMuPDF based on target repo + for widget in page.widgets(): + if not widget.field_name: + continue + + field_type = _widget_type_to_field_type(widget.field_type) + + options = None + # Using integer constants to avoid linter errors + # 3 = COMBOBOX, 4 = LISTBOX + if widget.field_type in (3, 4): + # Retrieve options for dropdown/listbox + # widget.choice_values is sometimes available + if hasattr(widget, "choice_values"): + options = list(widget.choice_values) or [] + + current_value = widget.field_value + if isinstance(current_value, bool): + current_value = str(current_value).lower() + + # Context extraction + label = _extract_nearby_text(page, widget.rect) + + fields.append(DetectedField( + field_id=f"page{page_num}_{widget.field_name}", + field_type=field_type, + bbox=tuple(widget.rect), + page=page_num, + label_context=label, + current_value=str(current_value) if current_value is not None else None, + options=options, + native_field_name=widget.field_name + )) + + doc.close() + return fields + +# Define FieldEdit dataclass for type hinting +@dataclass +class FieldEdit: + field_id: str + value: Any + +def apply_edits(pdf_bytes: bytes, edits: list[FieldEdit]) -> bytes: + """Apply edits to the PDF based on field_id -> value mapping.""" + doc = fitz.open(stream=pdf_bytes, filetype="pdf") + edit_map = {e.field_id: e for e in edits} + + for page_num in range(len(doc)): + page = doc[page_num] + + for widget in page.widgets(): + if not widget.field_name: + continue + + field_id = f"page{page_num}_{widget.field_name}" + + if field_id in edit_map: + edit = edit_map[field_id] + _apply_widget_edit(widget, edit.value) + + result = doc.tobytes() + doc.close() + return result + +def _apply_widget_edit(widget: "fitz.Widget", value: Any): + """Helper to apply value to a specific widget.""" + # Checkbox logic: 2 = CHECKBOX + if widget.field_type == 2: + # PyMuPDF expects boolean for checkbox state + if isinstance(value, str): + value = value.lower() in ('true', 'yes', '1', 'on', 'checked') + widget.field_value = bool(value) + else: + # Text/Choice logic + widget.field_value = str(value) + + # Update appearance + widget.update()