coding-agent/papershift2diff_edit.txt at main · voxmenthe/coding-agent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
diff --git a/src/main.py b/src/main.py
index 9c1ff4c..e451830 100644
--- a/src/main.py
+++ b/src/main.py

@@ -226,6 +391,16 @@ class CodeAgent:
                         'active_files': [getattr(f, 'name', str(f)) for f in self.active_files],
                         'thinking_budget': self.thinking_budget,
                     }
+                    # Convert Content objects to serializable dictionaries
+                    serializable_history = []
+                    for content in self.conversation_history:
+                        # Ensure parts is a list of strings
+                        parts_text = [part.text for part in content.parts if hasattr(part, 'text') and part.text is not None]
+                        serializable_history.append({
+                            'role': content.role,
+                            'parts': parts_text
+                        })
+                    save_state['conversation_history'] = serializable_history
                     try:
                         with open(save_path, 'w') as f:
                             json.dump(save_state, f, indent=2)
@@ -243,7 +418,7 @@ class CodeAgent:
                     else:
                         print("\n⚠️ Usage: /load <filename>")
                         continue
-                    load_path = self.saved_conversations_dir / filename
+                    load_path = Path(__file__).parent.parent / self.config.get('SAVED_CONVERSATIONS_DIRECTORY', 'SAVED_CONVERSATIONS/') / filename
                     if not load_path.is_file():
                         print(f"\n❌ File not found: {filename}")
                         continue
@@ -251,7 +426,21 @@ class CodeAgent:
                         with open(load_path, 'r') as f:
                             load_state = json.load(f)
                         # Restore state
-                        self.conversation_history = load_state.get('conversation_history', [])
+                        reconstructed_history = []
+                        if 'conversation_history' in load_state and isinstance(load_state['conversation_history'], list):
+                            for item in load_state['conversation_history']:
+                                if isinstance(item, dict) and 'role' in item and 'parts' in item and isinstance(item['parts'], list):
+                                    # Recreate Parts from the list of text strings
+                                    parts = [glm.Part(text=part_text) for part_text in item['parts'] if isinstance(part_text, str)]
+                                    # Recreate Content object
+                                    content = glm.Content(role=item['role'], parts=parts)
+                                    reconstructed_history.append(content)
+                                else:
+                                    logger.warning(f"Skipping invalid item in loaded history: {item}")
+                        else:
+                             logger.warning(f"'conversation_history' key missing or not a list in {filename}")
+
+                        self.conversation_history = reconstructed_history
                         self.current_token_count = load_state.get('current_token_count', 0)
                         self.prompt_time_counts = load_state.get('prompt_time_counts', [0])
                         self.messages_per_interval = load_state.get('messages_per_interval', [0])
@@ -386,11 +575,32 @@ class CodeAgent:
                         print(f"🚨 An unexpected error occurred during /clear: {e}")
                     continue # Skip sending this command to the model

+                # --- /prompt command ---
+                elif user_input.lower().startswith("/prompt "):
+                    if len(user_input.split()) == 2:
+                        prompt_name = user_input.split()[1]
+                        prompt_content = self._load_prompt(prompt_name)
+                        if prompt_content:
+                             self.pending_prompt = prompt_content
+                             print(f"\n✅ Prompt '{prompt_name}' loaded. It will be included in your next message.")
+                             # Optionally print truncated prompt:
+                             # print(f"   Content: {prompt_content[:100]}...")
+                        else:
+                             available = self._list_available_prompts()
+                             print(f"\n❌ Prompt '{prompt_name}' not found.")
+                             if available:
+                                 print(f"   Available prompts: {', '.join(available)}")
+                             else:
+                                 print("   No prompts found in the prompts directory.")
+                    else:
+                        print("\nUsage: /prompt <prompt_name>")
+                    continue # Skip sending command to model
+
                 # --- Prepare message content (Text + Files) ---
-                message_content = [user_input] # Start with user text
+                message_content = [message_to_send] # Start with user text
                 if self.active_files:
                     message_content.extend(self.active_files) # Add file objects
-                    if self.verbose:
+                    if self.config.get('verbose', DEFAULT_CONFIG['verbose']):
                         print(f"\n📎 Attaching {len(self.active_files)} files to the prompt:")
                         for f in self.active_files:
                             print(f"   - {f.display_name} ({f.name})")
@@ -398,7 +608,7 @@ class CodeAgent:
                 # --- Update manual history (for token counting ONLY - Use Text Only) ---
                 # Add user message BEFORE sending to model
                 # Store only the text part for history counting simplicity
-                new_user_content = types.Content(parts=[types.Part(text=user_input)], role="user")
+                new_user_content = glm.Content(parts=[glm.Part(text=message_to_send)], role="user")
                 self.conversation_history.append(new_user_content)

                 # --- Send Message ---
@@ -413,34 +623,66 @@ class CodeAgent:
                     config=tool_config
                 )

-                # --- Update manual history and calculate new token count AFTER response ---
-                agent_response_content = None
-                response_text = "" # Initialize empty response text
+                agent_response_text = ""
                 if response.candidates and response.candidates[0].content:
                     agent_response_content = response.candidates[0].content
-                    # Ensure we extract text even if other parts exist (e.g., tool calls)
-                    if agent_response_content.parts:
-                         # Simple concatenation of text parts for history
-                         response_text = " ".join(p.text for p in agent_response_content.parts if hasattr(p, 'text'))
-                    self.conversation_history.append(agent_response_content)
+                    # Extract text from all parts for printing
+                    agent_response_text = " ".join(p.text for p in agent_response_content.parts if hasattr(p, 'text'))
+
+                if agent_response_text: # Only append if there's text
+                    history_agent_content = glm.Content(role="model", parts=[glm.Part(text=agent_response_text)])
+                    self.conversation_history.append(history_agent_content)
+                    logger.debug(f"Appended model text response to history: Role={history_agent_content.role}")
                 else:
-                    print("\n⚠️ Agent response did not contain content for history/counting.")
+                    # Log if the response had candidates/content but no text could be extracted
+                    logger.warning("Agent response content found, but no text parts to add to history.")

                 # Print agent's response text to user
-                # Use the extracted response_text or response.text as fallback
-                print(f"\n🟢 \x1b[92mAgent:\x1b[0m {response_text or response.text}")
+                print(f"\n🟢 \x1b[92mAgent:\x1b[0m {agent_response_text or '[No response text]'}")
+
+                # --- Detailed History Logging Before Token Count ---
+                logger.debug(f"Inspecting conversation_history (length: {len(self.conversation_history)}) before count_tokens:")
+                history_seems_ok = True
+                for i, content in enumerate(self.conversation_history):
+                    logger.debug(f"  [{i}] Role: {getattr(content, 'role', 'N/A')}")
+                    if hasattr(content, 'parts'):
+                        for j, part in enumerate(content.parts):
+                            part_type = type(part)
+                            part_info = f"Part {j}: Type={part_type.__name__}"
+                            if hasattr(part, 'text'):
+                                part_info += f", Text='{part.text[:50]}...'"
+                            elif hasattr(part, 'file_data'):
+                                part_info += f", FileData URI='{getattr(part.file_data, 'file_uri', 'N/A')}'"
+                                history_seems_ok = False # Found a file part!
+                                logger.error(f"    🚨 ERROR: Found unexpected file_data part in history for token counting: {part_info}")
+                            elif hasattr(part, 'function_call'):
+                                part_info += f", FunctionCall Name='{getattr(part.function_call, 'name', 'N/A')}'"
+                                history_seems_ok = False # Found a function call part!
+                                logger.error(f"    🚨 ERROR: Found unexpected function_call part in history for token counting: {part_info}")
+                            else:
+                                # Log other unexpected part types
+                                history_seems_ok = False
+                                logger.error(f"    🚨 ERROR: Found unexpected part type in history for token counting: {part_info}")
+                            logger.debug(f"    {part_info}")
+                    else:
+                        logger.warning(f"  [{i}] Content object has no 'parts' attribute.")
+                if history_seems_ok:
+                    logger.debug("History inspection passed: Only text parts found.")
+                else:
+                    logger.error("History inspection FAILED: Non-text parts found. Token counting will likely fail.")
+                # --- End Detailed History Logging ---

-                # Calculate and store token count for the *next* prompt
-                try:
-                    # Get token count via the models endpoint
+                # Calculate and display token count using client.models
+                try: # Inner try specifically for token counting
                     token_count_response = self.client.models.count_tokens(
                         model=self.model_name,
-                        contents=self.chat.history
+                        contents=self.conversation_history
                     )
                     self.current_token_count = token_count_response.total_tokens
-                except Exception as count_error:
-                    # Don't block interaction if counting fails, just report it and keep old count
-                    print(f"\n⚠️ \x1b[93mCould not update token count: {count_error}\x1b[0m")
+                    print(f"\n[Token Count: {self.current_token_count}]" )
+                except Exception as count_err:
+                    logger.error(f"Error calculating token count: {count_err}", exc_info=True)
+                    print("🚨 Error: Failed to calculate token count.")

             except KeyboardInterrupt:
                 print("\n👋 Goodbye!")
@@ -468,20 +710,22 @@ class CodeAgent:
         if not self.pdfs_dir_abs_path:
             print("\n⚠️ PDF directory not configured. Cannot process PDFs.")
             return None
-        # Ensure db_path and blob_dir are Path objects if loaded from config
-        db_path = self.db_path if isinstance(self.db_path, Path) else Path(self.db_path)
-        blob_dir = self.blob_dir if isinstance(self.blob_dir, Path) else Path(self.blob_dir)
+
+        # Ensure blob_dir is a Path object (already resolved in __init__)
+        blob_dir = self.blob_dir

-        if not db_path:
-             print("\n⚠️ Database path not configured. Cannot save PDF metadata.")
+        # Use the connection established during initialization
+        if not self.conn:
+             print("\n⚠️ Database connection not available. Cannot save PDF metadata.")
              return None
         if not blob_dir:
              print("\n⚠️ Blob directory not configured. Cannot save extracted text.")
              return None
-        # Gemini client check might depend on whether processing happens here or elsewhere
-        # if not self.client:
-        #     print("Gemini client not initialized. Cannot process PDF.")
-        #     return
+
+        # Gemini client check - ensure it's ready if needed
+        if self.pdf_processing_method == 'Gemini' and not self.client:
+            print("\n⚠️ Gemini client not initialized, but required for Gemini processing. Cannot process PDF.")
+            return None

         if len(args) < 1:
             print("\n⚠️ Usage: /pdf <filename> [optional: arxiv_id]")
@@ -508,145 +752,148 @@ class CodeAgent:
         print(f"\n⏳ Processing PDF: {filename}...")

         paper_id = None
-        conn = None
+        # Remove the local try/except for connection, use self.conn directly
         try:
-            conn = database.get_db_connection(db_path)
-            if not conn:
-                print("\n⚠️ Error: Failed to connect to the database.")
-                return None
-
-            paper_id = database.add_minimal_paper(conn, filename)
+            # --- Database Operations using self.conn ---
+            paper_id = database.add_minimal_paper(self.conn, filename)
             if not paper_id:
                 print("\n⚠️ Error: Failed to create initial database record.")
+                # No connection to close here
                 return None

             print(f"  📄 Created database record with ID: {paper_id}")

             if arxiv_id_arg:
                 if isinstance(arxiv_id_arg, str) and len(arxiv_id_arg) > 5:
-                    database.update_paper_field(conn, paper_id, 'arxiv_id', arxiv_id_arg)
+                    database.update_paper_field(self.conn, paper_id, 'arxiv_id', arxiv_id_arg)
                     print(f"     Updated record with provided arXiv ID: {arxiv_id_arg}")
                 else:
                     print(f"  ⚠️ Warning: Provided arXiv ID '{arxiv_id_arg}' seems invalid. Skipping update.")
+            # --- End Database Operations ---

-            print(f"  ⚙️  Extracting text from '{filename}' (using placeholder)...")
+            print(f"  ⚙️  Extracting text from '{filename}' (using {self.pdf_processing_method})...")
             extracted_text = None
             try:
-                try:
-                    from pypdf import PdfReader
-                    reader = PdfReader(pdf_path)
-                    extracted_text = ""
-                    for page in reader.pages:
-                        extracted_text += page.extract_text() + "\n"
-                    if not extracted_text:
-                        print("  ⚠️ Warning: PyPDF fallback extracted no text.")
-                        extracted_text = f"Placeholder: No text extracted via PyPDF for {filename}"
-                except Exception as pypdf_err:
-                    print(f"  ⚠️ PyPDF fallback failed: {pypdf_err}. Using basic placeholder.")
-                    extracted_text = f"Placeholder: Error during PyPDF extraction for {filename}"
-
-                if not extracted_text:
-                    raise ValueError("Text extraction resulted in empty content.")
+                # TODO: Implement fallback to pypdf if configured or if gemini fails?
+                if self.pdf_processing_method == 'Gemini':
+                     extracted_text = tools.extract_text_from_pdf_gemini(pdf_path, self.client, self.model_name)
+                # Add other methods like pypdf here if needed
+                # elif self.pdf_processing_method == 'pypdf':
+                #    extracted_text = tools.extract_text_from_pdf_pypdf(pdf_path) # Assuming this exists
+                else:
+                    raise ValueError(f"Unsupported pdf_processing_method: {self.pdf_processing_method}")
+
+                if not extracted_text: # Check if extraction failed
+                    raise ValueError("Text extraction resulted in empty or failed content.")

             except Exception as extract_err:
-                print(f"\n⚠️ Error during text extraction: {extract_err}")
-                database.update_paper_field(conn, paper_id, 'status', 'error_process')
-                conn.close()
+                print(f"\n⚠️ Error during text extraction ({self.pdf_processing_method}): {extract_err}")
+                database.update_paper_field(self.conn, paper_id, 'status', 'error_process')
+                # No conn.close() needed here
                 return None

-            print(f"  💬 Successfully extracted text ({len(extracted_text)} chars) [Placeholder/Fallback].")
+            print(f"  💬 Successfully extracted text ({len(extracted_text)} chars) [{self.pdf_processing_method}].")

             blob_filename = f"paper_{paper_id}_text.txt"
             blob_full_path = blob_dir / blob_filename
-            blob_rel_path = blob_filename
+            blob_rel_path = blob_filename # Store relative path in DB

             print(f"  💾 Saving extracted text to {blob_full_path}...")
             save_success = tools.save_text_blob(blob_full_path, extracted_text)

             if not save_success:
                 print("\n⚠️ Error: Failed to save text blob.")
-                database.update_paper_field(conn, paper_id, 'status', 'error_blob')
-                conn.close()
+                database.update_paper_field(self.conn, paper_id, 'status', 'error_blob')
+                # No conn.close() needed here
                 return None
             else:
                 print(f"     Successfully saved text blob.")
-                update_success = database.update_paper_field(conn, paper_id, 'blob_path', blob_rel_path)
+                update_success = database.update_paper_field(self.conn, paper_id, 'blob_path', blob_rel_path)
                 if not update_success:
                     print(f"  ⚠️ Warning: Failed to update blob_path in database for ID {paper_id}.")
                     # Log and continue, as text is saved locally

-            update_success = database.update_paper_field(conn, paper_id, 'status', 'complete')
-            if update_success:
-                print(f"\n✅ Processing complete for '{filename}' (ID: {paper_id}).")
+            update_success = database.update_paper_field(self.conn, paper_id, 'status', 'processed') # Mark as processed before adding to history
+            if not update_success:
+                 logger.warning(f"Failed to update status to processed for paper ID {paper_id}")
+                 # Continue anyway, try to add to history
+
+            # Store context to be prepended next turn
+            context_header = f"CONTEXT FROM PDF ('{filename}', ID: {paper_id}):\n---"
+            if MAX_PDF_CONTEXT_LENGTH:
+                max_text_len = MAX_PDF_CONTEXT_LENGTH - len(context_header) - 50 # Reserve space for header and separators
+                truncated_text = extracted_text[:max_text_len]
+                if len(extracted_text) > max_text_len:
+                    truncated_text += "\n... [TRUNCATED]"
             else:
-                print(f"\n⚠️ Warning: Failed to update final status to 'complete' for ID {paper_id}.")
-
-            conn.close()
-            return paper_id
-
-        except Exception as e:
-            logger.error(f"An error occurred during PDF processing for '{filename}' (ID: {paper_id}): {e}", exc_info=True)
-            print(f"\n❌ An unexpected error occurred during processing: {e}")
-            if conn and paper_id:
+                truncated_text = extracted_text
+
+            self.pending_pdf_context = f"{context_header}\n{truncated_text}\n---"
+            logger.info(f"Stored context from {filename} (ID: {paper_id}) to be prepended next turn ({len(self.pending_pdf_context)} chars).")
+            print(f"\n✅ PDF processed. Content will be added to context on your next message. (ID: {paper_id}) ")
+            database.update_paper_field(self.conn, paper_id, 'status', 'processed_pending_context')
+
+            return paper_id
+        except Exception as e: # Reinstate the main exception handler for the function's try block
+            logger.error(f"An error occurred during PDF processing for '{filename}': {e}", exc_info=True)
+            if paper_id and self.conn:
                 try:
-                    database.update_paper_field(conn, paper_id, 'status', 'error_generic')
+                    # Attempt to mark as error if possible
+                    database.update_paper_field(self.conn, paper_id, 'status', 'error_process')
                 except Exception as db_err:
-                    logger.error(f"Failed to update error status for {paper_id}: {db_err}", exc_info=True)
-            if conn:
-                conn.close()
-            return None
-
-        finally:
-            if conn:
-                conn.close()
+                    logger.error(f"Additionally failed to update status to error for paper {paper_id}: {db_err}")
+            # No conn.close() needed here
+            print(f"\n❌ An unexpected error occurred: {e}")
+            return None # Return None on major processing error

-    def _handle_list_command(self, args: list):
-        """Handles the /list command to show papers in the database."""
-        # ... (rest of the code remains the same)
-
-# --- Main Execution ---
 def main():
-    parser = argparse.ArgumentParser(description="Run the Code Agent")
-    parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose tool logging')
-    args = parser.parse_args()
-    config = load_config()
-    print("🚀 Starting Code Agent...")
-    api_key = os.getenv('GEMINI_API_KEY')
-    if not api_key:
-        print("\n⚠️ No API key found in env, trying config.yaml.")
-        api_key = config.get('api_key', DEFAULT_CONFIG['api_key'])
-    if not api_key:
-        print("\n❌ No API key found. Please set the GEMINI_API_KEY environment variable.")
+    config_path = Path('src/config.yaml')
+    config = load_config(config_path)
+    if not config:
         sys.exit(1)

-    # Make project_root available to the tools module if needed indirectly
-    # (Though direct definition in tools.py is preferred)
-    # import src.tools
-    # src.tools.project_root = project_root
-
-    # Configure logging level based on verbose flag
-    level = logging.DEBUG if args.verbose else logging.WARNING
-    logging.basicConfig(format="%(levelname)s:%(name)s:%(message)s", level=level)
-    # Suppress verbose logs from external libraries
-    logging.getLogger('google_genai').setLevel(level)
-    logging.getLogger('browser_use').setLevel(level)
-    logging.getLogger('agent').setLevel(level)
-    logging.getLogger('controller').setLevel(level)
-
-    # Resolve PDF directory relative to project root
-    project_root = Path(__file__).parent.parent # Assuming script is in src/
-    pdf_dir_path = project_root / config.get('PDFS_TO_CHAT_WITH_DIRECTORY', DEFAULT_CONFIG['PDFS_TO_CHAT_WITH_DIRECTORY'])
-
-    agent = CodeAgent(
-        model_name=config.get('model_name', DEFAULT_CONFIG['model_name']),
-        verbose=args.verbose or config.get('verbose', DEFAULT_CONFIG['verbose']),
-        api_key=api_key,
-        default_thinking_budget=config.get('default_thinking_budget', DEFAULT_CONFIG['default_thinking_budget']),
-        pdf_dir=str(pdf_dir_path), # Pass absolute path to agent
-        db_path=config.get('PAPER_DB_PATH', DEFAULT_CONFIG['PAPER_DB_PATH']),
-        blob_dir=config.get('PAPER_BLOBS_DIR', DEFAULT_CONFIG['PAPER_BLOBS_DIR'])
-    )
-    agent.start_interaction()
+    print_welcome_message(config)
+
+    # --- Database Setup ---
+    db_path_str = config.get('PAPER_DB_PATH')
+    conn = None
+    if db_path_str:
+        db_path = Path(db_path_str).resolve()
+        logger.info(f"Attempting to connect to database: {db_path}")
+        conn = database.get_db_connection(db_path)
+        if conn:
+            try:
+                 # Ensure tables exist
+                 database.create_tables(conn)
+                 logger.info("Database tables checked/created successfully.")
+            except Exception as db_init_err:
+                 logger.error(f"Failed to initialize database tables: {db_init_err}", exc_info=True)
+                 database.close_db_connection(conn)
+                 conn = None # Prevent agent from using bad connection
+                 print("\n⚠️ CRITICAL: Failed to initialize database. Exiting.")
+                 sys.exit(1)
+        else:
+            print("\n⚠️ Warning: Failed to establish database connection. Proceeding without database features.")
+    else:
+        print("\n⚠️ Warning: 'PAPER_DB_PATH' not specified in config.yaml. Proceeding without database features.")
+    # --- End Database Setup ---
+
+    try:
+        # Pass the established connection (or None) to the agent
+        agent = CodeAgent(config=config, conn=conn)
+        agent.start_interaction()
+    except KeyboardInterrupt:
+        print("\nExiting...")
+    except Exception as e:
+        logger.error(f"An unexpected error occurred in the main loop: {e}", exc_info=True)
+        print(f"\n❌ An unexpected error occurred: {e}")
+    finally:
+        # Ensure database connection is closed on exit
+        if conn:
+             logger.info("Closing database connection...")
+             database.close_db_connection(conn)
+             logger.info("Database connection closed.")
+        print("\nGoodbye!")

 if __name__ == "__main__":
     main()
\ No newline at end of file