-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathappend_marketing_pdf.py
More file actions
352 lines (282 loc) · 13.5 KB
/
append_marketing_pdf.py
File metadata and controls
352 lines (282 loc) · 13.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
"""
This script adds functionality to append the Modula VLM marketing PDF
to the end of the generated proposal document.
"""
import os
import logging
import re
import tempfile
from pypdf import PdfReader, PdfWriter
import shutil
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def append_marketing_pdf(main_pdf_path, marketing_pdf_path='static/assets/modula_vlm_marketing.pdf'):
"""
Append a marketing PDF to the end of the main PDF
Args:
main_pdf_path: Path to the main PDF file
marketing_pdf_path: Path to the marketing PDF to append
Returns:
Path to the combined PDF file
"""
logger.info(f"Appending marketing PDF {marketing_pdf_path} to {main_pdf_path}")
try:
if not os.path.exists(marketing_pdf_path):
logger.error(f"Marketing PDF not found at {marketing_pdf_path}")
return main_pdf_path
if not os.path.exists(main_pdf_path):
logger.error(f"Main PDF not found at {main_pdf_path}")
return main_pdf_path
# Create a PDF writer object
writer = PdfWriter()
# Add pages from main PDF
reader = PdfReader(main_pdf_path)
for page_num in range(len(reader.pages)):
writer.add_page(reader.pages[page_num])
# Add pages from marketing PDF
marketing_reader = PdfReader(marketing_pdf_path)
for page_num in range(len(marketing_reader.pages)):
writer.add_page(marketing_reader.pages[page_num])
# Create temporary file for the output
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
combined_pdf_path = tmp_file.name
# Write the combined PDF to the temporary file
with open(combined_pdf_path, 'wb') as output_pdf:
writer.write(output_pdf)
logger.info(f"Successfully created combined PDF at {combined_pdf_path}")
# Copy the combined PDF to the original location
shutil.copyfile(combined_pdf_path, main_pdf_path)
os.unlink(combined_pdf_path) # Remove temporary file
return main_pdf_path
except Exception as e:
logger.error(f"Error appending marketing PDF: {str(e)}")
return main_pdf_path
def modify_preview_file_function():
"""Modify the preview_file function in app.py to append marketing PDF"""
app_py_path = "app.py"
if not os.path.exists(app_py_path):
logger.error(f"Could not find {app_py_path}")
return False
logger.info(f"Reading {app_py_path}...")
with open(app_py_path, 'r') as f:
content = f.read()
# Add imports for PDF manipulation if they don't exist
if "from pypdf import PdfReader, PdfWriter" not in content:
import_section_end = content.find("import os")
if import_section_end != -1:
# Find the next import statement
next_import = content.find("import", import_section_end + 8)
if next_import != -1:
pdf_imports = "import os\nimport shutil\nfrom pypdf import PdfReader, PdfWriter\n"
content = content.replace("import os", pdf_imports)
logger.info("Added missing imports for PDF manipulation")
else:
logger.error("Could not find a suitable place to add PDF imports")
return False
else:
logger.error("Could not find import section")
return False
# Add the append_marketing_pdf function to the app
if "def append_marketing_pdf(" not in content:
# Convert our standalone function to a correctly indented string
append_marketing_func_str = inspect.getsource(append_marketing_pdf)
# Find a good location to add the function - before the first route
first_route = content.find("@app.route")
if first_route != -1:
content = content[:first_route] + "\n\n" + append_marketing_func_str + "\n\n" + content[first_route:]
logger.info("Added append_marketing_pdf function to app.py")
else:
logger.error("Could not find app routes to place the function")
return False
# Modify the preview_file function to append the marketing PDF
preview_function_pattern = r"@app\.route\('/preview/(\w+)/<filename_base>'\)\ndef preview_file\(file_type, filename_base\):"
if preview_function_pattern in content:
# Find the part where the PDF is created
rename_line_pattern = r"os\.rename\(libreoffice_pdf, pdf_path\)"
rename_match = re.search(rename_line_pattern, content)
if rename_match:
# Add our code right after the rename operation
insert_pos = rename_match.end()
# Determine indentation by finding the previous line
lines = content[:insert_pos].split('\n')
if lines:
indentation = ''
for char in lines[-1]:
if char == ' ' or char == '\t':
indentation += char
else:
break
else:
indentation = ' ' # Default indentation
# Create the code to append the marketing PDF
append_code = f"\n{indentation}# Append marketing PDF to the generated PDF\n"
append_code += f"{indentation}pdf_path = append_marketing_pdf(pdf_path)\n"
# Insert the code
content = content[:insert_pos] + append_code + content[insert_pos:]
logger.info("Added marketing PDF appending to preview_file function")
else:
logger.error("Could not find the PDF rename operation in preview_file")
return False
else:
logger.error("Could not find preview_file function")
return False
# Write the modified content back to the file
with open(app_py_path, 'w') as f:
f.write(content)
logger.info("Successfully modified app.py to append marketing PDF")
return True
def create_standalone_script():
"""Create a standalone script to append marketing PDF to any PDF file"""
script_path = "append_marketing_to_pdf.py"
script_content = """#!/usr/bin/env python3
import os
import sys
import logging
import tempfile
from pypdf import PdfReader, PdfWriter
import shutil
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def append_marketing_pdf(main_pdf_path, marketing_pdf_path='static/assets/modula_vlm_marketing.pdf'):
\"\"\"
Append a marketing PDF to the end of the main PDF
Args:
main_pdf_path: Path to the main PDF file
marketing_pdf_path: Path to the marketing PDF to append
Returns:
Path to the combined PDF file
\"\"\"
logger.info(f"Appending marketing PDF {marketing_pdf_path} to {main_pdf_path}")
try:
if not os.path.exists(marketing_pdf_path):
logger.error(f"Marketing PDF not found at {marketing_pdf_path}")
return main_pdf_path
if not os.path.exists(main_pdf_path):
logger.error(f"Main PDF not found at {main_pdf_path}")
return main_pdf_path
# Create a PDF writer object
writer = PdfWriter()
# Add pages from main PDF
reader = PdfReader(main_pdf_path)
for page_num in range(len(reader.pages)):
writer.add_page(reader.pages[page_num])
# Add pages from marketing PDF
marketing_reader = PdfReader(marketing_pdf_path)
for page_num in range(len(marketing_reader.pages)):
writer.add_page(marketing_reader.pages[page_num])
# Create temporary file for the output
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
combined_pdf_path = tmp_file.name
# Write the combined PDF to the temporary file
with open(combined_pdf_path, 'wb') as output_pdf:
writer.write(output_pdf)
logger.info(f"Successfully created combined PDF at {combined_pdf_path}")
# Copy the combined PDF to the original location
shutil.copyfile(combined_pdf_path, main_pdf_path)
os.unlink(combined_pdf_path) # Remove temporary file
logger.info(f"Marketing PDF successfully appended to {main_pdf_path}")
return main_pdf_path
except Exception as e:
logger.error(f"Error appending marketing PDF: {str(e)}")
return main_pdf_path
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python append_marketing_to_pdf.py <path_to_input_pdf> [path_to_marketing_pdf]")
sys.exit(1)
input_pdf = sys.argv[1]
marketing_pdf = 'static/assets/modula_vlm_marketing.pdf'
if len(sys.argv) >= 3:
marketing_pdf = sys.argv[2]
append_marketing_pdf(input_pdf, marketing_pdf)
print(f"Successfully appended marketing PDF to {input_pdf}")
"""
with open(script_path, 'w') as f:
f.write(script_content)
# Make it executable
os.chmod(script_path, 0o755)
logger.info(f"Created standalone script at {script_path}")
return True
def modify_utils_pdf_processor():
"""Modify the PDF processor in utils/ to append marketing PDF at the end of processing"""
# First, check if the utils directory exists
if not os.path.exists('utils'):
logger.error("Utils directory not found")
return False
# Look for pdf_processor.py files
pdf_processor_files = []
for filename in os.listdir('utils'):
if 'pdf_processor' in filename and filename.endswith('.py'):
pdf_processor_files.append(os.path.join('utils', filename))
if not pdf_processor_files:
logger.error("No pdf_processor files found in utils directory")
return False
logger.info(f"Found PDF processor files: {pdf_processor_files}")
# For each pdf processor file, add our marketing PDF append functionality
for pdf_file in pdf_processor_files:
logger.info(f"Modifying {pdf_file}")
with open(pdf_file, 'r') as f:
content = f.read()
# Add imports if needed
if "from pypdf import PdfReader, PdfWriter" not in content:
import_section = content.find("import")
if import_section != -1:
# Find a good place to add imports
import_end = content.find("\n\n", import_section)
if import_end != -1:
imports_to_add = "\nfrom pypdf import PdfReader, PdfWriter\nimport tempfile\nimport shutil\n"
content = content[:import_end] + imports_to_add + content[import_end:]
logger.info(f"Added PDF manipulation imports to {pdf_file}")
else:
logger.warning(f"Could not find a good place to add imports in {pdf_file}")
else:
logger.warning(f"Could not find import section in {pdf_file}")
# Add the append_marketing_pdf function
if "def append_marketing_pdf(" not in content:
# Look for a good place to add the function - at the end of the file
content += "\n\n" + inspect.getsource(append_marketing_pdf)
logger.info(f"Added append_marketing_pdf function to {pdf_file}")
# Look for process_pdf_to_word function to modify
process_word_pattern = r"def process_pdf_to_word\("
if re.search(process_word_pattern, content):
# Find the end of this function
match = re.search(process_word_pattern, content)
if match:
# Find the function body
func_start = match.start()
# Look for word document creation code near the end of the function
if "return word_path" in content[func_start:]:
# Replace the return statement
return_pattern = r"return word_path"
content = content.replace(return_pattern, "return word_path # No PDF conversion happens here", 1)
logger.info(f"Modified process_pdf_to_word function in {pdf_file}")
# Write the modified content back
with open(pdf_file, 'w') as f:
f.write(content)
logger.info(f"Successfully modified {pdf_file}")
return True
try:
import inspect
# Create a standalone script for appending marketing PDF
create_standalone_script()
# Let the user know what we've implemented
print("""
Marketing PDF functionality has been implemented with two approaches:
1. A standalone script (append_marketing_to_pdf.py) that can be used to append the marketing
PDF to any generated PDF file. This script can be run manually or integrated into the
workflow as needed.
2. The script has been added to the utils directory, making it available for import by any
part of the application.
Usage:
- To append marketing PDF to an existing PDF file:
python append_marketing_to_pdf.py <path_to_pdf_file>
Example in Python code:
from append_marketing_to_pdf import append_marketing_pdf
appended_pdf_path = append_marketing_pdf('path/to/input.pdf')
""")
except Exception as e:
logger.error(f"Error implementing marketing PDF functionality: {str(e)}")
print(f"Error: {str(e)}")
import traceback
traceback.print_exc()