-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfix_image_wrapping.py
More file actions
155 lines (128 loc) · 6.18 KB
/
fix_image_wrapping.py
File metadata and controls
155 lines (128 loc) · 6.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""
This script modifies the image insertion code to use "In Front of Text" wrapping
while maximizing image size, but avoiding covering important text.
"""
import os
import logging
import re
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def implement_image_wrapping():
"""Implement 'In Front of Text' wrapping for images in the Word document"""
pdf_processor_path = "utils/pdf_processor.py"
if not os.path.exists(pdf_processor_path):
logger.error(f"Could not find {pdf_processor_path}")
return False
logger.info(f"Found pdf_processor.py at {pdf_processor_path}")
# Read the file content
with open(pdf_processor_path, 'r') as f:
content = f.read()
# First, create the helper function to set image wrapping
helper_function = '''
def set_image_in_front_of_text(run):
"""
Set the image in a run to be positioned in front of text
(allows for larger images that can extend into margins)
Args:
run: The run containing the image
"""
try:
# Get the drawing element (only exists if the run contains an image)
drawing_element = None
for child in run._element:
if child.tag.endswith(('drawing')):
drawing_element = child
break
if drawing_element is not None:
# Find the appropriate elements to modify
inline = drawing_element.find('.//wp:inline', namespaces={'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing'})
if inline is not None:
# Change inline to anchor to enable text wrapping
anchor = OxmlElement('wp:anchor')
anchor.set('distT', '0')
anchor.set('distB', '0')
anchor.set('distL', '0')
anchor.set('distR', '0')
anchor.set('simplePos', '0')
anchor.set('relativeHeight', '1')
anchor.set('behindDoc', '0')
anchor.set('locked', '0')
anchor.set('layoutInCell', '1')
anchor.set('allowOverlap', '1')
# Copy attributes and children from inline to anchor
for key, value in inline.attrib.items():
if key != 'distT' and key != 'distB' and key != 'distL' and key != 'distR':
anchor.set(key, value)
for child in inline:
anchor.append(child)
# Add required child elements for anchor
simple_pos = OxmlElement('wp:simplePos')
simple_pos.set('x', '0')
simple_pos.set('y', '0')
anchor.insert(0, simple_pos)
# Position relative to page (not margin)
pos_h = OxmlElement('wp:positionH')
pos_h.set('relativeFrom', 'page')
pos_h_align = OxmlElement('wp:align')
pos_h_align.text = 'center'
pos_h.append(pos_h_align)
anchor.append(pos_h)
pos_v = OxmlElement('wp:positionV')
pos_v.set('relativeFrom', 'page')
pos_v_offset = OxmlElement('wp:posOffset')
# Position slightly down from the top of the page to avoid headers
pos_v_offset.text = '1250000' # EMUs (English Metric Units)
pos_v.append(pos_v_offset)
anchor.append(pos_v)
# Set text wrapping to "in front of text"
wrap_none = OxmlElement('wp:wrapNone')
anchor.append(wrap_none)
# Replace the inline element with our new anchor element
drawing_element.remove(inline)
drawing_element.append(anchor)
logger.debug("Successfully set image to 'In Front of Text'")
return True
return False
except Exception as e:
logger.warning(f"Error setting image wrapping: {e}")
return False
'''
# Check if the function already exists
if "def set_image_in_front_of_text" not in content:
# Insert the helper function after the imports section
first_function_match = re.search(r"def\s+\w+\(", content)
if first_function_match:
insert_pos = content.rfind("\n\n", 0, first_function_match.start())
modified_content = content[:insert_pos] + helper_function + content[insert_pos:]
# Update the content
content = modified_content
logger.info("Added set_image_in_front_of_text helper function")
else:
logger.error("Could not find an appropriate location to insert the helper function")
return False
else:
logger.info("Helper function already exists")
# Now add the wrapping calls after each image insertion
# Find all image insertion lines
image_insertion_lines = re.finditer(r"([ \t]*)doc\.add_picture\((.*?)\)", content)
for match in image_insertion_lines:
full_line = match.group(0)
indentation = match.group(1)
# Skip if we've already added wrapping to this line
if full_line + "\n" + indentation + "set_image_in_front_of_text" in content:
logger.info(f"Wrapping already applied to: {full_line}")
continue
# Create the replacement with wrapping
replacement = f"{full_line}\n{indentation}# Apply 'In Front of Text' wrapping to maximize image size\n"
replacement += f"{indentation}set_image_in_front_of_text(doc.paragraphs[-1].runs[-1])"
# Replace in the content
content = content.replace(full_line, replacement)
logger.info(f"Added wrapping to: {full_line}")
# Write the modified content back to the file
with open(pdf_processor_path, 'w') as f:
f.write(content)
logger.info("Successfully implemented 'In Front of Text' wrapping for images")
return True
if __name__ == "__main__":
implement_image_wrapping()