From 8ca8f56ff1df15ba124a3ab511508399ab3344f1 Mon Sep 17 00:00:00 2001 From: Diogo Fernandes Date: Sun, 22 Mar 2026 12:21:36 -0300 Subject: [PATCH] [Bug] Fix color parsing --- HISTORY.rst | 3 ++- html4docx/constants.py | 2 ++ html4docx/h4d.py | 33 ++++++++++++++++++++++++++++----- tests/assets/htmls/tables3.html | 10 +++++----- tests/test_h4d.py | 1 - 5 files changed, 37 insertions(+), 12 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index f595038..c824cdb 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -12,7 +12,8 @@ Release History **Fixes** -- None +- Fixes `#73 `_: Error parsing styles with spaces. | `dfop02 `_ +- Fixes `#71 `_: Error applying color to table cells. | `vvalchev `_ **New Features** diff --git a/html4docx/constants.py b/html4docx/constants.py index 3ef9de5..de06b3a 100644 --- a/html4docx/constants.py +++ b/html4docx/constants.py @@ -161,3 +161,5 @@ def default_borders(): re.compile(r'page-break-after\s*:\s*always\s*(?:!important)?\s*(?:;|$)'), re.compile(r'break-after\s*:\s*page\s*(?:!important)?\s*(?:;|$)'), ) + +RGB_SPACES_REGEX = re.compile(r'(rgba?\()([^)]+)(\))', re.IGNORECASE) diff --git a/html4docx/h4d.py b/html4docx/h4d.py index 8aaf471..a0d9276 100644 --- a/html4docx/h4d.py +++ b/html4docx/h4d.py @@ -325,6 +325,21 @@ def check_unit_keywords(value: str) -> str: lower_val = value.lower() return keywords.get(lower_val, value) + def normalize_rgb_spaces(value: str) -> str: + """ + Removes spaces inside rgb()/rgba() so it can be safely split. + Example: + rgb(222, 222, 222) -> rgb(222,222,222) + """ + + def _replace(match): + prefix, content, suffix = match.groups() + # remove spaces only inside the function + content = content.replace(' ', '') + return f"{prefix}{content}{suffix}" + + return constants.RGB_SPACES_REGEX.sub(_replace, value) + @lru_cache(maxsize=None) def border_unit_converter(unit_value: str): """Convert multiple units to pt that is used on Word table cell border""" @@ -360,10 +375,16 @@ def parse_border_value(value: str): Parses a border value like: '1px solid #000000', 'solid 1px red', or '#000000 medium dashed' in any order. """ - parts = value.split() + value = value.strip() + + # Return all default if there is only empty value + if not value or value == '': + return default_size, default_style, default_color + + parts = normalize_rgb_spaces(value).split() - # Return all default if there is only 'none' or empty - if (len(parts) == 1 and parts[0] == 'none') or (not value or value.strip() == ''): + # Return all default if there is only 'none' value + if len(parts) == 1 and parts[0].lower() == 'none': return default_size, default_style, default_color size = None @@ -1061,7 +1082,8 @@ def add_styles_to_table_cell(self, styles, doc_cell, cell_row): """Styles that must be applied specifically in a _Cell object""" # Set background color if 'background-color' in styles: - self.set_cell_background(doc_cell, styles['background-color']) + color = utils.parse_color(styles['background-color'], return_hex=True) + self.set_cell_background(doc_cell, color) # Set width (approximate, since DOCX uses different units) if 'width' in styles: @@ -1741,7 +1763,8 @@ def handle_comment(self, data): # Style: Green color to mimic HTML comment styling dark_ish_green = "#008000" - run.font.color.rgb = utils.parse_color(dark_ish_green) + dark_ish_green_color = utils.parse_color(dark_ish_green) + run.font.color.rgb = RGBColor(*dark_ish_green_color) run.italic = True # makes it feel more like a comment def ignore_nested_tables(self, tables_soup): diff --git a/tests/assets/htmls/tables3.html b/tests/assets/htmls/tables3.html index b7ff21d..4ec19c0 100644 --- a/tests/assets/htmls/tables3.html +++ b/tests/assets/htmls/tables3.html @@ -4,26 +4,26 @@ width="641"> -

CATEGORY

+ rgba(250, 195, 42, 1);border-left-style:none;border-right:1em solid #fac32a;border-top:1.0pt solid + rgb(250, 195, 42);height:23.75pt;padding:0in;width:222.2pt;" width="296">

OBSERVATIONS/COMMENTS

- NETHERLANDS COURTS   - GERMANY COURTS