diff --git a/remove-list-columns.py b/remove-list-columns.py index 91888fc9d..d6422aeeb 100644 --- a/remove-list-columns.py +++ b/remove-list-columns.py @@ -32,32 +32,80 @@ def extract_table_blocks(lines): return blocks def split_table_row(row_lines): - """Splits a table row (beginning with '*') into a list of cells.""" - cells = [] - current_cell = [] - for line in row_lines: - if re.match(r'^\s*\*\s+-', line): # First cell in row - parts = re.split(r'\s*\*\s+-\s*', line, maxsplit=1) - current_cell = [parts[1]] - elif re.match(r'^\s*-\s+', line): # New cell - cells.append(current_cell) - current_cell = [line.strip()[2:]] - else: - current_cell.append(line.strip()) - cells.append(current_cell) - return cells + """Splits a table row (beginning with '*') into a list of cells, preserving original line spacing.""" + import re + cells = [] + current_cell = [] + for line in row_lines: + if re.match(r'^\s*\*\s+-', line): # First cell in row + # Keep only the cell content (text after "* - ") + parts = re.split(r'^\s*\*\s+-\s*', line, maxsplit=1) + current_cell = [parts[1]] + elif re.match(r'^\s*-\s+', line): # New cell + # Finish previous cell and start a new one (text after "- ") + cells.append(current_cell) + parts = re.split(r'^\s*-\s+', line, maxsplit=1) + current_cell = [parts[1]] + else: + # Continuation line: keep exactly as-is (including leading spaces) + current_cell.append(line) + cells.append(current_cell) + return cells def join_cells(cells, base_indent): - """Reconstructs a list-table row from cell lists.""" - line = f"{base_indent}* - " + cells[0][0] - lines = [line] - for line in cells[0][1:]: - lines.append(base_indent + " " + line) - for cell in cells[1:]: - lines.append(base_indent + " - " + cell[0]) - for l in cell[1:]: - lines.append(base_indent + " " + l) - return lines + """Reconstructs a list-table row from cell lists. + + Continuation lines are aligned to the cell's content column (the column where the + first line's text starts), and any additional original indentation beyond that is preserved. + This also preserves the deeper indentation used by directive option lines. + """ + # In a list-table, content starts 4 characters after the list marker for both + # the first cell ("* - ") and other cells (" - "). + content_col_len = len(base_indent) + 4 # baseline spaces before content in any cell + + def count_leading_spaces(s: str) -> int: + n = 0 + for ch in s: + if ch == ' ': + n += 1 + elif ch == '\t': + # tabs aren't expected in RST, but treat as 4 spaces if present + n += 4 + else: + break + return n + + out = [] + + # First cell + first_line_text = cells[0][0].rstrip('\n') + out.append(f"{base_indent}* - {first_line_text.rstrip()}") + + for line in cells[0][1:]: + if line.strip() == "": + out.append("") # preserve blank lines + continue + s = line.rstrip('\n') + lead = count_leading_spaces(s) + extra = max(0, lead - content_col_len) # extra indent beyond the baseline content column + out.append((" " * content_col_len) + (" " * extra) + s.lstrip(' ')) + + # Remaining cells + for cell in cells[1:]: + first_line_text = cell[0].rstrip('\n') + out.append(f"{base_indent} - {first_line_text.rstrip()}") + for line in cell[1:]: + if line.strip() == "": + out.append("") + continue + s = line.rstrip('\n') + lead = count_leading_spaces(s) + extra = max(0, lead - content_col_len) + out.append((" " * content_col_len) + (" " * extra) + s.lstrip(' ')) + + return out + + def process_table(table_lines, cols_to_remove_str, remove_empty_row=False): # Parse comma-separated column names