wb = xl.load_workbook(file, rich_text=True, data_only=True) # ignore all strikethrough texts ws = wb[sheet] for j in range(1, ws.max_column+1): for i in range(1, ws.max_row+1): col = get_column_letter(j) row = str(i) curval = [] cell = ws[col + row] if isinstance(cell.value, CellRichText): for text_block in cell.value: if isinstance(text_block, TextBlock): if text_block.font.strike is None or text_block.font.strike is False: curval.append(text_block) else: curval.append(text_block) ws[col + row] = CellRichText(curval) else: if cell.value: if cell.font.strike is True: ws[col + row].value = ''
On 9 May 2025, at 3:33, 'Feng, Youyang' via openpyxl-users wrote:
Hi Youyang,
Similar situation happened around cells with different format and content, although I cannot generalize their share pattern. So far, I have come up with some possible cases where strikethrough does not work on a TextBlock:
I'm sorry, but the AI generated code is very poor. I suggest you change your agent. I'm not going to debug such code but will make the following observations:
I just tried Mistral for the task and the code looks a lot better – both more likely to be what you want and easier to read – but ChatGPT also seems okay.
Charlie
--
Charlie Clark
Managing Director
Clark Consulting & Research
German Office
Sengelsweg 34
Düsseldorf
D- 40489
Tel: +49-203-3925-0390
Mobile: +49-178-782-6226
from openpyxl import load_workbook
from openpyxl.styles import Font, colors
from openpyxl.cell.rich_text import TextBlock, CellRichText
from pathlib import Path
def remove_strikethrough_text(file_path, sheet_name):
# Load the workbook
workbook = load_workbook(file_path)
sheet = workbook[sheet_name]
# Traverse all cells in the sheet
for row in sheet.iter_rows():
for cell in row:
if isinstance(cell.value, CellRichText):
# Handle rich text formatting
new_rich_text = []
for part in cell.value:
if not part.font.strikethrough:
new_rich_text.append(part)
else:
# Create a new TextBlock without strikethrough
new_part = TextBlock(part.font, part.text)
new_part.font = Font(strikethrough=False)
new_rich_text.append(new_part)
cell.value = CellRichText(new_rich_text)
else:
# Check if the entire cell content is strikethrough
print(cell.value)
if cell.font and cell.font.strikethrough:
cell.value = ''
# Save the modified workbook
workbook.save(Path(file_path).stem + '_stripped.xlsx')
# Example usage
file_path = 'strike4.xlsx'
sheet_name = 'Sheet1'
remove_strikethrough_text(file_path, sheet_name)
Hello,
I have read the document you referenced but not sure if I have a full grasp of it. Below are my understandings:
However, this does not answer my question that why some RichText are recognized as str.
For cell A1, it appears that openpyxl reads the first chunk “underline and strike” as str, losing all formatting it contains, i.e., “u=True” and “strike=True”.
Like A3, where “plain text with strikethrough” is recognized as str.
How does openpyxl determine if a part in a CellRichText is a TextBlock or not?
Also, when I tried to process the processed file again, it removes A3 cell but still keeps A1 untouched.
Any thoughts?
Below is my updated code.
from openpyxl import load_workbook
from openpyxl.styles import Font, colors
from openpyxl.cell.rich_text import TextBlock, CellRichText
from openpyxl.cell.text import InlineFont
from pathlib import Path
def remove_strikethrough_text(file_path, sheet_name):
# Load the workbook
workbook = load_workbook(file_path, rich_text=True)
sheet = workbook[sheet_name]
# Traverse all cells in the sheet
for row in sheet.iter_rows():
for cell in row:
if isinstance(cell.value, CellRichText):
# Handle rich text formatting
new_rich_text = []
for part in cell.value:
if isinstance(part, TextBlock):
if not part.font.strike or part.font.strike == False or part.font.strike is None:
new_rich_text.append(part)
# str does not have font, add directly.
else: # isinstance(part, str):
new_rich_text.append(part)
# else:
# print(part)
# # Create a new TextBlock without strikethrough
# new_part = TextBlock(part.font, part.text)
# new_part.font = InlineFont(strike=False)
# new_rich_text.append(new_part)
cell.value = CellRichText(new_rich_text)
else:
# Check if the entire cell content is strikethrough
if cell.font and cell.font.strikethrough:
cell.value = ''
# Save the modified workbook
workbook.save(Path(file_path).stem + '_stripped.xlsx')
# Example usage
file_path = 'strike4.xlsx'
sheet_name = 'Sheet1'
remove_strikethrough_text(file_path, sheet_name)
On 23 May 2025, at 2:43, 'Main QQ' via openpyxl-users wrote:
Hi Youyang,
I'm confused here, are you indicating that openpyxl may read some formatted text as string?
The main issue is that in a command line, it's difficult to have both the formatting and the text itself. Openpyxl tries make this easier but the actual OOXML is essentially unreadable.
I have read through the documentation regrding RichText, tried a lot of methods and codes to test around, but unfortunately not much luck so far, which is why I end up asking for help here. So far, the primary issue is openpyxl reads a plain text with format as regular string instead of a RichText, which is exactly what I am confused about.
This is the problem with OOXML. Rich text was added later, and was a mistake in my view. If you have text in a cell and format the cell this is completely different to using rich text, which has to be added explicitly. So, if you have both in your files then you'll have to check whether you have rich text or not.
For example:
c1 = ws["A1"]
c1.value = "Simple text"
c1.font = Font(strikethrough=True)
Is completely different to a cell with rich text.
strike = InlineFont(strike=True)
bold = InlineFont(b=True)
italic = InlineFont(i=True)
rich = CellRichText()
for word, style in zip(["bold", "italic", "strike"], [bold, italic, strike]):
rich.append(TextBlock(style, word + " "))
c2 = ws["B1"]
c2.value = rich
Any formatting applied to c2 will be ignored.
If you notice any issues in my code and would love to help me out here, I would be really appreaciative.
I don't have your files so your code is not really usable for me. In any case, as I said, you should really work with RichText objects to get familiar with them.