from docx.api import Document from docx.oxml.table import CT_Tbl from docx.oxml.text.paragraph import CT_P from docx.table import _Cell, Table, _Row from docx.text.paragraph import Paragraph import time import re path = r'C:\Users\Danny\Desktop\ssd wichtige dinge D\Tools\fast_excel_to_bill\test_folder\Templates\templeta.docx' doc_output_path = r'C:\Users\Danny\Desktop\ssd wichtige dinge D\Tools\fast_excel_to_bill\test_folder' + r'\doc_test_{}.docx'.format( time.strftime("%Y-%m-%d_H%H-M%M")) class DOCFileBuilder: def __init__(self, docx_input_path='', docx_save_path=''): if docx_input_path: self.document = Document(docx_input_path) else: self.document = Document() self.docx_save_path = docx_save_path # Low Level Functions def save_docx(self): if self.docx_save_path != '': # Default self.document.save() else: self.document.save(self.docx_save_path) def get_header_footer(self): self.header_ = self.document.sections[0].header self.footer_ = self.document.sections[0].footer def replace_text_in_paragraph(self, paragraph,search_str, replace_str): inline = paragraph.runs for i in range(len(inline)): if search_str in inline[i].text: text = inline[i].text.replace(search_str, replace_str) inline[i].text = text def paragraphs_in_table(self): for t in self.document.tables: for row in t.rows: for cell in row.cells: for p in cell.paragraphs: yield p def replace_string_in_standard_text(self, search_str, replace_str): for p in self.document.paragraphs: self.replace_text_in_paragraph(p,search_str,replace_str) def replace_string_in_all_tables(self, search_str, replace_str): for p in self.paragraphs_in_table(): self.replace_text_in_paragraph(p,search_str, replace_str) def replace_dates_string_in_standard_text(self, replace_date_str): for p in self.document.paragraphs: inline = p.runs for i in range(len(inline)): try: found_date = re.search(r'\d\d\.\d\d\.\d+', inline[i].text).group() text = inline[i].text.replace(found_date, replace_date_str) inline[i].text = text except AttributeError: pass def delete_empty_rows_in_table(self, table, column_count): for row in table.rows: column_counter = 0 for cell in row.cells: for p in cell.paragraphs: inline = p.runs for i in range(len(inline)): if inline[i].text == '': column_counter += 1 if column_counter == column_count: remove_row(table, row) def replace_dates_string_in_all_tables(self, replace_date_str): for p in self.paragraphs_in_table(): inline = p.runs for i in range(len(inline)): try: found_date = re.search(r'\d\d\.\d\d\.\d+', inline[i].text).group() text = inline[i].text.replace(found_date, replace_date_str) inline[i].text = text except AttributeError: pass def add_heading(self, text, level, alignment=0): self.title = self.document.add_heading(text, level) self.title.alignment = alignment def search_table_with_key_columns(self, keys): for t in self.document.tables: len_key_list = 0 for row in t.rows: for cell in row.cells: for p in cell.paragraphs: inline = p.runs for i in range(len(inline)): for key in keys: try: re.search(key, inline[i].text).group() len_key_list += 1 if len_key_list == len(keys): return t except AttributeError: pass def replace_rows(self, table, list_tuple): # each tuple builds a row, tuple must be the same size as cells in row # replacing till list of tuple is iterate through # then deleting the rest len_insert_rows = 0 for tuple in list_tuple: for row in table.rows: count_columns = 0 for count, elem in enumerate(tuple): for p in row.cells[count].paragraphs: inline = p.runs for i in range(len(inline)): if inline[i].text == '': inline[i].text = str(elem) count_columns += 1 if count_columns == len(tuple): len_insert_rows += 1 if len_insert_rows == len(list_tuple): return # Custom Format functions def replace_Leistungszeitraum(self, start_date, end_date): for p in self.paragraphs_in_table(): inline = p.runs for i in range(len(inline)): try: re.search(r'Leistungszeitraum', inline[i].text).group() text = inline[i].text.replace(inline[i].text, 'Leistungszeitraum: {} bis zum {}'.format(start_date, end_date)) inline[i].text = text except AttributeError: pass # High Tier functions def replace_all_dates_custom_wise(self, start_date, end_date): self.replace_dates_string_in_standard_text(end_date) self.replace_dates_string_in_all_tables(end_date) self.replace_Leistungszeitraum(start_date, end_date) def replace_custom_placeholder_in_doc(self, placeholder, replace_str): for p in self.document.paragraphs: self.replace_text_in_paragraph(p, placeholder, replace_str) for p in self.paragraphs_in_table(): self.replace_text_in_paragraph(p, placeholder,replace_str) def remove_row(table, row): tbl = table._tbl tr = row._tr tbl.remove(tr) def iter_block_items(parent): """ Yield each paragraph and table child within *parent*, in document order. Each returned value is an instance of either Table or Paragraph. """ if isinstance(parent, Document): parent_elm = parent._document_part.body._body elif isinstance(parent, _Cell): parent_elm = parent._tc elif isinstance(parent, _Row): parent_elm = parent._tr else: raise ValueError("something's not right") for child in parent_elm.iterchildren(): if isinstance(child, CT_P): yield Paragraph(child, parent) if isinstance(child, CT_Tbl): yield Table(child, parent) def find_table_text_in_standard(doc): # not header or footer table_text = [] for table in doc.tables: for row in table.rows: for cell in row.cells: for paragraph in cell.paragraphs: table_text += [paragraph.text] return table_text def create_custome_doc_file(): ''' :return: Bill as Doc file ''' doc = DOCFileBuilder() doc.add_heading('EnD-GbR', 0, 2) return doc.document if __name__ == '__main__': builder_obj = DOCFileBuilder(path) builder_obj.replace_dates_string_in_standard_text('03.11.2022') builder_obj.replace_dates_string_in_all_tables('03.11.2022') keys = ['KW', 'Leistung', 'Zeit in Stunden', 'Betrag in Euro'] example_tuple = [(123123, 213123, 12312312, 445235)] key_table = builder_obj.search_table_with_key_columns(keys) builder_obj.replace_rows(key_table, example_tuple) builder_obj.delete_empty_rows_in_table(key_table, 4) builder_obj.document.add_page_break() key_table = builder_obj.search_table_with_key_columns(['KW', 'Leistungsübersicht']) builder_obj.delete_empty_rows_in_table(key_table, 2) doc = builder_obj.document