|
- from docx.api import Document
- from docx.oxml.table import CT_Tbl
- from docx.oxml.text.paragraph import CT_P
- from docx.table import _Cell, Table, _Row
- from docx.text.paragraph import Paragraph
- import time
- import re
- path = r'C:\Users\Danny\Desktop\ssd wichtige dinge D\Tools\fast_excel_to_bill\test_folder\Templates\templeta.docx'
- doc_output_path = r'C:\Users\Danny\Desktop\ssd wichtige dinge D\Tools\fast_excel_to_bill\test_folder' + r'\doc_test_{}.docx'.format(
- time.strftime("%Y-%m-%d_H%H-M%M"))
- class DOCFileBuilder:
- def __init__(self, docx_input_path='', docx_save_path=''):
- if docx_input_path:
- self.document = Document(docx_input_path)
- else:
- self.document = Document()
- self.docx_save_path = docx_save_path
- # Low Level Functions
- def save_docx(self):
- if self.docx_save_path != '': # Default
- self.document.save()
- else:
- self.document.save(self.docx_save_path)
- def get_header_footer(self):
- self.header_ = self.document.sections[0].header
- self.footer_ = self.document.sections[0].footer
- def replace_text_in_paragraph(self, paragraph,search_str, replace_str):
- inline = paragraph.runs
- for i in range(len(inline)):
- if search_str in inline[i].text:
- text = inline[i].text.replace(search_str, replace_str)
- inline[i].text = text
- def paragraphs_in_table(self):
- for t in self.document.tables:
- for row in t.rows:
- for cell in row.cells:
- for p in cell.paragraphs:
- yield p
- def replace_string_in_standard_text(self, search_str, replace_str):
- for p in self.document.paragraphs:
- self.replace_text_in_paragraph(p,search_str,replace_str)
- def replace_string_in_all_tables(self, search_str, replace_str):
- for p in self.paragraphs_in_table():
- self.replace_text_in_paragraph(p,search_str, replace_str)
- def replace_dates_string_in_standard_text(self, replace_date_str):
- for p in self.document.paragraphs:
- inline = p.runs
- for i in range(len(inline)):
- try:
- found_date = re.search(r'\d\d\.\d\d\.\d+', inline[i].text).group()
- text = inline[i].text.replace(found_date, replace_date_str)
- inline[i].text = text
- except AttributeError:
- pass
- def delete_empty_rows_in_table(self, table, column_count):
- for row in table.rows:
- column_counter = 0
- for cell in row.cells:
- for p in cell.paragraphs:
- inline = p.runs
- for i in range(len(inline)):
- if inline[i].text == '':
- column_counter += 1
- if column_counter == column_count:
- remove_row(table, row)
- def replace_dates_string_in_all_tables(self, replace_date_str):
- for p in self.paragraphs_in_table():
- inline = p.runs
- for i in range(len(inline)):
- try:
- found_date = re.search(r'\d\d\.\d\d\.\d+', inline[i].text).group()
- text = inline[i].text.replace(found_date, replace_date_str)
- inline[i].text = text
- except AttributeError:
- pass
- def add_heading(self, text, level, alignment=0):
- self.title = self.document.add_heading(text, level)
- self.title.alignment = alignment
- def search_table_with_key_columns(self, keys):
- for t in self.document.tables:
- len_key_list = 0
- for row in t.rows:
- for cell in row.cells:
- for p in cell.paragraphs:
- inline = p.runs
- for i in range(len(inline)):
- for key in keys:
- try:
- re.search(key, inline[i].text).group()
- len_key_list += 1
- if len_key_list == len(keys):
- return t
- except AttributeError:
- pass
- def replace_rows(self, table, list_tuple):
- # each tuple builds a row, tuple must be the same size as cells in row
- # replacing till list of tuple is iterate through
- # then deleting the rest
- len_insert_rows = 0
- for tuple in list_tuple:
- for row in table.rows:
- count_columns = 0
- for count, elem in enumerate(tuple):
- for p in row.cells[count].paragraphs:
- inline = p.runs
- for i in range(len(inline)):
- if inline[i].text == '':
- inline[i].text = str(elem)
- count_columns += 1
- if count_columns == len(tuple):
- len_insert_rows += 1
- if len_insert_rows == len(list_tuple):
- return
- # Custom Format functions
- def replace_Leistungszeitraum(self, start_date, end_date):
- for p in self.paragraphs_in_table():
- inline = p.runs
- for i in range(len(inline)):
- try:
- re.search(r'Leistungszeitraum', inline[i].text).group()
- text = inline[i].text.replace(inline[i].text,
- 'Leistungszeitraum: {} bis zum {}'.format(start_date, end_date))
- inline[i].text = text
- except AttributeError:
- pass
- # High Tier functions
- def replace_all_dates_custom_wise(self, start_date, end_date):
- self.replace_dates_string_in_standard_text(end_date)
- self.replace_dates_string_in_all_tables(end_date)
- self.replace_Leistungszeitraum(start_date, end_date)
- def replace_custom_placeholder_in_doc(self, placeholder, replace_str):
- for p in self.document.paragraphs:
- self.replace_text_in_paragraph(p, placeholder, replace_str)
- for p in self.paragraphs_in_table():
- self.replace_text_in_paragraph(p, placeholder,replace_str)
- def remove_row(table, row):
- tbl = table._tbl
- tr = row._tr
- tbl.remove(tr)
- def iter_block_items(parent):
- """
- Yield each paragraph and table child within *parent*, in document order.
- Each returned value is an instance of either Table or Paragraph.
- """
- if isinstance(parent, Document):
- parent_elm = parent._document_part.body._body
- elif isinstance(parent, _Cell):
- parent_elm = parent._tc
- elif isinstance(parent, _Row):
- parent_elm = parent._tr
- else:
- raise ValueError("something's not right")
- for child in parent_elm.iterchildren():
- if isinstance(child, CT_P):
- yield Paragraph(child, parent)
- if isinstance(child, CT_Tbl):
- yield Table(child, parent)
- def find_table_text_in_standard(doc): # not header or footer
- table_text = []
- for table in doc.tables:
- for row in table.rows:
- for cell in row.cells:
- for paragraph in cell.paragraphs:
- table_text += [paragraph.text]
- return table_text
- def create_custome_doc_file():
- '''
- :return: Bill as Doc file
- '''
- doc = DOCFileBuilder()
- doc.add_heading('EnD-GbR', 0, 2)
- return doc.document
- if __name__ == '__main__':
- builder_obj = DOCFileBuilder(path)
- builder_obj.replace_dates_string_in_standard_text('03.11.2022')
- builder_obj.replace_dates_string_in_all_tables('03.11.2022')
- keys = ['KW', 'Leistung', 'Zeit in Stunden', 'Betrag in Euro']
- example_tuple = [(123123, 213123, 12312312, 445235)]
- key_table = builder_obj.search_table_with_key_columns(keys)
- builder_obj.replace_rows(key_table, example_tuple)
- builder_obj.delete_empty_rows_in_table(key_table, 4)
- builder_obj.document.add_page_break()
- key_table = builder_obj.search_table_with_key_columns(['KW', 'Leistungsübersicht'])
- builder_obj.delete_empty_rows_in_table(key_table, 2)
- doc = builder_obj.document
|