|
@@ -0,0 +1,221 @@
|
|
|
|
+from docx.api import Document
|
|
|
|
+from docx.oxml.table import CT_Tbl
|
|
|
|
+from docx.oxml.text.paragraph import CT_P
|
|
|
|
+from docx.table import _Cell, Table, _Row
|
|
|
|
+from docx.text.paragraph import Paragraph
|
|
|
|
+import time
|
|
|
|
+import re
|
|
|
|
+
|
|
|
|
+path = r'C:\Users\Danny\Desktop\ssd wichtige dinge D\Tools\fast_excel_to_bill\test_folder\Templates\templeta.docx'
|
|
|
|
+doc_output_path = r'C:\Users\Danny\Desktop\ssd wichtige dinge D\Tools\fast_excel_to_bill\test_folder' + r'\doc_test_{}.docx'.format(
|
|
|
|
+ time.strftime("%Y-%m-%d_H%H-M%M"))
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class DOCFileBuilder:
|
|
|
|
+ def __init__(self, docx_input_path='', docx_save_path=''):
|
|
|
|
+ if docx_input_path:
|
|
|
|
+ self.document = Document(docx_input_path)
|
|
|
|
+ else:
|
|
|
|
+ self.document = Document()
|
|
|
|
+ self.docx_save_path = docx_save_path
|
|
|
|
+
|
|
|
|
+ # Low Level Functions
|
|
|
|
+ def save_docx(self):
|
|
|
|
+ if self.docx_save_path != '': # Default
|
|
|
|
+ self.document.save()
|
|
|
|
+ else:
|
|
|
|
+ self.document.save(self.docx_save_path)
|
|
|
|
+
|
|
|
|
+ def get_header_footer(self):
|
|
|
|
+ self.header_ = self.document.sections[0].header
|
|
|
|
+ self.footer_ = self.document.sections[0].footer
|
|
|
|
+
|
|
|
|
+ def replace_text_in_paragraph(self, paragraph,search_str, replace_str):
|
|
|
|
+ inline = paragraph.runs
|
|
|
|
+ for i in range(len(inline)):
|
|
|
|
+ if search_str in inline[i].text:
|
|
|
|
+ text = inline[i].text.replace(search_str, replace_str)
|
|
|
|
+ inline[i].text = text
|
|
|
|
+
|
|
|
|
+ def paragraphs_in_table(self):
|
|
|
|
+ for t in self.document.tables:
|
|
|
|
+ for row in t.rows:
|
|
|
|
+ for cell in row.cells:
|
|
|
|
+ for p in cell.paragraphs:
|
|
|
|
+ yield p
|
|
|
|
+
|
|
|
|
+ def replace_string_in_standard_text(self, search_str, replace_str):
|
|
|
|
+ for p in self.document.paragraphs:
|
|
|
|
+ self.replace_text_in_paragraph(p,search_str,replace_str)
|
|
|
|
+
|
|
|
|
+ def replace_string_in_all_tables(self, search_str, replace_str):
|
|
|
|
+ for p in self.paragraphs_in_table():
|
|
|
|
+ self.replace_text_in_paragraph(p,search_str, replace_str)
|
|
|
|
+
|
|
|
|
+ def replace_dates_string_in_standard_text(self, replace_date_str):
|
|
|
|
+ for p in self.document.paragraphs:
|
|
|
|
+ inline = p.runs
|
|
|
|
+ for i in range(len(inline)):
|
|
|
|
+ try:
|
|
|
|
+ found_date = re.search(r'\d\d\.\d\d\.\d+', inline[i].text).group()
|
|
|
|
+ text = inline[i].text.replace(found_date, replace_date_str)
|
|
|
|
+ inline[i].text = text
|
|
|
|
+ except AttributeError:
|
|
|
|
+ pass
|
|
|
|
+
|
|
|
|
+ def delete_empty_rows_in_table(self, table, column_count):
|
|
|
|
+ for row in table.rows:
|
|
|
|
+ column_counter = 0
|
|
|
|
+ for cell in row.cells:
|
|
|
|
+ for p in cell.paragraphs:
|
|
|
|
+ inline = p.runs
|
|
|
|
+ for i in range(len(inline)):
|
|
|
|
+ if inline[i].text == '':
|
|
|
|
+ column_counter += 1
|
|
|
|
+ if column_counter == column_count:
|
|
|
|
+ remove_row(table, row)
|
|
|
|
+
|
|
|
|
+ def replace_dates_string_in_all_tables(self, replace_date_str):
|
|
|
|
+ for p in self.paragraphs_in_table():
|
|
|
|
+ inline = p.runs
|
|
|
|
+ for i in range(len(inline)):
|
|
|
|
+ try:
|
|
|
|
+ found_date = re.search(r'\d\d\.\d\d\.\d+', inline[i].text).group()
|
|
|
|
+ text = inline[i].text.replace(found_date, replace_date_str)
|
|
|
|
+ inline[i].text = text
|
|
|
|
+ except AttributeError:
|
|
|
|
+ pass
|
|
|
|
+
|
|
|
|
+ def add_heading(self, text, level, alignment=0):
|
|
|
|
+ self.title = self.document.add_heading(text, level)
|
|
|
|
+ self.title.alignment = alignment
|
|
|
|
+
|
|
|
|
+ def search_table_with_key_columns(self, keys):
|
|
|
|
+ for t in self.document.tables:
|
|
|
|
+ len_key_list = 0
|
|
|
|
+ for row in t.rows:
|
|
|
|
+ for cell in row.cells:
|
|
|
|
+ for p in cell.paragraphs:
|
|
|
|
+ inline = p.runs
|
|
|
|
+ for i in range(len(inline)):
|
|
|
|
+ for key in keys:
|
|
|
|
+ try:
|
|
|
|
+ re.search(key, inline[i].text).group()
|
|
|
|
+ len_key_list += 1
|
|
|
|
+ if len_key_list == len(keys):
|
|
|
|
+ return t
|
|
|
|
+ except AttributeError:
|
|
|
|
+ pass
|
|
|
|
+
|
|
|
|
+ def replace_rows(self, table, list_tuple):
|
|
|
|
+ # each tuple builds a row, tuple must be the same size as cells in row
|
|
|
|
+ # replacing till list of tuple is iterate through
|
|
|
|
+ # then deleting the rest
|
|
|
|
+ len_insert_rows = 0
|
|
|
|
+ for tuple in list_tuple:
|
|
|
|
+ for row in table.rows:
|
|
|
|
+ count_columns = 0
|
|
|
|
+ for count, elem in enumerate(tuple):
|
|
|
|
+ for p in row.cells[count].paragraphs:
|
|
|
|
+ inline = p.runs
|
|
|
|
+ for i in range(len(inline)):
|
|
|
|
+ if inline[i].text == '':
|
|
|
|
+ inline[i].text = str(elem)
|
|
|
|
+ count_columns += 1
|
|
|
|
+ if count_columns == len(tuple):
|
|
|
|
+ len_insert_rows += 1
|
|
|
|
+ if len_insert_rows == len(list_tuple):
|
|
|
|
+ return
|
|
|
|
+
|
|
|
|
+ # Custom Format functions
|
|
|
|
+ def replace_Leistungszeitraum(self, start_date, end_date):
|
|
|
|
+ for p in self.paragraphs_in_table():
|
|
|
|
+ inline = p.runs
|
|
|
|
+ for i in range(len(inline)):
|
|
|
|
+ try:
|
|
|
|
+ re.search(r'Leistungszeitraum', inline[i].text).group()
|
|
|
|
+ text = inline[i].text.replace(inline[i].text,
|
|
|
|
+ 'Leistungszeitraum: {} bis zum {}'.format(start_date, end_date))
|
|
|
|
+ inline[i].text = text
|
|
|
|
+ except AttributeError:
|
|
|
|
+ pass
|
|
|
|
+
|
|
|
|
+ # High Tier functions
|
|
|
|
+ def replace_all_dates_custom_wise(self, start_date, end_date):
|
|
|
|
+ self.replace_dates_string_in_standard_text(end_date)
|
|
|
|
+ self.replace_dates_string_in_all_tables(end_date)
|
|
|
|
+ self.replace_Leistungszeitraum(start_date, end_date)
|
|
|
|
+
|
|
|
|
+ def replace_custom_placeholder_in_doc(self, placeholder, replace_str):
|
|
|
|
+ for p in self.document.paragraphs:
|
|
|
|
+ self.replace_text_in_paragraph(p, placeholder, replace_str)
|
|
|
|
+ for p in self.paragraphs_in_table():
|
|
|
|
+ self.replace_text_in_paragraph(p, placeholder,replace_str)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def remove_row(table, row):
|
|
|
|
+ tbl = table._tbl
|
|
|
|
+ tr = row._tr
|
|
|
|
+ tbl.remove(tr)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def iter_block_items(parent):
|
|
|
|
+ """
|
|
|
|
+ Yield each paragraph and table child within *parent*, in document order.
|
|
|
|
+ Each returned value is an instance of either Table or Paragraph.
|
|
|
|
+ """
|
|
|
|
+ if isinstance(parent, Document):
|
|
|
|
+ parent_elm = parent._document_part.body._body
|
|
|
|
+ elif isinstance(parent, _Cell):
|
|
|
|
+ parent_elm = parent._tc
|
|
|
|
+ elif isinstance(parent, _Row):
|
|
|
|
+ parent_elm = parent._tr
|
|
|
|
+ else:
|
|
|
|
+ raise ValueError("something's not right")
|
|
|
|
+
|
|
|
|
+ for child in parent_elm.iterchildren():
|
|
|
|
+ if isinstance(child, CT_P):
|
|
|
|
+ yield Paragraph(child, parent)
|
|
|
|
+ if isinstance(child, CT_Tbl):
|
|
|
|
+ yield Table(child, parent)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def find_table_text_in_standard(doc): # not header or footer
|
|
|
|
+ table_text = []
|
|
|
|
+ for table in doc.tables:
|
|
|
|
+ for row in table.rows:
|
|
|
|
+ for cell in row.cells:
|
|
|
|
+ for paragraph in cell.paragraphs:
|
|
|
|
+ table_text += [paragraph.text]
|
|
|
|
+ return table_text
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def create_custome_doc_file():
|
|
|
|
+ '''
|
|
|
|
+ :return: Bill as Doc file
|
|
|
|
+ '''
|
|
|
|
+ doc = DOCFileBuilder()
|
|
|
|
+ doc.add_heading('EnD-GbR', 0, 2)
|
|
|
|
+ return doc.document
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+if __name__ == '__main__':
|
|
|
|
+ builder_obj = DOCFileBuilder(path)
|
|
|
|
+ builder_obj.replace_dates_string_in_standard_text('03.11.2022')
|
|
|
|
+ builder_obj.replace_dates_string_in_all_tables('03.11.2022')
|
|
|
|
+ keys = ['KW', 'Leistung', 'Zeit in Stunden', 'Betrag in Euro']
|
|
|
|
+ example_tuple = [(123123, 213123, 12312312, 445235)]
|
|
|
|
+ key_table = builder_obj.search_table_with_key_columns(keys)
|
|
|
|
+ builder_obj.replace_rows(key_table, example_tuple)
|
|
|
|
+ builder_obj.delete_empty_rows_in_table(key_table, 4)
|
|
|
|
+ builder_obj.document.add_page_break()
|
|
|
|
+ key_table = builder_obj.search_table_with_key_columns(['KW', 'Leistungsübersicht'])
|
|
|
|
+ builder_obj.delete_empty_rows_in_table(key_table, 2)
|
|
|
|
+ doc = builder_obj.document
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|