Daniel
/
Tools


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
							from docx.api import Document
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P
from docx.table import _Cell, Table, _Row
from docx.text.paragraph import Paragraph
import time
import re

path = r'C:\Users\Danny\Desktop\ssd wichtige dinge D\Tools\fast_excel_to_bill\test_folder\Templates\templeta.docx'
doc_output_path = r'C:\Users\Danny\Desktop\ssd wichtige dinge D\Tools\fast_excel_to_bill\test_folder' + r'\doc_test_{}.docx'.format(
    time.strftime("%Y-%m-%d_H%H-M%M"))


class DOCFileBuilder:
    def __init__(self, docx_input_path='', docx_save_path=''):
        if docx_input_path:
            self.document = Document(docx_input_path)
        else:
            self.document = Document()
        self.docx_save_path = docx_save_path

    # Low Level Functions
    def save_docx(self):
        if self.docx_save_path == '': # Default
            self.document.save()
        else:
            self.document.save(self.docx_save_path)

    def get_header_footer(self):
        self.header_ = self.document.sections[0].header
        self.footer_ = self.document.sections[0].footer

    def replace_text_in_paragraph(self, paragraph,search_str, replace_str):
        inline = paragraph.runs
        for i in range(len(inline)):
            if search_str in inline[i].text:
                text = inline[i].text.replace(search_str, replace_str)
                inline[i].text = text

    def paragraphs_in_table(self):
        for t in self.document.tables:
            for row in t.rows:
                for cell in row.cells:
                    for p in cell.paragraphs:
                        yield p

    def replace_string_in_standard_text(self, search_str, replace_str):
        for p in self.document.paragraphs:
            self.replace_text_in_paragraph(p,search_str,replace_str)

    def replace_string_in_all_tables(self, search_str, replace_str):
        for p in self.paragraphs_in_table():
            self.replace_text_in_paragraph(p,search_str, replace_str)

    def replace_dates_string_in_standard_text(self, replace_date_str):
        for p in self.document.paragraphs:
            inline = p.runs
            for i in range(len(inline)):
                try:
                    found_date = re.search(r'\d\d\.\d\d\.\d+', inline[i].text).group()
                    text = inline[i].text.replace(found_date, replace_date_str)
                    inline[i].text = text
                except AttributeError:
                    pass

    def delete_empty_rows_in_table(self, table, column_count):
        for row in table.rows:
            column_counter = 0
            for cell in row.cells:
                for p in cell.paragraphs:
                    inline = p.runs
                    for i in range(len(inline)):
                        if inline[i].text == '':
                            column_counter += 1
            if column_counter == column_count:
                remove_row(table, row)

    def replace_dates_string_in_all_tables(self, replace_date_str):
        for p in self.paragraphs_in_table():
            inline = p.runs
            for i in range(len(inline)):
                try:
                    found_date = re.search(r'\d\d\.\d\d\.\d+', inline[i].text).group()
                    text = inline[i].text.replace(found_date, replace_date_str)
                    inline[i].text = text
                except AttributeError:
                    pass

    def add_heading(self, text, level, alignment=0):
        self.title = self.document.add_heading(text, level)
        self.title.alignment = alignment

    def search_table_with_key_columns(self, keys):
        for t in self.document.tables:
            len_key_list = 0
            for row in t.rows:
                for cell in row.cells:
                    for p in cell.paragraphs:
                        inline = p.runs
                        for i in range(len(inline)):
                            for key in keys:
                                try:
                                    re.search(key, inline[i].text).group()
                                    len_key_list += 1
                                    if len_key_list == len(keys):
                                        return t
                                except AttributeError:
                                    pass

    def replace_rows(self, table, list_tuple):
        # each tuple builds a row, tuple must be the same size as cells in row
        # replacing till list of tuple is iterate through
        # then deleting the rest
        len_insert_rows = 0
        for tuple in list_tuple:
            for row in table.rows:
                count_columns = 0
                for count, elem in enumerate(tuple):
                    for p in row.cells[count].paragraphs:
                        inline = p.runs
                        for i in range(len(inline)):
                            if inline[i].text == '':
                                inline[i].text = str(elem)
                                count_columns += 1
                if count_columns == len(tuple):
                    len_insert_rows += 1
                if len_insert_rows == len(list_tuple):
                    return

    # Custom Format functions
    def replace_Leistungszeitraum(self, start_date, end_date):
        for p in self.paragraphs_in_table():
            inline = p.runs
            for i in range(len(inline)):
                try:
                    re.search(r'Leistungszeitraum', inline[i].text).group()
                    text = inline[i].text.replace(inline[i].text,
                                                  'Leistungszeitraum: {} bis zum {}'.format(start_date, end_date))
                    inline[i].text = text
                except AttributeError:
                    pass

    # High Tier functions
    def replace_all_dates_custom_wise(self, start_date, end_date):
        self.replace_dates_string_in_standard_text(end_date)
        self.replace_dates_string_in_all_tables(end_date)
        self.replace_Leistungszeitraum(start_date, end_date)

    def replace_custom_placeholder_in_doc(self, placeholder, replace_str):
        for p in self.document.paragraphs:
            self.replace_text_in_paragraph(p, placeholder, replace_str)
        for p in self.paragraphs_in_table():
            self.replace_text_in_paragraph(p, placeholder,replace_str)


def remove_row(table, row):
    tbl = table._tbl
    tr = row._tr
    tbl.remove(tr)


def iter_block_items(parent):
    """
    Yield each paragraph and table child within *parent*, in document order.
    Each returned value is an instance of either Table or Paragraph.
    """
    if isinstance(parent, Document):
        parent_elm = parent._document_part.body._body
    elif isinstance(parent, _Cell):
        parent_elm = parent._tc
    elif isinstance(parent, _Row):
        parent_elm = parent._tr
    else:
        raise ValueError("something's not right")

    for child in parent_elm.iterchildren():
        if isinstance(child, CT_P):
            yield Paragraph(child, parent)
        if isinstance(child, CT_Tbl):
            yield Table(child, parent)


def find_table_text_in_standard(doc):  # not header or footer
    table_text = []
    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                for paragraph in cell.paragraphs:
                    table_text += [paragraph.text]
    return table_text


def create_custome_doc_file():
    '''
    :return: Bill as Doc file
    '''
    doc = DOCFileBuilder()
    doc.add_heading('EnD-GbR', 0, 2)
    return doc.document


if __name__ == '__main__':
    builder_obj = DOCFileBuilder(path)
    builder_obj.replace_dates_string_in_standard_text('03.11.2022')
    builder_obj.replace_dates_string_in_all_tables('03.11.2022')
    keys = ['KW', 'Leistung', 'Zeit in Stunden', 'Betrag in Euro']
    example_tuple = [(123123, 213123, 12312312, 445235)]
    key_table = builder_obj.search_table_with_key_columns(keys)
    builder_obj.replace_rows(key_table, example_tuple)
    builder_obj.delete_empty_rows_in_table(key_table, 4)
    builder_obj.document.add_page_break()
    key_table = builder_obj.search_table_with_key_columns(['KW', 'Leistungsübersicht'])
    builder_obj.delete_empty_rows_in_table(key_table, 2)
    doc = builder_obj.document