123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250 |
- # from docx.api import Document
- from docx.oxml.table import CT_Tbl
- from docx.oxml.text.paragraph import CT_P
- from docx.table import _Cell, Table, _Row
- from docx.text.paragraph import Paragraph
- from docx import Document
- from docx.shared import Inches
- from docx.oxml import OxmlElement
- from docx.oxml.ns import qn
- import time
- import re
- path = r'C:\Users\Danny\Desktop\ssd wichtige dinge D\Tools\fast_excel_to_bill\test_folder\Templates\templeta.docx'
- doc_output_path = r'C:\Users\Danny\Desktop\ssd wichtige dinge D\Tools\fast_excel_to_bill\test_folder' + r'\doc_test_{}.docx'.format(
- time.strftime("%Y-%m-%d_H%H-M%M"))
- class DOCFileBuilder:
- def __init__(self, docx_input_path='', docx_save_path=''):
- if docx_input_path:
- self.document = Document(docx_input_path)
- else:
- self.document = Document()
- self.docx_save_path = docx_save_path
- # Low Level Functions
- def save_docx(self):
- if self.docx_save_path == '': # Default
- self.document.save()
- else:
- self.document.save(self.docx_save_path)
- def get_header_footer(self):
- self.header_ = self.document.sections[0].header
- self.footer_ = self.document.sections[0].footer
- def replace_text_in_paragraph(self, paragraph, search_str, replace_str):
- inline = paragraph.runs
- for i in range(len(inline)):
- if search_str in inline[i].text:
- text = inline[i].text.replace(search_str, replace_str)
- inline[i].text = text
- def paragraphs_in_table(self):
- for t in self.document.tables:
- for row in t.rows:
- for cell in row.cells:
- for p in cell.paragraphs:
- yield p
- def replace_string_in_standard_text(self, search_str, replace_str):
- for p in self.document.paragraphs:
- self.replace_text_in_paragraph(p, search_str, replace_str)
- def replace_string_in_all_tables(self, search_str, replace_str):
- for p in self.paragraphs_in_table():
- self.replace_text_in_paragraph(p, search_str, replace_str)
- def replace_dates_string_in_standard_text(self, replace_date_str):
- for p in self.document.paragraphs:
- inline = p.runs
- for i in range(len(inline)):
- try:
- found_date = re.search(r'\d\d\.\d\d\.\d+', inline[i].text).group()
- text = inline[i].text.replace(found_date, replace_date_str)
- inline[i].text = text
- except AttributeError:
- pass
- def delete_empty_rows_in_table(self, table, column_count):
- for row in table.rows:
- column_counter = 0
- for cell in row.cells:
- for p in cell.paragraphs:
- inline = p.runs
- for i in range(len(inline)):
- if inline[i].text == '':
- column_counter += 1
- if column_counter == column_count:
- remove_row(table, row)
- def replace_dates_string_in_all_tables(self, replace_date_str):
- for p in self.paragraphs_in_table():
- inline = p.runs
- for i in range(len(inline)):
- try:
- found_date = re.search(r'\d\d\.\d\d\.\d+', inline[i].text).group()
- text = inline[i].text.replace(found_date, replace_date_str)
- inline[i].text = text
- except AttributeError:
- pass
- def add_heading(self, text, level, alignment=0):
- self.title = self.document.add_heading(text, level)
- self.title.alignment = alignment
- def search_table_with_key_columns(self, keys):
- for t in self.document.tables:
- len_key_list = 0
- for row in t.rows:
- for cell in row.cells:
- for p in cell.paragraphs:
- inline = p.runs
- for i in range(len(inline)):
- for key in keys:
- try:
- re.search(key, inline[i].text).group()
- len_key_list += 1
- if len_key_list == len(keys):
- return t
- except AttributeError:
- pass
- def replace_rows(self, table, list_tuple):
- # each tuple builds a row, tuple must be the same size as cells in row
- # replacing till list of tuple is iterate through
- # then deleting the rest
- len_insert_rows = 0
- for tuple in list_tuple:
- for row in table.rows:
- count_columns = 0
- for count, elem in enumerate(tuple):
- for p in row.cells[count].paragraphs:
- inline = p.runs
- for i in range(len(inline)):
- if inline[i].text == '':
- if isinstance(elem, list):
- for list_idx,list_elem in enumerate(elem):
- if list_idx == 0:
- inline[i].text = str(elem[0])
- else:
- p.style = 'List'
- r = p.add_run()
- r.add_break()
- r.add_text(str(list_elem))
- count_columns += 1
- else:
- inline[i].text = str(elem)
- count_columns += 1
- if count_columns == len(tuple):
- len_insert_rows += 1
- break
- if len_insert_rows == len(list_tuple):
- return
- # Custom Format functions
- def replace_Leistungszeitraum(self, start_date, end_date):
- for p in self.paragraphs_in_table():
- inline = p.runs
- for i in range(len(inline)):
- try:
- re.search(r'Leistungszeitraum', inline[i].text).group()
- text = inline[i].text.replace(inline[i].text,
- 'Leistungszeitraum: {} bis zum {}'.format(start_date, end_date))
- inline[i].text = text
- except AttributeError:
- pass
- def replace_Rechnungsnummer(self, month, year):
- for p in self.paragraphs_in_table():
- inline = p.runs
- for i in range(len(inline)):
- try:
- re.search(r'Rechnung Nr', inline[i].text).group()
- lines = inline[i].text.split('\n')
- updated_lines = []
- for line in lines:
- if r'Rechnung Nr' in line:
- updated_lines.append('Rechnung Nr.: {}-{}'.format(year, month))
- else:
- updated_lines.append(line)
- text = '\n'.join(updated_lines)
- inline[i].text = text
- except AttributeError:
- pass
- # High Tier functions
- def replace_all_dates_custom_wise(self, start_date, end_date):
- self.replace_dates_string_in_standard_text(end_date)
- self.replace_dates_string_in_all_tables(end_date)
- self.replace_Leistungszeitraum(start_date, end_date)
- def replace_custom_placeholder_in_doc(self, placeholder, replace_str):
- for p in self.document.paragraphs:
- self.replace_text_in_paragraph(p, placeholder, replace_str)
- for p in self.paragraphs_in_table():
- self.replace_text_in_paragraph(p, placeholder, replace_str)
- def remove_row(table, row):
- tbl = table._tbl
- tr = row._tr
- tbl.remove(tr)
- def iter_block_items(parent):
- """
- Yield each paragraph and table child within *parent*, in document order.
- Each returned value is an instance of either Table or Paragraph.
- """
- if isinstance(parent, Document):
- parent_elm = parent._document_part.body._body
- elif isinstance(parent, _Cell):
- parent_elm = parent._tc
- elif isinstance(parent, _Row):
- parent_elm = parent._tr
- else:
- raise ValueError("something's not right")
- for child in parent_elm.iterchildren():
- if isinstance(child, CT_P):
- yield Paragraph(child, parent)
- if isinstance(child, CT_Tbl):
- yield Table(child, parent)
- def find_table_text_in_standard(doc): # not header or footer
- table_text = []
- for table in doc.tables:
- for row in table.rows:
- for cell in row.cells:
- for paragraph in cell.paragraphs:
- table_text += [paragraph.text]
- return table_text
- def create_custome_doc_file():
- '''
- :return: Bill as Doc file
- '''
- doc = DOCFileBuilder()
- doc.add_heading('EnD-GbR', 0, 2)
- return doc.document
- if __name__ == '__main__':
- builder_obj = DOCFileBuilder(path)
- builder_obj.replace_dates_string_in_standard_text('03.11.2022')
- builder_obj.replace_dates_string_in_all_tables('03.11.2022')
- keys = ['KW', 'Leistung', 'Zeit in Stunden', 'Betrag in Euro']
- example_tuple = [(123123, 213123, 12312312, 445235)]
- key_table = builder_obj.search_table_with_key_columns(keys)
- builder_obj.replace_rows(key_table, example_tuple)
- builder_obj.delete_empty_rows_in_table(key_table, 4)
- builder_obj.document.add_page_break()
- key_table = builder_obj.search_table_with_key_columns(['KW', 'Leistungsübersicht'])
- builder_obj.delete_empty_rows_in_table(key_table, 2)
- doc = builder_obj.document
|