docx_builder.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. from docx.api import Document
  2. from docx.oxml.table import CT_Tbl
  3. from docx.oxml.text.paragraph import CT_P
  4. from docx.table import _Cell, Table, _Row
  5. from docx.text.paragraph import Paragraph
  6. import time
  7. import re
  8. path = r'C:\Users\Danny\Desktop\ssd wichtige dinge D\Tools\fast_excel_to_bill\test_folder\Templates\templeta.docx'
  9. doc_output_path = r'C:\Users\Danny\Desktop\ssd wichtige dinge D\Tools\fast_excel_to_bill\test_folder' + r'\doc_test_{}.docx'.format(
  10. time.strftime("%Y-%m-%d_H%H-M%M"))
  11. class DOCFileBuilder:
  12. def __init__(self, docx_input_path='', docx_save_path=''):
  13. if docx_input_path:
  14. self.document = Document(docx_input_path)
  15. else:
  16. self.document = Document()
  17. self.docx_save_path = docx_save_path
  18. # Low Level Functions
  19. def save_docx(self):
  20. if self.docx_save_path == '': # Default
  21. self.document.save()
  22. else:
  23. self.document.save(self.docx_save_path)
  24. def get_header_footer(self):
  25. self.header_ = self.document.sections[0].header
  26. self.footer_ = self.document.sections[0].footer
  27. def replace_text_in_paragraph(self, paragraph,search_str, replace_str):
  28. inline = paragraph.runs
  29. for i in range(len(inline)):
  30. if search_str in inline[i].text:
  31. text = inline[i].text.replace(search_str, replace_str)
  32. inline[i].text = text
  33. def paragraphs_in_table(self):
  34. for t in self.document.tables:
  35. for row in t.rows:
  36. for cell in row.cells:
  37. for p in cell.paragraphs:
  38. yield p
  39. def replace_string_in_standard_text(self, search_str, replace_str):
  40. for p in self.document.paragraphs:
  41. self.replace_text_in_paragraph(p,search_str,replace_str)
  42. def replace_string_in_all_tables(self, search_str, replace_str):
  43. for p in self.paragraphs_in_table():
  44. self.replace_text_in_paragraph(p,search_str, replace_str)
  45. def replace_dates_string_in_standard_text(self, replace_date_str):
  46. for p in self.document.paragraphs:
  47. inline = p.runs
  48. for i in range(len(inline)):
  49. try:
  50. found_date = re.search(r'\d\d\.\d\d\.\d+', inline[i].text).group()
  51. text = inline[i].text.replace(found_date, replace_date_str)
  52. inline[i].text = text
  53. except AttributeError:
  54. pass
  55. def delete_empty_rows_in_table(self, table, column_count):
  56. for row in table.rows:
  57. column_counter = 0
  58. for cell in row.cells:
  59. for p in cell.paragraphs:
  60. inline = p.runs
  61. for i in range(len(inline)):
  62. if inline[i].text == '':
  63. column_counter += 1
  64. if column_counter == column_count:
  65. remove_row(table, row)
  66. def replace_dates_string_in_all_tables(self, replace_date_str):
  67. for p in self.paragraphs_in_table():
  68. inline = p.runs
  69. for i in range(len(inline)):
  70. try:
  71. found_date = re.search(r'\d\d\.\d\d\.\d+', inline[i].text).group()
  72. text = inline[i].text.replace(found_date, replace_date_str)
  73. inline[i].text = text
  74. except AttributeError:
  75. pass
  76. def add_heading(self, text, level, alignment=0):
  77. self.title = self.document.add_heading(text, level)
  78. self.title.alignment = alignment
  79. def search_table_with_key_columns(self, keys):
  80. for t in self.document.tables:
  81. len_key_list = 0
  82. for row in t.rows:
  83. for cell in row.cells:
  84. for p in cell.paragraphs:
  85. inline = p.runs
  86. for i in range(len(inline)):
  87. for key in keys:
  88. try:
  89. re.search(key, inline[i].text).group()
  90. len_key_list += 1
  91. if len_key_list == len(keys):
  92. return t
  93. except AttributeError:
  94. pass
  95. def replace_rows(self, table, list_tuple):
  96. # each tuple builds a row, tuple must be the same size as cells in row
  97. # replacing till list of tuple is iterate through
  98. # then deleting the rest
  99. len_insert_rows = 0
  100. for tuple in list_tuple:
  101. for row in table.rows:
  102. count_columns = 0
  103. for count, elem in enumerate(tuple):
  104. for p in row.cells[count].paragraphs:
  105. inline = p.runs
  106. for i in range(len(inline)):
  107. if inline[i].text == '':
  108. inline[i].text = str(elem)
  109. count_columns += 1
  110. if count_columns == len(tuple):
  111. len_insert_rows += 1
  112. if len_insert_rows == len(list_tuple):
  113. return
  114. # Custom Format functions
  115. def replace_Leistungszeitraum(self, start_date, end_date):
  116. for p in self.paragraphs_in_table():
  117. inline = p.runs
  118. for i in range(len(inline)):
  119. try:
  120. re.search(r'Leistungszeitraum', inline[i].text).group()
  121. text = inline[i].text.replace(inline[i].text,
  122. 'Leistungszeitraum: {} bis zum {}'.format(start_date, end_date))
  123. inline[i].text = text
  124. except AttributeError:
  125. pass
  126. # High Tier functions
  127. def replace_all_dates_custom_wise(self, start_date, end_date):
  128. self.replace_dates_string_in_standard_text(end_date)
  129. self.replace_dates_string_in_all_tables(end_date)
  130. self.replace_Leistungszeitraum(start_date, end_date)
  131. def replace_custom_placeholder_in_doc(self, placeholder, replace_str):
  132. for p in self.document.paragraphs:
  133. self.replace_text_in_paragraph(p, placeholder, replace_str)
  134. for p in self.paragraphs_in_table():
  135. self.replace_text_in_paragraph(p, placeholder,replace_str)
  136. def remove_row(table, row):
  137. tbl = table._tbl
  138. tr = row._tr
  139. tbl.remove(tr)
  140. def iter_block_items(parent):
  141. """
  142. Yield each paragraph and table child within *parent*, in document order.
  143. Each returned value is an instance of either Table or Paragraph.
  144. """
  145. if isinstance(parent, Document):
  146. parent_elm = parent._document_part.body._body
  147. elif isinstance(parent, _Cell):
  148. parent_elm = parent._tc
  149. elif isinstance(parent, _Row):
  150. parent_elm = parent._tr
  151. else:
  152. raise ValueError("something's not right")
  153. for child in parent_elm.iterchildren():
  154. if isinstance(child, CT_P):
  155. yield Paragraph(child, parent)
  156. if isinstance(child, CT_Tbl):
  157. yield Table(child, parent)
  158. def find_table_text_in_standard(doc): # not header or footer
  159. table_text = []
  160. for table in doc.tables:
  161. for row in table.rows:
  162. for cell in row.cells:
  163. for paragraph in cell.paragraphs:
  164. table_text += [paragraph.text]
  165. return table_text
  166. def create_custome_doc_file():
  167. '''
  168. :return: Bill as Doc file
  169. '''
  170. doc = DOCFileBuilder()
  171. doc.add_heading('EnD-GbR', 0, 2)
  172. return doc.document
  173. if __name__ == '__main__':
  174. builder_obj = DOCFileBuilder(path)
  175. builder_obj.replace_dates_string_in_standard_text('03.11.2022')
  176. builder_obj.replace_dates_string_in_all_tables('03.11.2022')
  177. keys = ['KW', 'Leistung', 'Zeit in Stunden', 'Betrag in Euro']
  178. example_tuple = [(123123, 213123, 12312312, 445235)]
  179. key_table = builder_obj.search_table_with_key_columns(keys)
  180. builder_obj.replace_rows(key_table, example_tuple)
  181. builder_obj.delete_empty_rows_in_table(key_table, 4)
  182. builder_obj.document.add_page_break()
  183. key_table = builder_obj.search_table_with_key_columns(['KW', 'Leistungsübersicht'])
  184. builder_obj.delete_empty_rows_in_table(key_table, 2)
  185. doc = builder_obj.document