docx_builder.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. # from docx.api import Document
  2. from docx.oxml.table import CT_Tbl
  3. from docx.oxml.text.paragraph import CT_P
  4. from docx.table import _Cell, Table, _Row
  5. from docx.text.paragraph import Paragraph
  6. from docx import Document
  7. from docx.shared import Inches
  8. from docx.oxml import OxmlElement
  9. from docx.oxml.ns import qn
  10. import time
  11. import re
  12. path = r'C:\Users\Danny\Desktop\ssd wichtige dinge D\Tools\fast_excel_to_bill\test_folder\Templates\templeta.docx'
  13. doc_output_path = r'C:\Users\Danny\Desktop\ssd wichtige dinge D\Tools\fast_excel_to_bill\test_folder' + r'\doc_test_{}.docx'.format(
  14. time.strftime("%Y-%m-%d_H%H-M%M"))
  15. class DOCFileBuilder:
  16. def __init__(self, docx_input_path='', docx_save_path=''):
  17. if docx_input_path:
  18. self.document = Document(docx_input_path)
  19. else:
  20. self.document = Document()
  21. self.docx_save_path = docx_save_path
  22. # Low Level Functions
  23. def save_docx(self):
  24. if self.docx_save_path == '': # Default
  25. self.document.save()
  26. else:
  27. self.document.save(self.docx_save_path)
  28. def get_header_footer(self):
  29. self.header_ = self.document.sections[0].header
  30. self.footer_ = self.document.sections[0].footer
  31. def replace_text_in_paragraph(self, paragraph, search_str, replace_str):
  32. inline = paragraph.runs
  33. for i in range(len(inline)):
  34. if search_str in inline[i].text:
  35. text = inline[i].text.replace(search_str, replace_str)
  36. inline[i].text = text
  37. def paragraphs_in_table(self):
  38. for t in self.document.tables:
  39. for row in t.rows:
  40. for cell in row.cells:
  41. for p in cell.paragraphs:
  42. yield p
  43. def replace_string_in_standard_text(self, search_str, replace_str):
  44. for p in self.document.paragraphs:
  45. self.replace_text_in_paragraph(p, search_str, replace_str)
  46. def replace_string_in_all_tables(self, search_str, replace_str):
  47. for p in self.paragraphs_in_table():
  48. self.replace_text_in_paragraph(p, search_str, replace_str)
  49. def replace_dates_string_in_standard_text(self, replace_date_str):
  50. for p in self.document.paragraphs:
  51. inline = p.runs
  52. for i in range(len(inline)):
  53. try:
  54. found_date = re.search(r'\d\d\.\d\d\.\d+', inline[i].text).group()
  55. text = inline[i].text.replace(found_date, replace_date_str)
  56. inline[i].text = text
  57. except AttributeError:
  58. pass
  59. def delete_empty_rows_in_table(self, table, column_count):
  60. for row in table.rows:
  61. column_counter = 0
  62. for cell in row.cells:
  63. for p in cell.paragraphs:
  64. inline = p.runs
  65. for i in range(len(inline)):
  66. if inline[i].text == '':
  67. column_counter += 1
  68. if column_counter == column_count:
  69. remove_row(table, row)
  70. def replace_dates_string_in_all_tables(self, replace_date_str):
  71. for p in self.paragraphs_in_table():
  72. inline = p.runs
  73. for i in range(len(inline)):
  74. try:
  75. found_date = re.search(r'\d\d\.\d\d\.\d+', inline[i].text).group()
  76. text = inline[i].text.replace(found_date, replace_date_str)
  77. inline[i].text = text
  78. except AttributeError:
  79. pass
  80. def add_heading(self, text, level, alignment=0):
  81. self.title = self.document.add_heading(text, level)
  82. self.title.alignment = alignment
  83. def search_table_with_key_columns(self, keys):
  84. for t in self.document.tables:
  85. len_key_list = 0
  86. for row in t.rows:
  87. for cell in row.cells:
  88. for p in cell.paragraphs:
  89. inline = p.runs
  90. for i in range(len(inline)):
  91. for key in keys:
  92. try:
  93. re.search(key, inline[i].text).group()
  94. len_key_list += 1
  95. if len_key_list == len(keys):
  96. return t
  97. except AttributeError:
  98. pass
  99. def replace_rows(self, table, list_tuple):
  100. # each tuple builds a row, tuple must be the same size as cells in row
  101. # replacing till list of tuple is iterate through
  102. # then deleting the rest
  103. len_insert_rows = 0
  104. for tuple in list_tuple:
  105. for row in table.rows:
  106. count_columns = 0
  107. for count, elem in enumerate(tuple):
  108. for p in row.cells[count].paragraphs:
  109. inline = p.runs
  110. for i in range(len(inline)):
  111. if inline[i].text == '':
  112. if isinstance(elem, list):
  113. for list_idx,list_elem in enumerate(elem):
  114. if list_idx == 0:
  115. inline[i].text = str(elem[0])
  116. else:
  117. p.style = 'List'
  118. r = p.add_run()
  119. r.add_break()
  120. r.add_text(str(list_elem))
  121. count_columns += 1
  122. else:
  123. inline[i].text = str(elem)
  124. count_columns += 1
  125. if count_columns == len(tuple):
  126. len_insert_rows += 1
  127. break
  128. if len_insert_rows == len(list_tuple):
  129. return
  130. # Custom Format functions
  131. def replace_Leistungszeitraum(self, start_date, end_date):
  132. for p in self.paragraphs_in_table():
  133. inline = p.runs
  134. for i in range(len(inline)):
  135. try:
  136. re.search(r'Leistungszeitraum', inline[i].text).group()
  137. text = inline[i].text.replace(inline[i].text,
  138. 'Leistungszeitraum: {} bis zum {}'.format(start_date, end_date))
  139. inline[i].text = text
  140. except AttributeError:
  141. pass
  142. def replace_Rechnungsnummer(self, month, year):
  143. for p in self.paragraphs_in_table():
  144. inline = p.runs
  145. for i in range(len(inline)):
  146. try:
  147. re.search(r'Rechnung Nr', inline[i].text).group()
  148. lines = inline[i].text.split('\n')
  149. updated_lines = []
  150. for line in lines:
  151. if r'Rechnung Nr' in line:
  152. updated_lines.append('Rechnung Nr.: {}-{}'.format(year, month))
  153. else:
  154. updated_lines.append(line)
  155. text = '\n'.join(updated_lines)
  156. inline[i].text = text
  157. except AttributeError:
  158. pass
  159. # High Tier functions
  160. def replace_all_dates_custom_wise(self, start_date, end_date):
  161. self.replace_dates_string_in_standard_text(end_date)
  162. self.replace_dates_string_in_all_tables(end_date)
  163. self.replace_Leistungszeitraum(start_date, end_date)
  164. def replace_custom_placeholder_in_doc(self, placeholder, replace_str):
  165. for p in self.document.paragraphs:
  166. self.replace_text_in_paragraph(p, placeholder, replace_str)
  167. for p in self.paragraphs_in_table():
  168. self.replace_text_in_paragraph(p, placeholder, replace_str)
  169. def remove_row(table, row):
  170. tbl = table._tbl
  171. tr = row._tr
  172. tbl.remove(tr)
  173. def iter_block_items(parent):
  174. """
  175. Yield each paragraph and table child within *parent*, in document order.
  176. Each returned value is an instance of either Table or Paragraph.
  177. """
  178. if isinstance(parent, Document):
  179. parent_elm = parent._document_part.body._body
  180. elif isinstance(parent, _Cell):
  181. parent_elm = parent._tc
  182. elif isinstance(parent, _Row):
  183. parent_elm = parent._tr
  184. else:
  185. raise ValueError("something's not right")
  186. for child in parent_elm.iterchildren():
  187. if isinstance(child, CT_P):
  188. yield Paragraph(child, parent)
  189. if isinstance(child, CT_Tbl):
  190. yield Table(child, parent)
  191. def find_table_text_in_standard(doc): # not header or footer
  192. table_text = []
  193. for table in doc.tables:
  194. for row in table.rows:
  195. for cell in row.cells:
  196. for paragraph in cell.paragraphs:
  197. table_text += [paragraph.text]
  198. return table_text
  199. def create_custome_doc_file():
  200. '''
  201. :return: Bill as Doc file
  202. '''
  203. doc = DOCFileBuilder()
  204. doc.add_heading('EnD-GbR', 0, 2)
  205. return doc.document
  206. if __name__ == '__main__':
  207. builder_obj = DOCFileBuilder(path)
  208. builder_obj.replace_dates_string_in_standard_text('03.11.2022')
  209. builder_obj.replace_dates_string_in_all_tables('03.11.2022')
  210. keys = ['KW', 'Leistung', 'Zeit in Stunden', 'Betrag in Euro']
  211. example_tuple = [(123123, 213123, 12312312, 445235)]
  212. key_table = builder_obj.search_table_with_key_columns(keys)
  213. builder_obj.replace_rows(key_table, example_tuple)
  214. builder_obj.delete_empty_rows_in_table(key_table, 4)
  215. builder_obj.document.add_page_break()
  216. key_table = builder_obj.search_table_with_key_columns(['KW', 'Leistungsübersicht'])
  217. builder_obj.delete_empty_rows_in_table(key_table, 2)
  218. doc = builder_obj.document