import pandas as pd def add_zeros_zipcodes(list): for each_zipcode in range(0, len(list)): if len(list[each_zipcode]) < 5: counter = 5-len(list[each_zipcode]) for each_missing_digit in range(0,counter): list[each_zipcode]='0'+list[each_zipcode] elif len(list[each_zipcode]) > 5: try: list[each_zipcode] = list[each_zipcode][0:list[each_zipcode].index('.0')] if len(list[each_zipcode]) < 5: counter = 5 - len(list[each_zipcode]) for each_missing_digit in range(0, counter): list[each_zipcode] = '0' + list[each_zipcode] except: assert False, "zipcode {} contains more than 5 digits".format(list[each_zipcode]) return list class Read_Table(object): def __init__(self,path, axes=[], encoding = 'utf-8', seperation = ';', type='xlsx', name=''): ''' :param type: Currently Available csv, xlsx and ods ''' self.axes = axes self.path = path self.encoding = encoding self.seperation = seperation self.type = type if type == 'xlsx': self.engine = 'openpyxl' self.name = name if type == 'ods': self.engine = 'odf' # low Level def read_table_excel_or_ods(self): return pd.read_excel(self.path, engine= self.engine) def read_table_SQL(self): raise NotImplementedError def read_table_csv_(self): return pd.read_csv(self.path, sep = self.seperation, encoding = self.encoding) # mid Level def get_column_titles(self): if self.type== 'xlsx': return self.read_table_excel_or_ods().columns.values if self.type== 'ods': raise NotImplementedError if self.type == 'csv': raise NotImplementedError def get_values_from_columns(self, column_titles: list): values=[] for each_element in column_titles: if self.type== 'xlsx': table= pd.read_excel(self.path, engine=self.engine) values+=[[val for val in table[each_element].dropna()]] if self.type== 'ods': raise NotImplementedError if self.type == 'csv': raise NotImplementedError return values # high Level def table_to_dict(self): if self.type == 'xlsx': column_titles = self.get_column_titles() values = self.get_values_from_columns(column_titles) return dict(zip(column_titles,values)) if self.type == 'ods': raise NotImplementedError if self.type == 'csv': raise NotImplementedError class Read_unordered_Table(Read_Table): ''' This Class is for Tables which do not have a column to value Struct ''' def __init__(self,path, axes=[], encoding = 'utf-8', seperation = ';', type='xlsx', name=''): Read_Table.__init__(self,path, axes=axes, encoding = encoding, seperation = seperation, type=type, name=name) #High Level def get_values_after_key_as_dict(self, list_of_keys): dict_ = self.table_to_dict() ordered_dict= {} for each_key in list_of_keys: for each_list in dict_.values(): for c,each_value in enumerate(each_list): if each_value == each_key: ordered_dict[each_value] = each_list[c+1:] return ordered_dict