123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109 |
- import pandas as pd
- def add_zeros_zipcodes(list):
- for each_zipcode in range(0, len(list)):
- if len(list[each_zipcode]) < 5:
- counter = 5-len(list[each_zipcode])
- for each_missing_digit in range(0,counter):
- list[each_zipcode]='0'+list[each_zipcode]
- elif len(list[each_zipcode]) > 5:
- try:
- list[each_zipcode] = list[each_zipcode][0:list[each_zipcode].index('.0')]
- if len(list[each_zipcode]) < 5:
- counter = 5 - len(list[each_zipcode])
- for each_missing_digit in range(0, counter):
- list[each_zipcode] = '0' + list[each_zipcode]
- except:
- assert False, "zipcode {} contains more than 5 digits".format(list[each_zipcode])
- return list
- class Read_Table(object):
- def __init__(self,path, axes=[], encoding = 'utf-8', seperation = ';', type='xlsx', name=''):
- '''
- :param type: Currently Available csv, xlsx and ods
- '''
- self.axes = axes
- self.path = path
- self.encoding = encoding
- self.seperation = seperation
- self.type = type
- if type == 'xlsx':
- self.engine = 'openpyxl'
- self.name = name
- if type == 'ods':
- self.engine = 'odf'
- # low Level
- def read_table_excel_or_ods(self):
- return pd.read_excel(self.path, engine= self.engine)
- def read_table_SQL(self):
- raise NotImplementedError
- def read_table_csv_(self):
- return pd.read_csv(self.path, sep = self.seperation, encoding = self.encoding)
- # mid Level
- def get_column_titles(self):
- if self.type== 'xlsx':
- return self.read_table_excel_or_ods().columns.values
- if self.type== 'ods':
- raise NotImplementedError
- if self.type == 'csv':
- raise NotImplementedError
- def get_values_from_columns(self, column_titles: list):
- values=[]
- for each_element in column_titles:
- if self.type== 'xlsx':
- table= pd.read_excel(self.path, engine=self.engine)
- values+=[[val for val in table[each_element].dropna()]]
- if self.type== 'ods':
- raise NotImplementedError
- if self.type == 'csv':
- raise NotImplementedError
- return values
- # high Level
- def table_to_dict(self):
- if self.type == 'xlsx':
- column_titles = self.get_column_titles()
- values = self.get_values_from_columns(column_titles)
- return dict(zip(column_titles,values))
- if self.type == 'ods':
- raise NotImplementedError
- if self.type == 'csv':
- raise NotImplementedError
- class Read_unordered_Table(Read_Table):
- '''
- This Class is for Tables which do not have a column to value Struct
- '''
- def __init__(self,path, axes=[], encoding = 'utf-8', seperation = ';', type='xlsx', name=''):
- Read_Table.__init__(self,path, axes=axes, encoding = encoding, seperation = seperation, type=type, name=name)
- #High Level
- def get_values_after_key_as_dict(self, list_of_keys):
- dict_ = self.table_to_dict()
- ordered_dict= {}
- for each_key in list_of_keys:
- for each_list in dict_.values():
- for c,each_value in enumerate(each_list):
- if each_value == each_key:
- ordered_dict[each_value] = each_list[c+1:]
- return ordered_dict
|