本文共 1260 字,大约阅读时间需要 4 分钟。
def get_type(df_obj, col): tmp = df_obj[col].value_counts().to_dict() dic = {} index = 0 for i in tmp: dic[i] = index index += 1 return dic# 可以根据需要定义不同的映射规则df_obj['education_num'] = df_obj['education'].map({ 'illiterate': 0, 'basic.4y': 1, 'basic.6y': 2, 'basic.9y': 3, 'high.school': 4, 'professional.course': 5, 'unknown': 6, 'university.degree': 7})df_obj['month_num'] = df_obj['month'].map({ 'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'may': 5, 'jun': 6, 'jul': 7, 'aug': 8, 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12})df_obj['loan_num'] = df_obj['loan'].map({ 'no': 0, 'unknown': 1, 'yes': 2})# 补充其他变量yCounts = df_obj['marital'].value_counts().to_dict()df_obj['marital_num'] = df_obj['marital'].map(get_type(df_obj, 'marital'))df_obj['housing_num'] = df_obj['housing'].map(get_type(df_obj, 'housing'))df_obj['contact_num'] = df_obj['contact'].map(get_type(df_obj, 'contact'))df_obj['day_of_week_num'] = df_obj['day_of_week'].map(get_type(df_obj, 'day_of_week'))df_obj['poutcome_num'] = df_obj['poutcome'].map(get_type(df_obj, 'poutcome'))# 定义处理后的列catCols = [ 'default_num', 'loan_num', 'marital_num', 'housing_num', 'day_of_week_num', 'education_num', 'month_num', 'poutcome_num', 'y']df_obj[catCols].head()
转载地址:http://xgufk.baihongyu.com/