Добавил:
darkwarius13@gmail.com Рад если помог :). Можешь на почту спасибо сказать Опубликованный материал нарушает ваши авторские права? Сообщите нам.
Вуз: Предмет: Файл:

лаб 1 / index

.py
Скачиваний:
7
Добавлен:
27.06.2021
Размер:
3.01 Кб
Скачать
import numpy as np
import math
import copy as copy
import pandas as pd
import matplotlib.pyplot as plt

file = pd.ExcelFile('Dataset.xlsx')
df = pd.read_excel(file, sheet_name='Лист1', header=1);

print('DATA FRAME ORIGIN -----------------')
#print(df)

print('DATA FRAME DESCRIBE -----------------')
#print(df.describe())

print('DATA FRAME FILLNA(0) -----------------')
df_1 = df.fillna(0)
#print(df_1)


print('DATA FRAME FILLNA(backfill) -----------------')
df_2 = df.fillna('backfill')
#print(df_2)


print('DATA FRAME FILLNA(bfill) -----------------')
df_3 = df.fillna('bfill')
#print(df_3)


print('DATA FRAME FILLNA(pad) -----------------')
df_4 = df.fillna('pad')
#print(df_4)


print('DATA FRAME FILLNA(ffill) -----------------')
df_5 = df.fillna('ffill')
#print(df_5)

print('DATA FRAME dropna() -----------------')
df_6 = df.dropna()
#print(df_6)

def isNa(value):
print(value)
if isinstance(value, int) or isinstance(value, float):
if math.isnan(value) and value != 0:
return True
return False

def drop_na_20 (df):
for index, rowObject in df.iterrows():
countOfNa = 0;
countOfCols = len(rowObject);
for key in rowObject:
if isNa(key):
countOfNa += 1
percentNaInRow = (countOfNa / countOfCols) * 100;
#print('index =', rowObject.name, 'count_na =' ,countOfNa, '|',percentNaInRow, '%')
if percentNaInRow > 20 or percentNaInRow < 0.1 :
print('deleting', rowObject.name)
df = df.drop(rowObject.name)
return df

df_9 = drop_na_20(df)


df_all_cols = df.dropna();
df_all_cols.loc[0: , 'sum_d'] = 0
df_all_cols.loc[0: , 'countOfCols'] = 0
df_all_rows = df.dropna(axis=1);


def fillNa (df_with_na):
df_with_na.loc[0: , 'sum_d'] = 0
df_with_na.loc[0: , 'countOfCols'] = len(df_with_na.columns)
for label, content in df_with_na.items():
countOfCols = len(df_with_na.columns);
for x in content.index:
item = df_with_na.loc[x, label];
d = 0;
# here nan exist
if not df_with_na.isna().loc[x, label] and not isinstance(item, str):
for y, item_1 in enumerate(df_all_cols.loc[:, label]):
if label != 'sum_d' and label != 'countOfCols':
#print(label, '-----------' ,df_with_na.loc[x, label])
df_with_na.loc[x, 'sum_d'] += abs(item_1 - item)
#print(label, '-----------' ,df_with_na.loc[x, label])
elif label == 'sum_d':
df_with_na.loc[x, label] = df_with_na.loc[x, label] / (df_with_na.loc[x, 'countOfCols'] - 1)
else:
df_with_na.loc[x, 'countOfCols'] = df_with_na.loc[x, 'countOfCols'] - 1;





fillNa(df_9);
print('succes')
Соседние файлы в папке лаб 1