Добавил:
darkwarius13@gmail.com Рад если помог :). Можешь на почту спасибо сказать Опубликованный материал нарушает ваши авторские права? Сообщите нам.
Вуз: Предмет: Файл:

лаб 1 / lb1

.py
Скачиваний:
7
Добавлен:
27.06.2021
Размер:
2.93 Кб
Скачать
import numpy as np
import math
import copy as copy
import pandas as pd
import matplotlib.pyplot as plt

# Check if value is nan
def isNa(value):
if isinstance(value, int) or isinstance(value, float):
if math.isnan(value) and value != 0:
return True
return False

# Drop maxNaPercents percents of nan values in DataFrame
def dropNaRowByPercent (df, maxNaPercents):
for y, colObject in df.iteritems():
countOfNa = 0;
countOfRows = len(colObject);
for key in colObject:
if isNa(key):
countOfNa += 1
percentNaInRow = (countOfNa / countOfRows) * 100;
#print('index =', rowObject.name, 'count_na =' ,countOfNa, '|',percentNaInRow, '%')
if percentNaInRow > maxNaPercents:
print('deleting', colObject.name)
df = df.drop(colObject.name, axis=1)
return df

# Generate value instead nan
def generateValue(naPosX, naPosY, rowCount):
distances = []
for x, naFreeRow in df_dropNa.iterrows():
res = 0
for y, value in df_res.iloc[naPosX].iteritems():
if (isNa(value) != True) and (isinstance(value, int) or isinstance(value, float)):
res += abs(naFreeRow[y] - value)
distances.append(res/rowCount)
#print("distances: " + str(distances))

inverseDistancesSum = 0
for distance in distances:
inverseDistancesSum += 1/distance
#print("inverse distance sum: " + str(inverseDistancesSum))

affiliationLevels = []
for distance in distances:
affiliationLevels.append((1/distance)/inverseDistancesSum)
#print("affiliation levels: " + str(affiliationLevels))

naValue = 0
iterator = 0
for x, value in df_dropNa[naPosY].iteritems():
naValue += value * affiliationLevels[iterator]
iterator += 1
return naValue


file = pd.ExcelFile('Dataset.xlsx')
df = pd.read_excel(file, sheet_name='Лист1', header=1);
df_res = dropNaRowByPercent(df, 20);
df_dropNa = df_res.dropna()
resultData = df_res.copy()

print('DATA FRAME DESCRIBE -----------------')
df_desc = df.describe()

print('DATA FRAME FILLNA(0) -----------------')
df_1 = df.fillna(0)


print('DATA FRAME FILLNA(backfill) -----------------')
df_2 = df.fillna('backfill')


print('DATA FRAME FILLNA(bfill) -----------------')
df_3 = df.fillna('bfill')

print('DATA FRAME FILLNA(pad) -----------------')
df_4 = df.fillna('pad')


print('DATA FRAME FILLNA(ffill) -----------------')
df_5 = df.fillna('ffill')

print('DATA FRAME dropna() -----------------')
df_6 = df.dropna()

for x, row in df_res.iterrows():
for y, value in row.iteritems():
if isNa(value):
resultData.loc[x, y] = generateValue(x, y, len(df_dropNa.index) + 1)

df_describe_resultData = resultData.describe()
Соседние файлы в папке лаб 1