Добавил:
Опубликованный материал нарушает ваши авторские права? Сообщите нам.
Вуз: Предмет: Файл:

Лаб. 8 Python

.docx
Скачиваний:
1
Добавлен:
31.08.2024
Размер:
259.27 Кб
Скачать

import pandas as pd import numpy as np data = pd.read_csv('train.csv') info = np.zeros((3, 4)) for row in data.iterrows(): row = row[1] if row.Sex == "male": if row.Survived == 1: info[row.Pclass-1][0] += 1 else: info[row.Pclass - 1][1] += 1 else: if row.Survived == 1: info[row.Pclass-1][2] += 1 else: info[row.Pclass - 1][3] += 1 for i in range(3): print("Класс билета " + str(int(i+1)) + ":") print("Мужчины:") print("\tВыжило: " + str(int(info[i][0]))) print("\tУмерло: " + str(int(info[i][1]))) print("Женщины:") print("\tВыжило: " + str(int(info[i][2]))) print("\tУмерло: " + str(int(info[i][3]))) print("\n")

import pandas as pd data = pd.read_csv('train.csv') male = data[data.Sex == 'male'] female = data[data.Sex == 'female'] print("Мужчины:") print(male.describe()) print("\n") print("Женщины:") print(female.describe()) #print(data.groupby("Sex").describe()) ''' print("\n") print("Женщины:") print(data.Sex['female'].describe()) print("\n") '''

import pandas as pd import numpy as np data = pd.read_csv('train.csv') info = np.zeros((3, 2)) ports = ["C", "Q", "S"] for row in data.iterrows(): row = row[1] for i in range(len(ports)): if row.Embarked == ports[i]: if row.Survived == 1: info[i][0] += 1 else: info[i][1] += 1 passengersPorts = [sum(info[0]), sum(info[1]), sum(info[2])] for i in range(3): print("Порт " + ports[i] + ":") print("\tВыжило: " + str(int(info[i][0]))) print("\tУмерло: " + str(int(info[i][1]))) print("\tПроцент выживших: " + str(round(float((info[i][0]*100)/passengersPorts[i]), 2)) +"%") print("\n")

import pandas as pd import collections as cl data = pd.read_csv('train.csv') name = [] surname = [] for row in data.iterrows(): row = row[1] surname.append(row.Name.split(',')[0]) name.append(row.Name.split(',')[1].split('.')[1]) countSurname = cl.Counter(surname).most_common(10) countName = cl.Counter(name).most_common(10) print("Топ 10 популярных имён:") for i in range(10): print(f"\t{countName[i][0]}: {countName[i][1]}") print("\nТоп 10 популярных фамилий:") for i in range(10): print(f"\t{countSurname[i][0]}: {countSurname[i][1]}")

import pandas as pd data = pd.read_csv('train.csv') head = [] count = 0 for row in data: if count == 0: head.append(row.split(',')) head = sum(head, []) nan = data.isnull().any() print(nan) print("\n") dType = data.dtypes nanColumn = [] for i in range(len(nan)): if (nan[i] == True) and (dType[i] == int or dType[i] == float): nanColumn.append(head[i]) print(nanColumn) print("\n") if len(nanColumn) != 0: for i in range(len(nanColumn)): data[nanColumn[i]].fillna(data[nanColumn[i]].median(), inplace = True) print(data.isnull().any())

import csv import pandas as pd import numpy as np data = pd.read_csv('train.csv') infoPclass = np.zeros((3, 4)) for row in data.iterrows(): row = row[1] if row.Sex == "male": if row.Survived == 1: infoPclass[row.Pclass-1][0] += 1 else: infoPclass[row.Pclass - 1][1] += 1 else: if row.Survived == 1: infoPclass[row.Pclass-1][2] += 1 else: infoPclass[row.Pclass - 1][3] += 1 malePclassOne = round((infoPclass[0][0]*100)/(infoPclass[0][0]+infoPclass[0][1]), 2) femalePclassOne = round((infoPclass[0][2]*100)/(infoPclass[0][2]+infoPclass[0][3]), 2) malePclassTwo = round((infoPclass[1][0]*100)/(infoPclass[1][0]+infoPclass[1][1]), 2) femalePclassTwo = round((infoPclass[1][2]*100)/(infoPclass[1][2]+infoPclass[1][3]), 2) malePclassThree = round((infoPclass[2][0]*100)/(infoPclass[2][0]+infoPclass[2][1]), 2) femalePclassThree = round((infoPclass[2][2]*100)/(infoPclass[2][2]+infoPclass[2][3]), 2) ''' print(malePclassOne) print(femalePclassOne) print(malePclassTwo) print(femalePclassTwo) print(malePclassThree) print(femalePclassThree) ''' infoEmbarked = np.zeros((3, 4)) for row in data.iterrows(): row = row[1] if row.Sex == "male": if row.Survived == 1: if row.Embarked == "C": infoEmbarked[0][0] += 1 if row.Embarked == "Q": infoEmbarked[1][0] += 1 if row.Embarked == "S": infoEmbarked[2][0] += 1 else: if row.Embarked == "C": infoEmbarked[0][1] += 1 if row.Embarked == "Q": infoEmbarked[1][1] += 1 if row.Embarked == "S": infoEmbarked[2][1] += 1 else: if row.Survived == 1: if row.Embarked == "C": infoEmbarked[0][2] += 1 if row.Embarked == "Q": infoEmbarked[1][2] += 1 if row.Embarked == "S": infoEmbarked[2][2] += 1 else: if row.Embarked == "C": infoEmbarked[0][3] += 1 if row.Embarked == "Q": infoEmbarked[1][3] += 1 if row.Embarked == "S": infoEmbarked[2][3] += 1 maleEmbarkedC = round((infoEmbarked[0][0]*100)/(infoEmbarked[0][0]+infoEmbarked[0][1]), 2) femaleEmbarkedC = round((infoEmbarked[0][2]*100)/(infoEmbarked[0][2]+infoEmbarked[0][3]), 2) maleEmbarkedQ = round((infoEmbarked[1][0]*100)/(infoEmbarked[1][0]+infoEmbarked[1][1]), 2) femaleEmbarkedQ = round((infoEmbarked[1][2]*100)/(infoEmbarked[1][2]+infoEmbarked[1][3]), 2) maleEmbarkedS = round((infoEmbarked[2][0]*100)/(infoEmbarked[2][0]+infoEmbarked[2][1]), 2) femaleEmbarkedS = round((infoEmbarked[2][2]*100)/(infoEmbarked[2][2]+infoEmbarked[2][3]), 2) ''' print("\n") print(maleEmbarkedC) print(femaleEmbarkedC) print(maleEmbarkedQ) print(femaleEmbarkedQ) print(maleEmbarkedS) print(femaleEmbarkedS) ''' infoAge = np.zeros((4, 4)) for row in data.iterrows(): row = row[1] if row.Sex == "male": if 0 < row.Age <= 20: if row.Survived == 1: infoAge[0][0] += 1 else: infoAge[0][1] += 1 elif 40 < row.Age <= 60: if row.Survived == 1: infoAge[2][0] += 1 else: infoAge[2][1] += 1 elif 60 < row.Age: if row.Survived == 1: infoAge[3][0] += 1 else: infoAge[3][1] += 1 else: if row.Survived == 1: infoAge[1][0] += 1 else: infoAge[1][1] += 1 else: if 0 < row.Age <= 20: if row.Survived == 1: infoAge[0][2] += 1 else: infoAge[0][3] += 1 elif 40 < row.Age <= 60: if row.Survived == 1: infoAge[2][2] += 1 else: infoAge[2][3] += 1 elif 60 < row.Age: if row.Survived == 1: infoAge[3][2] += 1 else: infoAge[3][3] += 1 else: if row.Survived == 1: infoAge[1][2] += 1 else: infoAge[1][3] += 1 maleAge020 = round((infoAge[0][0]*100)/(infoAge[0][0]+infoAge[0][1]), 2) femaleAge020 = round((infoAge[0][2]*100)/(infoAge[0][2]+infoAge[0][3]), 2) maleAge2140 = round((infoAge[1][0]*100)/(infoAge[1][0]+infoAge[1][1]), 2) femaleAge2140 = round((infoAge[1][2]*100)/(infoAge[1][2]+infoAge[1][3]), 2) maleAge4160 = round((infoAge[2][0]*100)/(infoAge[2][0]+infoAge[2][1]), 2) femaleAge4160 = round((infoAge[2][2]*100)/(infoAge[2][2]+infoAge[2][3]), 2) maleAge61 = round((infoAge[2][0]*100)/(infoAge[2][0]+infoAge[2][1]), 2) femaleAge61 = round((infoAge[3][2]*100)/(infoAge[3][2]+infoAge[3][3]), 2) ''' print("\n") print(maleAge020) print(femaleAge020) print(maleAge2140) print(femaleAge2140) print(maleAge4160) print(femaleAge4160) print(maleAge61) print(femaleAge61) ''' dictInfo = {} dataTest = pd.read_csv('test.csv') for row in dataTest.iterrows(): row = row[1] if row.Sex == "male": if row.Pclass == 1: dictInfo[str(row.PassengerId)] = malePclassOne #dictInfo[str(row.PassengerId) + " " + row.Sex] = malePclassOne if row.Pclass == 2: dictInfo[str(row.PassengerId)] = malePclassTwo if row.Pclass == 3: dictInfo[str(row.PassengerId)] = malePclassThree else: if row.Pclass == 1: dictInfo[str(row.PassengerId)] = femalePclassOne if row.Pclass == 2: dictInfo[str(row.PassengerId)] = femalePclassTwo if row.Pclass == 3: dictInfo[str(row.PassengerId)] = femalePclassThree for row in dataTest.iterrows(): row = row[1] if row.Sex == "male": if row.Embarked == "C": dictInfo[str(row.PassengerId)] += maleEmbarkedC if row.Embarked == "Q": dictInfo[str(row.PassengerId)] += maleEmbarkedQ if row.Embarked == "S": dictInfo[str(row.PassengerId)] += maleEmbarkedS else: if row.Embarked == "C": dictInfo[str(row.PassengerId)] += femaleEmbarkedC if row.Embarked == "Q": dictInfo[str(row.PassengerId)] += femaleEmbarkedQ if row.Embarked == "S": dictInfo[str(row.PassengerId)] += femaleEmbarkedS for row in dataTest.iterrows(): row = row[1] if row.Sex == "male": if 0 < row.Age <= 20: dictInfo[str(row.PassengerId)] += maleAge020 elif 40 < row.Age <= 60: dictInfo[str(row.PassengerId)] += maleAge4160 elif 60 < row.Age: dictInfo[str(row.PassengerId)] += maleAge61 else: dictInfo[str(row.PassengerId)] += maleAge2140 else: if 0 < row.Age <= 20: dictInfo[str(row.PassengerId)] += femaleAge020 elif 40 < row.Age <= 60: dictInfo[str(row.PassengerId)] += femaleAge4160 elif 60 < row.Age: dictInfo[str(row.PassengerId)] += femaleAge61 else: dictInfo[str(row.PassengerId)] += femaleAge2140 #print("\n") for key, value in dictInfo.items(): print(key +": " + str(round((value/3), 2)) +"%") #код для добавления в csv ''' for key, value in dictInfo.items(): if dictInfo[key] >= 50: dictInfo[key] = 1 else: dictInfo[key] = 0 for key, value in dictInfo.items(): print(key +": " + str(value)) infoForCsv = [] for key, value in dictInfo.items(): infoForCsv.append({"PassengerId": key, "Survived": dictInfo[key]}) #print(infoForCsv) with open("titanic.csv", "w", newline="") as f: columns = ["PassengerId", "Survived"] writer = csv.DictWriter(f, fieldnames = columns, delimiter=";") writer.writeheader() writer.writerows(infoForCsv) '''

import numpy as np import pandas as pd import matplotlib.pyplot as plt data = pd.read_csv('train.csv') infoAge = np.zeros((2, 8)) for row in data.iterrows(): row = row[1] if 0 < row.Age <= 10: if row.Survived == 1: infoAge[0][0] += 1 else: infoAge[1][0] += 1 elif 11 < row.Age <= 20: if row.Survived == 1: infoAge[0][1] += 1 else: infoAge[1][1] += 1 elif 31 < row.Age <= 40: if row.Survived == 1: infoAge[0][3] += 1 else: infoAge[1][3] += 1 elif 41 < row.Age <= 50: if row.Survived == 1: infoAge[0][4] += 1 else: infoAge[1][4] += 1 elif 51 < row.Age <= 60: if row.Survived == 1: infoAge[0][5] += 1 else: infoAge[1][5] += 1 elif 61 < row.Age <= 70: if row.Survived == 1: infoAge[0][6] += 1 else: infoAge[1][6] += 1 elif 70 < row.Age: if row.Survived == 1: infoAge[0][7] += 1 else: infoAge[1][7] += 1 else: if row.Survived == 1: infoAge[0][2] += 1 else: infoAge[1][2] += 1 listAge = ["0-10", "11-20", "21-30", "31-40", "41-50", "51-60", "61-70", "70+"] index = np.arange(8) bw = 0.3 plt.bar(index, infoAge[1], bw, color = 'r') plt.bar(index + bw, infoAge[0], bw, color = 'g') plt.xticks(index + bw/2, listAge) plt.yticks(np.arange(0 , 310, 10)) ''' print(infoAge) plt.bar(listAge, infoAge[1], color = 'r') plt.bar(listAge, infoAge[0], color = 'g') ''' ''' for i in range(len(listAge)): plt.text(infoAge[1][i], ha = 'center') plt.text(infoAge[0][i], ha='center') ''' plt.legend(["Умерло", "Выжило"], loc = 'upper right') plt.title("Гистограмма зависимости возраста от выживаемости") plt.xlabel("Возраст") plt.ylabel("Количество человек") plt.grid(axis = "y") plt.show()

Соседние файлы в предмете Программирование на Python