Введение в Pandas и Numpy: основные учебные пособия, часть 6

Панды

Pandas - один из пакетов Python, ориентированных на данные, который значительно упрощает импорт и анализ данных. Pandas построен на Numpy и matplot, что делает манипулирование данными и визуализацию более удобными.

Импорт панд

Перед импортом на вашу платформу нам необходимо сначала установить его. Руководство по установке pandas можно найти здесь: › https://pandas.pydata.org/pandas-docs/stable/install.html .

import pandas as pd

Загрузка данных

from .csv
rev=pd.read_csv("ifg.csv")
from dictionary
rev=pd.Dataframe(dict)
from database[docs]
from pandas.io import sql
import sqlite3
conm=sqlite3.connect(/Users/gjreda/Dropbox/gregreda.co)
que="select * from towed where make='TOWER'"
res=sql.read_sql(que,con=conm)
res.head()

Команда головой и хвостом

rev.head()
rev.tail()

Команда head используется для возврата первых N строк в фрейме данных, тогда как tail используется для получения последних N строк.

Dataframe

Dataframe - это двумерная помеченная структура данных со столбцами потенциально разных типов.

df = pd.DataFrame([[y, x1_1, x2_1, ...], [y, x1_2, x2_2, ...], ... ])
df.columns = ['class', 'x1', 'x2', ...]

Базовое описание фрейма данных: df.shape

Описание фрейма данных: df.describe()

Типы столбцов списка: df.dtypes

Перечислить уникальные значения в столбце: df['columnName'].unique()

Numpy

Numpy - это библиотека для вычислений на Python, которая предоставляет высокопроизводительный объект многомерного массива и инструменты для работы с массивами.

Импорт

import numpy as np

a = np.array([1, 2, 3])   
print(type(a))            
print(a.shape)            
print(a[0], a[1], a[2])   
a[0] = 5                  
print(a)                  

b = np.array([[1,2,3],[4,5,6]])    
print(b.shape)                     
print(b[0, 0], b[0, 1], b[1, 0])

Создание массивов

import numpy as np

a = np.zeros((2,2))   
print(a)              
                     

b = np.ones((1,2))    # Create an array of all ones
print(b)              # Prints "[[ 1.  1.]]"

c = np.full((2,2), 7)  # Create a constant array
print(c)               # Prints "[[ 7.  7.]
                       #          [ 7.  7.]]"

d = np.eye(2)         # Create a 2x2 identity matrix
print(d)              # Prints "[[ 1.  0.]
                      #          [ 0.  1.]]"

Индексирование массива

import numpy as np

# Create the following rank 2 array with shape (3, 4)
# [[ 1  2  3  4]
#  [ 5  6  7  8]
#  [ 9 10 11 12]]
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])

# Use slicing to pull out the subarray consisting of the first 2 rows
# and columns 1 and 2; b is the following array of shape (2, 2):
# [[2 3]
#  [6 7]]
b = a[:2, 1:3]

# A slice of an array is a view into the same data, so modifying it
# will modify the original array.
print(a[0, 1])   # Prints "2"
b[0, 0] = 77     # b[0, 0] is the same piece of data as a[0, 1]
print(a[0, 1])   # Prints "77"
import numpy as np

# Create the following rank 2 array with shape (3, 4)
# [[ 1  2  3  4]
#  [ 5  6  7  8]
#  [ 9 10 11 12]]
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])

# Two ways of accessing the data in the middle row of the array.
# Mixing integer indexing with slices yields an array of lower rank,
# while using only slices yields an array of the same rank as the
# original array:
row_r1 = a[1, :]    # Rank 1 view of the second row of a
row_r2 = a[1:2, :]  # Rank 2 view of the second row of a
print(row_r1, row_r1.shape)  # Prints "[5 6 7 8] (4,)"
print(row_r2, row_r2.shape)  # Prints "[[5 6 7 8]] (1, 4)"

# We can make the same distinction when accessing columns of an array:
col_r1 = a[:, 1]
col_r2 = a[:, 1:2]
print(col_r1, col_r1.shape)  # Prints "[ 2  6 10] (3,)"
print(col_r2, col_r2.shape)  # Prints "[[ 2]
                             #          [ 6]
                             #          [10]] (3, 1)"

Типы данных

import numpy as np

x = np.array([1, 2])   # Let numpy choose the datatype
print(x.dtype)         # Prints "int64"

x = np.array([1.0, 2.0])   # Let numpy choose the datatype
print(x.dtype)             # Prints "float64"

x = np.array([1, 2], dtype=np.int64)   # Force a particular datatype
print(x.dtype)

Вы можете найти меня в Twitter, связаться со мной в Linkedin здесь

Подпишитесь на нашу рассылку, чтобы получать еженедельные тщательно отобранные статьи о глубоком обучении и компьютерном зрении

Помогите нам работать в позднюю ночную смену, купив нам чашку кофе

Введение в Pandas и Numpy: основные учебные пособия, часть 6

Панды

Numpy

Вопросы по теме