import numpy as np
import pandas as pd
# data and labels
data = [('Simo' , 'Sahli' , 42, 4, 25),
('Amy' , 'Cooze' , 52, 24, 94),
('Ali' , 'Baba' , 36, 31, 57),
('Jiarong', 'Bao' , 24, 70, 62),
('Jacob' , 'Joseph', 73, 3, np.nan)]
labels= ['first_name','last_name','age','math','english']
# pandas dataframe
df = pd.DataFrame(data, columns = labels)
# Others
print(df)
import numpy as np
import pandas as pd
# data and labels
data = [('Simo' , 'Sahli' , 42, 4, 25),
('Amy' , 'Cooze' , 52, 24, 94),
('Ali' , 'Baba' , 36, 31, 57),
('Jiarong', 'Bao' , 24, 70, 62),
('Jacob' , 'Joseph', 73, 3, np.nan)]
labels= ['first_name','last_name','age','math','english']
# pandas dataframes
df = pd.DataFrame(data, columns = labels)
B = pd.DataFrame() # an empty dataframe
# dealing with tables
print(df)
print()
print(df.empty)
print()
print(B.empty)
print()
print(df.ndim)
print()
print(df.size)
print()
print(df.shape)
print()
print(df.dtypes)
import numpy as np
import pandas as pd
# data and labels
data = [('Simo' , 'Sahli' , 42, 4, 25),
('Amy' , 'Cooze' , 52, 24, 94),
('Ali' , 'Baba' , 36, 31, 57),
('Jiarong', 'Bao' , 24, 70, 62),
('Jacob' , 'Joseph', 73, 3, np.nan)]
labels= ['first_name','last_name','age','math','english']
# pandas dataframes
df = pd.DataFrame(data, columns = labels)
# Dealing with rows
print(df[:2])
print()
print(df[:-2])
print()
print(df[2:4])
print()
print(df[-4:-2])
import numpy as np
import pandas as pd
# data and labels
data = [('Simo' , 'Sahli' , 42, 4, 25),
('Amy' , 'Cooze' , 52, 24, 94),
('Ali' , 'Baba' , 36, 31, 57),
('Jiarong', 'Bao' , 24, 70, 62),
('Jacob' , 'Joseph', 73, 3, np.nan)]
labels= ['first_name','last_name','age','math','english']
# pandas dataframes
df = pd.DataFrame(data, columns = labels)
# Dealing with columns
print(df['last_name'])
print()
print(df['last_name'][2:])
print()
print(df['last_name'][-2:])
print()
print(df['last_name'][2:4])
print()
print(df['last_name'][-4:-2])
import numpy as np
import pandas as pd
# data and labels
data = [('Simo' , 'Sahli' , 42, 4, 25),
('Amy' , 'Cooze' , 52, 24, 94),
('Ali' , 'Baba' , 36, 31, 57),
('Jiarong', 'Bao' , 24, 70, 62),
('Jacob' , 'Joseph', 73, 3, np.nan)]
labels= ['first_name','last_name','age','math','english']
# pandas dataframes
df = pd.DataFrame(data, columns = labels)
# Dealing with items
print(df['age'][0])
print()
print(df['age'][3])
print()
print(df['age'].min())
print()
print(df['age'].max())
print()
print(df['age'].mean())
print()
print(df['age'].sum())
import numpy as np
import pandas as pd
# data and labels and pandas dataframes
data = [('Simo' , 'Sahli' , 42, 4, 25),
('Amy' , 'Cooze' , 52, 24, 94),
('Ali' , 'Baba' , 36, 31, 57),
('Jiarong', 'Bao' , 24, 70, 62),
('Jacob' , 'Joseph', 73, 3, np.nan)]
labels = ['first_name','last_name','age','math','english']
df = pd.DataFrame(data, columns = labels)
# Tables
print(df.values) # convert Table to a Matrix
print()
print(df.head(2))
print()
print(df.tail(2))
print()
B = df.T # transpose of the table
print(B)
import numpy as np
import pandas as pd
# data and labels and pandas dataframes
data = [('Simo' , 'Sahli' , 42, 4, 25),
('Amy' , 'Cooze' , 52, 24, 94),
('Ali' , 'Baba' , 36, 31, 57),
('Jiarong', 'Bao' , 24, 70, 62),
('Jacob' , 'Joseph', 73, 3, np.nan)]
labels = ['first_name','last_name','age','math','english']
data2 = [('koko', 'kaka' , 27, 90, 15),
('bobo', 'baba' , 25, 14, 97)]
df = pd.DataFrame(data, columns = labels)
df2= pd.DataFrame(data2, columns = labels)
# Tables Concatenation
print(df)
print()
print(df2)
df = df.append(df2)
print()
print(df)
import numpy as np
import pandas as pd
# data and labels and pandas dataframes
data = [('Simo' , 'Sahli' , 42, 4, 25),
('Amy' , 'Cooze' , 52, 24, 94),
('Ali' , 'Baba' , 36, 31, 57),
('Jiarong', 'Bao' , 24, 70, 62),
('Jacob' , 'Joseph', 73, 3, np.nan)]
labels = ['first_name','last_name','age','math','english']
df = pd.DataFrame(data, columns = labels)
# Columns: Column to an array
print(df['last_name'].values)
print()
# Columns: Addition and Deletion
df['score'] = (df['math'] + df['english']) / 2.0
print(df)
print()
# del df['score']
df.pop('score') # the same as del df['score']
print(df)
import numpy as np
import pandas as pd
# data and labels and pandas dataframe
data = [('Simo' , 'Sahli' , 42, 4, 25),
('Simo' , 'Koko' , 52, 24, 94),
('Ali' , 'Baba' , 36, 31, 57),
('Jiarong', 'Bao' , 24, 70, 62),
('Jacob' , 'Joseph', 73, 3, np.nan)]
labels = ['first_name','last_name','age','math','english']
df = pd.DataFrame(data, columns = labels)
# Columns: Sorting by values
df = df.sort_values(by='math')
print(df)
print("--------------")
df = df.sort_values(by=['first_name','last_name'])
print(df)
print("--------------")
import numpy as np
import pandas as pd
# data and labels and pandas dataframe
data = [('Simo' , 'Sahli' , 42, 4, 25),
('Amy' , 'Cooze' , 52, 24, 94),
('Ali' , 'Baba' , 36, 31, 57),
('Jiarong', 'Bao' , 24, 70, 62),
('Jacob' , 'Joseph', 73, 3, np.nan)]
labels = ['first_name','last_name','age','math','english']
df = pd.DataFrame(data, columns = labels)
# Columns: Sorting by index
df = df.sort_index(ascending=False)
print(df)
print("--------------")
df = df.sort_index(axis=0)
print(df)
print("--------------")
df = df.sort_index(axis=1)
print(df)
print("--------------")
import numpy as np
import pandas as pd
# data and labels and pandas dataframe
#....
# Rows:
print(df[-4:-2].values)
print()
print(df.iloc[2])
print()
print(df.loc[2])
print()
print(df.loc[2].values)
print()
df = df.drop(2) # deleting row at index 2
print(df)
import numpy as np
import pandas as pd
# data and labels and pandas dataframe
#....
# Loop for columns:
for a,b in df.iteritems():
print(a)
print()
print(b)
# Loop for rows:
for a,b in df.iterrows():
print(a)
print()
print(b)
import numpy as np
import pandas as pd
labels= ['first_name','last_name','age','math','english']
df = pd.read_csv("datas/test.csv",sep=',',header=None)
df.columns = labels
print(df)
import numpy as np
import pandas as pd
df = pd.read_csv("datas/test.csv",sep=',',header=0)
print(df)
import numpy as np
import pandas as pd
df = pd.read_csv("datas/test.csv",sep=',',header=0)
df = df.drop(2)
df.to_csv("datas/test3.csv", encoding='utf-8', index=False)
def my_sum(a,b):
return a + b
s = lambda x, y : x + y
print(my_sum(5,2))
print()
print(s(5,2))
def myfunc(n):
return lambda a : a * n
mydoubler = myfunc(2)
mytripler = myfunc(3)
print(mydoubler(5))
print()
print(mydoubler(11))
print()
print(mytripler(5))
print()
print(mytripler(11))
import numpy as np
import pandas as pd
df = pd.read_csv("datas/ranks.csv", sep=' ', header=0)
print(df)
print("--------------------")
a = df.groupby('Team')
b = df.groupby(['Team','Year'])
print(type(a.groups))
print(type(b.groups))
print("--------------------")
for x,y in a.groups.items():
print(x,"\t",list(y))
import numpy as np
import pandas as pd
df = pd.read_csv("datas/ranks.csv", sep=' ', header=0)
print(df)
print("--------------------")
a = df.groupby('Year')
print(a.get_group(2014))
print("--------------------")
a = df.groupby(['Team','Year'])
print(a.get_group(('Riders',2014)))
import numpy as np
import pandas as pd
df = pd.read_csv("datas/ranks.csv", sep=' ', header=0)
print(df)
print("--------------------")
a = df.groupby('Year')
print(a['Points'].agg(np.mean))
print()
print(a['Points'].agg(np.sum))
print()
print(a['Points'].agg(np.size))
print()
print(a['Points'].agg([np.sum, np.mean, np.std]))
import numpy as np
import pandas as pd
df = pd.read_csv("datas/ranks.csv", sep=' ', header=0)
print(df)
print("--------------------")
a = df.groupby('Team')
f = lambda x: len(x) >= 3
print(a.filter(f))
import numpy as np
import pandas as pd
df1= pd.read_csv("datas/A.csv", sep=';', header=0)
df2= pd.read_csv("datas/B.csv", sep=';', header=0)
print(df1,"\n--------------------")
print(df2,"\n--------------------")
df = pd.merge(df1,df2,on='id')
print(df,"\n--------------------")
df = pd.merge(df1,df2,on=['id','subject'])
print(df,"\n--------------------")
df=pd.merge(df1, df2, on='subject', how='left')
print(df,"\n--------------------")
df=pd.merge(df1, df2, on='subject', how='right')
print(df,"\n--------------------")
df=pd.merge(df1, df2, on='subject', how='inner')
print(df,"\n--------------------")
df=pd.merge(df1, df2, on='subject', how='outer')
print(df,"\n--------------------")
import numpy as np
import pandas as pd
df1= pd.read_csv("datas/A.csv", sep=';', header=0)
df2= pd.read_csv("datas/B.csv", sep=';', header=0)
print(df1,"\n--------------------")
print(df2,"\n--------------------")
df = pd.concat([df1,df2],axis=0)
print(df,"\n--------------------")
df = pd.concat([df1,df2],axis=0,ignore_index=True)
print(df,"\n--------------------")
df = pd.concat([df1,df2],axis=1)
print(df,"\n--------------------")
df = df1.append(df2)
print(df,"\n--------------------")
Read the file tips.csv and print it.
import pandas as pd
df = pd.read_csv("datas/tips.csv",sep=':',header=0)
print(df)
Print only the first 3 rows in tips.csv.
import pandas as pd
df = pd.read_csv("datas/tips.csv",sep=':',header=0)
print(df[:3])
Print only the first 3 rows in tips.csv and the columns bill, sex, size.
Hint: you can use df[['bill','sex', 'size']]
import pandas as pd
df = pd.read_csv("datas/tips.csv",sep=':',header=0)
print(df[:3][['bill','sex', 'size']])
Print only the last 3 rows in tips.csv and the columns bill, sex, size.
import pandas as pd
df = pd.read_csv("datas/tips.csv",sep=':',header=0)
print(df[-3:][['bill','sex', 'size']])
Print only those whose time is 'Dinner'.
import pandas as pd
df = pd.read_csv("datas/tips.csv",sep=':',header=0)
a = df.groupby('time')
print(a.get_group('Dinner'))
Print only those whose bill is less than 20.
import pandas as pd
df = pd.read_csv("datas/tips.csv",sep=':',header=0)
print(df[df["bill"] < 20])
Group by 'sex' and print the result.
import pandas as pd
df = pd.read_csv("datas/tips.csv",sep=':',header=0)
a = df.groupby('sex')
for x,y in a.groups.items():
print(x,"\t",list(y))
Draw a graph using pandas and opencv for the file shown on the right.
import sys
import cv2
import numpy as np
sys.path.append("..")
import pandas as pd
from matplotlib import pyplot as plt
def horizontal_line(zone,x,y,color = (255,255,255)):
return cv2.line(zone,(0,int(y)),(int(x),int(y)),color,1)
def vertical_line(zone,x,y,color = (255,255,255)):
return cv2.line(zone,(int(x),0),(int(x),int(y)),color,1)
# make the background
img = np.zeros((512,1024,3),np.uint8)
x_max = img.shape[1]
y_max = img.shape[0]
img1 = vertical_line(img,x_max/2,y_max)
img2 = horizontal_line(img1,x_max,y_max/2)
A = []
B = []
pts = []
# put Anomaly values into an array
df = pd.read_csv("datas/temperature.csv",sep=';',header=0)
df = df.sort_values(by='Year')
A = np.append(A,df['Anomaly'].values)
B = np.append(B,df['Year'].values)
n = A.shape[0]
# calculate the points condinates
for i in range(n):
pts.append([x_max/2+(i*30),y_max/2-A[i]*100])
pts = np.array(pts, np.int32)
print(pts)
# draw the polyline
for i in range(n):
img = cv2.putText(img2,str(int(B[i])), (pts[i][0],int(y_max/2)),
cv2.FONT_HERSHEY_SIMPLEX,
0.3, (255,255,255), 1, cv2.LINE_AA)
img = cv2.polylines(img,[pts],False,(0,255,255))
img = img[:int(y_max/2+20),int(x_max/2):,:]
plt.imshow(img)
plt.title('my picture')
plt.show()
Draw a graph using pandas and opencv for the file shown on the right.
import sys
import cv2
import numpy as np
sys.path.append("..")
import pandas as pd
from matplotlib import pyplot as plt
def horizontal_line(zone,x,y,color = (255,255,255)):
return cv2.line(zone,(0,int(y)),(int(x),int(y)),color,1)
def vertical_line(zone,x,y,color = (255,255,255)):
return cv2.line(zone,(int(x),0),(int(x),int(y)),color,1)
# make the background
img = np.zeros((512,800,3),np.uint8)+255
x_max = img.shape[1]
y_max = img.shape[0]
img1 = vertical_line(img,x_max/2,y_max)
img2 = horizontal_line(img1,x_max,y_max/2)
A = []
B = []
pts = []
pts2 = []
# put Anomaly values into an array
df = pd.read_csv("datas/temperature.csv",sep=';',header=0)
df = df.sort_values(by='Year')
A = np.append(A,df['Anomaly'].values)
B = np.append(B,df['Year'].values)
n = A.shape[0]
# calculate the points condinates
for i in range(n):
pts.append([x_max/2+(i*30)+10,y_max/2-A[i]*100])
pts = np.array(pts, np.int32)
for i in range(n):
pts2.append([x_max/2+(i*30)+30,y_max/2])
pts2 = np.array(pts2, np.int32)
# draw the rectangle
for i in range(n):
img = cv2.putText(img2,str(int(B[i])), (pts2[i][0]-20,pts2[i][1]+10),
cv2.FONT_HERSHEY_SIMPLEX,
0.3, (0,0,0), 1, cv2.LINE_AA)
for i in range(n):
img = cv2.rectangle(img,tuple(pts[i]),tuple(pts2[i]),(255,0,0),-1)
img = img[:int(y_max/2+20),int(x_max/2):,:]
plt.imshow(img)
plt.title('my picture')
plt.show()
Draw a graph using pandas and opencv for the file shown on the right.
import sys
import cv2
import numpy as np
sys.path.append("..")
import pandas as pd
import math
from matplotlib import pyplot as plt
def horizontal_line(zone,x,y,color = (255,255,255)):
return cv2.line(zone,(0,int(y)),(int(x),int(y)),color,1)
def vertical_line(zone,x,y,color = (255,255,255)):
return cv2.line(zone,(int(x),0),(int(x),int(y)),color,1)
# make the background
img = np.zeros((512,512,3),np.uint8)#+255
x_max = img.shape[1]
y_max = img.shape[0]
img1 = vertical_line(img,x_max/2,y_max)
img2 = horizontal_line(img1,x_max,y_max/2)
A = np.array([])
B = np.array([])
C = np.array([])
D = np.array([])
E = np.array([])
F = np.array([])
angle = np.array([])
start = np.array([])
x = int(x_max/2)
y = int(y_max/2)
r = 200
# put Anomaly values into an array
df = pd.read_csv("datas/energy.csv",sep=':',header=0)
df = df.sort_values(by='Resource')
A = np.append(A,df['Resource'].values)
B = np.append(B,df['Percentages'].values)
C = np.append(C,df['Rate'].values)
n = B.shape[0]
# calculate the angles
for i in range(n):
angle = np.append(angle,(int(B[i]*360)))
start = np.append(start,angle[:i].sum())
start = np.append(start,360)
m = start.shape[0]
# draw the ellipses
for i in range(m-1):
end = start[i+1]
color = (255-30*i,255-i*20,i*40)
img = cv2.ellipse(img2,(x,y),(r,r),0,start[i],end,color,-1)
# get angle to calculate the middle points to put text
for i in range(m-1):
D = np.append(D,start[i]+(int(start[i+1]-start[i])/2))
# mark points
for i in range(n):
a = x + int(np.cos(D[i]/180 * np.pi)*r) #:X
b = y + int(np.sin(D[i]/180 * np.pi)*r) #:Y
E = np.append(E,a)
F = np.append(F,b)
img = cv2.circle(img,(a,b), 1, (0,0,0), -1)
# draw lines and put texts
for i in range(n):
if D[i]>= 90 and D[i]<= 270:
img = cv2.line(img,(int(E[i]),int(F[i])),(int(x-r-50),int(F[i])),(255,255,255),1)
img = cv2.putText(img,A[i],(int(x-r-50),int(F[i]-10)),
cv2.FONT_HERSHEY_SIMPLEX,
0.5, (255,255,255), 1, cv2.LINE_AA)
img = cv2.putText(img,C[i],(int(x-r-50),int(F[i]+15)),
cv2.FONT_HERSHEY_SIMPLEX,
0.5, (255,255,255), 1, cv2.LINE_AA)
else:
img = cv2.line(img,(int(E[i]),int(F[i])),(int(x+r+50),int(F[i])),(255,255,255),1)
img = cv2.putText(img,A[i],(int(x+r+25),int(F[i]-10)),
cv2.FONT_HERSHEY_SIMPLEX,
0.5, (255,255,255), 1, cv2.LINE_AA)
img = cv2.putText(img,C[i],(int(x+r+25),int(F[i]+15)),
cv2.FONT_HERSHEY_SIMPLEX,
0.5, (255,255,255), 1, cv2.LINE_AA)
plt.imshow(img)
plt.title('my picture')
plt.show()
Draw a graph using pandas and opencv for the file shown on the right.
import sys
import cv2
import numpy as np
sys.path.append("..")
import pandas as pd
import math
from matplotlib import pyplot as plt
def horizontal_line(zone,x,y,color = (255,255,255)):
return cv2.line(zone,(0,int(y)),(int(x),int(y)),color,1)
def vertical_line(zone,x,y,color = (255,255,255)):
return cv2.line(zone,(int(x),0),(int(x),int(y)),color,1)
# make the background
img = np.zeros((512,512,3),np.uint8)#+255
x_max = img.shape[1]
y_max = img.shape[0]
img1 = vertical_line(img,x_max/2,y_max)
img2 = horizontal_line(img1,x_max,y_max/2)
A = np.array([])
B = np.array([])
C = np.array([])
D = np.array([])
E = np.array([])
F = np.array([])
angle = np.array([])
start = np.array([])
x = int(x_max/2)
y = int(y_max/2)
r = 200
# put Anomaly values into an array
df = pd.read_csv("datas/energy.csv",sep=':',header=0)
df = df.sort_values(by='Resource')
A = np.append(A,df['Resource'].values)
B = np.append(B,df['Percentages'].values)
C = np.append(C,df['Rate'].values)
n = B.shape[0]
# calculate the angles
for i in range(n):
angle = np.append(angle,(int(B[i]*360)))
start = np.append(start,angle[:i].sum())
start = np.append(start,360)
m = start.shape[0]
# draw the ellipses
for i in range(m-1):
end = start[i+1]
color = (255-30*i,255-i*20,i*40)
img = cv2.ellipse(img2,(x,y),(r,r),0,start[i],end,color,-1)
# creat another circle in the middle
img = cv2.circle(img,(x,y), 100, (0,0,0), -1)
# get angle to calculate the middle points to put text
for i in range(m-1):
D = np.append(D,start[i]+(int(start[i+1]-start[i])/2))
# mark points
for i in range(n):
a = x + int(np.cos(D[i]/180*math.pi)*r) #:X
b = y + int(np.sin(D[i]/180*math.pi)*r) #:Y
E = np.append(E,a)
F = np.append(F,b)
img = cv2.circle(img,(a,b), 1, (0,0,0), -1)
# draw lines and put texts
for i in range(n):
if D[i]>= 90 and D[i]<= 270:
img = cv2.line(img,(int(E[i]),int(F[i])),(int(x-r-50),int(F[i])),(255,255,255),1)
img = cv2.putText(img,A[i],(int(x-r-50),int(F[i]-10)),
cv2.FONT_HERSHEY_SIMPLEX,
0.5, (255,255,255), 1, cv2.LINE_AA)
img = cv2.putText(img,C[i],(int(x-r-50),int(F[i]+15)),
cv2.FONT_HERSHEY_SIMPLEX,
0.5, (255,255,255), 1, cv2.LINE_AA)
else:
img = cv2.line(img,(int(E[i]),int(F[i])),(int(x+r+50),int(F[i])),(255,255,255),1)
img = cv2.putText(img,A[i],(int(x+r+25),int(F[i]-10)),
cv2.FONT_HERSHEY_SIMPLEX,
0.5, (255,255,255), 1, cv2.LINE_AA)
img = cv2.putText(img,C[i],(int(x+r+25),int(F[i]+15)),
cv2.FONT_HERSHEY_SIMPLEX,
0.5, (255,255,255), 1, cv2.LINE_AA)
plt.imshow(img)
plt.title('my picture')
plt.show()