import os
import numpy as np
import pandas as pd
from frovedis.exrpc.server import FrovedisServer
from frovedis.dataframe.df import FrovedisDataframe


                              FrovedisServer.initialize("mpirun -np 2 {}".format(os.environ['FROVEDIS_SERVER']))

'[ID: 1] FrovedisServer (Hostname: handson02, Port: 42383) has been initialized with 2 MPI processes.'


                              peopleDF = {
            'Ename' : ['Michael', 'Andy', 'Tanaka', 'Raul', 'Yuta'], 
            'Age' : [29, 30, 27, 19, 31],
            'Country' : ['USA', 'England', 'Japan', 'France', 'Japan']
           }


                              countryDF = {
             'Ccode' : [1, 2, 3, 4],
             'Country' : ['USA', 'England', 'Japan', 'France']
            }


                              pdf1 = pd.DataFrame(peopleDF)
pdf2 = pd.DataFrame(countryDF)
fdf1 = FrovedisDataframe(pdf1)
fdf2 = FrovedisDataframe(pdf2)


                              # display created frovedis dataframes
print ("* print Frovedis DataFrame")
print(fdf1.to_pandas_dataframe())
print(fdf2.to_pandas_dataframe())

* print Frovedis DataFrame
         Ename  Age  Country
index                       
0      Michael   29      USA
1         Andy   30  England
2       Tanaka   27    Japan
3         Raul   19   France
4         Yuta   31    Japan
       Ccode  Country
index                
0          1      USA
1          2  England
2          3    Japan
3          4   France


                              # select demo
print ("* select Ename and Age")
print(fdf1[["Ename","Age"]].to_pandas_dataframe())

* select Ename and Age
     Ename  Age
0  Michael   29
1     Andy   30
2   Tanaka   27
3     Raul   19
4     Yuta   31


                              # filter demo
print ("* filter by Age > 19 and Contry == 'Japan'")
print(fdf1[fdf1.Age > 19 and fdf1.Country == 'Japan'].to_pandas_dataframe())

* filter by Age > 19 and Contry == 'Japan'
    Ename  Age Country
0  Tanaka   27   Japan
1    Yuta   31   Japan


                              # sort demo 
print ("* sort by Age (descending order)")
print(fdf1.sort("Age",ascending=False).to_pandas_dataframe()) # single column, descending

* sort by Age (descending order)
     Ename  Age  Country
0     Yuta   31    Japan
1     Andy   30  England
2  Michael   29      USA
3   Tanaka   27    Japan
4     Raul   19   France


                              print ("* sort by Country and Age")
print(fdf1.sort(["Country", "Age"]).to_pandas_dataframe())    # multiple column

* sort by Country and Age
     Ename  Age  Country
0     Andy   30  England
1     Raul   19   France
2   Tanaka   27    Japan
3     Yuta   31    Japan
4  Michael   29      USA


                              # groupby demo
print ("* groupby Country and max/min/mean of Age and count of Ename")
out = fdf1.groupby('Country')
out = out.agg({'Age': ['max','min','mean']})
out2 = out[["Country","max_Age","min_Age","mean_Age"]]
print(out2.to_pandas_dataframe())

* groupby Country and max/min/mean of Age and count of Ename
   Country  max_Age  min_Age  mean_Age
0  England       30       30      30.0
1    Japan       31       27      29.0
2   France       19       19      19.0
3      USA       29       29      29.0


                              # renaming demo
print ("* rename Contry to Cname")
fdf3 = fdf2.rename({'Country' : 'Cname'})
print(fdf3.to_pandas_dataframe())

* rename Contry to Cname
       Ccode    Cname
index                
0          1      USA
1          2  England
2          3    Japan
3          4   France


                              # join after column renaming
print ("* merge (join) two tables")
out = fdf1.merge(fdf3, left_on="Country", right_on="Cname") # with defaults
print(out.to_pandas_dataframe())

* merge (join) two tables
         Ename  Age  Country  Ccode    Cname
index                                       
0      Michael   29      USA      1      USA
1         Andy   30  England      2  England
2       Tanaka   27    Japan      3    Japan
3         Raul   19   France      4   France
4         Yuta   31    Japan      3    Japan


                              # operation chaining: join -> sort -> select -> show
print ("* chaining: merge two tables, sort by Age, and select Age, Ename and Country")
out = fdf1.merge(fdf3, left_on="Country", right_on="Cname") \
    .sort("Age")[["Age", "Ename", "Country"]]
print(out.to_pandas_dataframe())

* chaining: merge two tables, sort by Age, and select Age, Ename and Country
   Age    Ename  Country
0   19     Raul   France
1   27   Tanaka    Japan
2   29  Michael      USA
3   30     Andy  England
4   31     Yuta    Japan


                              # column statistics
print ("describe: ")
print (fdf1.describe())
print ("\n")

describe: 
              Age
count    5.000000
mean    27.200000
std      4.816638
sum    136.000000
min     19.000000
max     31.000000


                              # merging with panda dataframe
print ("* merge with pandas table")
pdf2.rename(columns={'Country' : 'Cname'},inplace=True)
joined = fdf1.merge(pdf2, left_on="Country", right_on="Cname")
print(joined.to_pandas_dataframe())

* merge with pandas table
         Ename  Age  Country  Ccode    Cname
index                                       
0      Michael   29      USA      1      USA
1         Andy   30  England      2  England
2       Tanaka   27    Japan      3    Japan
3         Raul   19   France      4   France
4         Yuta   31    Japan      3    Japan


                              # conversion
print ("* convert Frovedis DataFrame to Pandas DataFrame")
print (fdf1.to_pandas_dataframe())
print ("\n")

* convert Frovedis DataFrame to Pandas DataFrame
         Ename  Age  Country
index                       
0      Michael   29      USA
1         Andy   30  England
2       Tanaka   27    Japan
3         Raul   19   France
4         Yuta   31    Japan


                              FrovedisServer.shut_down()


                              import pandas as pd
import numpy as np
from datetime import datetime
import time, os
import itertools
import matplotlib.pyplot as plt
from frovedis.exrpc.server import FrovedisServer
from frovedis.dataframe.df import FrovedisDataframe as fd


                              FrovedisServer.initialize("mpirun -np 2 {}".format(os.environ['FROVEDIS_SERVER']))

'[ID: 1] FrovedisServer (Hostname: handson02, Port: 39739) has been initialized with 2 MPI processes.'


                              manufacturers = pd.read_table('../data/country_vaccinations_by_manufacturer.csv', sep=',', engine='python').dropna()


                              manufacturers.head()


                              fd_manufacturers = fd(manufacturers)


                              location = fd_manufacturers['location']
location = location.to_pandas_dataframe().to_numpy()
x = set(list(itertools.chain.from_iterable(location)))
print(x)

{'Latvia', 'Germany', 'Iceland', 'Czechia', 'Chile', 'Lithuania', 'Romania', 'Italy', 'France', 'United States'}


                              fd_manufacturers.describe()


                              fd_US = fd_manufacturers[fd_manufacturers.location=='United States']
fd_Germany = fd_manufacturers[fd_manufacturers.location=='Germany']
fd_Chile = fd_manufacturers[fd_manufacturers.location=='Chile']
fd_Chile.to_pandas_dataframe().head()


                              fdUS_out = fd_US.groupby('vaccine').agg({'total_vaccinations': ['sum']})
fdGermany_out = fd_Germany.groupby('vaccine').agg({'total_vaccinations': ['sum']})
fdChile_out = fd_Chile.groupby('vaccine').agg({'total_vaccinations': ['sum']})


                              fdUS_out.sort("sum_total_vaccinations",ascending=False).to_pandas_dataframe().head()


                              fdGermany_out.sort("sum_total_vaccinations",ascending=False).to_pandas_dataframe().head()


                              fdChile_out.sort("sum_total_vaccinations",ascending=False).to_pandas_dataframe().head()


                              US_Pfizer = fd_US[fd_US.vaccine=='Pfizer/BioNTech']
US_Pfizer.to_pandas_dataframe().head()


                              Germany_Pfizer = fd_Germany[fd_Germany.vaccine=='Pfizer/BioNTech']
Germany_Pfizer.to_pandas_dataframe().head()


                              US_data = US_Pfizer.to_pandas_dataframe()
Germany_data = Germany_Pfizer.to_pandas_dataframe()
fig = plt.figure(figsize=(16,8))
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)

x1 = range(0,len(US_data['date']),25)
ax1.plot(US_data['date'], US_data['total_vaccinations'])
ax1.set_xticks(x1)
ax1.set_title('United States: Pfizer', size=15)
ax1.set_xlabel('Date')
ax1.set_ylabel('Total vaccinations x100000000')

ax2.plot(Germany_data['date'], Germany_data['total_vaccinations'])
x2 = range(0,len(Germany_data['date']),25)
ax2.plot(Germany_data['date'], Germany_data['total_vaccinations'])
ax2.set_xticks(x2)
ax2.set_title('Germany: Pfizer', size=15)
ax2.set_xlabel('Date')
ax2.set_ylabel('Total vaccinations x100000000')

plt.show()

	vaccine	sum_total_vaccinations
0	Pfizer/BioNTech	6572931045
1	Moderna	5789377678
2	Johnson&Johnson	298233690

	vaccine	sum_total_vaccinations
0	Pfizer/BioNTech	1006759304
1	Oxford/AstraZeneca	227902088
2	Moderna	63056380
3	Johnson&Johnson	68781

Breadcrumb navigation

Operations with Frovedis DataFrame

Frovedis DataFrame operation example using sample data¶

Starting Frovedis server¶

Two types of sample data are prepared in dictionary format. Converting to Frovedis DataFrame via pandas DataFrame. Data is transferred to the Vector Engine's memory as it is converted from pandas to Frovedis DataFrame.¶

Showing Frovedis Data Frame.¶

Column is selected by specifying the column name.¶

Filtering by specifying "age" and "country name".¶

Sorting by "age".¶

Sorting by specifying multiple columns.¶

Grouping by "country name", and totaling the maximum, minimum, and average ages in each group.¶

Renaming the column.¶

Two DataFrames are integrated by specifying the column name.¶

Integration and sorting of two DataFrames. Displaying the selected result.¶

Displaying statistical information of data in DataFrame.¶

Creating a new Frovedis DataFrame "joined" by integrating pandas DataFrame and Frovedis DataFrame.¶

Converting Frovedis DataFrame to pandas DataFrame.¶

Frovedis DataFrame operation example using Covid-19 vaccine Data Set¶

Processing with the Kaggle COVID-19 World Vaccination Progress dataset¶

Starting Frovedis Server.¶

Loading Vaccine Data Set into pandas DataFrame.¶

Converting from pandas to Frovedis DataFrame. At this time, data is transferred to the memory of the vector engine.¶

Inoculating countries included in the Frovedis Data Frame are being extracted. After that, the extracted data is converted to NumPy, converted to a list, and displayed.¶

Displaying Frovedis DataFrame stats.¶

Getting Data by country from Frovedis DataFrame.¶

The total number of vaccinations is being calculated for each type of vaccine. Using Frovedis groupby and agg, the total is calculated and displayed for each vaccine. Here, the aggregated results of U.S., Germany, and Chile are displayed.¶

Extracting the number of vaccinations made by Pfizer for U.S. and Germany.¶

Converting time series data of Pfizer vaccine inoculation in U.S. and Germany to Pandas DataFrame. The graph of the converted data is shown below.¶

Related Links

	location	date	vaccine	total_vaccinations
0	Chile	2020-12-24	Pfizer/BioNTech	420
1	Chile	2020-12-25	Pfizer/BioNTech	5198
2	Chile	2020-12-26	Pfizer/BioNTech	8338
3	Chile	2020-12-27	Pfizer/BioNTech	8649
4	Chile	2020-12-28	Pfizer/BioNTech	8649

	total_vaccinations
count	3.491000e+03
mean	4.885988e+06
std	1.635967e+07
sum	1.705698e+10
min	0.000000e+00
max	1.341169e+08

	location	date	vaccine	total_vaccinations
0	United States	2021-01-12	Pfizer/BioNTech	5488697
1	United States	2021-01-13	Pfizer/BioNTech	6025872
2	United States	2021-01-15	Pfizer/BioNTech	7153268
3	United States	2021-01-19	Pfizer/BioNTech	8874811
4	United States	2021-01-20	Pfizer/BioNTech	9281063

	location	date	vaccine	total_vaccinations
0	Germany	2020-12-27	Pfizer/BioNTech	24473
1	Germany	2020-12-28	Pfizer/BioNTech	42813
2	Germany	2020-12-29	Pfizer/BioNTech	92363
3	Germany	2020-12-30	Pfizer/BioNTech	154903
4	Germany	2020-12-31	Pfizer/BioNTech	204951

	vaccine	sum_total_vaccinations
0	Sinovac	656309318
1	Pfizer/BioNTech	82842894