from IPython.display import IFrame
ciaLink1="https://www.cia.gov/the-world-factbook/field/carbon-dioxide-emissions/country-comparison"
IFrame(ciaLink1, width=900, height=900)
# read web table into pandas DF
import pandas as pd
linkToFile='https://github.com/CienciaDeDatosEspacial/code_and_data/raw/main/data/carbonEmi_downloaded.csv'
carbon=pd.read_csv(linkToFile)
# here it is:
carbon
| name | slug | value | date_of_information | ranking | region | |
|---|---|---|---|---|---|---|
| 0 | China | china | 10,773,248,000.0 | 2019 est. | 1 | East and Southeast Asia |
| 1 | United States | united-states | 5,144,361,000.0 | 2019 est. | 2 | North America |
| 2 | India | india | 2,314,738,000.0 | 2019 est. | 3 | South Asia |
| 3 | Russia | russia | 1,848,070,000.0 | 2019 est. | 4 | Central Asia |
| 4 | Japan | japan | 1,103,234,000.0 | 2019 est. | 5 | East and Southeast Asia |
| ... | ... | ... | ... | ... | ... | ... |
| 213 | Antarctica | antarctica | 28,000.0 | 2019 est. | 214 | Antarctica |
| 214 | Saint Helena, Ascension, and Tristan da Cunha | saint-helena-ascension-and-tristan-da-cunha | 13,000.0 | 2019 est. | 215 | Africa |
| 215 | Niue | niue | 8,000.0 | 2019 est. | 216 | Australia and Oceania |
| 216 | Northern Mariana Islands | northern-mariana-islands | 0.0 | 2019 est. | 217 | Australia and Oceania |
| 217 | Tuvalu | tuvalu | 0.0 | 2019 est. | 218 | Australia and Oceania |
218 rows × 6 columns
Tip: use drop, loc, and iloc for the same purpose (three ways to accomplish the task).
# I want to eliminate slug and ranking
# First, I make a copy of my csv file to save the first version
carbon_new=carbon.copy()
byeColumns=['slug','ranking'] # you can delete more than one
#this is the result
carbon_new.drop(columns=byeColumns,inplace=True) # here inplace modify carbon_new and it doesn't create other DataFrame
#then
carbon_new
| name | value | date_of_information | region | |
|---|---|---|---|---|
| 0 | China | 10,773,248,000.0 | 2019 est. | East and Southeast Asia |
| 1 | United States | 5,144,361,000.0 | 2019 est. | North America |
| 2 | India | 2,314,738,000.0 | 2019 est. | South Asia |
| 3 | Russia | 1,848,070,000.0 | 2019 est. | Central Asia |
| 4 | Japan | 1,103,234,000.0 | 2019 est. | East and Southeast Asia |
| ... | ... | ... | ... | ... |
| 213 | Antarctica | 28,000.0 | 2019 est. | Antarctica |
| 214 | Saint Helena, Ascension, and Tristan da Cunha | 13,000.0 | 2019 est. | Africa |
| 215 | Niue | 8,000.0 | 2019 est. | Australia and Oceania |
| 216 | Northern Mariana Islands | 0.0 | 2019 est. | Australia and Oceania |
| 217 | Tuvalu | 0.0 | 2019 est. | Australia and Oceania |
218 rows × 4 columns
carbon_new=carbon_new.loc[:, ~carbon_new.columns.isin(['slug','ranking'])]
carbon_new
| name | value | date_of_information | region | |
|---|---|---|---|---|
| 0 | China | 10,773,248,000.0 | 2019 est. | East and Southeast Asia |
| 1 | United States | 5,144,361,000.0 | 2019 est. | North America |
| 2 | India | 2,314,738,000.0 | 2019 est. | South Asia |
| 3 | Russia | 1,848,070,000.0 | 2019 est. | Central Asia |
| 4 | Japan | 1,103,234,000.0 | 2019 est. | East and Southeast Asia |
| ... | ... | ... | ... | ... |
| 213 | Antarctica | 28,000.0 | 2019 est. | Antarctica |
| 214 | Saint Helena, Ascension, and Tristan da Cunha | 13,000.0 | 2019 est. | Africa |
| 215 | Niue | 8,000.0 | 2019 est. | Australia and Oceania |
| 216 | Northern Mariana Islands | 0.0 | 2019 est. | Australia and Oceania |
| 217 | Tuvalu | 0.0 | 2019 est. | Australia and Oceania |
218 rows × 4 columns
#accessing by list of comprehension
carbon_new = carbon_new.iloc[:, [j for j in range(len(carbon_new.columns)) if j not in [1, 4]]]
carbon_new
| name | value | date_of_information | region | |
|---|---|---|---|---|
| 0 | China | 10,773,248,000.0 | 2019 est. | East and Southeast Asia |
| 1 | United States | 5,144,361,000.0 | 2019 est. | North America |
| 2 | India | 2,314,738,000.0 | 2019 est. | South Asia |
| 3 | Russia | 1,848,070,000.0 | 2019 est. | Central Asia |
| 4 | Japan | 1,103,234,000.0 | 2019 est. | East and Southeast Asia |
| ... | ... | ... | ... | ... |
| 213 | Antarctica | 28,000.0 | 2019 est. | Antarctica |
| 214 | Saint Helena, Ascension, and Tristan da Cunha | 13,000.0 | 2019 est. | Africa |
| 215 | Niue | 8,000.0 | 2019 est. | Australia and Oceania |
| 216 | Northern Mariana Islands | 0.0 | 2019 est. | Australia and Oceania |
| 217 | Tuvalu | 0.0 | 2019 est. | Australia and Oceania |
218 rows × 4 columns
Tip: Use rename.
carbon_new.rename(columns={'date_of_information':'carbon_date'}, inplace=True)
carbon_new
| name | value | carbon_date | region | |
|---|---|---|---|---|
| 0 | China | 10,773,248,000.0 | 2019 est. | East and Southeast Asia |
| 1 | United States | 5,144,361,000.0 | 2019 est. | North America |
| 2 | India | 2,314,738,000.0 | 2019 est. | South Asia |
| 3 | Russia | 1,848,070,000.0 | 2019 est. | Central Asia |
| 4 | Japan | 1,103,234,000.0 | 2019 est. | East and Southeast Asia |
| ... | ... | ... | ... | ... |
| 213 | Antarctica | 28,000.0 | 2019 est. | Antarctica |
| 214 | Saint Helena, Ascension, and Tristan da Cunha | 13,000.0 | 2019 est. | Africa |
| 215 | Niue | 8,000.0 | 2019 est. | Australia and Oceania |
| 216 | Northern Mariana Islands | 0.0 | 2019 est. | Australia and Oceania |
| 217 | Tuvalu | 0.0 | 2019 est. | Australia and Oceania |
218 rows × 4 columns
Tip: use strip.
carbon_new.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 218 entries, 0 to 217 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 name 218 non-null object 1 value 218 non-null object 2 date_of_information 218 non-null object 3 region 218 non-null object dtypes: object(4) memory usage: 6.9+ KB
#This is for seeing the cells which has trailing and leading spaces
carbon_new.region.to_list() #change region to other column name
['East and Southeast Asia', 'North America', 'South Asia', 'Central Asia', 'East and Southeast Asia', 'Europe', 'East and Southeast Asia', 'Middle East', 'North America', 'Middle East', 'East and Southeast Asia', 'Africa', 'North America', 'South America', 'Australia and Oceania', 'Europe', 'Middle East', 'Europe', 'Europe', 'East and Southeast Asia', 'Europe', 'Europe', 'East and Southeast Asia', 'Middle East', 'Central Asia', 'East and Southeast Asia', 'East and Southeast Asia', 'East and Southeast Asia', 'Africa', 'Europe', 'South Asia', 'South America', 'Europe', 'Africa', 'Middle East', 'East and Southeast Asia', 'Europe', 'Middle East', 'Central Asia', 'Africa', 'South America', 'Central Asia', 'Europe', 'South Asia', 'Middle East', 'East and Southeast Asia', 'South America', 'South America', 'Middle East', 'Europe', 'Europe', 'Europe', 'Middle East', 'Africa', 'South America', 'Europe', 'Europe', 'Europe', 'Europe', 'Europe', 'Middle East', 'Europe', 'East and Southeast Asia', 'Australia and Oceania', 'Central America and the Caribbean', 'Europe', 'Europe', 'Africa', 'Europe', 'South America', 'Europe', 'Middle East', 'Europe', 'Europe', 'East and Southeast Asia', 'Middle East', 'Central America and the Caribbean', 'Middle East', 'Central America and the Caribbean', 'South Asia', 'Africa', 'Middle East', 'East and Southeast Asia', 'Africa', 'Central America and the Caribbean', 'Central America and the Caribbean', 'East and Southeast Asia', 'Africa', 'South America', 'Africa', 'Africa', 'Africa', 'Europe', 'Central America and the Caribbean', 'Europe', 'Europe', 'Europe', 'East and Southeast Asia', 'Europe', 'Africa', 'Africa', 'Europe', 'Africa', 'Middle East', 'Middle East', 'Central America and the Caribbean', 'East and Southeast Asia', 'Europe', 'Central America and the Caribbean', 'Europe', 'Central America and the Caribbean', 'Europe', 'Europe', 'Europe', 'Africa', 'South America', 'South Asia', 'Central Asia', 'Africa', 'South Asia', 'Central Asia', 'Central America and the Caribbean', 'Europe', 'Africa', 'Africa', 'Africa', 'Africa', 'East and Southeast Asia', 'South America', 'Middle East', 'Africa', 'Australia and Oceania', 'Africa', 'Africa', 'Europe', 'Central America and the Caribbean', 'Africa', 'Africa', 'Africa', 'Africa', 'Africa', 'Central America and the Caribbean', 'Africa', 'Europe', 'Middle East', 'Middle East', 'Europe', 'Central America and the Caribbean', 'Africa', 'South America', 'Africa', 'Africa', 'Europe', 'Central America and the Caribbean', 'Africa', 'South America', 'South Asia', 'Africa', 'East and Southeast Asia', 'Australia and Oceania', 'Africa', 'Africa', 'Central America and the Caribbean', 'Australia and Oceania', 'Africa', 'Africa', 'Australia and Oceania', 'Central America and the Caribbean', 'Africa', 'Africa', 'Africa', 'Australia and Oceania', 'Africa', 'South Asia', 'Africa', 'Africa', 'Europe', 'Central America and the Caribbean', 'Africa', 'North America', 'Africa', 'Central America and the Caribbean', 'Africa', 'Central America and the Caribbean', 'Africa', 'Africa', 'North America', 'Central America and the Caribbean', 'East and Southeast Asia', 'Europe', 'Australia and Oceania', 'Australia and Oceania', 'Africa', 'Australia and Oceania', 'Africa', 'Central America and the Caribbean', 'Australia and Oceania', 'Africa', 'Central America and the Caribbean', 'Central America and the Caribbean', 'Australia and Oceania', 'Central America and the Caribbean', 'Central America and the Caribbean', 'Australia and Oceania', 'Central America and the Caribbean', 'Africa', 'Australia and Oceania', 'Australia and Oceania', 'North America', 'Australia and Oceania', 'Australia and Oceania', 'South America', 'Central America and the Caribbean', 'Antarctica', 'Africa', 'Australia and Oceania', 'Australia and Oceania', 'Australia and Oceania']
carbon_new.name.str.strip()
carbon_new.value.str.strip()
carbon_new.carbon_date.str.strip()
carbon_new.region.str.strip()
0 East and Southeast Asia
1 North America
2 South Asia
3 Central Asia
4 East and Southeast Asia
...
213 Antarctica
214 Africa
215 Australia and Oceania
216 Australia and Oceania
217 Australia and Oceania
Name: region, Length: 218, dtype: object
#Here we corroborate the strings
carbon_new.region.to_list()
['East and Southeast Asia', 'North America', 'South Asia', 'Central Asia', 'East and Southeast Asia', 'Europe', 'East and Southeast Asia', 'Middle East', 'North America', 'Middle East', 'East and Southeast Asia', 'Africa', 'North America', 'South America', 'Australia and Oceania', 'Europe', 'Middle East', 'Europe', 'Europe', 'East and Southeast Asia', 'Europe', 'Europe', 'East and Southeast Asia', 'Middle East', 'Central Asia', 'East and Southeast Asia', 'East and Southeast Asia', 'East and Southeast Asia', 'Africa', 'Europe', 'South Asia', 'South America', 'Europe', 'Africa', 'Middle East', 'East and Southeast Asia', 'Europe', 'Middle East', 'Central Asia', 'Africa', 'South America', 'Central Asia', 'Europe', 'South Asia', 'Middle East', 'East and Southeast Asia', 'South America', 'South America', 'Middle East', 'Europe', 'Europe', 'Europe', 'Middle East', 'Africa', 'South America', 'Europe', 'Europe', 'Europe', 'Europe', 'Europe', 'Middle East', 'Europe', 'East and Southeast Asia', 'Australia and Oceania', 'Central America and the Caribbean', 'Europe', 'Europe', 'Africa', 'Europe', 'South America', 'Europe', 'Middle East', 'Europe', 'Europe', 'East and Southeast Asia', 'Middle East', 'Central America and the Caribbean', 'Middle East', 'Central America and the Caribbean', 'South Asia', 'Africa', 'Middle East', 'East and Southeast Asia', 'Africa', 'Central America and the Caribbean', 'Central America and the Caribbean', 'East and Southeast Asia', 'Africa', 'South America', 'Africa', 'Africa', 'Africa', 'Europe', 'Central America and the Caribbean', 'Europe', 'Europe', 'Europe', 'East and Southeast Asia', 'Europe', 'Africa', 'Africa', 'Europe', 'Africa', 'Middle East', 'Middle East', 'Central America and the Caribbean', 'East and Southeast Asia', 'Europe', 'Central America and the Caribbean', 'Europe', 'Central America and the Caribbean', 'Europe', 'Europe', 'Europe', 'Africa', 'South America', 'South Asia', 'Central Asia', 'Africa', 'South Asia', 'Central Asia', 'Central America and the Caribbean', 'Europe', 'Africa', 'Africa', 'Africa', 'Africa', 'East and Southeast Asia', 'South America', 'Middle East', 'Africa', 'Australia and Oceania', 'Africa', 'Africa', 'Europe', 'Central America and the Caribbean', 'Africa', 'Africa', 'Africa', 'Africa', 'Africa', 'Central America and the Caribbean', 'Africa', 'Europe', 'Middle East', 'Middle East', 'Europe', 'Central America and the Caribbean', 'Africa', 'South America', 'Africa', 'Africa', 'Europe', 'Central America and the Caribbean', 'Africa', 'South America', 'South Asia', 'Africa', 'East and Southeast Asia', 'Australia and Oceania', 'Africa', 'Africa', 'Central America and the Caribbean', 'Australia and Oceania', 'Africa', 'Africa', 'Australia and Oceania', 'Central America and the Caribbean', 'Africa', 'Africa', 'Africa', 'Australia and Oceania', 'Africa', 'South Asia', 'Africa', 'Africa', 'Europe', 'Central America and the Caribbean', 'Africa', 'North America', 'Africa', 'Central America and the Caribbean', 'Africa', 'Central America and the Caribbean', 'Africa', 'Africa', 'North America', 'Central America and the Caribbean', 'East and Southeast Asia', 'Europe', 'Australia and Oceania', 'Australia and Oceania', 'Africa', 'Australia and Oceania', 'Africa', 'Central America and the Caribbean', 'Australia and Oceania', 'Africa', 'Central America and the Caribbean', 'Central America and the Caribbean', 'Australia and Oceania', 'Central America and the Caribbean', 'Central America and the Caribbean', 'Australia and Oceania', 'Central America and the Caribbean', 'Africa', 'Australia and Oceania', 'Australia and Oceania', 'North America', 'Australia and Oceania', 'Australia and Oceania', 'South America', 'Central America and the Caribbean', 'Antarctica', 'Africa', 'Australia and Oceania', 'Australia and Oceania', 'Australia and Oceania']
Tip: Use contains.
carbon_new = carbon_new.copy()
carbon_new
| name | value | carbon_date | region | |
|---|---|---|---|---|
| 0 | China | 10,773,248,000.0 | 2019 est. | East and Southeast Asia |
| 1 | United States | 5,144,361,000.0 | 2019 est. | North America |
| 2 | India | 2,314,738,000.0 | 2019 est. | South Asia |
| 3 | Russia | 1,848,070,000.0 | 2019 est. | Central Asia |
| 4 | Japan | 1,103,234,000.0 | 2019 est. | East and Southeast Asia |
| ... | ... | ... | ... | ... |
| 213 | Antarctica | 28,000.0 | 2019 est. | Antarctica |
| 214 | Saint Helena, Ascension, and Tristan da Cunha | 13,000.0 | 2019 est. | Africa |
| 215 | Niue | 8,000.0 | 2019 est. | Australia and Oceania |
| 216 | Northern Mariana Islands | 0.0 | 2019 est. | Australia and Oceania |
| 217 | Tuvalu | 0.0 | 2019 est. | Australia and Oceania |
218 rows × 4 columns
# is there a cell where you have symbols beyond [^ ] alphanumeric (\w) or points (\.)?
carbon_new.carbon_date[carbon_new.carbon_date.str.contains(pat=r'[^\w\.]',regex=True)]
0 2019 est.
1 2019 est.
2 2019 est.
3 2019 est.
4 2019 est.
...
213 2019 est.
214 2019 est.
215 2019 est.
216 2019 est.
217 2019 est.
Name: carbon_date, Length: 218, dtype: object
carbon_new.value[carbon_new.value.str.contains(pat=r'[^\w\.]',regex=True)]
0 10,773,248,000.0
1 5,144,361,000.0
2 2,314,738,000.0
3 1,848,070,000.0
4 1,103,234,000.0
...
211 46,000.0
212 33,000.0
213 28,000.0
214 13,000.0
215 8,000.0
Name: value, Length: 216, dtype: object
Tip: use replace.
carbon_new.columns.str.contains(' ')
array([False, False, False, False])
carbon_new.columns[carbon_new.columns.str.contains(' ')]
Index([], dtype='object')
Tip: use replace.
carbon_new.columns = carbon_new.columns.str.replace(' ', '_')
carbon_new
| name | value | carbon_date | region | |
|---|---|---|---|---|
| 0 | China | 10,773,248,000.0 | 2019 est. | East and Southeast Asia |
| 1 | United States | 5,144,361,000.0 | 2019 est. | North America |
| 2 | India | 2,314,738,000.0 | 2019 est. | South Asia |
| 3 | Russia | 1,848,070,000.0 | 2019 est. | Central Asia |
| 4 | Japan | 1,103,234,000.0 | 2019 est. | East and Southeast Asia |
| ... | ... | ... | ... | ... |
| 213 | Antarctica | 28,000.0 | 2019 est. | Antarctica |
| 214 | Saint Helena, Ascension, and Tristan da Cunha | 13,000.0 | 2019 est. | Africa |
| 215 | Niue | 8,000.0 | 2019 est. | Australia and Oceania |
| 216 | Northern Mariana Islands | 0.0 | 2019 est. | Australia and Oceania |
| 217 | Tuvalu | 0.0 | 2019 est. | Australia and Oceania |
218 rows × 4 columns
Tip: use extract.
#Separate in to parts: numeric and string
carbon_new.carbon_date=carbon_new.carbon_date.str.replace(pat= r'[^0-9]', repl= '',regex=True)
carbon_new
| name | value | carbon_date | region | |
|---|---|---|---|---|
| 0 | China | 10,773,248,000.0 | 2019 | East and Southeast Asia |
| 1 | United States | 5,144,361,000.0 | 2019 | North America |
| 2 | India | 2,314,738,000.0 | 2019 | South Asia |
| 3 | Russia | 1,848,070,000.0 | 2019 | Central Asia |
| 4 | Japan | 1,103,234,000.0 | 2019 | East and Southeast Asia |
| ... | ... | ... | ... | ... |
| 213 | Antarctica | 28,000.0 | 2019 | Antarctica |
| 214 | Saint Helena, Ascension, and Tristan da Cunha | 13,000.0 | 2019 | Africa |
| 215 | Niue | 8,000.0 | 2019 | Australia and Oceania |
| 216 | Northern Mariana Islands | 0.0 | 2019 | Australia and Oceania |
| 217 | Tuvalu | 0.0 | 2019 | Australia and Oceania |
218 rows × 4 columns
from IPython.display import IFrame
ciaLink2="https://www.cia.gov/the-world-factbook/field/revenue-from-forest-resources/country-comparison"
IFrame(ciaLink2, width=900, height=900)
# read web table into pandas DF
import pandas as pd
forestDFs=pd.read_html(ciaLink2, # link
header=0, # where is the header? # significa que la primera fila sera el encabezado de nombres
flavor='bs4')
forest=forestDFs[0].copy()
forest
| Rank | Country | % of GDP | Date of Information | |
|---|---|---|---|---|
| 0 | 1 | Solomon Islands | 20.27 | 2018 est. |
| 1 | 2 | Liberia | 13.27 | 2018 est. |
| 2 | 3 | Burundi | 10.31 | 2018 est. |
| 3 | 4 | Guinea-Bissau | 9.24 | 2018 est. |
| 4 | 5 | Central African Republic | 8.99 | 2018 est. |
| ... | ... | ... | ... | ... |
| 199 | 200 | Guam | 0.00 | 2018 est. |
| 200 | 201 | Faroe Islands | 0.00 | 2017 est. |
| 201 | 202 | Aruba | 0.00 | 2017 est. |
| 202 | 203 | Virgin Islands | 0.00 | 2017 est. |
| 203 | 204 | Macau | 0.00 | 2018 est. |
204 rows × 4 columns
Tip: use replace.
forest.rename(columns={'% of GDP': 'pct of GDP'}, inplace=True)
forest
| Rank | Country | pct of GDP | Date of Information | |
|---|---|---|---|---|
| 0 | 1 | Solomon Islands | 20.27 | 2018 est. |
| 1 | 2 | Liberia | 13.27 | 2018 est. |
| 2 | 3 | Burundi | 10.31 | 2018 est. |
| 3 | 4 | Guinea-Bissau | 9.24 | 2018 est. |
| 4 | 5 | Central African Republic | 8.99 | 2018 est. |
| ... | ... | ... | ... | ... |
| 199 | 200 | Guam | 0.00 | 2018 est. |
| 200 | 201 | Faroe Islands | 0.00 | 2017 est. |
| 201 | 202 | Aruba | 0.00 | 2017 est. |
| 202 | 203 | Virgin Islands | 0.00 | 2017 est. |
| 203 | 204 | Macau | 0.00 | 2018 est. |
204 rows × 4 columns
forest.columns=forest.columns.str.replace('% of GDP','pct of GDP')
forest
| Rank | Country | pct of GDP | Date of Information | |
|---|---|---|---|---|
| 0 | 1 | Solomon Islands | 20.27 | 2018 est. |
| 1 | 2 | Liberia | 13.27 | 2018 est. |
| 2 | 3 | Burundi | 10.31 | 2018 est. |
| 3 | 4 | Guinea-Bissau | 9.24 | 2018 est. |
| 4 | 5 | Central African Republic | 8.99 | 2018 est. |
| ... | ... | ... | ... | ... |
| 199 | 200 | Guam | 0.00 | 2018 est. |
| 200 | 201 | Faroe Islands | 0.00 | 2017 est. |
| 201 | 202 | Aruba | 0.00 | 2017 est. |
| 202 | 203 | Virgin Islands | 0.00 | 2017 est. |
| 203 | 204 | Macau | 0.00 | 2018 est. |
204 rows × 4 columns
Tip: use drop, loc, and iloc for the same purpose (three ways to accomplish the task).
#this is the result
forest.drop(columns='Rank',inplace=True) # here inplace modify carbon_new and it doesn't create other DataFrame
#then
forest
| Country | pct of GDP | Date of Information | |
|---|---|---|---|
| 0 | Solomon Islands | 20.27 | 2018 est. |
| 1 | Liberia | 13.27 | 2018 est. |
| 2 | Burundi | 10.31 | 2018 est. |
| 3 | Guinea-Bissau | 9.24 | 2018 est. |
| 4 | Central African Republic | 8.99 | 2018 est. |
| ... | ... | ... | ... |
| 199 | Guam | 0.00 | 2018 est. |
| 200 | Faroe Islands | 0.00 | 2017 est. |
| 201 | Aruba | 0.00 | 2017 est. |
| 202 | Virgin Islands | 0.00 | 2017 est. |
| 203 | Macau | 0.00 | 2018 est. |
204 rows × 3 columns
forest=forest.loc[:, ~forest.columns.isin(['Rank'])]
forest
| Country | % of GDP | Date of Information | |
|---|---|---|---|
| 0 | Solomon Islands | 20.27 | 2018 est. |
| 1 | Liberia | 13.27 | 2018 est. |
| 2 | Burundi | 10.31 | 2018 est. |
| 3 | Guinea-Bissau | 9.24 | 2018 est. |
| 4 | Central African Republic | 8.99 | 2018 est. |
| ... | ... | ... | ... |
| 199 | Guam | 0.00 | 2018 est. |
| 200 | Faroe Islands | 0.00 | 2017 est. |
| 201 | Aruba | 0.00 | 2017 est. |
| 202 | Virgin Islands | 0.00 | 2017 est. |
| 203 | Macau | 0.00 | 2018 est. |
204 rows × 3 columns
forest=forestDFs[0].copy()
forest.iloc[:, [j for j in range(len(forest.columns)) if j not in [0]]]
| Country | % of GDP | Date of Information | |
|---|---|---|---|
| 0 | Solomon Islands | 20.27 | 2018 est. |
| 1 | Liberia | 13.27 | 2018 est. |
| 2 | Burundi | 10.31 | 2018 est. |
| 3 | Guinea-Bissau | 9.24 | 2018 est. |
| 4 | Central African Republic | 8.99 | 2018 est. |
| ... | ... | ... | ... |
| 199 | Guam | 0.00 | 2018 est. |
| 200 | Faroe Islands | 0.00 | 2017 est. |
| 201 | Aruba | 0.00 | 2017 est. |
| 202 | Virgin Islands | 0.00 | 2017 est. |
| 203 | Macau | 0.00 | 2018 est. |
204 rows × 3 columns
Tip: Use rename.
forest.rename(columns={'Date of Information':'forest_date'}, inplace=True)
forest
| Rank | Country | % of GDP | forest_date | |
|---|---|---|---|---|
| 0 | 1 | Solomon Islands | 20.27 | 2018 est. |
| 1 | 2 | Liberia | 13.27 | 2018 est. |
| 2 | 3 | Burundi | 10.31 | 2018 est. |
| 3 | 4 | Guinea-Bissau | 9.24 | 2018 est. |
| 4 | 5 | Central African Republic | 8.99 | 2018 est. |
| ... | ... | ... | ... | ... |
| 199 | 200 | Guam | 0.00 | 2018 est. |
| 200 | 201 | Faroe Islands | 0.00 | 2017 est. |
| 201 | 202 | Aruba | 0.00 | 2017 est. |
| 202 | 203 | Virgin Islands | 0.00 | 2017 est. |
| 203 | 204 | Macau | 0.00 | 2018 est. |
204 rows × 4 columns
Tip: use replace.
forest.columns.str.contains(' ')
array([False, False, True, False])
Tip: use strip.
forest.columns.str.strip()
Index(['Rank', 'Country', '% of GDP', 'forest_date'], dtype='object')
forest.columns.to_list()
['Rank', 'Country', '% of GDP', 'forest_date']
Tip: use extract.
forest.forest_date=forest.forest_date.str.replace(pat= r'[^0-9]', repl= '',regex=True)
forest
| Rank | Country | % of GDP | forest_date | |
|---|---|---|---|---|
| 0 | 1 | Solomon Islands | 20.27 | 2018 |
| 1 | 2 | Liberia | 13.27 | 2018 |
| 2 | 3 | Burundi | 10.31 | 2018 |
| 3 | 4 | Guinea-Bissau | 9.24 | 2018 |
| 4 | 5 | Central African Republic | 8.99 | 2018 |
| ... | ... | ... | ... | ... |
| 199 | 200 | Guam | 0.00 | 2018 |
| 200 | 201 | Faroe Islands | 0.00 | 2017 |
| 201 | 202 | Aruba | 0.00 | 2017 |
| 202 | 203 | Virgin Islands | 0.00 | 2017 |
| 203 | 204 | Macau | 0.00 | 2018 |
204 rows × 4 columns