from IPython.display import IFrame  
ciaLink1="https://www.cia.gov/the-world-factbook/field/carbon-dioxide-emissions/country-comparison" 
IFrame(ciaLink1, width=900, height=900)


# read web table into pandas DF
import pandas as pd

linkToFile='https://github.com/CienciaDeDatosEspacial/code_and_data/raw/main/data/carbonEmi_downloaded.csv'
carbon=pd.read_csv(linkToFile)


# here it is:
carbon


# I want to eliminate slug and ranking
# First, I make a copy of my csv file to save the first version
carbon_new=carbon.copy()


byeColumns=['slug','ranking'] # you can delete more than one

#this is the result
carbon_new.drop(columns=byeColumns,inplace=True) # here inplace modify carbon_new and it doesn't create other DataFrame
#then
carbon_new


carbon_new=carbon_new.loc[:, ~carbon_new.columns.isin(['slug','ranking'])]


carbon_new


#accessing by list of comprehension
carbon_new = carbon_new.iloc[:, [j for j in range(len(carbon_new.columns)) if j not in [1, 4]]]


carbon_new


carbon_new.rename(columns={'date_of_information':'carbon_date'}, inplace=True)


carbon_new


carbon_new.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 218 entries, 0 to 217
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   name                 218 non-null    object
 1   value                218 non-null    object
 2   date_of_information  218 non-null    object
 3   region               218 non-null    object
dtypes: object(4)
memory usage: 6.9+ KB


#This is for seeing the cells which has trailing and leading spaces
carbon_new.region.to_list()  #change region to other column name

['East and Southeast Asia',
 'North America',
 'South Asia',
 'Central Asia',
 'East and Southeast Asia',
 'Europe',
 'East and Southeast Asia',
 'Middle East',
 'North America',
 'Middle East',
 'East and Southeast Asia',
 'Africa',
 'North America',
 'South America',
 'Australia and Oceania',
 'Europe',
 'Middle East',
 'Europe',
 'Europe',
 'East and Southeast Asia',
 'Europe',
 'Europe',
 'East and Southeast Asia',
 'Middle East',
 'Central Asia',
 'East and Southeast Asia',
 'East and Southeast Asia',
 'East and Southeast Asia',
 'Africa',
 'Europe',
 'South Asia',
 'South America',
 'Europe',
 'Africa',
 'Middle East',
 'East and Southeast Asia',
 'Europe',
 'Middle East',
 'Central Asia',
 'Africa',
 'South America',
 'Central Asia',
 'Europe',
 'South Asia',
 'Middle East',
 'East and Southeast Asia',
 'South America',
 'South America',
 'Middle East',
 'Europe',
 'Europe',
 'Europe',
 'Middle East',
 'Africa',
 'South America',
 'Europe',
 'Europe',
 'Europe',
 'Europe',
 'Europe',
 'Middle East',
 'Europe',
 'East and Southeast Asia',
 'Australia and Oceania',
 'Central America and the Caribbean',
 'Europe',
 'Europe',
 'Africa',
 'Europe',
 'South America',
 'Europe',
 'Middle East',
 'Europe',
 'Europe',
 'East and Southeast Asia',
 'Middle East',
 'Central America and the Caribbean',
 'Middle East',
 'Central America and the Caribbean',
 'South Asia',
 'Africa',
 'Middle East',
 'East and Southeast Asia',
 'Africa',
 'Central America and the Caribbean',
 'Central America and the Caribbean',
 'East and Southeast Asia',
 'Africa',
 'South America',
 'Africa',
 'Africa',
 'Africa',
 'Europe',
 'Central America and the Caribbean',
 'Europe',
 'Europe',
 'Europe',
 'East and Southeast Asia',
 'Europe',
 'Africa',
 'Africa',
 'Europe',
 'Africa',
 'Middle East',
 'Middle East',
 'Central America and the Caribbean',
 'East and Southeast Asia',
 'Europe',
 'Central America and the Caribbean',
 'Europe',
 'Central America and the Caribbean',
 'Europe',
 'Europe',
 'Europe',
 'Africa',
 'South America',
 'South Asia',
 'Central Asia',
 'Africa',
 'South Asia',
 'Central Asia',
 'Central America and the Caribbean',
 'Europe',
 'Africa',
 'Africa',
 'Africa',
 'Africa',
 'East and Southeast Asia',
 'South America',
 'Middle East',
 'Africa',
 'Australia and Oceania',
 'Africa',
 'Africa',
 'Europe',
 'Central America and the Caribbean',
 'Africa',
 'Africa',
 'Africa',
 'Africa',
 'Africa',
 'Central America and the Caribbean',
 'Africa',
 'Europe',
 'Middle East',
 'Middle East',
 'Europe',
 'Central America and the Caribbean',
 'Africa',
 'South America',
 'Africa',
 'Africa',
 'Europe',
 'Central America and the Caribbean',
 'Africa',
 'South America',
 'South Asia',
 'Africa',
 'East and Southeast Asia',
 'Australia and Oceania',
 'Africa',
 'Africa',
 'Central America and the Caribbean',
 'Australia and Oceania',
 'Africa',
 'Africa',
 'Australia and Oceania',
 'Central America and the Caribbean',
 'Africa',
 'Africa',
 'Africa',
 'Australia and Oceania',
 'Africa',
 'South Asia',
 'Africa',
 'Africa',
 'Europe',
 'Central America and the Caribbean',
 'Africa',
 'North America',
 'Africa',
 'Central America and the Caribbean',
 'Africa',
 'Central America and the Caribbean',
 'Africa',
 'Africa',
 'North America',
 'Central America and the Caribbean',
 'East and Southeast Asia',
 'Europe',
 'Australia and Oceania',
 'Australia and Oceania',
 'Africa',
 'Australia and Oceania',
 'Africa',
 'Central America and the Caribbean',
 'Australia and Oceania',
 'Africa',
 'Central America and the Caribbean',
 'Central America and the Caribbean',
 'Australia and Oceania',
 'Central America and the Caribbean',
 'Central America and the Caribbean',
 'Australia and Oceania',
 'Central America and the Caribbean',
 'Africa',
 'Australia and Oceania',
 'Australia and Oceania',
 'North America',
 'Australia and Oceania',
 'Australia and Oceania',
 'South America',
 'Central America and the Caribbean',
 'Antarctica',
 'Africa',
 'Australia and Oceania',
 'Australia and Oceania',
 'Australia and Oceania']


carbon_new.name.str.strip()
carbon_new.value.str.strip()
carbon_new.carbon_date.str.strip()
carbon_new.region.str.strip()

0      East and Southeast Asia
1                North America
2                   South Asia
3                 Central Asia
4      East and Southeast Asia
                ...           
213                 Antarctica
214                     Africa
215      Australia and Oceania
216      Australia and Oceania
217      Australia and Oceania
Name: region, Length: 218, dtype: object


#Here we corroborate the strings
carbon_new.region.to_list()

['East and Southeast Asia',
 'North America',
 'South Asia',
 'Central Asia',
 'East and Southeast Asia',
 'Europe',
 'East and Southeast Asia',
 'Middle East',
 'North America',
 'Middle East',
 'East and Southeast Asia',
 'Africa',
 'North America',
 'South America',
 'Australia and Oceania',
 'Europe',
 'Middle East',
 'Europe',
 'Europe',
 'East and Southeast Asia',
 'Europe',
 'Europe',
 'East and Southeast Asia',
 'Middle East',
 'Central Asia',
 'East and Southeast Asia',
 'East and Southeast Asia',
 'East and Southeast Asia',
 'Africa',
 'Europe',
 'South Asia',
 'South America',
 'Europe',
 'Africa',
 'Middle East',
 'East and Southeast Asia',
 'Europe',
 'Middle East',
 'Central Asia',
 'Africa',
 'South America',
 'Central Asia',
 'Europe',
 'South Asia',
 'Middle East',
 'East and Southeast Asia',
 'South America',
 'South America',
 'Middle East',
 'Europe',
 'Europe',
 'Europe',
 'Middle East',
 'Africa',
 'South America',
 'Europe',
 'Europe',
 'Europe',
 'Europe',
 'Europe',
 'Middle East',
 'Europe',
 'East and Southeast Asia',
 'Australia and Oceania',
 'Central America and the Caribbean',
 'Europe',
 'Europe',
 'Africa',
 'Europe',
 'South America',
 'Europe',
 'Middle East',
 'Europe',
 'Europe',
 'East and Southeast Asia',
 'Middle East',
 'Central America and the Caribbean',
 'Middle East',
 'Central America and the Caribbean',
 'South Asia',
 'Africa',
 'Middle East',
 'East and Southeast Asia',
 'Africa',
 'Central America and the Caribbean',
 'Central America and the Caribbean',
 'East and Southeast Asia',
 'Africa',
 'South America',
 'Africa',
 'Africa',
 'Africa',
 'Europe',
 'Central America and the Caribbean',
 'Europe',
 'Europe',
 'Europe',
 'East and Southeast Asia',
 'Europe',
 'Africa',
 'Africa',
 'Europe',
 'Africa',
 'Middle East',
 'Middle East',
 'Central America and the Caribbean',
 'East and Southeast Asia',
 'Europe',
 'Central America and the Caribbean',
 'Europe',
 'Central America and the Caribbean',
 'Europe',
 'Europe',
 'Europe',
 'Africa',
 'South America',
 'South Asia',
 'Central Asia',
 'Africa',
 'South Asia',
 'Central Asia',
 'Central America and the Caribbean',
 'Europe',
 'Africa',
 'Africa',
 'Africa',
 'Africa',
 'East and Southeast Asia',
 'South America',
 'Middle East',
 'Africa',
 'Australia and Oceania',
 'Africa',
 'Africa',
 'Europe',
 'Central America and the Caribbean',
 'Africa',
 'Africa',
 'Africa',
 'Africa',
 'Africa',
 'Central America and the Caribbean',
 'Africa',
 'Europe',
 'Middle East',
 'Middle East',
 'Europe',
 'Central America and the Caribbean',
 'Africa',
 'South America',
 'Africa',
 'Africa',
 'Europe',
 'Central America and the Caribbean',
 'Africa',
 'South America',
 'South Asia',
 'Africa',
 'East and Southeast Asia',
 'Australia and Oceania',
 'Africa',
 'Africa',
 'Central America and the Caribbean',
 'Australia and Oceania',
 'Africa',
 'Africa',
 'Australia and Oceania',
 'Central America and the Caribbean',
 'Africa',
 'Africa',
 'Africa',
 'Australia and Oceania',
 'Africa',
 'South Asia',
 'Africa',
 'Africa',
 'Europe',
 'Central America and the Caribbean',
 'Africa',
 'North America',
 'Africa',
 'Central America and the Caribbean',
 'Africa',
 'Central America and the Caribbean',
 'Africa',
 'Africa',
 'North America',
 'Central America and the Caribbean',
 'East and Southeast Asia',
 'Europe',
 'Australia and Oceania',
 'Australia and Oceania',
 'Africa',
 'Australia and Oceania',
 'Africa',
 'Central America and the Caribbean',
 'Australia and Oceania',
 'Africa',
 'Central America and the Caribbean',
 'Central America and the Caribbean',
 'Australia and Oceania',
 'Central America and the Caribbean',
 'Central America and the Caribbean',
 'Australia and Oceania',
 'Central America and the Caribbean',
 'Africa',
 'Australia and Oceania',
 'Australia and Oceania',
 'North America',
 'Australia and Oceania',
 'Australia and Oceania',
 'South America',
 'Central America and the Caribbean',
 'Antarctica',
 'Africa',
 'Australia and Oceania',
 'Australia and Oceania',
 'Australia and Oceania']


carbon_new = carbon_new.copy()


carbon_new


# is there a cell where you have symbols beyond [^ ] alphanumeric (\w) or points (\.)? 
carbon_new.carbon_date[carbon_new.carbon_date.str.contains(pat=r'[^\w\.]',regex=True)]

0      2019 est.
1      2019 est.
2      2019 est.
3      2019 est.
4      2019 est.
         ...    
213    2019 est.
214    2019 est.
215    2019 est.
216    2019 est.
217    2019 est.
Name: carbon_date, Length: 218, dtype: object


carbon_new.value[carbon_new.value.str.contains(pat=r'[^\w\.]',regex=True)]

0      10,773,248,000.0
1       5,144,361,000.0
2       2,314,738,000.0
3       1,848,070,000.0
4       1,103,234,000.0
             ...       
211            46,000.0
212            33,000.0
213            28,000.0
214            13,000.0
215             8,000.0
Name: value, Length: 216, dtype: object


carbon_new.columns.str.contains(' ')

array([False, False, False, False])


carbon_new.columns[carbon_new.columns.str.contains(' ')]

Index([], dtype='object')


carbon_new.columns = carbon_new.columns.str.replace(' ', '_')


carbon_new


#Separate in to parts: numeric and string
carbon_new.carbon_date=carbon_new.carbon_date.str.replace(pat= r'[^0-9]', repl= '',regex=True)


carbon_new


from IPython.display import IFrame  
ciaLink2="https://www.cia.gov/the-world-factbook/field/revenue-from-forest-resources/country-comparison" 
IFrame(ciaLink2, width=900, height=900)


# read web table into pandas DF
import pandas as pd

forestDFs=pd.read_html(ciaLink2, # link
                        header=0, # where is the header? # significa que la primera fila sera el encabezado de nombres
                        flavor='bs4')


forest=forestDFs[0].copy()


forest


forest.rename(columns={'% of GDP': 'pct of GDP'}, inplace=True)
forest


forest.columns=forest.columns.str.replace('% of GDP','pct of GDP')


forest


#this is the result
forest.drop(columns='Rank',inplace=True) # here inplace modify carbon_new and it doesn't create other DataFrame
#then
forest


forest=forest.loc[:, ~forest.columns.isin(['Rank'])]


forest


forest=forestDFs[0].copy()


forest.iloc[:, [j for j in range(len(forest.columns)) if j not in [0]]]


forest.rename(columns={'Date of Information':'forest_date'}, inplace=True)
forest


forest.columns.str.contains(' ')

array([False, False,  True, False])


forest.columns.str.strip()

Index(['Rank', 'Country', '% of GDP', 'forest_date'], dtype='object')


forest.columns.to_list()

['Rank', 'Country', '% of GDP', 'forest_date']


forest.forest_date=forest.forest_date.str.replace(pat= r'[^0-9]', repl= '',regex=True)


forest

	Rank	Country	% of GDP	Date of Information
0	1	Solomon Islands	20.27	2018 est.
1	2	Liberia	13.27	2018 est.
2	3	Burundi	10.31	2018 est.
3	4	Guinea-Bissau	9.24	2018 est.
4	5	Central African Republic	8.99	2018 est.
...	...	...	...	...
199	200	Guam	0.00	2018 est.
200	201	Faroe Islands	0.00	2017 est.
201	202	Aruba	0.00	2017 est.
202	203	Virgin Islands	0.00	2017 est.
203	204	Macau	0.00	2018 est.

	Rank	Country	pct of GDP	Date of Information
0	1	Solomon Islands	20.27	2018 est.
1	2	Liberia	13.27	2018 est.
2	3	Burundi	10.31	2018 est.
3	4	Guinea-Bissau	9.24	2018 est.
4	5	Central African Republic	8.99	2018 est.
...	...	...	...	...
199	200	Guam	0.00	2018 est.
200	201	Faroe Islands	0.00	2017 est.
201	202	Aruba	0.00	2017 est.
202	203	Virgin Islands	0.00	2017 est.
203	204	Macau	0.00	2018 est.

	Rank	Country	pct of GDP	Date of Information
0	1	Solomon Islands	20.27	2018 est.
1	2	Liberia	13.27	2018 est.
2	3	Burundi	10.31	2018 est.
3	4	Guinea-Bissau	9.24	2018 est.
4	5	Central African Republic	8.99	2018 est.
...	...	...	...	...
199	200	Guam	0.00	2018 est.
200	201	Faroe Islands	0.00	2017 est.
201	202	Aruba	0.00	2017 est.
202	203	Virgin Islands	0.00	2017 est.
203	204	Macau	0.00	2018 est.

	Country	pct of GDP	Date of Information
0	Solomon Islands	20.27	2018 est.
1	Liberia	13.27	2018 est.
2	Burundi	10.31	2018 est.
3	Guinea-Bissau	9.24	2018 est.
4	Central African Republic	8.99	2018 est.
...	...	...	...
199	Guam	0.00	2018 est.
200	Faroe Islands	0.00	2017 est.
201	Aruba	0.00	2017 est.
202	Virgin Islands	0.00	2017 est.
203	Macau	0.00	2018 est.

	Country	% of GDP	Date of Information
0	Solomon Islands	20.27	2018 est.
1	Liberia	13.27	2018 est.
2	Burundi	10.31	2018 est.
3	Guinea-Bissau	9.24	2018 est.
4	Central African Republic	8.99	2018 est.
...	...	...	...
199	Guam	0.00	2018 est.
200	Faroe Islands	0.00	2017 est.
201	Aruba	0.00	2017 est.
202	Virgin Islands	0.00	2017 est.
203	Macau	0.00	2018 est.

Tarea - Ciencias de Datos espaciales¶

Estudiante: Pierina Milla¶

PARTE 1¶

1. Keep the columns name, value, date_of_information and region.¶

1.1 Using drop¶

1.2 Using loc¶

1.3 Using iloc¶

2. Change the column name date_of_information to carbon_date.¶

3. Make sure the cells with text does not have neither trailing nor leading spaces.¶

4. Detect the presence of symbols in the numeric data that are not numeric or point.¶

4.1 Solving carbon_date¶

4.2 Solving value¶

5. Make sure there are no spaces as part of the column names.¶

6. Get rid of any value detected in the previous step:¶

7. Keep only the year value in the column carbon_date.¶

PARTE 2¶

1. Replace '%' by 'pct'.¶

2. Keep the columns Country, pct of GDP, and Date of Information.¶

2.1 Using drop¶

2.2 Using loc¶

2.3 Using iloc¶

3. Change the column name Date of Information to forest_date.¶

4. Make sure there are no spaces as part of the column names.¶

5. Make sure the cells with text does not have neither trailing nor leading spaces.¶

6. Keep only the year value in the column forest_date.¶

	name	slug	value	date_of_information	ranking	region
0	China	china	10,773,248,000.0	2019 est.	1	East and Southeast Asia
1	United States	united-states	5,144,361,000.0	2019 est.	2	North America
2	India	india	2,314,738,000.0	2019 est.	3	South Asia
3	Russia	russia	1,848,070,000.0	2019 est.	4	Central Asia
4	Japan	japan	1,103,234,000.0	2019 est.	5	East and Southeast Asia
...	...	...	...	...	...	...
213	Antarctica	antarctica	28,000.0	2019 est.	214	Antarctica
214	Saint Helena, Ascension, and Tristan da Cunha	saint-helena-ascension-and-tristan-da-cunha	13,000.0	2019 est.	215	Africa
215	Niue	niue	8,000.0	2019 est.	216	Australia and Oceania
216	Northern Mariana Islands	northern-mariana-islands	0.0	2019 est.	217	Australia and Oceania
217	Tuvalu	tuvalu	0.0	2019 est.	218	Australia and Oceania

	name	value	carbon_date	region
0	China	10,773,248,000.0	2019	East and Southeast Asia
1	United States	5,144,361,000.0	2019	North America
2	India	2,314,738,000.0	2019	South Asia
3	Russia	1,848,070,000.0	2019	Central Asia
4	Japan	1,103,234,000.0	2019	East and Southeast Asia
...	...	...	...	...
213	Antarctica	28,000.0	2019	Antarctica
214	Saint Helena, Ascension, and Tristan da Cunha	13,000.0	2019	Africa
215	Niue	8,000.0	2019	Australia and Oceania
216	Northern Mariana Islands	0.0	2019	Australia and Oceania
217	Tuvalu	0.0	2019	Australia and Oceania

	Rank	Country	% of GDP	forest_date
0	1	Solomon Islands	20.27	2018
1	2	Liberia	13.27	2018
2	3	Burundi	10.31	2018
3	4	Guinea-Bissau	9.24	2018
4	5	Central African Republic	8.99	2018
...	...	...	...	...
199	200	Guam	0.00	2018
200	201	Faroe Islands	0.00	2017
201	202	Aruba	0.00	2017
202	203	Virgin Islands	0.00	2017
203	204	Macau	0.00	2018