0

I'm trying to organize my df by state alphabetically but when I sort by state using sort_values, nothing happens. I believe there is an issue with how the data is getting pulled because I get a KeyError that 'state' is not recognized. Should I use the rename function instead of renaming the columns the way I did?

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime

def load_data():

    # importing datasets
    df_2017=pd.read_excel('assets/US_States_Visited_2017.xlsx', skiprows=6,skipfooter=13)
    df_2018=pd.read_excel('assets/US_States_Visited_2018.xlsx', skiprows=7,skipfooter=7)
    df_2019=pd.read_excel('assets/US_States_Visited_2019.xlsx', skiprows=6,skipfooter=8)
    
    # renaming columns
    df_2017.columns = ['2017_rank','state','2016_market_share','visitation_2016','2017_market_share','visitation_2017','volume_change']
    df_2018.columns = ['2018_rank','state','2018_market_share','visitation_2018','volume_change','2017_market_share','visitation_2017']
    df_2019.columns = ['2019_rank','state','2019_market_share','visitation_2019','volume_change','2018_market_share','visitation_2018']
    
    # stripping state names
    df_2017['state'] = df_2017['state'].str.strip()
    df_2018['state'] = df_2018['state'].str.strip()
    df_2019['state'] = df_2019['state'].str.strip()
    
    # dropping all columns except for relevent state and visitation columns
    df_2017.drop(df_2017.columns[[0,2,4,6]], axis=1,inplace=True)
    df_2018.drop(df_2018.columns[[0,2,4,5,6]], axis=1,inplace=True)
    df_2019.drop(df_2019.columns[[0,2,4,5,6]], axis=1,inplace=True) 
    
    # multiplying visitation by 1000 to get accurate value
    df_2017['visitation_2016'] = df_2017['visitation_2016']*1000
    df_2017['visitation_2017'] = df_2017['visitation_2017']*1000
    df_2018['visitation_2018'] = df_2018['visitation_2018']*1000
    df_2019['visitation_2019'] = df_2019['visitation_2019']*1000
    
    # starting output at state column
    df_2017=df_2017.set_index('state')
    df_2018=df_2018.set_index('state')
    df_2019=df_2019.set_index('state')
    
    # merging all datasets by state variable
    merged_US_states_visitation = df_2017.merge(df_2018,on='state',how= 'left').merge(df_2019,on='state',how='right')
    
    #sorting by name
    merged_US_states_visitation.sort_values(by=['state'])
    
    return merged_US_states_visitation

load_data().head(25)

View of df

1 Answer 1

1

You are trying to i) sort_values when your target is the index; and ii) you are not assigning the sorted result. Go with:

merged_US_states_visitation.sort_index(inplace=True)
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.