I think the most straight forward is to use matplotlib.dates
to format the axis:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
def graph(dataframe):
fig, ax = plt.subplots()
xfmt = mdates.DateFormatter('%YM%m') #see https://strftime.org/
major = mdates.MonthLocator([1,7]) #label only Jan and Jul
graph = dataframe[["Profit"]].plot(ax=ax) #link plot to the existing axes
graph.set_title('Statistics')
graph.set_ylabel('Thousand $')
graph.set_xlabel('Time')
graph.xaxis.set_major_locator(major) #set major locator tick on x-axis
graph.xaxis.set_major_formatter(xfmt) #format xtick label
plt.grid(True)
plt.show()
But a key point is you need to have your dates as Python's built-in datetime.date
(not datetime.datetime
); thanks to this answer. If your dates are str
or a different type of datetime, you will need to convert, but there are many resources on SO and elsewhere for doing this like this or this:
In[0]:
dr = pd.date_range('01-01-2012', '01-01-2014', freq='1MS')
dr = [pd.to_datetime(date).date() for date in df.index] #explicitly converting to datetime with .date()
df = pd.DataFrame(index=dr, data={'Profit':np.random.rand(25)})
type(df.index.[0])
Out[0]:
datetime.date
Calling graph(df)
using the example above gets this plot:
Just to expand on this, here's what happens when the index is pandas.Timestamp
instead of datetime.date
:
In[0]:
dr = pd.date_range('01-01-2012', '01-01-2014', freq='1MS')
# dr = [pd.to_datetime(date).date() for date in df.index] #skipping date conversion
df = pd.DataFrame(index=dr, data={'Profit':np.random.rand(25)})
graph(df)
Out[0]:
The x-axis is improperly formatted:
However, if you are willing to just create the plot directly through matplotlib
, rather than pandas
(pandas
is using matplotlib
anyway), this can handle more types of dates:
In[0]:
dr = pd.date_range('01-01-2012', '01-01-2014', freq='1MS')
# dr = [pd.to_datetime(date).date() for date in df.index] #skipping date conversion
df = pd.DataFrame(index=dr, data={'Profit':np.random.rand(25)})
def graph_2(dataframe):
fig, ax = plt.subplots()
xfmt = mdates.DateFormatter('%YM%m')
major = mdates.MonthLocator([1,7])
ax.plot(dataframe.index,dataframe['Profit'], label='Profit')
ax.set_title('Statistics')
ax.set_ylabel('Thousand $')
ax.set_xlabel('Time')
ax.xaxis.set_major_locator(major)
ax.xaxis.set_major_formatter(xfmt)
ax.legend() #legend needs to be added
plt.grid(True)
plt.show()
graph_2(df)
type(df.index[0])
Out[0]:
pandas._libs.tslibs.timestamps.Timestamp
And here is the working graph: