Issue
I have the dataframes ready to plot, but when I use matplotlib to plot these data the lines are not correct and do not show the trend.
for example, the first graph should be a curly line, however, I got a straight line plotted in the graph.
I wonder how to plot these lines correctly? and fix both axis?
import pandas as pd
import datetime as dt
import pandas_datareader as web
import matplotlib.pyplot as plt
from matplotlib import style
import matplotlib.ticker as ticker
from bs4 import BeautifulSoup
import requests
import matplotlib.dates as mdates
url = 'https://www.federalreserve.gov/data.xml'
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
for chart in soup.select('chart'):
series = {}
index = []
for s in chart.select('series'):
series[s['description']] = []
temp_index = []
for o in s.select('observation'):
temp_index.append(o['index'])
series[s['description']].append(o['value'])
if len(temp_index) > len(index):
index = temp_index
series['index'] = index
max_len = len(max(series.values(), key=len))
for k in series:
series[k] = series[k] + ['No Data'] * (max_len - len(series[k]))
df = pd.DataFrame(series).set_index('index')
print(df)
print('-' * 80)
plt.figure()
for i in df:
plt.plot(df.index,df[i],label=chart['title'])
plt.show()
Solution
- The dates are not a datetime format, so they are interpreted as strings, and they're all unique, which makes a mess of the axis.
df.index = pd.to_datetime(df.index)
has been added
- The values in the columns are also strings, not numbers
df = pd.DataFrame(series, dtype=float).set_index('index')
will catch most of the columns, but there are some columns that still have stings, so can't be convertedprint(df.info())
has been added. Review and fix any column that is anobject
. That means the column contains some strings and can't be converted to a float.- Use
[np.nan]
instead of['No Data']
, so the column can be set as a float, which will allow it to plot correctly.
- Use
import numpy as np
url = 'https://www.federalreserve.gov/data.xml'
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
for chart in soup.select('chart'):
series = {}
index = []
for s in chart.select('series'):
series[s['description']] = []
temp_index = []
for o in s.select('observation'):
temp_index.append(o['index'])
series[s['description']].append(o['value'])
if len(temp_index) > len(index):
index = temp_index
series['index'] = index
max_len = len(max(series.values(), key=len))
for k in series:
# adding No Data is preventing the the column from being interpreted as a float
series[k] = series[k] + [np.nan] * (max_len - len(series[k]))
df = pd.DataFrame(series, dtype=float).set_index('index') # added dtype=float
df.index = pd.to_datetime(df.index) # convert the index to a datetime format
print(df)
print(df.info()) # review the printed info, any column that isn't a float has strings in it the must be fixed
print('-' * 80)
plt.figure()
for i in df:
plt.figure(figsize=(9, 5))
plt.plot(df.index, df[i])
plt.title(f'{chart["title"]}\n{i}')
plt.show()
Answered By - Trenton McKinney
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.