Issue
I am working with some neural networks and I am struggling to plot a correlation heatmap for the titanic dataset using seaborn. To be concise: it seems that there is a problem with the 'n_siblings_spouses' features during the plotting. I don't know if the problem is due to the feature itself (spacing, maybe?) or if there is an intrinsic issue with seaborn.
Would it be possible to solve the issue without the need to remove the feature from the dataset?
Here is a MWE. And thanks in advance!
from __future__ import absolute_import,division,print_function,unicode_literals
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rc, font_manager
%matplotlib inline
from IPython.display import clear_output
from six.moves import urllib
import tensorflow.compat.v2.feature_column as fc
import tensorflow as tf
import seaborn as sns
rc('text', usetex=True)
matplotlib.rcParams['text.latex.preamble'] = [r'\usepackage{amsmath}']
# only if needed
#!apt install texlive-fonts-recommended texlive-fonts-extra cm-super dvipng
plt.rc('font', family='serif')
# URL address of data
TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
# Downloading data
train_file_path = tf.keras.utils.get_file("train.csv", TRAIN_DATA_URL)
# Setting numpy default values.
np.set_printoptions(precision=3, suppress=True)
# Reading data
data_train = pd.read_csv(train_file_path)
print("\n TRAIN DATA SET")
print(data_train.head(),"\n")
def heatMap(df):
#Create Correlation df
corr = df.corr()
#Plot figsize
fig, ax = plt.subplots(figsize=(10, 10))
#Generate Color Map
colormap = sns.diverging_palette(220, 10, as_cmap=True)
#Generate Heat Map, allow annotations and place floats in map
sns.heatmap(corr, cmap=colormap, annot=True, fmt=".2f")
#Apply xticks
plt.xticks(range(len(corr.columns)), corr.columns);
#Apply yticks
plt.yticks(range(len(corr.columns)), corr.columns)
#show plot
plt.show()
heatMap(data_train)
Here is the issue that is raised when trying to execute the heatMap function (I am working in Colab. However, this also happens in console):
---------------------------------------------------------------------------
CalledProcessError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/matplotlib/texmanager.py in _run_checked_subprocess(self, command, tex)
305 cwd=self.texcache,
--> 306 stderr=subprocess.STDOUT)
307 except FileNotFoundError as exc:
22 frames
CalledProcessError: Command '['latex', '-interaction=nonstopmode', '--halt-on-error', '/root/.cache/matplotlib/tex.cache/bf616eae1512bede263889c8e1d8fb21.tex']' returned non-zero exit status 1.
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/matplotlib/texmanager.py in _run_checked_subprocess(self, command, tex)
317 prog=command[0],
318 tex=tex.encode('unicode_escape'),
--> 319 exc=exc.output.decode('utf-8'))) from exc
320 _log.debug(report)
321 return report
RuntimeError: latex was not able to process the following string:
b'n_siblings_spouses'
Here is the full report generated by latex:
This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=latex)
restricted \write18 enabled.
entering extended mode
(/root/.cache/matplotlib/tex.cache/bf616eae1512bede263889c8e1d8fb21.tex
LaTeX2e <2017-04-15>
Babel <3.18> and hyphenation patterns for 3 language(s) loaded.
(/usr/share/texlive/texmf-dist/tex/latex/base/article.cls
Document Class: article 2014/09/29 v1.4h Standard LaTeX document class
(/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo))
(/usr/share/texlive/texmf-dist/tex/latex/type1cm/type1cm.sty)
(/usr/share/texmf/tex/latex/cm-super/type1ec.sty
(/usr/share/texlive/texmf-dist/tex/latex/base/t1cmr.fd))
(/usr/share/texlive/texmf-dist/tex/latex/base/textcomp.sty
(/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.def))
(/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty
(/usr/share/texlive/texmf-dist/tex/latex/base/utf8.def
(/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.dfu)
(/usr/share/texlive/texmf-dist/tex/latex/base/ot1enc.dfu)
(/usr/share/texlive/texmf-dist/tex/latex/base/omsenc.dfu)
(/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.dfu)))
(/usr/share/texlive/texmf-dist/tex/latex/geometry/geometry.sty
(/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty)
(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ifpdf.sty)
(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ifvtex.sty)
(/usr/share/texlive/texmf-dist/tex/generic/ifxetex/ifxetex.sty)
Package geometry Warning: Over-specification in `h'-direction.
`width' (5058.9pt) is ignored.
Package geometry Warning: Over-specification in `v'-direction.
`height' (5058.9pt) is ignored.
) (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsmath.sty
For additional information on amsmath, use the `?' option.
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amstext.sty
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsgen.sty))
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsbsy.sty)
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsopn.sty))
(./bf616eae1512bede263889c8e1d8fb21.aux)
(/usr/share/texlive/texmf-dist/tex/latex/base/ts1cmr.fd)
*geometry* driver: auto-detecting
*geometry* detected driver: dvips
! Missing $ inserted.
<inserted text>
$
l.19 {\rmfamily n_
siblings_spouses}
No pages of output.
Transcript written on bf616eae1512bede263889c8e1d8fb21.log.
---------------------------------------------------------------------------
CalledProcessError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/matplotlib/texmanager.py in _run_checked_subprocess(self, command, tex)
305 cwd=self.texcache,
--> 306 stderr=subprocess.STDOUT)
307 except FileNotFoundError as exc:
21 frames
CalledProcessError: Command '['latex', '-interaction=nonstopmode', '--halt-on-error', '/root/.cache/matplotlib/tex.cache/bf616eae1512bede263889c8e1d8fb21.tex']' returned non-zero exit status 1.
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/matplotlib/texmanager.py in _run_checked_subprocess(self, command, tex)
317 prog=command[0],
318 tex=tex.encode('unicode_escape'),
--> 319 exc=exc.output.decode('utf-8'))) from exc
320 _log.debug(report)
321 return report
RuntimeError: latex was not able to process the following string:
b'n_siblings_spouses'
Here is the full report generated by latex:
This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) (preloaded format=latex)
restricted \write18 enabled.
entering extended mode
(/root/.cache/matplotlib/tex.cache/bf616eae1512bede263889c8e1d8fb21.tex
LaTeX2e <2017-04-15>
Babel <3.18> and hyphenation patterns for 3 language(s) loaded.
(/usr/share/texlive/texmf-dist/tex/latex/base/article.cls
Document Class: article 2014/09/29 v1.4h Standard LaTeX document class
(/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo))
(/usr/share/texlive/texmf-dist/tex/latex/type1cm/type1cm.sty)
(/usr/share/texmf/tex/latex/cm-super/type1ec.sty
(/usr/share/texlive/texmf-dist/tex/latex/base/t1cmr.fd))
(/usr/share/texlive/texmf-dist/tex/latex/base/textcomp.sty
(/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.def))
(/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty
(/usr/share/texlive/texmf-dist/tex/latex/base/utf8.def
(/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.dfu)
(/usr/share/texlive/texmf-dist/tex/latex/base/ot1enc.dfu)
(/usr/share/texlive/texmf-dist/tex/latex/base/omsenc.dfu)
(/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.dfu)))
(/usr/share/texlive/texmf-dist/tex/latex/geometry/geometry.sty
(/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty)
(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ifpdf.sty)
(/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ifvtex.sty)
(/usr/share/texlive/texmf-dist/tex/generic/ifxetex/ifxetex.sty)
Package geometry Warning: Over-specification in `h'-direction.
`width' (5058.9pt) is ignored.
Package geometry Warning: Over-specification in `v'-direction.
`height' (5058.9pt) is ignored.
) (/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsmath.sty
For additional information on amsmath, use the `?' option.
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amstext.sty
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsgen.sty))
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsbsy.sty)
(/usr/share/texlive/texmf-dist/tex/latex/amsmath/amsopn.sty))
(./bf616eae1512bede263889c8e1d8fb21.aux)
(/usr/share/texlive/texmf-dist/tex/latex/base/ts1cmr.fd)
*geometry* driver: auto-detecting
*geometry* detected driver: dvips
! Missing $ inserted.
<inserted text>
$
l.19 {\rmfamily n_
siblings_spouses}
No pages of output.
Transcript written on bf616eae1512bede263889c8e1d8fb21.log.
<Figure size 720x720 with 2 Axes>
Solution
To solve this problem, I came across this information that Colab needs a Tex-related module. There was also an excellent answer to SO.
You will need to install the following
- ! sudo apt-get install texlive-latex-recommended
- ! sudo apt-get install dvipng texlive-fonts-recommended
- ! wget http://mirrors.ctan.org/macros/latex/contrib/type1cm.zip
- ! unzip type1cm.zip -d /tmp/type1cm
- ! cd /tmp/type1cm/type1cm/ && sudo latex type1cm.ins
- ! sudo mkdir /usr/share/texmf/tex/latex/type1cm
- ! sudo cp /tmp/type1cm/type1cm/type1cm.sty /usr/share/texmf/tex/latex/type1cm
- ! sudo texhash
- ! sudo apt install cm-super
from __future__ import absolute_import,division,print_function,unicode_literals
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
# from matplotlib import rc, font_manager
%matplotlib inline
from IPython.display import clear_output
from six.moves import urllib
import tensorflow.compat.v2.feature_column as fc
import tensorflow as tf
import seaborn as sns
# rc('text', usetex=True)
# matplotlib.rcParams['text.latex.preamble'] = [r'\usepackage{amsmath}']
# only if needed
#!apt install texlive-fonts-recommended texlive-fonts-extra cm-super dvipng
# plt.rc('font', family='serif')
# URL address of data
TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
# Downloading data
train_file_path = tf.keras.utils.get_file("/content/sample_data/train.csv", TRAIN_DATA_URL)
# Setting numpy default values.
np.set_printoptions(precision=3, suppress=True)
# Reading data
data_train = pd.read_csv(train_file_path)
print("\n TRAIN DATA SET")
print(data_train.head(),"\n")
def heatMap(df):
#Create Correlation df
corr = df.corr()
print(corr)
#Plot figsize
fig, ax = plt.subplots(figsize=(10, 10))
#Generate Color Map
colormap = sns.diverging_palette(220, 10, as_cmap=True)
#Generate Heat Map, allow annotations and place floats in map
sns.heatmap(corr, cmap=colormap, annot=True, fmt=".2f")
#Apply xticks
plt.xticks(range(len(corr.columns)), corr.columns);
#Apply yticks
plt.yticks(range(len(corr.columns)), corr.columns)
#show plot
plt.show()
heatMap(data_train)
Answered By - r-beginners
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.