pandoc_tabulate¶
In [2]:
import sphinx
In [8]:
import pandas as pd
import seaborn as sns
In [37]:
titanic = sns.load_dataset("titanic")
titanic.tail()
Out[37]:
survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
886 | 0 | 2 | male | 27.0 | 0 | 0 | 13.00 | S | Second | man | True | NaN | Southampton | no | True |
887 | 1 | 1 | female | 19.0 | 0 | 0 | 30.00 | S | First | woman | False | B | Southampton | yes | True |
888 | 0 | 3 | female | NaN | 1 | 2 | 23.45 | S | Third | woman | False | NaN | Southampton | no | False |
889 | 1 | 1 | male | 26.0 | 0 | 0 | 30.00 | C | First | man | True | C | Cherbourg | yes | True |
890 | 0 | 3 | male | 32.0 | 0 | 0 | 7.75 | Q | Third | man | True | NaN | Queenstown | no | True |
In [26]:
help(titanic_tail.to_html)
Help on method to_html in module pandas.core.frame:
to_html(buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, bold_rows=True, classes=None, escape=True, max_rows=None, max_cols=None, show_dimensions=False, notebook=False, decimal='.') method of pandas.core.frame.DataFrame instance
Render a DataFrame as an HTML table.
`to_html`-specific options:
bold_rows : boolean, default True
Make the row labels bold in the output
classes : str or list or tuple, default None
CSS class(es) to apply to the resulting html table
escape : boolean, default True
Convert the characters <, >, and & to HTML-safe sequences.=
max_rows : int, optional
Maximum number of rows to show before truncating. If None, show
all.
max_cols : int, optional
Maximum number of columns to show before truncating. If None, show
all.
decimal : string, default '.'
Character recognized as decimal separator, e.g. ',' in Europe
.. versionadded:: 0.18.0
Parameters
----------
buf : StringIO-like, optional
buffer to write to
columns : sequence, optional
the subset of columns to write; default None writes all columns
col_space : int, optional
the minimum width of each column
header : bool, optional
whether to print column labels, default True
index : bool, optional
whether to print index (row) labels, default True
na_rep : string, optional
string representation of NAN to use, default 'NaN'
formatters : list or dict of one-parameter functions, optional
formatter functions to apply to columns' elements by position or name,
default None. The result of each function must be a unicode string.
List must be of length equal to the number of columns.
float_format : one-parameter function, optional
formatter function to apply to columns' elements if they are floats,
default None. The result of this function must be a unicode string.
sparsify : bool, optional
Set to False for a DataFrame with a hierarchical index to print every
multiindex key at each row, default True
index_names : bool, optional
Prints the names of the indexes, default True
justify : {'left', 'right'}, default None
Left or right-justify the column labels. If None uses the option from
the print configuration (controlled by set_option), 'right' out
of the box.
Returns
-------
formatted : string (or unicode, depending on data and options)
In [25]:
print(titanic_tail.to_html())
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>survived</th>
<th>pclass</th>
<th>sex</th>
<th>age</th>
<th>sibsp</th>
<th>parch</th>
<th>fare</th>
<th>embarked</th>
<th>class</th>
<th>who</th>
<th>adult_male</th>
<th>deck</th>
<th>embark_town</th>
<th>alive</th>
<th>alone</th>
</tr>
</thead>
<tbody>
<tr>
<th>886</th>
<td>0</td>
<td>2</td>
<td>male</td>
<td>27.0</td>
<td>0</td>
<td>0</td>
<td>13.00</td>
<td>S</td>
<td>Second</td>
<td>man</td>
<td>True</td>
<td>NaN</td>
<td>Southampton</td>
<td>no</td>
<td>True</td>
</tr>
<tr>
<th>887</th>
<td>1</td>
<td>1</td>
<td>female</td>
<td>19.0</td>
<td>0</td>
<td>0</td>
<td>30.00</td>
<td>S</td>
<td>First</td>
<td>woman</td>
<td>False</td>
<td>B</td>
<td>Southampton</td>
<td>yes</td>
<td>True</td>
</tr>
<tr>
<th>888</th>
<td>0</td>
<td>3</td>
<td>female</td>
<td>NaN</td>
<td>1</td>
<td>2</td>
<td>23.45</td>
<td>S</td>
<td>Third</td>
<td>woman</td>
<td>False</td>
<td>NaN</td>
<td>Southampton</td>
<td>no</td>
<td>False</td>
</tr>
<tr>
<th>889</th>
<td>1</td>
<td>1</td>
<td>male</td>
<td>26.0</td>
<td>0</td>
<td>0</td>
<td>30.00</td>
<td>C</td>
<td>First</td>
<td>man</td>
<td>True</td>
<td>C</td>
<td>Cherbourg</td>
<td>yes</td>
<td>True</td>
</tr>
<tr>
<th>890</th>
<td>0</td>
<td>3</td>
<td>male</td>
<td>32.0</td>
<td>0</td>
<td>0</td>
<td>7.75</td>
<td>Q</td>
<td>Third</td>
<td>man</td>
<td>True</td>
<td>NaN</td>
<td>Queenstown</td>
<td>no</td>
<td>True</td>
</tr>
</tbody>
</table>
In [12]:
import pypandoc
In [16]:
help(pypandoc.convert)
Help on function convert in module pypandoc:
convert(source, to, format=None, extra_args=(), encoding='utf-8', outputfile=None, filters=None)
Converts given `source` from `format` `to` another.
:param str source: Unicode string or bytes or a file path (see encoding)
:param str to: format into which the input should be converted; can be one of
`pypandoc.get_pandoc_formats()[1]`
:param str format: the format of the inputs; will be inferred if input is a file with an
known filename extension; can be one of `pypandoc.get_pandoc_formats()[1]`
(Default value = None)
:param list extra_args: extra arguments (list of strings) to be passed to pandoc
(Default value = ())
:param str encoding: the encoding of the file or the input bytes (Default value = 'utf-8')
:param str outputfile: output will be written to outfilename or the converted content
returned if None (Default value = None)
:param list filters: pandoc filters e.g. filters=['pandoc-citeproc']
:returns: converted string (unicode) or an empty string if an outputfile was given
:rtype: unicode
:raises RuntimeError: if any of the inputs are not valid of if pandoc fails with an error
:raises OSError: if pandoc is not found; make sure it has been installed and is available at
path.
In [23]:
pypandoc.get_pandoc_formats()
Out[23]:
(['commonmark',
'docbook',
'docx',
'epub',
'haddock',
'html',
'json',
'latex',
'markdown',
'markdown_github',
'markdown_mmd',
'markdown_phpextra',
'markdown_strict',
'mediawiki',
'native',
'odt',
'opml',
'org',
'rst',
't2t',
'textile',
'twiki'],
['asciidoc',
'beamer',
'commonmark',
'context',
'docbook',
'docx',
'dokuwiki',
'dzslides',
'epub',
'epub3',
'fb2',
'haddock',
'html',
'html5',
'icml',
'json',
'latex',
'man',
'markdown',
'markdown_github',
'markdown_mmd',
'markdown_phpextra',
'markdown_strict',
'mediawiki',
'native',
'odt',
'opendocument',
'opml',
'org',
'pdf',
'plain',
'revealjs',
'rst',
'rtf',
's5',
'slideous',
'slidy',
'texinfo',
'textile'])
In [45]:
titanic_tail = titanic.tail()
pypandoc.convert(titanic_tail.to_html(), "markdown", format="html", outputfile="titanic_tail.md")
pypandoc.convert(titanic_tail.to_html(), "markdown_github", format="html", outputfile="titanic_tail_github.md")
pypandoc.convert(titanic_tail.to_html(), "markdown_mmd", format="html", outputfile="titanic_tail_mmd.md")
pypandoc.convert(titanic_tail.to_html(), "markdown_phpextra", format="html", outputfile="titanic_tail_phpex.md")
pypandoc.convert(titanic_tail.to_html(index=False), "markdown_github", format="html", outputfile="titanic_tail_github_noindex.md")
titanic_tail.index.names = ["index"]
pypandoc.convert(titanic_tail.to_html(), "markdown_github", format="html", outputfile="titanic_tail_github_index.md")
print(titanic_tail.to_html())
titanic_tail
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>survived</th>
<th>pclass</th>
<th>sex</th>
<th>age</th>
<th>sibsp</th>
<th>parch</th>
<th>fare</th>
<th>embarked</th>
<th>class</th>
<th>who</th>
<th>adult_male</th>
<th>deck</th>
<th>embark_town</th>
<th>alive</th>
<th>alone</th>
</tr>
<tr>
<th>index</th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<th>886</th>
<td>0</td>
<td>2</td>
<td>male</td>
<td>27.0</td>
<td>0</td>
<td>0</td>
<td>13.00</td>
<td>S</td>
<td>Second</td>
<td>man</td>
<td>True</td>
<td>NaN</td>
<td>Southampton</td>
<td>no</td>
<td>True</td>
</tr>
<tr>
<th>887</th>
<td>1</td>
<td>1</td>
<td>female</td>
<td>19.0</td>
<td>0</td>
<td>0</td>
<td>30.00</td>
<td>S</td>
<td>First</td>
<td>woman</td>
<td>False</td>
<td>B</td>
<td>Southampton</td>
<td>yes</td>
<td>True</td>
</tr>
<tr>
<th>888</th>
<td>0</td>
<td>3</td>
<td>female</td>
<td>NaN</td>
<td>1</td>
<td>2</td>
<td>23.45</td>
<td>S</td>
<td>Third</td>
<td>woman</td>
<td>False</td>
<td>NaN</td>
<td>Southampton</td>
<td>no</td>
<td>False</td>
</tr>
<tr>
<th>889</th>
<td>1</td>
<td>1</td>
<td>male</td>
<td>26.0</td>
<td>0</td>
<td>0</td>
<td>30.00</td>
<td>C</td>
<td>First</td>
<td>man</td>
<td>True</td>
<td>C</td>
<td>Cherbourg</td>
<td>yes</td>
<td>True</td>
</tr>
<tr>
<th>890</th>
<td>0</td>
<td>3</td>
<td>male</td>
<td>32.0</td>
<td>0</td>
<td>0</td>
<td>7.75</td>
<td>Q</td>
<td>Third</td>
<td>man</td>
<td>True</td>
<td>NaN</td>
<td>Queenstown</td>
<td>no</td>
<td>True</td>
</tr>
</tbody>
</table>
Out[45]:
survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
index | |||||||||||||||
886 | 0 | 2 | male | 27.0 | 0 | 0 | 13.00 | S | Second | man | True | NaN | Southampton | no | True |
887 | 1 | 1 | female | 19.0 | 0 | 0 | 30.00 | S | First | woman | False | B | Southampton | yes | True |
888 | 0 | 3 | female | NaN | 1 | 2 | 23.45 | S | Third | woman | False | NaN | Southampton | no | False |
889 | 1 | 1 | male | 26.0 | 0 | 0 | 30.00 | C | First | man | True | C | Cherbourg | yes | True |
890 | 0 | 3 | male | 32.0 | 0 | 0 | 7.75 | Q | Third | man | True | NaN | Queenstown | no | True |
In [30]:
import tabulate
In [65]:
titanic_tail = titanic.tail()
print(tabulate.tabulate(titanic_tail, list(titanic_tail.columns), tablefmt="pipe"))
#titanic_tail.index.name = "index"
print(tabulate.tabulate(titanic_tail, ["index"] + list(titanic_tail.columns), tablefmt="pipe"))
titanic_group = titanic.sex.value_counts()
print(tabulate.tabulate(titanic_group.reset_index(), ["sex", "counts"], tablefmt="pipe"))
titanic_crosstab = pd.crosstab(titanic.sex, titanic["class"])
print(tabulate.tabulate(titanic_crosstab, list(titanic["class"].unique()), tablefmt="pipe"))
| | survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone |
|----:|-----------:|---------:|:-------|------:|--------:|--------:|-------:|:-----------|:--------|:------|-------------:|:-------|:--------------|:--------|--------:|
| 886 | 0 | 2 | male | 27 | 0 | 0 | 13 | S | Second | man | 1 | nan | Southampton | no | 1 |
| 887 | 1 | 1 | female | 19 | 0 | 0 | 30 | S | First | woman | 0 | B | Southampton | yes | 1 |
| 888 | 0 | 3 | female | nan | 1 | 2 | 23.45 | S | Third | woman | 0 | nan | Southampton | no | 0 |
| 889 | 1 | 1 | male | 26 | 0 | 0 | 30 | C | First | man | 1 | C | Cherbourg | yes | 1 |
| 890 | 0 | 3 | male | 32 | 0 | 0 | 7.75 | Q | Third | man | 1 | nan | Queenstown | no | 1 |
| index | survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone |
|--------:|-----------:|---------:|:-------|------:|--------:|--------:|-------:|:-----------|:--------|:------|-------------:|:-------|:--------------|:--------|--------:|
| 886 | 0 | 2 | male | 27 | 0 | 0 | 13 | S | Second | man | 1 | nan | Southampton | no | 1 |
| 887 | 1 | 1 | female | 19 | 0 | 0 | 30 | S | First | woman | 0 | B | Southampton | yes | 1 |
| 888 | 0 | 3 | female | nan | 1 | 2 | 23.45 | S | Third | woman | 0 | nan | Southampton | no | 0 |
| 889 | 1 | 1 | male | 26 | 0 | 0 | 30 | C | First | man | 1 | C | Cherbourg | yes | 1 |
| 890 | 0 | 3 | male | 32 | 0 | 0 | 7.75 | Q | Third | man | 1 | nan | Queenstown | no | 1 |
| | sex | counts |
|---:|:-------|---------:|
| 0 | male | 577 |
| 1 | female | 314 |
| | Third | First | Second |
|:-------|--------:|--------:|---------:|
| female | 94 | 76 | 144 |
| male | 122 | 108 | 347 |
In [ ]:
In [ ]:
In [ ]: