pandoc_tabulate

In [2]:
import sphinx
In [8]:
import pandas as pd
import seaborn as sns
In [37]:
titanic = sns.load_dataset("titanic")
titanic.tail()

Out[37]:
survived pclass sex age sibsp parch fare embarked class who adult_male deck embark_town alive alone
886 0 2 male 27.0 0 0 13.00 S Second man True NaN Southampton no True
887 1 1 female 19.0 0 0 30.00 S First woman False B Southampton yes True
888 0 3 female NaN 1 2 23.45 S Third woman False NaN Southampton no False
889 1 1 male 26.0 0 0 30.00 C First man True C Cherbourg yes True
890 0 3 male 32.0 0 0 7.75 Q Third man True NaN Queenstown no True
In [26]:
help(titanic_tail.to_html)
Help on method to_html in module pandas.core.frame:

to_html(buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, bold_rows=True, classes=None, escape=True, max_rows=None, max_cols=None, show_dimensions=False, notebook=False, decimal='.') method of pandas.core.frame.DataFrame instance
    Render a DataFrame as an HTML table.

    `to_html`-specific options:

    bold_rows : boolean, default True
        Make the row labels bold in the output
    classes : str or list or tuple, default None
        CSS class(es) to apply to the resulting html table
    escape : boolean, default True
        Convert the characters <, >, and & to HTML-safe sequences.=
    max_rows : int, optional
        Maximum number of rows to show before truncating. If None, show
        all.
    max_cols : int, optional
        Maximum number of columns to show before truncating. If None, show
        all.
    decimal : string, default '.'
        Character recognized as decimal separator, e.g. ',' in Europe

        .. versionadded:: 0.18.0

    Parameters
    ----------
    buf : StringIO-like, optional
        buffer to write to
    columns : sequence, optional
        the subset of columns to write; default None writes all columns
    col_space : int, optional
        the minimum width of each column
    header : bool, optional
        whether to print column labels, default True
    index : bool, optional
        whether to print index (row) labels, default True
    na_rep : string, optional
        string representation of NAN to use, default 'NaN'
    formatters : list or dict of one-parameter functions, optional
        formatter functions to apply to columns' elements by position or name,
        default None. The result of each function must be a unicode string.
        List must be of length equal to the number of columns.
    float_format : one-parameter function, optional
        formatter function to apply to columns' elements if they are floats,
        default None. The result of this function must be a unicode string.
    sparsify : bool, optional
        Set to False for a DataFrame with a hierarchical index to print every
        multiindex key at each row, default True
    index_names : bool, optional
        Prints the names of the indexes, default True
    justify : {'left', 'right'}, default None
        Left or right-justify the column labels. If None uses the option from
        the print configuration (controlled by set_option), 'right' out
        of the box.

    Returns
    -------
    formatted : string (or unicode, depending on data and options)

In [25]:
print(titanic_tail.to_html())
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>survived</th>
      <th>pclass</th>
      <th>sex</th>
      <th>age</th>
      <th>sibsp</th>
      <th>parch</th>
      <th>fare</th>
      <th>embarked</th>
      <th>class</th>
      <th>who</th>
      <th>adult_male</th>
      <th>deck</th>
      <th>embark_town</th>
      <th>alive</th>
      <th>alone</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>886</th>
      <td>0</td>
      <td>2</td>
      <td>male</td>
      <td>27.0</td>
      <td>0</td>
      <td>0</td>
      <td>13.00</td>
      <td>S</td>
      <td>Second</td>
      <td>man</td>
      <td>True</td>
      <td>NaN</td>
      <td>Southampton</td>
      <td>no</td>
      <td>True</td>
    </tr>
    <tr>
      <th>887</th>
      <td>1</td>
      <td>1</td>
      <td>female</td>
      <td>19.0</td>
      <td>0</td>
      <td>0</td>
      <td>30.00</td>
      <td>S</td>
      <td>First</td>
      <td>woman</td>
      <td>False</td>
      <td>B</td>
      <td>Southampton</td>
      <td>yes</td>
      <td>True</td>
    </tr>
    <tr>
      <th>888</th>
      <td>0</td>
      <td>3</td>
      <td>female</td>
      <td>NaN</td>
      <td>1</td>
      <td>2</td>
      <td>23.45</td>
      <td>S</td>
      <td>Third</td>
      <td>woman</td>
      <td>False</td>
      <td>NaN</td>
      <td>Southampton</td>
      <td>no</td>
      <td>False</td>
    </tr>
    <tr>
      <th>889</th>
      <td>1</td>
      <td>1</td>
      <td>male</td>
      <td>26.0</td>
      <td>0</td>
      <td>0</td>
      <td>30.00</td>
      <td>C</td>
      <td>First</td>
      <td>man</td>
      <td>True</td>
      <td>C</td>
      <td>Cherbourg</td>
      <td>yes</td>
      <td>True</td>
    </tr>
    <tr>
      <th>890</th>
      <td>0</td>
      <td>3</td>
      <td>male</td>
      <td>32.0</td>
      <td>0</td>
      <td>0</td>
      <td>7.75</td>
      <td>Q</td>
      <td>Third</td>
      <td>man</td>
      <td>True</td>
      <td>NaN</td>
      <td>Queenstown</td>
      <td>no</td>
      <td>True</td>
    </tr>
  </tbody>
</table>
In [12]:
import pypandoc
In [16]:
help(pypandoc.convert)
Help on function convert in module pypandoc:

convert(source, to, format=None, extra_args=(), encoding='utf-8', outputfile=None, filters=None)
    Converts given `source` from `format` `to` another.

    :param str source: Unicode string or bytes or a file path (see encoding)

    :param str to: format into which the input should be converted; can be one of
            `pypandoc.get_pandoc_formats()[1]`

    :param str format: the format of the inputs; will be inferred if input is a file with an
            known filename extension; can be one of `pypandoc.get_pandoc_formats()[1]`
            (Default value = None)

    :param list extra_args: extra arguments (list of strings) to be passed to pandoc
            (Default value = ())

    :param str encoding: the encoding of the file or the input bytes (Default value = 'utf-8')

    :param str outputfile: output will be written to outfilename or the converted content
            returned if None (Default value = None)

    :param list filters: pandoc filters e.g. filters=['pandoc-citeproc']

    :returns: converted string (unicode) or an empty string if an outputfile was given
    :rtype: unicode

    :raises RuntimeError: if any of the inputs are not valid of if pandoc fails with an error
    :raises OSError: if pandoc is not found; make sure it has been installed and is available at
            path.

In [23]:
pypandoc.get_pandoc_formats()
Out[23]:
(['commonmark',
  'docbook',
  'docx',
  'epub',
  'haddock',
  'html',
  'json',
  'latex',
  'markdown',
  'markdown_github',
  'markdown_mmd',
  'markdown_phpextra',
  'markdown_strict',
  'mediawiki',
  'native',
  'odt',
  'opml',
  'org',
  'rst',
  't2t',
  'textile',
  'twiki'],
 ['asciidoc',
  'beamer',
  'commonmark',
  'context',
  'docbook',
  'docx',
  'dokuwiki',
  'dzslides',
  'epub',
  'epub3',
  'fb2',
  'haddock',
  'html',
  'html5',
  'icml',
  'json',
  'latex',
  'man',
  'markdown',
  'markdown_github',
  'markdown_mmd',
  'markdown_phpextra',
  'markdown_strict',
  'mediawiki',
  'native',
  'odt',
  'opendocument',
  'opml',
  'org',
  'pdf',
  'plain',
  'revealjs',
  'rst',
  'rtf',
  's5',
  'slideous',
  'slidy',
  'texinfo',
  'textile'])
In [45]:
titanic_tail = titanic.tail()

pypandoc.convert(titanic_tail.to_html(), "markdown",             format="html", outputfile="titanic_tail.md")
pypandoc.convert(titanic_tail.to_html(), "markdown_github",    format="html", outputfile="titanic_tail_github.md")
pypandoc.convert(titanic_tail.to_html(), "markdown_mmd",      format="html", outputfile="titanic_tail_mmd.md")
pypandoc.convert(titanic_tail.to_html(), "markdown_phpextra", format="html", outputfile="titanic_tail_phpex.md")

pypandoc.convert(titanic_tail.to_html(index=False), "markdown_github", format="html", outputfile="titanic_tail_github_noindex.md")

titanic_tail.index.names = ["index"]
pypandoc.convert(titanic_tail.to_html(), "markdown_github", format="html", outputfile="titanic_tail_github_index.md")

print(titanic_tail.to_html())
titanic_tail

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>survived</th>
      <th>pclass</th>
      <th>sex</th>
      <th>age</th>
      <th>sibsp</th>
      <th>parch</th>
      <th>fare</th>
      <th>embarked</th>
      <th>class</th>
      <th>who</th>
      <th>adult_male</th>
      <th>deck</th>
      <th>embark_town</th>
      <th>alive</th>
      <th>alone</th>
    </tr>
    <tr>
      <th>index</th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>886</th>
      <td>0</td>
      <td>2</td>
      <td>male</td>
      <td>27.0</td>
      <td>0</td>
      <td>0</td>
      <td>13.00</td>
      <td>S</td>
      <td>Second</td>
      <td>man</td>
      <td>True</td>
      <td>NaN</td>
      <td>Southampton</td>
      <td>no</td>
      <td>True</td>
    </tr>
    <tr>
      <th>887</th>
      <td>1</td>
      <td>1</td>
      <td>female</td>
      <td>19.0</td>
      <td>0</td>
      <td>0</td>
      <td>30.00</td>
      <td>S</td>
      <td>First</td>
      <td>woman</td>
      <td>False</td>
      <td>B</td>
      <td>Southampton</td>
      <td>yes</td>
      <td>True</td>
    </tr>
    <tr>
      <th>888</th>
      <td>0</td>
      <td>3</td>
      <td>female</td>
      <td>NaN</td>
      <td>1</td>
      <td>2</td>
      <td>23.45</td>
      <td>S</td>
      <td>Third</td>
      <td>woman</td>
      <td>False</td>
      <td>NaN</td>
      <td>Southampton</td>
      <td>no</td>
      <td>False</td>
    </tr>
    <tr>
      <th>889</th>
      <td>1</td>
      <td>1</td>
      <td>male</td>
      <td>26.0</td>
      <td>0</td>
      <td>0</td>
      <td>30.00</td>
      <td>C</td>
      <td>First</td>
      <td>man</td>
      <td>True</td>
      <td>C</td>
      <td>Cherbourg</td>
      <td>yes</td>
      <td>True</td>
    </tr>
    <tr>
      <th>890</th>
      <td>0</td>
      <td>3</td>
      <td>male</td>
      <td>32.0</td>
      <td>0</td>
      <td>0</td>
      <td>7.75</td>
      <td>Q</td>
      <td>Third</td>
      <td>man</td>
      <td>True</td>
      <td>NaN</td>
      <td>Queenstown</td>
      <td>no</td>
      <td>True</td>
    </tr>
  </tbody>
</table>
Out[45]:
survived pclass sex age sibsp parch fare embarked class who adult_male deck embark_town alive alone
index
886 0 2 male 27.0 0 0 13.00 S Second man True NaN Southampton no True
887 1 1 female 19.0 0 0 30.00 S First woman False B Southampton yes True
888 0 3 female NaN 1 2 23.45 S Third woman False NaN Southampton no False
889 1 1 male 26.0 0 0 30.00 C First man True C Cherbourg yes True
890 0 3 male 32.0 0 0 7.75 Q Third man True NaN Queenstown no True
In [30]:
import tabulate
In [65]:
titanic_tail = titanic.tail()

print(tabulate.tabulate(titanic_tail, list(titanic_tail.columns), tablefmt="pipe"))

#titanic_tail.index.name = "index"
print(tabulate.tabulate(titanic_tail, ["index"] + list(titanic_tail.columns), tablefmt="pipe"))

titanic_group = titanic.sex.value_counts()
print(tabulate.tabulate(titanic_group.reset_index(), ["sex", "counts"], tablefmt="pipe"))

titanic_crosstab = pd.crosstab(titanic.sex, titanic["class"])
print(tabulate.tabulate(titanic_crosstab, list(titanic["class"].unique()), tablefmt="pipe"))

|     |   survived |   pclass | sex    |   age |   sibsp |   parch |   fare | embarked   | class   | who   |   adult_male | deck   | embark_town   | alive   |   alone |
|----:|-----------:|---------:|:-------|------:|--------:|--------:|-------:|:-----------|:--------|:------|-------------:|:-------|:--------------|:--------|--------:|
| 886 |          0 |        2 | male   |    27 |       0 |       0 |  13    | S          | Second  | man   |            1 | nan    | Southampton   | no      |       1 |
| 887 |          1 |        1 | female |    19 |       0 |       0 |  30    | S          | First   | woman |            0 | B      | Southampton   | yes     |       1 |
| 888 |          0 |        3 | female |   nan |       1 |       2 |  23.45 | S          | Third   | woman |            0 | nan    | Southampton   | no      |       0 |
| 889 |          1 |        1 | male   |    26 |       0 |       0 |  30    | C          | First   | man   |            1 | C      | Cherbourg     | yes     |       1 |
| 890 |          0 |        3 | male   |    32 |       0 |       0 |   7.75 | Q          | Third   | man   |            1 | nan    | Queenstown    | no      |       1 |
|   index |   survived |   pclass | sex    |   age |   sibsp |   parch |   fare | embarked   | class   | who   |   adult_male | deck   | embark_town   | alive   |   alone |
|--------:|-----------:|---------:|:-------|------:|--------:|--------:|-------:|:-----------|:--------|:------|-------------:|:-------|:--------------|:--------|--------:|
|     886 |          0 |        2 | male   |    27 |       0 |       0 |  13    | S          | Second  | man   |            1 | nan    | Southampton   | no      |       1 |
|     887 |          1 |        1 | female |    19 |       0 |       0 |  30    | S          | First   | woman |            0 | B      | Southampton   | yes     |       1 |
|     888 |          0 |        3 | female |   nan |       1 |       2 |  23.45 | S          | Third   | woman |            0 | nan    | Southampton   | no      |       0 |
|     889 |          1 |        1 | male   |    26 |       0 |       0 |  30    | C          | First   | man   |            1 | C      | Cherbourg     | yes     |       1 |
|     890 |          0 |        3 | male   |    32 |       0 |       0 |   7.75 | Q          | Third   | man   |            1 | nan    | Queenstown    | no      |       1 |
|    | sex    |   counts |
|---:|:-------|---------:|
|  0 | male   |      577 |
|  1 | female |      314 |
|        |   Third |   First |   Second |
|:-------|--------:|--------:|---------:|
| female |      94 |      76 |      144 |
| male   |     122 |     108 |      347 |
In [ ]:

In [ ]:

In [ ]: