Source code for camelot.plotting

"""Plotting functions usefull for visual debugging."""

from pdfminer.layout import LTTextLineVertical


try:
    import matplotlib.patches as patches
    import matplotlib.pyplot as plt
except ImportError:
    _HAS_MPL = False
else:
    _HAS_MPL = True

from .utils import bbox_from_str
from .utils import bbox_from_textlines
from .utils import get_textline_coords


def extend_axe_lim(ax, bbox, margin=10):
    """Ensure the ax limits include the input bbox."""
    x0, x1 = ax.get_xlim()
    y0, y1 = ax.get_ylim()
    ax.set_xlim(min(x0, bbox[0] - margin), max(x1, bbox[2] + margin))
    ax.set_ylim(min(y0, bbox[1] - margin), max(y1, bbox[3] + margin))


def draw_labeled_bbox(
    ax,
    bbox,
    text,
    color="black",
    linewidth=3,
    linestyle="solid",
    label_pos="top,left",
    fontsize=12,
):
    """Utility drawing function to draw a box with an associated text label.

    Parameters
    ----------
    ax : matplotlib.axes.Axes
        matplotlib.axes.Axes (optional)
    bbox : [type]
        boundingbox
    text : string
        The text to be placed inside the box.
    color : str, optional
        The color of the box, by default "black"
    linewidth : int, optional
        The linewidth of the box, by default 3
    linestyle : str, optional
        The matplotlib linestyle, by default "solid"
    label_pos : str, optional
        The label postiion, by default "top,left"
    fontsize : int, optional
        The fontsize of the text in the box, by default 12
    """
    ax.add_patch(
        patches.Rectangle(
            (bbox[0], bbox[1]),
            bbox[2] - bbox[0],
            bbox[3] - bbox[1],
            color=color,
            linewidth=linewidth,
            linestyle=linestyle,
            fill=False,
        )
    )

    vlabel, hlabel = label_pos.split(",")
    if vlabel == "top":
        y = max(bbox[1], bbox[3])
    elif vlabel == "bottom":
        y = min(bbox[1], bbox[3])
    else:
        y = 0.5 * (bbox[1] + bbox[3])

    # We want to draw the label outside the box (above or below)
    label_align_swap = {"top": "bottom", "bottom": "top", "center": "center"}
    vlabel_out_of_box = label_align_swap[vlabel]
    if hlabel == "right":
        x = max(bbox[0], bbox[2])
    elif hlabel == "left":
        x = min(bbox[0], bbox[2])
    else:
        x = 0.5 * (bbox[0] + bbox[2])
    ax.text(
        x,
        y,
        text,
        fontsize=fontsize,
        color="black",
        verticalalignment=vlabel_out_of_box,
        horizontalalignment=hlabel,
        bbox=dict(facecolor=color, alpha=0.1),
    )


def draw_pdf(table, ax):
    """Draw the content of the table's source pdf into the passed subplot.

    Parameters
    ----------
    table : camelot.core.Table
    ax : matplotlib.axes.Axes (optional)
    """
    img = table.get_pdf_image()
    ax.imshow(img, extent=(0, table.pdf_size[0], 0, table.pdf_size[1]))


def draw_parse_constraints(table, ax):
    """Draw any user provided constraints (area, region, columns, etc).

    Parameters
    ----------
    table : camelot.core.Table
    ax : matplotlib.axes.Axes (optional)
    """
    if table.parse_details:
        zone_constraints = {
            "region": "table_regions",
            "area": "table_areas",
        }
        for zone_name, zone_id in zone_constraints.items():
            # Display a bbox per region / area
            for zone_str in table.parse_details[zone_id] or []:
                draw_labeled_bbox(
                    ax,
                    bbox_from_str(zone_str),
                    "{zone_name}: ({zone_str})".format(
                        zone_name=zone_name, zone_str=zone_str
                    ),
                    color="purple",
                    linestyle="dotted",
                    linewidth=1,
                    label_pos="bottom,right",
                )


def draw_text(table, ax):
    """Draw text, horizontal in blue, vertical in red.

    Parameters
    ----------
    table : camelot.core.Table
    ax : matplotlib.axes.Axes (optional)
    """
    bbox = bbox_from_textlines(table.textlines)
    for t in table.textlines:
        color = "red" if isinstance(t, LTTextLineVertical) else "blue"
        ax.add_patch(
            patches.Rectangle(
                (t.x0, t.y0), t.x1 - t.x0, t.y1 - t.y0, color=color, alpha=0.2
            )
        )
    extend_axe_lim(ax, bbox)


def prepare_plot(table, ax=None):
    """Initialize plot and draw common components.

    Parameters
    ----------
    table : camelot.core.Table
    ax : matplotlib.axes.Axes (optional)

    Returns
    -------
    ax : matplotlib.axes.Axes
    """
    if ax is None:
        fig = plt.figure()
        ax = fig.add_subplot(111, aspect="equal")
    draw_pdf(table, ax)
    draw_parse_constraints(table, ax)
    return ax


[docs] class PlotMethods: """Classmethod for plotting methods.""" def __call__(self, table, kind="text", filename=None, ax=None): """Plot elements found on PDF page based on kind specified. Useful for debugging and playing with different parameters to get the best output. Parameters ---------- table: camelot.core.Table A Camelot Table. kind : str, optional (default: 'text') {'text', 'grid', 'contour', 'joint', 'line', 'network_table_search'} The element type for which a plot should be generated. filename: str, optional (default: None) Absolute path for saving the generated plot. ax : matplotlib.axes.Axes (optional) Returns ------- fig : matplotlib.fig.Figure """ if not _HAS_MPL: raise ImportError("matplotlib is required for plotting.") if table.flavor == "lattice" and kind in ["textedge"]: raise NotImplementedError(f"Lattice flavor does not support kind={kind!r}") if table.flavor != "lattice" and kind in ["line"]: raise NotImplementedError( f"{table.flavor} flavor does not support kind={kind!r}" ) plot_method = getattr(self, kind) if filename is not None: fig = plot_method(table, ax) fig.savefig(filename) return None return plot_method(table, ax)
[docs] def text(self, table, ax=None): """Generate a plot for all text elements present on the PDF page. Parameters ---------- table : camelot.core.Table ax : matplotlib.axes.Axes (optional) Returns ------- fig : matplotlib.fig.Figure """ ax = prepare_plot(table, ax) draw_text(table, ax) return ax.get_figure()
[docs] @staticmethod def grid(table, ax=None): """Generate a plot for the detected table grids on the PDF page. Parameters ---------- table : camelot.core.Table ax : matplotlib.axes.Axes (optional) Returns ------- fig : matplotlib.fig.Figure """ ax = prepare_plot(table, ax) for row in table.cells: for cell in row: if cell.left: ax.plot([cell.lb[0], cell.lt[0]], [cell.lb[1], cell.lt[1]]) if cell.right: ax.plot([cell.rb[0], cell.rt[0]], [cell.rb[1], cell.rt[1]]) if cell.top: ax.plot([cell.lt[0], cell.rt[0]], [cell.lt[1], cell.rt[1]]) if cell.bottom: ax.plot([cell.lb[0], cell.rb[0]], [cell.lb[1], cell.rb[1]]) return ax.get_figure()
[docs] @staticmethod def contour(table, ax=None): """Generate a plot for all table boundaries present on the PDF page. Parameters ---------- table : camelot.core.Table ax : matplotlib.axes.Axes (optional) Returns ------- fig : matplotlib.fig.Figure """ _for_lattice = table.flavor == "lattice" ax = prepare_plot(table, ax) if not _for_lattice: draw_text(table, ax) ax.add_patch( patches.Rectangle( (table._bbox[0], table._bbox[1]), table._bbox[2] - table._bbox[0], table._bbox[3] - table._bbox[1], fill=False, color="red", ) ) if not _for_lattice: extend_axe_lim(ax, table._bbox) return ax.get_figure()
[docs] @staticmethod def textedge(table, ax=None): """Generate a plot for relevant textedges. Parameters ---------- table : camelot.core.Table ax : matplotlib.axes.Axes (optional) Returns ------- fig : matplotlib.fig.Figure """ ax = prepare_plot(table, ax) draw_text(table, ax) if table.flavor == "network": for network in table.parse_details["network_searches"]: most_connected_tl = network.most_connected_textline() ax.add_patch( patches.Rectangle( (most_connected_tl.x0, most_connected_tl.y0), most_connected_tl.x1 - most_connected_tl.x0, most_connected_tl.y1 - most_connected_tl.y0, color="red", alpha=0.5, ) ) for tl in sorted( network._textline_to_alignments.keys(), key=lambda textline: (-textline.y0, textline.x0), ): alignments = network._textline_to_alignments[tl] coords = get_textline_coords(tl) alignment_id_h, tls_h = alignments.max_v() alignment_id_v, tls_v = alignments.max_h() xs = list(map(lambda tl: tl.x0, tls_v)) ys = list(map(lambda tl: tl.y1, tls_h)) top_h = max(ys) ax.text( coords[alignment_id_h], top_h + 5, f"{len(tls_h)}", verticalalignment="bottom", horizontalalignment="center", fontsize=8, color="green", ) ax.plot( [coords[alignment_id_h]] * len(ys), ys, color="green", linestyle="solid", linewidth=1, marker="o", markeredgecolor="green", fillstyle=None, markersize=4, alpha=0.8, ) left_v = min(map(lambda tl: tl.x0, tls_v)) ax.text( left_v - 5, coords[alignment_id_v], f"{len(tls_v)}", verticalalignment="center", horizontalalignment="right", fontsize=8, color="blue", ) ax.plot( xs, [coords[alignment_id_v]] * len(xs), color="blue", linestyle="solid", linewidth=1, marker="o", markeredgecolor="blue", fillstyle="full", markersize=3, alpha=0.8, ) else: for te in table._textedges: ax.plot([te.coord, te.coord], [te.y0, te.y1]) return ax.get_figure()
[docs] @staticmethod def joint(table, ax=None): """Generate a plot for all line intersections present on the PDF page. Parameters ---------- table : camelot.core.Table ax : matplotlib.axes.Axes (optional) Returns ------- fig : matplotlib.fig.Figure """ ax = prepare_plot(table, ax) x_coord = [] y_coord = [] for coord in table.parse["joints"]: x_coord.append(coord[0]) y_coord.append(coord[1]) ax.plot(x_coord, y_coord, "ro") return ax.get_figure()
[docs] @staticmethod def line(table, ax=None): """Generate a plot for all line segments present on the PDF page. Parameters ---------- table : camelot.core.Table ax : matplotlib.axes.Axes (optional) Returns ------- fig : matplotlib.fig.Figure """ ax = prepare_plot(table, ax) vertical, horizontal = table._segments for v in vertical: ax.plot([v[0], v[2]], [v[1], v[3]]) for h in horizontal: ax.plot([h[0], h[2]], [h[1], h[3]]) return ax.get_figure()