Source code for moic.plugins.jira.utils.parser

"""
Module for a Jira Textile like to Markdown parser
"""
import random
import re
import string

from rich.syntax import Syntax


[docs]class JiraDocument:
    """
    Class wich represent a Jira Rich text field such as an Issue Description or comment
    It provides methods to convert it to Markdown and prepare it for rendering through Rich
    """

    HEAD_COLORS = [
        "dodger_blue3",
        "dodger_blue2",
        "dodger_blue1",
        "deep_sky_blue3",
        "deep_sky_blue2",
        "deep_sky_blue1",
        "bright_blue",
    ]

    def __init__(self, raw: str):
        """
        Init a JiraDocument object

        Args:
            raw (str): The Jira rich text content
        """
        # Convert \n into \r\n to fulfill Jira's behaviour
        if raw:
            if "\n" in raw and "\r\n" not in raw:
                raw = raw.replace("\n", "\r\n")

            self.raw = raw
            self.elements = self._parse()
        else:
            self.raw = ""
            self.elements = []
            self.markdown = ""
            self.rendered = ""

    def _parse(self):
        """
        Parse the raw content line by line into cutom JiraElement objects
        It will groups block lines into BlockElement and create a list of all elements
        presents into the document
        """
        elements = []

        in_block = False
        block = ""
        block_type = ""
        block_style = ""

        for line in self.raw.split("\r\n"):
            # if line starts with {code}, {quote}, {panel} or {color}
            if (
                line.startswith("{quote")
                or line.startswith("{code")
                or line.startswith("{panel")
                or line.startswith("{color")
            ) and not in_block:
                if self.isoneline(line):
                    search = re.search(r"{(.*?)(:(.*?))?}(.*){.*}", line)
                    elements.append(
                        BlockElement(
                            search.string,
                            content=search[4],
                            content_type=search[1],
                            content_style=search[3],
                            oneline=True,
                        )
                    )
                    continue
                else:
                    in_block = True
                    search = re.search(r"\{(.*?)(:(.*?))?\}", line)
                    block_type = search[1]
                    if search[3]:
                        block_style = search[3]
                # Get the content existing after the block element to put back into block
                if re.search(r"\{.*\}(.*)", line):
                    block = re.search(r"\{.*\}(.*)", line)[1] + "\n"
            elif ("{quote" in line or "{code" in line or "{panel" in line or "{color" in line) and in_block:
                in_block = False
                # Get content esisting before the block element and put it back into block
                if re.search(r"\{.*\}(.*)", line):
                    block = block + re.search(r"(.*)\{.*\}.*", line)[1]
                elements.append(BlockElement(block, content=block, content_type=block_type, content_style=block_style,))
                block = ""
                block_style = ""
            else:
                if in_block:
                    block = block + line + "\n"
                else:
                    # Head Element
                    search = re.search(r"^h([0-9])\. (.*)$", line)
                    if search:
                        elements.append(HeadElement(search[0], content=search[2], level=int(search[1])))
                        continue
                    # List Item Element
                    search = re.search(r"^(\*+) (.*)$", line)
                    if search:
                        elements.append(
                            ListItemElement(search[0], content=search[2], level=len(search[1]), item_type="*",)
                        )
                        continue
                    search = re.search(r"^(#+) (.*)$", line)
                    if search:
                        elements.append(
                            ListItemElement(search[0], content=search[2], level=len(search[1]), item_type="#",)
                        )
                        continue
                    # Quote Element
                    search = re.search(r"^bq. (.*)$", line)
                    if search:
                        elements.append(QuoteElement(search[0], content=search[1]))
                        continue
                    # Table Element
                    # Text line
                    # New Line
                    if line == "":
                        elements.append(NewLineElement())
                        continue
                    elements.append(TextElement(line))
        return elements

[docs]    def isoneline(self, line: str):
        """
        Check if the given line is a oneline block
        With a pair of {quote|code|etc...} tags

        Args:
            line (str): The line to check
        """
        search = re.search(r"^\{(.*?)(:(.*?))?\}(.*)\{.*$", line)
        if search:
            block_type = search[1]
            if re.findall(rf"{{{block_type}", line):
                return True
        return False


[docs]class JiraElement:
    """
    A root custom JiraElement
    It represent any kind of element
    """

    def __init__(self):
        """
        Init the object
        """
        pass

    def _parse(self):
        """
        Convert the element into markdown
        """
        pass


[docs]class TextElement:
    def __init__(self, raw: str):
        """
        Init the object

        Args:
            raw (str): The raw element
        """
        self.raw = raw
        self.content = raw
        self.rendered = raw
        self.markdown = raw
        self._parse()

    def _parse(self):
        """
        Convert the element into markdown
        It will parse and convert syntax such bold, italic, quotes etc...
        """

        # Users references
        matches = re.findall(r"(\[~(.*?)\])", self.markdown)
        for match in matches:
            self.markdown = self.markdown.replace(match[0], f"**@{match[1]}**")
            self.rendered = self.rendered.replace(match[0], f"[bold]@{match[1]}[/bold]")

        # Get links which should be protected
        links = []
        for match in re.findall(r"(\[(.*?)\])", self.markdown):
            link_anchor = f"[{''.join(random.choice(string.ascii_lowercase) for i in range(10))}]"
            title = ""
            url = match[1]
            rendered = f"[grey70]{url}[/grey70]"
            if "|" in match[1]:
                title = match[1].split("|")[0]
                url = match[1].split("|")[1]
                rendered = f"[blue]{title}[/blue] | [grey70]{url}[/grey70]"
            link_value = f"[{title}]({url})" if "|" in match[1] else f"[{url}]"

            links.append({"key": link_anchor, "value": link_value, "rendered": rendered})
            self.markdown = self.markdown.replace(match[0], link_anchor)
            self.rendered = self.rendered.replace(match[0], link_anchor)

        monospaceds = []
        for match in re.findall(r"({{(.*?)}})", self.markdown):
            monospaced_anchor = f"[{''.join(random.choice(string.ascii_lowercase) for i in range(10))}]"
            monospaceds.append(
                {"key": monospaced_anchor, "value": f"`{match[1]}`", "rendered": f"[dim]{match[1]}[/dim]"}
            )
            self.markdown = self.markdown.replace(match[0], monospaced_anchor)
            self.rendered = self.rendered.replace(match[0], monospaced_anchor)

        # *strong*
        matches = re.findall(r"( |^)(\*(.*?)\*)( |$)", self.markdown)
        for match in matches:
            self.markdown = self.markdown.replace(match[1], f"*{match[2]}*")
            self.rendered = self.rendered.replace(match[1], f"[bold]{match[2]}[/bold]")
        # _emphasis_
        matches = re.findall(r"( |^)(_(.*?)_)( |$)", self.markdown)
        for match in matches:
            self.markdown = self.markdown.replace(match[1], f"_{match[2]}_")
            self.rendered = self.rendered.replace(match[1], f"[italic]{match[2]}[/italic]")
        # ??citation??
        matches = re.findall(r"( |^)(\?\?(.*?)\?\?)( |$)", self.markdown)
        for match in matches:
            self.markdown = self.markdown.replace(match[1], f"*{match[2]}*")
            self.rendered = self.rendered.replace(match[1], f"[underline]{match[2]}[/underline]")
        # -deleted- / +inserted+ / ^superscript^ / ~subscript~
        matches = re.findall(r"( |^)(-(.*?)-)( |$)", self.markdown)
        for match in matches:
            self.markdown = self.markdown.replace(match[1], f"*{match[2]}*")
            self.rendered = self.rendered.replace(match[1], f"[underline]{match[2]}[/underline]")

        matches = re.findall(r"( |^)(\+(.*?)\+)( |$)", self.markdown)
        for match in matches:
            self.markdown = self.markdown.replace(match[1], f"*{match[2]}*")
            self.rendered = self.rendered.replace(match[1], f"[underline]{match[2]}[/underline]")

        matches = re.findall(r"( |^)(\^(.*?)\^)( |$)", self.markdown)
        for match in matches:
            self.markdown = self.markdown.replace(match[1], f"*{match[2]}*")
            self.rendered = self.rendered.replace(match[1], f"[underline]{match[2]}[/underline]")

        matches = re.findall(r"( |^)(~(.*?)~)( |$)", self.markdown)
        for match in matches:
            self.markdown = self.markdown.replace(match[1], f"*{match[2]}*")
            self.rendered = self.rendered.replace(match[1], f"[underline]{match[2]}[/underline]")

        # Put back protected links and monospaced strings
        for link in links:
            self.markdown = self.markdown.replace(link["key"], link["value"])
            self.rendered = self.rendered.replace(link["key"], link["rendered"])

        for monospaced in monospaceds:
            self.markdown = self.markdown.replace(monospaced["key"], monospaced["value"])
            self.rendered = self.rendered.replace(monospaced["key"], monospaced["rendered"])

    def __repr__(self):
        """
        Representation method of the object
        """
        return f"<Text Element: content='{self.content}'>"


[docs]class BlockElement(JiraElement):
    """
    This class represent block part element such as code snippets
    They could be multiline or note
    """

    def __init__(
        self, raw: str, content: str = "", content_type: str = None, content_style: str = None, oneline: bool = False,
    ):
        """
        Init the object

        Args:
            raw (str): The raw element
            content (str): The content of the element without the {block} tags
            content_type (str): The type of content extracted from the taf (example: python, bash etc...)
            content_style (str): The content style if it exists (For panel)
            oneline (bool): True if the block element is represented in oneline only
        """
        self.raw = raw
        self.content = content
        self.type = content_type
        self.style = content_style
        self.oneline = oneline
        self.renderer = ""
        self.markdown = ""
        self._parse()

    def _parse(self):
        """
        Convert the element into markdown
        It will convert different kind of block part:
        - {quote}
        - {code}
        - {color}
        - {panel}
        - {etc}
        """
        if self.oneline:
            self.rendered = f"[bold blue]|[italic] Quote: [/blue bold]{self.content}[/italic]"
            self.markdown = f"> {self.content}"
        else:
            if self.type == "quote":
                self.rendered = f"[bold blue]|[italic] Quote: [/blue bold]{self.content}[/italic]"
            else:
                self.rendered = Syntax(self.content, self.style, line_numbers=False)
            self.markdown = f"```{self.style if self.style else ''}\n{self.content}\n```"

    def __repr__(self):
        """
        Representation method of the element
        """
        repr_content = self.content.replace("\n", "\\n")
        return f"<Block Element: content='{repr_content}'>"


[docs]class HeadElement(JiraElement):
    """
    This class represents Header element
    """

    def __init__(self, raw, content: str = None, level: int = 1):
        """
        Init the object

        Args:
            raw (str): The raw element
            content (str): The content of the element without the 'h[0-9].' tag
            level (int): The value of the header level (example: h3. => 3)
        """
        self.raw = raw
        self.content = content
        self.level = level
        self.markdown = ""
        self.rendered = ""
        self._parse()

    def _parse(self):
        """
        Convert the element into markdown
        """
        self.rendered = f"[bold {JiraDocument.HEAD_COLORS[self.level - 1]}]{self.content}[/]"
        self.markdown = f"{'#' * self.level} {self.content}"

    def __repr__(self):
        """
        Representation method of the element
        """
        return f"<Head Element: level='{self.level}' content='{self.content}'>"


[docs]class QuoteElement(JiraElement):
    """
    Class representing a quote (tag bq. in Jira)
    """

    def __init__(self, raw, content: str = None):
        """
        Init the object

        Args:
            raw (str): The raw element
            content (str): The content of the element without the 'bq.' tag
        """
        self.raw = raw
        self.content = TextElement(content)
        self.markdown = ""
        self.rendered = ""
        self._parse()

    def _parse(self):
        """
        Convert the element into markdown
        """
        self.rendered = f"[bold blue]|[italic] Quote: [/blue bold]{self.content.rendered}[/italic]"
        self.markdown = f"> {self.content.markdown}"

    def __repr__(self):
        """
        Representation method of the element
        """
        return f"<Quote Element: content='{self.content}'>"


[docs]class ListItemElement(JiraElement):
    """
    Class representing a list item such as:
    * item
    """

    def __init__(self, raw: str, content: str = "", level: int = 1, item_type: str = "*"):
        """
        Init the object

        Args:
            raw (str): The raw element
            content (str): The content element without the '*' tag
            level (str): The level of the element inside the list
            item_type (str): The kind of list (Support only : '*')
        """
        self.raw = raw
        self.content = TextElement(content)
        self.level = level
        self.item_type = item_type
        self.rendered = ""
        self.markdown = ""
        self._parse()

    def _parse(self):
        """
        Convert the element into markdown
        """
        self.rendered = f"{' ' * 2 *(self.level - 1)}• {self.content.rendered}"
        self.markdown = f"{' ' * 2 *(self.level - 1)}{self.item_type} {self.content.markdown}"

    def __repr__(self):
        """
        Representation method of the element
        """
        return f"<ListItem Element: level='{self.level}' content='{self.content}' item_type='{self.item_type}'>"


[docs]class NewLineElement(JiraElement):
    """
    Class representing an emtpy line
    """

    def __init__(self):
        """
        Init the object
        """
        self.raw = ""
        self.content = ""
        self.markdown = ""
        self.rendered = ""

    def __repr__(self):
        """
        Representation method of the element
        """
        return "<NewLine Element: >"