Source code for moic.plugins.jira.utils.parser

"""
Module for a Jira Textile like to Markdown parser
"""
import random
import re
import string

from rich.syntax import Syntax


[docs]class JiraDocument: """ Class wich represent a Jira Rich text field such as an Issue Description or comment It provides methods to convert it to Markdown and prepare it for rendering through Rich """ HEAD_COLORS = [ "dodger_blue3", "dodger_blue2", "dodger_blue1", "deep_sky_blue3", "deep_sky_blue2", "deep_sky_blue1", "bright_blue", ] def __init__(self, raw: str): """ Init a JiraDocument object Args: raw (str): The Jira rich text content """ # Convert \n into \r\n to fulfill Jira's behaviour if raw: if "\n" in raw and "\r\n" not in raw: raw = raw.replace("\n", "\r\n") self.raw = raw self.elements = self._parse() else: self.raw = "" self.elements = [] self.markdown = "" self.rendered = "" def _parse(self): """ Parse the raw content line by line into cutom JiraElement objects It will groups block lines into BlockElement and create a list of all elements presents into the document """ elements = [] in_block = False block = "" block_type = "" block_style = "" for line in self.raw.split("\r\n"): # if line starts with {code}, {quote}, {panel} or {color} if ( line.startswith("{quote") or line.startswith("{code") or line.startswith("{panel") or line.startswith("{color") ) and not in_block: if self.isoneline(line): search = re.search(r"{(.*?)(:(.*?))?}(.*){.*}", line) elements.append( BlockElement( search.string, content=search[4], content_type=search[1], content_style=search[3], oneline=True, ) ) continue else: in_block = True search = re.search(r"\{(.*?)(:(.*?))?\}", line) block_type = search[1] if search[3]: block_style = search[3] # Get the content existing after the block element to put back into block if re.search(r"\{.*\}(.*)", line): block = re.search(r"\{.*\}(.*)", line)[1] + "\n" elif ("{quote" in line or "{code" in line or "{panel" in line or "{color" in line) and in_block: in_block = False # Get content esisting before the block element and put it back into block if re.search(r"\{.*\}(.*)", line): block = block + re.search(r"(.*)\{.*\}.*", line)[1] elements.append(BlockElement(block, content=block, content_type=block_type, content_style=block_style,)) block = "" block_style = "" else: if in_block: block = block + line + "\n" else: # Head Element search = re.search(r"^h([0-9])\. (.*)$", line) if search: elements.append(HeadElement(search[0], content=search[2], level=int(search[1]))) continue # List Item Element search = re.search(r"^(\*+) (.*)$", line) if search: elements.append( ListItemElement(search[0], content=search[2], level=len(search[1]), item_type="*",) ) continue search = re.search(r"^(#+) (.*)$", line) if search: elements.append( ListItemElement(search[0], content=search[2], level=len(search[1]), item_type="#",) ) continue # Quote Element search = re.search(r"^bq. (.*)$", line) if search: elements.append(QuoteElement(search[0], content=search[1])) continue # Table Element # Text line # New Line if line == "": elements.append(NewLineElement()) continue elements.append(TextElement(line)) return elements
[docs] def isoneline(self, line: str): """ Check if the given line is a oneline block With a pair of {quote|code|etc...} tags Args: line (str): The line to check """ search = re.search(r"^\{(.*?)(:(.*?))?\}(.*)\{.*$", line) if search: block_type = search[1] if re.findall(rf"{{{block_type}", line): return True return False
[docs]class JiraElement: """ A root custom JiraElement It represent any kind of element """ def __init__(self): """ Init the object """ pass def _parse(self): """ Convert the element into markdown """ pass
[docs]class TextElement: def __init__(self, raw: str): """ Init the object Args: raw (str): The raw element """ self.raw = raw self.content = raw self.rendered = raw self.markdown = raw self._parse() def _parse(self): """ Convert the element into markdown It will parse and convert syntax such bold, italic, quotes etc... """ # Users references matches = re.findall(r"(\[~(.*?)\])", self.markdown) for match in matches: self.markdown = self.markdown.replace(match[0], f"**@{match[1]}**") self.rendered = self.rendered.replace(match[0], f"[bold]@{match[1]}[/bold]") # Get links which should be protected links = [] for match in re.findall(r"(\[(.*?)\])", self.markdown): link_anchor = f"[{''.join(random.choice(string.ascii_lowercase) for i in range(10))}]" title = "" url = match[1] rendered = f"[grey70]{url}[/grey70]" if "|" in match[1]: title = match[1].split("|")[0] url = match[1].split("|")[1] rendered = f"[blue]{title}[/blue] | [grey70]{url}[/grey70]" link_value = f"[{title}]({url})" if "|" in match[1] else f"[{url}]" links.append({"key": link_anchor, "value": link_value, "rendered": rendered}) self.markdown = self.markdown.replace(match[0], link_anchor) self.rendered = self.rendered.replace(match[0], link_anchor) monospaceds = [] for match in re.findall(r"({{(.*?)}})", self.markdown): monospaced_anchor = f"[{''.join(random.choice(string.ascii_lowercase) for i in range(10))}]" monospaceds.append( {"key": monospaced_anchor, "value": f"`{match[1]}`", "rendered": f"[dim]{match[1]}[/dim]"} ) self.markdown = self.markdown.replace(match[0], monospaced_anchor) self.rendered = self.rendered.replace(match[0], monospaced_anchor) # *strong* matches = re.findall(r"( |^)(\*(.*?)\*)( |$)", self.markdown) for match in matches: self.markdown = self.markdown.replace(match[1], f"*{match[2]}*") self.rendered = self.rendered.replace(match[1], f"[bold]{match[2]}[/bold]") # _emphasis_ matches = re.findall(r"( |^)(_(.*?)_)( |$)", self.markdown) for match in matches: self.markdown = self.markdown.replace(match[1], f"_{match[2]}_") self.rendered = self.rendered.replace(match[1], f"[italic]{match[2]}[/italic]") # ??citation?? matches = re.findall(r"( |^)(\?\?(.*?)\?\?)( |$)", self.markdown) for match in matches: self.markdown = self.markdown.replace(match[1], f"*{match[2]}*") self.rendered = self.rendered.replace(match[1], f"[underline]{match[2]}[/underline]") # -deleted- / +inserted+ / ^superscript^ / ~subscript~ matches = re.findall(r"( |^)(-(.*?)-)( |$)", self.markdown) for match in matches: self.markdown = self.markdown.replace(match[1], f"*{match[2]}*") self.rendered = self.rendered.replace(match[1], f"[underline]{match[2]}[/underline]") matches = re.findall(r"( |^)(\+(.*?)\+)( |$)", self.markdown) for match in matches: self.markdown = self.markdown.replace(match[1], f"*{match[2]}*") self.rendered = self.rendered.replace(match[1], f"[underline]{match[2]}[/underline]") matches = re.findall(r"( |^)(\^(.*?)\^)( |$)", self.markdown) for match in matches: self.markdown = self.markdown.replace(match[1], f"*{match[2]}*") self.rendered = self.rendered.replace(match[1], f"[underline]{match[2]}[/underline]") matches = re.findall(r"( |^)(~(.*?)~)( |$)", self.markdown) for match in matches: self.markdown = self.markdown.replace(match[1], f"*{match[2]}*") self.rendered = self.rendered.replace(match[1], f"[underline]{match[2]}[/underline]") # Put back protected links and monospaced strings for link in links: self.markdown = self.markdown.replace(link["key"], link["value"]) self.rendered = self.rendered.replace(link["key"], link["rendered"]) for monospaced in monospaceds: self.markdown = self.markdown.replace(monospaced["key"], monospaced["value"]) self.rendered = self.rendered.replace(monospaced["key"], monospaced["rendered"]) def __repr__(self): """ Representation method of the object """ return f"<Text Element: content='{self.content}'>"
[docs]class BlockElement(JiraElement): """ This class represent block part element such as code snippets They could be multiline or note """ def __init__( self, raw: str, content: str = "", content_type: str = None, content_style: str = None, oneline: bool = False, ): """ Init the object Args: raw (str): The raw element content (str): The content of the element without the {block} tags content_type (str): The type of content extracted from the taf (example: python, bash etc...) content_style (str): The content style if it exists (For panel) oneline (bool): True if the block element is represented in oneline only """ self.raw = raw self.content = content self.type = content_type self.style = content_style self.oneline = oneline self.renderer = "" self.markdown = "" self._parse() def _parse(self): """ Convert the element into markdown It will convert different kind of block part: - {quote} - {code} - {color} - {panel} - {etc} """ if self.oneline: self.rendered = f"[bold blue]|[italic] Quote: [/blue bold]{self.content}[/italic]" self.markdown = f"> {self.content}" else: if self.type == "quote": self.rendered = f"[bold blue]|[italic] Quote: [/blue bold]{self.content}[/italic]" else: self.rendered = Syntax(self.content, self.style, line_numbers=False) self.markdown = f"```{self.style if self.style else ''}\n{self.content}\n```" def __repr__(self): """ Representation method of the element """ repr_content = self.content.replace("\n", "\\n") return f"<Block Element: content='{repr_content}'>"
[docs]class HeadElement(JiraElement): """ This class represents Header element """ def __init__(self, raw, content: str = None, level: int = 1): """ Init the object Args: raw (str): The raw element content (str): The content of the element without the 'h[0-9].' tag level (int): The value of the header level (example: h3. => 3) """ self.raw = raw self.content = content self.level = level self.markdown = "" self.rendered = "" self._parse() def _parse(self): """ Convert the element into markdown """ self.rendered = f"[bold {JiraDocument.HEAD_COLORS[self.level - 1]}]{self.content}[/]" self.markdown = f"{'#' * self.level} {self.content}" def __repr__(self): """ Representation method of the element """ return f"<Head Element: level='{self.level}' content='{self.content}'>"
[docs]class QuoteElement(JiraElement): """ Class representing a quote (tag bq. in Jira) """ def __init__(self, raw, content: str = None): """ Init the object Args: raw (str): The raw element content (str): The content of the element without the 'bq.' tag """ self.raw = raw self.content = TextElement(content) self.markdown = "" self.rendered = "" self._parse() def _parse(self): """ Convert the element into markdown """ self.rendered = f"[bold blue]|[italic] Quote: [/blue bold]{self.content.rendered}[/italic]" self.markdown = f"> {self.content.markdown}" def __repr__(self): """ Representation method of the element """ return f"<Quote Element: content='{self.content}'>"
[docs]class ListItemElement(JiraElement): """ Class representing a list item such as: * item """ def __init__(self, raw: str, content: str = "", level: int = 1, item_type: str = "*"): """ Init the object Args: raw (str): The raw element content (str): The content element without the '*' tag level (str): The level of the element inside the list item_type (str): The kind of list (Support only : '*') """ self.raw = raw self.content = TextElement(content) self.level = level self.item_type = item_type self.rendered = "" self.markdown = "" self._parse() def _parse(self): """ Convert the element into markdown """ self.rendered = f"{' ' * 2 *(self.level - 1)}{self.content.rendered}" self.markdown = f"{' ' * 2 *(self.level - 1)}{self.item_type} {self.content.markdown}" def __repr__(self): """ Representation method of the element """ return f"<ListItem Element: level='{self.level}' content='{self.content}' item_type='{self.item_type}'>"
[docs]class NewLineElement(JiraElement): """ Class representing an emtpy line """ def __init__(self): """ Init the object """ self.raw = "" self.content = "" self.markdown = "" self.rendered = "" def __repr__(self): """ Representation method of the element """ return "<NewLine Element: >"