53 lines
1.9 KiB
Python
53 lines
1.9 KiB
Python
import tomllib # For parsing the TOML-like section (Python 3.11+)
|
|
import re
|
|
from pathlib import Path
|
|
|
|
|
|
class PostParser:
|
|
def __init__(self, file_path):
|
|
"""
|
|
Initialize the parser with the path to the input file.
|
|
|
|
:param file_path: Path to the file that contains the input data.
|
|
"""
|
|
self.file_path = file_path
|
|
self.fixed_path = Path(self.file_path).expanduser()
|
|
|
|
def parse(self):
|
|
"""
|
|
Parses the file into two parts: a dictionary of TOML values and Markdown content.
|
|
|
|
:return: A tuple containing (TOML dictionary, Markdown string).
|
|
"""
|
|
try:
|
|
with open(self.fixed_path, "r") as f:
|
|
content = f.read()
|
|
|
|
# Extract the triple-quoted TOML section and Markdown using regex
|
|
match = re.match(r'"""(.*?)"""\n(.*)', content, re.DOTALL)
|
|
if match is None:
|
|
raise ValueError("Input file does not follow the expected format.")
|
|
|
|
toml_content, markdown_content = match.groups()
|
|
|
|
# Validate and parse the TOML section
|
|
self._validate_toml(toml_content.strip())
|
|
toml_dict = tomllib.loads(toml_content.strip())
|
|
|
|
return toml_dict, markdown_content.strip()
|
|
except Exception as e:
|
|
raise RuntimeError(f"Failed to parse the file '{self.file_path}'. Error: {e}")
|
|
|
|
def _validate_toml(self, toml_content):
|
|
"""
|
|
Validates the TOML section for known issues and provides descriptive error messages.
|
|
|
|
:param toml_content: The TOML content as a string.
|
|
:raises ValueError: If the TOML content contains known errors.
|
|
"""
|
|
# Check for uppercase booleans
|
|
if re.search(r"=\s*(True|False)", toml_content):
|
|
raise ValueError("TOML booleans must be lowercase (true/false).")
|
|
|
|
# Add additional TOML validation as needed
|