From 3b7f6f9563dde4400ba50b27181163a03ece0b96 Mon Sep 17 00:00:00 2001 From: juk0de Date: Fri, 18 Aug 2023 16:07:50 +0200 Subject: [PATCH] added new module 'message.py' --- chatmastermind/message.py | 239 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 chatmastermind/message.py diff --git a/chatmastermind/message.py b/chatmastermind/message.py new file mode 100644 index 0000000..9375db5 --- /dev/null +++ b/chatmastermind/message.py @@ -0,0 +1,239 @@ +""" +Module implementing message related functions and classes. +""" +import pathlib +import yaml +from typing import Type, TypeVar, ClassVar, Optional, Any, Union +from dataclasses import dataclass, asdict +from .tags import Tag, TagLine + +QuestionInst = TypeVar('QuestionInst', bound='Question') +AnswerInst = TypeVar('AnswerInst', bound='Answer') +MessageInst = TypeVar('MessageInst', bound='Message') +YamlDict = dict[str, Union[QuestionInst, AnswerInst, set[Tag]]] + + +class MessageError(Exception): + pass + + +def str_presenter(dumper: yaml.Dumper, data: str) -> yaml.ScalarNode: + """ + Changes the YAML dump style to multiline syntax for multiline strings. + """ + if len(data.splitlines()) > 1: + return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|') + return dumper.represent_scalar('tag:yaml.org,2002:str', data) + + +yaml.add_representer(str, str_presenter) + + +def source_code(text: str, include_delims: bool = False) -> list[str]: + """ + Extract all source code sections from the given text, i. e. all lines + surrounded by lines tarting with '```'. If 'include_delims' is True, + the surrounding lines are included, otherwise they are omitted. The + result list contains every source code section as a single string. + The order in the list represents the order of the sections in the text. + """ + code_sections: list[str] = [] + code_lines: list[str] = [] + in_code_block = False + + for line in text.split('\n'): + if line.strip().startswith('```'): + if include_delims: + code_lines.append(line) + if in_code_block: + code_sections.append('\n'.join(code_lines) + '\n') + code_lines.clear() + in_code_block = not in_code_block + elif in_code_block: + code_lines.append(line) + + return code_sections + + +class Question(str): + """ + A single question with a defined header. + """ + txt_header: ClassVar[str] = '=== QUESTION ===' + yaml_key: ClassVar[str] = 'question' + + def __new__(cls: Type[QuestionInst], string: str) -> QuestionInst: + """ + Make sure the question string does not contain the header. + """ + if cls.txt_header in string: + raise MessageError(f"Question '{string}' contains the header '{cls.txt_header}'") + instance = super().__new__(cls, string) + return instance + + @classmethod + def from_list(cls: Type[QuestionInst], strings: list[str]) -> QuestionInst: + """ + Build Question from a list of strings. Make sure strings do not contain the header. + """ + if any(cls.txt_header in string for string in strings): + raise MessageError(f"Question contains the header '{cls.txt_header}'") + instance = super().__new__(cls, '\n'.join(strings).strip()) + return instance + + def source_code(self, include_delims: bool = False) -> list[str]: + """ + Extract and return all source code sections. + """ + return source_code(self, include_delims) + + +class Answer(str): + """ + A single answer with a defined header. + """ + txt_header: ClassVar[str] = '=== ANSWER ===' + yaml_key: ClassVar[str] = 'answer' + + def __new__(cls: Type[AnswerInst], string: str) -> AnswerInst: + """ + Make sure the answer string does not contain the header. + """ + if cls.txt_header in string: + raise MessageError(f"Answer '{string}' contains the header '{cls.txt_header}'") + instance = super().__new__(cls, string) + return instance + + @classmethod + def from_list(cls: Type[AnswerInst], strings: list[str]) -> AnswerInst: + """ + Build Question from a list of strings. Make sure strings do not contain the header. + """ + if any(cls.txt_header in string for string in strings): + raise MessageError(f"Question contains the header '{cls.txt_header}'") + instance = super().__new__(cls, '\n'.join(strings).strip()) + return instance + + def source_code(self, include_delims: bool = False) -> list[str]: + """ + Extract and return all source code sections. + """ + return source_code(self, include_delims) + + +@dataclass +class Message(): + """ + Single message. Consists of a question and optionally an answer, a set of tags + and a file path. + """ + question: Question + answer: Optional[Answer] + tags: Optional[set[Tag]] + file_path: Optional[pathlib.Path] + file_suffixes: ClassVar[list[str]] = ['.txt', '.yaml'] + tags_yaml_key: ClassVar[str] = 'tags' + file_yaml_key: ClassVar[str] = 'file_path' + + @classmethod + def from_dict(cls: Type[MessageInst], data: dict[str, Any]) -> MessageInst: + """ + Create a Message from the given dict. + """ + return cls(question=data[Question.yaml_key], + answer=data.get(Answer.yaml_key, None), + tags=set(data.get(cls.tags_yaml_key, [])), + file_path=data.get(cls.file_yaml_key, None)) + + @classmethod + def tags_from_file(cls: Type[MessageInst], file_path: pathlib.Path) -> set[Tag]: + """ + Return only the tags from the given Message file. + """ + if not file_path.exists(): + raise MessageError(f"Message file '{file_path}' does not exist") + if file_path.suffix not in cls.file_suffixes: + raise MessageError(f"File type '{file_path.suffix}' is not supported") + if file_path.suffix == '.txt': + with open(file_path, "r") as fd: + tags = TagLine(fd.readline()).tags() + else: # '.yaml' + with open(file_path, "r") as fd: + data = yaml.load(fd, Loader=yaml.FullLoader) + tags = set(sorted(data[cls.tags_yaml_key])) + return tags + + @classmethod + def from_file(cls: Type[MessageInst], file_path: pathlib.Path) -> MessageInst: + """ + Create a Message from the given file. Expects the following file structures: + For '.txt': + * TagLine + * Question.txt_header + * Question + * Answer.txt_header + For '.yaml': + * Question.yaml_key: single or multiline string + * Answer.yaml_key: single or multiline string + * Message.tags_yaml_key: list of strings + """ + if not file_path.exists(): + raise MessageError(f"Message file '{file_path}' does not exist") + if file_path.suffix not in cls.file_suffixes: + raise MessageError(f"File type '{file_path.suffix}' is not supported") + + tags: set[Tag] + question: Question + answer: Answer + if file_path.suffix == '.txt': + with open(file_path, "r") as fd: + tags = TagLine(fd.readline()).tags() + text = fd.read().strip().split('\n') + question_idx = text.index(Question.txt_header) + 1 + answer_idx = text.index(Answer.txt_header) + question = Question.from_list(text[question_idx:answer_idx]) + answer = Answer.from_list(text[answer_idx + 1:]) + return cls(question, answer, tags, file_path) + else: # '.yaml' + with open(file_path, "r") as fd: + data = yaml.load(fd, Loader=yaml.FullLoader) + data[cls.file_yaml_key] = file_path + return cls.from_dict(data) + + def to_file(self, file_path: Optional[pathlib.Path]) -> None: + """ + Write Message to the given file. Creates the following file structures: + For '.txt': + * TagLine + * Question.txt_header + * Question + * Answer.txt_header + * Answer + For '.yaml': + * Question.yaml_key: single or multiline string + * Answer.yaml_key: single or multiline string + * Message.tags_yaml_key: list of strings + """ + if file_path: + self.file_path = file_path + if not self.file_path: + raise MessageError("Got no valid path to write message") + if self.file_path.suffix not in self.file_suffixes: + raise MessageError(f"File type '{self.file_path.suffix}' is not supported") + if self.file_path.suffix == '.txt': + with open(self.file_path, "w") as fd: + msg_tags = self.tags or set() + fd.write(f'{TagLine.from_set(msg_tags)}\n') + fd.write(f'{Question.txt_header}\n{self.question}\n') + fd.write(f'{Answer.txt_header}\n{self.answer}\n') + elif self.file_path.suffix == '.yaml': + with open(self.file_path, "w") as fd: + data: YamlDict = {Question.yaml_key: str(self.question)} + if self.answer: + data[Answer.yaml_key] = str(self.answer) + if self.tags: + data[self.tags_yaml_key] = sorted([str(tag) for tag in self.tags]) + yaml.dump(data, fd) + + def as_dict(self) -> dict[str, Any]: + return asdict(self)