added module 'glossary.py'

2024-02-02 15:19:07 +01:00 · 2024-02-02 15:19:07 +01:00 · be873867ea
commit be873867ea
parent 82ad697b68
1 changed files with 128 additions and 0 deletions
--- a/chatmastermind/glossary.py
+++ b/chatmastermind/glossary.py
@ -0,0 +1,128 @@
+"""
+Module implementing glossaries for translations.
+"""
+import yaml
+import tempfile
+import shutil
+import csv
+from pathlib import Path
+from dataclasses import dataclass, field
+from typing import Type, TypeVar
+
+GlossaryInst = TypeVar('GlossaryInst', bound='Glossary')
+
+
+class GlossaryError(Exception):
+    pass
+
+
+def str_presenter(dumper: yaml.Dumper, data: str) -> yaml.ScalarNode:
+    """
+    Changes the YAML dump style to multiline syntax for multiline strings.
+    """
+    if len(data.splitlines()) > 1:
+        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
+    return dumper.represent_scalar('tag:yaml.org,2002:str', data)
+
+
+@dataclass
+class Glossary:
+    """
+    A glossary consists of the following parameters:
+        - Name (freely selectable)
+        - Path (full file path)
+        - Source language
+        - Target language
+        - Entries (pairs of source lang and target lang terms)
+        - ID (automatically generated / modified, required by DeepL)
+    """
+
+    name: str
+    source_lang: str
+    target_lang: str
+    entries: dict[str, str] = field(default_factory=lambda: dict())
+    file_path: Path | None = None
+    ID: str | None = None
+
+    @classmethod
+    def from_file(cls: Type[GlossaryInst], file_path: Path) -> GlossaryInst:
+        """
+        Create a glossary from the given file.
+        """
+        with open(file_path, "r") as fd:
+            try:
+                data = yaml.load(fd, Loader=yaml.FullLoader)
+                # remove any quotes from the entries that YAML may have added while dumping
+                # (e. g. for special keywords like 'yes')
+                clean_entries = {key.strip('\"\' '): value for key, value in data['Entries'].items()}
+                return cls(name=data['Name'],
+                           source_lang=data['SourceLang'],
+                           target_lang=data['TargetLang'],
+                           entries=clean_entries,
+                           file_path=file_path,
+                           ID=data['ID'] if data['ID'] != 'None' else None)
+            except Exception:
+                raise GlossaryError(f"'{file_path}' does not contain a valid glossary")
+
+    def to_file(self, file_path: Path | None = None) -> None:
+        """
+        Write glossary to given file.
+        """
+        if file_path:
+            self.file_path = file_path
+        if not self.file_path:
+            raise GlossaryError("Got no valid path to write glossary")
+        # write YAML
+        with tempfile.NamedTemporaryFile(dir=self.file_path.parent, prefix=self.file_path.name, mode="w", delete=False) as temp_fd:
+            temp_file_path = Path(temp_fd.name)
+            data = {'Name': self.name,
+                    'ID': str(self.ID),
+                    'SourceLang': self.source_lang,
+                    'TargetLang': self.target_lang,
+                    'Entries': self.entries}
+            yaml.dump(data, temp_fd, sort_keys=False)
+        shutil.move(temp_file_path, self.file_path)
+
+    def export_csv(self, dictionary: dict[str, str], file_path: Path) -> None:
+        """
+        Export the 'entries' of this glossary to a file in CSV format (compatible with DeepL).
+        """
+        with open(file_path, 'w', newline='', encoding='utf-8') as csvfile:
+            writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
+            for source_entry, target_entry in self.entries.items():
+                writer.writerow([source_entry, target_entry])
+
+    def export_tsv(self, entries: dict[str, str], file_path: Path) -> None:
+        """
+        Export the 'entries' of this glossary to a file in TSV format (compatible with DeepL).
+        """
+        with open(file_path, 'w', encoding='utf-8') as file:
+            for source_entry, target_entry in self.entries.items():
+                file.write(f"{source_entry}\t{target_entry}\n")
+
+    def import_csv(self, file_path: Path) -> None:
+        """
+        Import the entries from the given CSV file to those of the current glossary.
+        Existing entries are overwritten.
+        """
+        try:
+            with open(file_path, mode='r', encoding='utf-8') as csvfile:
+                reader = csv.reader(csvfile, delimiter=',', quotechar='"')
+                self.entries = {rows[0]: rows[1] for rows in reader if len(rows) >= 2}
+        except Exception as e:
+            raise GlossaryError(f"Error importing CSV: {e}")
+
+    def import_tsv(self, file_path: Path) -> None:
+        """
+        Import the entries from the given CSV file to those of the current glossary.
+        Existing entries are overwritten.
+        """
+        try:
+            with open(file_path, mode='r', encoding='utf-8') as tsvfile:
+                self.entries = {}
+                for line in tsvfile:
+                    parts = line.strip().split('\t')
+                    if len(parts) == 2:
+                        self.entries[parts[0]] = parts[1]
+        except Exception as e:
+            raise GlossaryError(f"Error importing TSV: {e}")