""" Module implementing glossaries for translations. """ import yaml import tempfile import shutil import csv from pathlib import Path from dataclasses import dataclass, field from typing import Type, TypeVar GlossaryInst = TypeVar('GlossaryInst', bound='Glossary') class GlossaryError(Exception): pass def str_presenter(dumper: yaml.Dumper, data: str) -> yaml.ScalarNode: """ Changes the YAML dump style to multiline syntax for multiline strings. """ if len(data.splitlines()) > 1: return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|') return dumper.represent_scalar('tag:yaml.org,2002:str', data) @dataclass class Glossary: """ A glossary consists of the following parameters: - Name (freely selectable) - Path (full file path) - Source language - Target language - Entries (pairs of source lang and target lang terms) - ID (automatically generated / modified, required by DeepL) """ name: str source_lang: str target_lang: str entries: dict[str, str] = field(default_factory=lambda: dict()) file_path: Path | None = None ID: str | None = None @classmethod def from_file(cls: Type[GlossaryInst], file_path: Path) -> GlossaryInst: """ Create a glossary from the given file. """ with open(file_path, "r") as fd: try: data = yaml.load(fd, Loader=yaml.FullLoader) # remove any quotes from the entries that YAML may have added while dumping # (e. g. for special keywords like 'yes') clean_entries = {key.strip('\"\' '): value for key, value in data['Entries'].items()} return cls(name=data['Name'], source_lang=data['SourceLang'], target_lang=data['TargetLang'], entries=clean_entries, file_path=file_path, ID=data['ID'] if data['ID'] != 'None' else None) except Exception: raise GlossaryError(f"'{file_path}' does not contain a valid glossary") def to_file(self, file_path: Path | None = None) -> None: """ Write glossary to given file. """ if file_path: self.file_path = file_path if not self.file_path: raise GlossaryError("Got no valid path to write glossary") # write YAML with tempfile.NamedTemporaryFile(dir=self.file_path.parent, prefix=self.file_path.name, mode="w", delete=False) as temp_fd: temp_file_path = Path(temp_fd.name) data = {'Name': self.name, 'ID': str(self.ID), 'SourceLang': self.source_lang, 'TargetLang': self.target_lang, 'Entries': self.entries} yaml.dump(data, temp_fd, sort_keys=False) shutil.move(temp_file_path, self.file_path) def export_csv(self, dictionary: dict[str, str], file_path: Path) -> None: """ Export the 'entries' of this glossary to a file in CSV format (compatible with DeepL). """ with open(file_path, 'w', newline='', encoding='utf-8') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) for source_entry, target_entry in self.entries.items(): writer.writerow([source_entry, target_entry]) def export_tsv(self, entries: dict[str, str], file_path: Path) -> None: """ Export the 'entries' of this glossary to a file in TSV format (compatible with DeepL). """ with open(file_path, 'w', encoding='utf-8') as file: for source_entry, target_entry in self.entries.items(): file.write(f"{source_entry}\t{target_entry}\n") def import_csv(self, file_path: Path) -> None: """ Import the entries from the given CSV file to those of the current glossary. Existing entries are overwritten. """ try: with open(file_path, mode='r', encoding='utf-8') as csvfile: reader = csv.reader(csvfile, delimiter=',', quotechar='"') self.entries = {rows[0]: rows[1] for rows in reader if len(rows) >= 2} except Exception as e: raise GlossaryError(f"Error importing CSV: {e}") def import_tsv(self, file_path: Path) -> None: """ Import the entries from the given CSV file to those of the current glossary. Existing entries are overwritten. """ try: with open(file_path, mode='r', encoding='utf-8') as tsvfile: self.entries = {} for line in tsvfile: parts = line.strip().split('\t') if len(parts) == 2: self.entries[parts[0]] = parts[1] except Exception as e: raise GlossaryError(f"Error importing TSV: {e}")