diff --git a/lmo2po.py b/lmo2po.py new file mode 100644 index 0000000..ecab460 --- /dev/null +++ b/lmo2po.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 + +import os +import sys + + +def die(msg): + print("ERROR:", msg) + sys.exit(1) + + +class LmoEntry: + def __init__(self, key_id = 0, val_id = 0, offset = 0, length = 0, val = None): + self.key_id = key_id + self.val_id = val_id + self.offset = offset + self.length = length + self.val = val + self.dup = 0 + + +class Lmo: + options = "" + entries = [] # list of LmoEntry + + def __init__(self): + self.options = "" + self.entries = [] + self.use_plural_num = None # value of entry.val_id + + def load_from_bin(self, filename): + self.entries = [] + with open(filename, "rb") as file: + data = file.read() + table_offset = int.from_bytes(data[-4:], byteorder='big') + #print("table_offset = 0x%X" % table_offset) + off = table_offset + while True: + if off + 16 >= len(data): + break + entry = LmoEntry() + entry.key_id = int.from_bytes(data[off :off+4] , byteorder='big') + entry.val_id = int.from_bytes(data[off+4 :off+8] , byteorder='big') + entry.offset = int.from_bytes(data[off+8 :off+12], byteorder='big') + entry.length = int.from_bytes(data[off+12:off+16], byteorder='big') + entry.val = data[entry.offset:entry.offset+entry.length] + #print("%08X %d %08X %d" % (entry.key_id, entry.val_id, entry.offset, entry.length)) + self.entries.append(entry) + off += 16 + if self.use_plural_num is None: + self.use_plural_num = True + for i, e in enumerate(self.entries): + if e.val_id > 10: + self.use_plural_num = False + break + self.entries = sorted(self.entries, key=lambda x: x.offset) + #self.dup_search() + + def dup_search(self): + count = 0 + for i, ent in enumerate(self.entries): + for k, ek in enumerate(self.entries): + if ent.key_id == ek.key_id and ent.offset != ek.offset: + ent.dup = 1 + count += 1 + break + return count + + def save_to_text(self, filename = None): + txt = "" + if 'k' in self.options: + entries = sorted(self.entries, key=lambda x: x.key_id) + else: + entries = sorted(self.entries, key=lambda x: x.offset) + self.dup_search() + for i, ent in enumerate(entries): + val = ent.val.decode('utf-8') + val = val.replace('\\', '\\\\') + val = val.replace('"', r'\"') + if ent.key_id == 0 and ent.val_id == 0 and ent.offset == 0: + val = val.replace('\n', r'\n') + txt += 'msgid ""' + '\n' + txt += 'msgstr ""' + '\n' + txt += '"Project-Id-Version: LuCI: {}\\n"'.format("base") + '\n' + txt += '"Language: {}\\n"'.format("en") + '\n' + txt += '"MIME-Version: 1.0\\n"' + '\n' + txt += '"Content-Type: text/plain; charset=UTF-8\\n"' + '\n' + txt += '"Content-Transfer-Encoding: 8bit\\n"' + '\n' + txt += '"Plural-Forms: {}\\n"'.format(val) + '\n' + txt += '"X-Generator: Weblate 4.8.1-dev\\n"' + '\n' + txt += '\n' + continue + prefix = '' + if self.use_plural_num and ent.val_id != 1: + prefix = '[%d]' % (ent.val_id - 1) + if ent.dup: + txt += '# DUP' + '\n' + txt += 'msgkey 0x{}'.format("%08X" % ent.key_id) + '\n' + line_limit = 77 + val = val.replace('\r', '') + if val.find('\n') >= 0: + txt += 'msgstr{} ""'.format(prefix) + '\n' + vlist = val.split('\n') + for i, v in enumerate(vlist): + v = v + r'\n' if i+1 < len(vlist) else v + if len(v) > 0: + txt += '"{}"'.format(v) + '\n' + elif val.find(r'\n') >= 0: + txt += 'msgstr{} ""'.format(prefix) + '\n' + vlist = val.split(r'\n') + for i, v in enumerate(vlist): + if len(v) > 0: + txt += '"{}"'.format(v) + '\n' + elif len(val) > line_limit - 10 and val.find(' ') > 0: + txt += 'msgstr{} ""'.format(prefix) + '\n' + wlist = val.split(' ') + v = "" + for i, word in enumerate(wlist): + if i > 0: + word = ' ' + word + if len(v) + len(word) < line_limit or len(v) == 0: + v += word + continue + txt += '"{}"'.format(v) + '\n' + v = word + if len(v) > 0: + txt += '"{}"'.format(v) + '\n' + else: + txt += 'msgstr{} "{}"'.format(prefix, val) + '\n' + txt += '\n' + if filename: + with open(filename, "wt", encoding='UTF-8', newline = "\n") as file: + file.write(txt) + return txt + + +if __name__ == "__main__": + fn_inp = sys.argv[1] + fn_out = sys.argv[2] + lmo = Lmo() + if len(sys.argv) > 3: + lmo.options = sys.argv[3] + lmo.load_from_bin(fn_inp) + if not ('m' in lmo.options): + lmo.save_to_text(fn_out) + print('\nPO-file saved to "{}"'.format(fn_out)) + sys.exit(1) + + # Merge 2 lmo-files + fn_inp2 = sys.argv[4] + lmo2 = Lmo() + lmo2.load_from_bin(fn_inp2) + for i, ent in enumerate(lmo2.entries): + dup = False + for k, ek in enumerate(lmo.entries): + if ek.key_id == ent.key_id: + dup = True + break + if not dup: + lmo.entries.append(ent) + lmo.options = 'k' + if not lmo2.use_plural_num: + lmo.use_plural_num = False + lmo.save_to_text(fn_out) + print('\nMerged PO-file saved to "{}"'.format(fn_out)) + + ''' + fn_out = fn_inp + import po2lmo + lmo3 = po2lmo.Lmo(verbose = 0) + lmo3.load_from_list(lmo.entries) + lmo3.save_to_bin(fn_out) + print('\nMerged LMO-file saved to "{}"'.format(fn_out)) + ''' + + diff --git a/po2lmo.py b/po2lmo.py new file mode 100644 index 0000000..6c5b175 --- /dev/null +++ b/po2lmo.py @@ -0,0 +1,289 @@ +#!/usr/bin/env python3 + +import os +import sys +import io + + +def die(msg): + print("ERROR:", msg) + sys.exit(1) + + +class u32_t(int): + def __rshift__(self, other): + return u32_t(int.__rshift__(self, other) & 0xFFFFFFFF) + + def __lshift__(self, other): + return u32_t(int.__lshift__(self, other) & 0xFFFFFFFF) + + def __add__(self, other): + return u32_t(int.__add__(self, other) & 0xFFFFFFFF) + + def __xor__(self, other): + return u32_t(int.__xor__(self, other) & 0xFFFFFFFF) + +def sfh_int8(data, offset = 0): + x = int.from_bytes(data[offset:offset+1], byteorder='little') + return x if x < 0x80 else x - 0x100 + +def sfh_uint16(data, offset = 0): + return int.from_bytes(data[offset:offset+2], byteorder='little') + +# SuperFastHash algorithm from Paul Hsieh (LGPLv2.1) http://www.azillionmonkeys.com/qed/hash.html +def sfh_hash(data): + if data is None: + return 0 + if isinstance(data, str): + data = data.encode("utf-8") + size = len(data) + hash = u32_t(size) + if size <= 0: + return 0 + rem = size & 3 + length = size // 4 + for i in range(length): + hash += sfh_uint16(data, i*4) + tmp = sfh_uint16(data, i*4 + 2) << 11 + tmp ^= hash + hash = (hash << 16) ^ tmp + hash += hash >> 11 + i = length * 4 + if rem == 3: + hash += sfh_uint16(data, i) + hash ^= hash << 16 + hash ^= sfh_int8(data, i + 2) << 18 + hash += hash >> 11 + if rem == 2: + hash += sfh_uint16(data, i) + hash ^= hash << 11 + hash += hash >> 17 + if rem == 1: + hash += sfh_int8(data, i) + hash ^= hash << 10 + hash += hash >> 1 + hash ^= hash << 3 + hash += hash >> 5 + hash ^= hash << 4 + hash += hash >> 17 + hash ^= hash << 25 + hash += hash >> 6 + return hash & 0xFFFFFFFF + + +MSG_UNSPEC = 0 +MSG_CTXT = 1 +MSG_ID = 2 +MSG_ID_PLURAL = 3 +MSG_STR = 4 + +class Msg: + def __init__(self): + self.plural_num = -1 + self.ctxt = None + self.id = None + self.id_plural = None + self.val = [ None ] # list of string + self.cur = MSG_UNSPEC + self.key = None + + +class LmoEntry: + def __init__(self, key_id = 0, val_id = 0, offset = 0, length = 0, val = None): + self.key_id = key_id + self.val_id = val_id + self.offset = offset + self.length = length + self.val = val + + +class Lmo: + entries = [] # list of LmoEntry + + def __init__(self, verbose = 0): + self.verbose = verbose + self.entries = [] + self.msg = Msg() + + def add_entry(self, key_id, val_id, val): + entry = LmoEntry() + entry.key_id = key_id + entry.val_id = val_id + entry.offset = len(self.entries) + entry.length = len(val) + entry.val = val + self.entries.append(entry) + + def print_msg(self): + msg = self.msg + if msg.key is not None: + val = msg.val[msg.plural_num] + self.add_entry(msg.key, msg.plural_num + 1, val) + elif msg.id and msg.val[0]: + for i in range(msg.plural_num + 1): + if i >= len(msg.val): + continue + val = msg.val[i] + if val is None: + continue + if (msg.ctxt and msg.id_plural): + key = "%s\1%s\2%d" % (msg.ctxt, msg.id, i) + elif (msg.ctxt): + key = "%s\1%s" % (msg.ctxt, msg.id) + elif (msg.id_plural): + key = "%s\2%d" % (msg.id, i) + else: + key = msg.id + key_id = sfh_hash(key) + val_id = sfh_hash(val) + if key_id != val_id: + self.add_entry(key_id, msg.plural_num + 1, val) + elif msg.val[0]: + p = msg.val[0] + prefix = b'\\nPlural-Forms: ' + x = p.find(prefix) + if x > 0: + x += len(prefix) + x2 = p.find(b'\\n', x) + if x2 > 0: + field = p[x:x2] + self.add_entry(0, 0, field) + self.msg = None + self.msg = Msg() + #print('-------------') + return self.msg + + def extract_string(self, line): + if line.startswith('#'): + return None + x = line.find('"') + if x < 0: + return None + line = line[x+1:] + line = line.replace(r'\\', '\x02') + line = line.replace(r'\"', '\x01') + x = line.find('"') + if x >= 0: + line = line[:x] + line = line.replace('\x01', '"') + line = line.replace('\x02', '\\') + return line + + def process_line(self, line, eof = False): + msg = self.msg + if line.startswith('msgctxt "'): + if msg.id or msg.val[0]: + msg = self.print_msg() + msg.ctxt = "" + msg.cur = MSG_CTXT + elif eof or line.startswith('msgid "'): + if msg.id or msg.val[0]: + msg = self.print_msg() + msg.id = "" + msg.cur = MSG_ID + elif line.startswith('msgid_plural "'): + msg.id_plural = "" + msg.cur = MSG_ID_PLURAL + elif line.startswith('msgstr "') or line.startswith('msgstr['): + msg.plural_num = 0 + if line.startswith('msgstr['): + x1 = line.find('[') + x2 = line.find(']') + msg.plural_num = int(line[x1+1:x2]) + if msg.plural_num >= 10: + die("Too many plural forms") + if len(msg.val) <= msg.plural_num: + x = msg.plural_num - len(msg.val) + 1 + for i in range(x): + msg.val.append(None) + msg.val[msg.plural_num] = b'' + msg.cur = MSG_STR + elif line.startswith('msgkey 0x'): + if msg.id or msg.val[0]: + msg = self.print_msg() + msg.id = '\x01' + msg.plural_num = 0 + x = line.find('0x') + msg.key = int(line[x:], 16) + return + if eof: + return + if msg.cur != MSG_UNSPEC: + tmp = self.extract_string(line) + if tmp is not None and len(tmp) > 0: + if msg.cur == MSG_CTXT: + msg.ctxt += tmp + #print('mctxt = "{}"'.format(msg.ctxt)) + if msg.cur == MSG_ID: + msg.id += tmp + #print('msgid = "{}"'.format(msg.id)) + if msg.cur == MSG_ID_PLURAL: + msg.id_plural += tmp + if msg.cur == MSG_STR: + msg.val[msg.plural_num] += tmp.encode("utf-8") + #print('msgstr[{}] = "{}"'.format(msg.plural_num, tmp)) + self.msg = msg + + def load_from_text(self, filename): + self.entries = [] + self.msg = Msg() + with open(filename, "r", encoding='UTF-8') as file: + for line in file: + self.process_line(line.rstrip()) + else: + self.process_line("", eof=True) + + def load_from_list(self, entries): + self.entries = entries + + def save_to_bin(self, filename = None): + buf = bytearray(b'\x00' * 0x400000) # 4MiB + offset = 0 + elst = [] # new list of LmoEntry() + for i, ent in enumerate(self.entries): + val = ent.val + if isinstance(val, str): + val = val.encode('utf-8') + length = len(val) + buf[offset:offset+length] = val + val_id = ent.val_id + elst.append(LmoEntry(ent.key_id, val_id, offset, length, val)) + offset += length + if offset & 3 != 0: + offset += 4 - (offset & 3) + elst = sorted(elst, key=lambda x: x.key_id) + #if offset & 0xF != 0: + # offset += 0x10 - (offset & 0xF) + table_offset = offset + for i, ent in enumerate(elst): + buf[offset :offset+4] = ent.key_id.to_bytes(4, byteorder='big') + buf[offset+4 :offset+8] = ent.val_id.to_bytes(4, byteorder='big') + buf[offset+8 :offset+12] = ent.offset.to_bytes(4, byteorder='big') + buf[offset+12:offset+16] = ent.length.to_bytes(4, byteorder='big') + if self.verbose: + for k, ek in enumerate(elst): + if ent.key_id == ek.key_id and ent.offset != ek.offset: + val = "" if ent.val is None else ent.val.decode() + print('DUP: 0x%08X (0x%05X) "%s"' % (ent.key_id, ent.offset, val)) + offset += 16 + if offset > 0: + buf[offset:offset+4] = table_offset.to_bytes(4, byteorder='big') + offset += 4 + buf = buf[:offset] + if filename: + with open(filename, "wb") as file: + file.write(buf) + return buf + + +if __name__ == "__main__": + fn_inp = sys.argv[1] + fn_out = sys.argv[2] + lmo = Lmo(verbose = 99) + lmo.load_from_text(fn_inp) + lmo.save_to_bin(fn_out) + print('\nLMO-file saved to "{}"'.format(fn_out)) + + + +