[Fixed]-Sorting 10k data set to the dict by child basis

1👍

Here’s one possible solution. It iterates through the codes once to build a simple tree, then afterward turns that tree into the kind that you requested.

import re
from pprint import pprint
from collections import defaultdict

def build_tree(codes):
    """Build the tree from a list of codes (strings)"""

    # tree is a dictionary that maps each code to a list of codes of children.
    tree = defaultdict(list)
    roots = []
    for code in codes:
        if '000000' in code:
            tree[code] = []
            roots.append(code)
        else:
            nonzero = re.search(r'[1-9]0*$', code).start()
            parent = code[:nonzero] + '0' + code[1 + nonzero:]
            tree[parent].append(code)

    # sort children (optional)
    for v in tree.values():
        v.sort()

    # convert original dictionary to one in the desired form.
    def convert(old_parent):
        result = {}
        result['code'] = old_parent
        if len(tree[old_parent]) > 0:
            result['children'] = [convert(c) for c in tree[old_parent]]
        return result

    return [convert(root) for root in roots]

codes = ["01000000", "01100000", "01110000", "01111000", "01111100", "01111110",
         "01111111", "01111112", "01111113", "01111114", "01111115", "01111120",
         "01111121", "01111122", "01111123", "01111124", "01111130", "01111131",
         "01111132", "01111133", "01111134", "01111140", "01111141", "01111142",
         "01111143", "01111144"]

pprint(build_tree(codes))

Here is the output (excuse the formatting)

[{'children': [{'children': [{'children': [{'children': [{'children': [{'children': [{'code': '01111111'},
                                                                                     {'code': '01111112'},
                                                                                     {'code': '01111113'},
                                                                                     {'code': '01111114'},
                                                                                     {'code': '01111115'}],
                                                                        'code': '01111110'},
                                                                       {'children': [{'code': '01111121'},
                                                                                     {'code': '01111122'},
                                                                                     {'code': '01111123'},
                                                                                     {'code': '01111124'}],
                                                                        'code': '01111120'},
                                                                       {'children': [{'code': '01111131'},
                                                                                     {'code': '01111132'},
                                                                                     {'code': '01111133'},
                                                                                     {'code': '01111134'}],
                                                                        'code': '01111130'},
                                                                       {'children': [{'code': '01111141'},
                                                                                     {'code': '01111142'},
                                                                                     {'code': '01111143'},
                                                                                     {'code': '01111144'}],
                                                                        'code': '01111140'}],
                                                          'code': '01111100'}],
                                            'code': '01111000'}],
                              'code': '01110000'}],
                'code': '01100000'}],
  'code': '01000000'}]

Leave a comment