Source code for avro_to_python.utils.avro.helpers

""" contains helper function for parsing avro schema """

from typing import List, Tuple

from avro_to_python.classes.reference import Reference
from avro_to_python.classes.field import Field

from avro_to_python.utils.exceptions import BadReferenceError
from avro_to_python.utils.avro.primitive_types import PRIMITIVE_TYPE_MAP


def _create_reference(file: dict) -> dict:
    """ creates a reference object for file references

    Parameters
    ----------
        file: dict
            object containing information on a complex avro type to reference

    Returns
    -------
        reference: dict
            object containing reference information
    """
    if any([('name' not in file), ('namespace') not in file]):
        raise BadReferenceError

    return Reference(
        name=file['name'],
        namespace=file['namespace']
    )


def _get_name(obj: dict) -> str:
    """ Fetches the non-fullname of the node, if one exist.

    Only named types should have the name key.
    This function doesn't check that but will raise ValueError
    if name isn't set.
    If the name is a fullname, the name part is returned.
    Otherwise the set name is returned.


    Parameters
    ----------
        obj: dict
            serialized object resembling an avsc schema

    Returns
    -------
        String name or empty string.
    """
    (namespace, _, name) = obj['name'].rpartition(".")
    if namespace and name:
        return name
    return obj['name']

def _get_namespace(obj: dict, parent_namespace: str=None) -> str:
    """ imputes the namespace if it doesn't already exist

    Namespaces follow the following chain of logic:
        - If name is a fullname, use the namespace part.
          This is how the Java avro-tools jar behaves, which is used
          as a reference implementation.
        - Use a namespace if it exists
        - If no namespace is given:
            - If referenced in a schema, inherit the same namespace as  parent
            - if not referenced in a schema and no parent, namespace = ''
              resembling the root of the input dir.


    Parameters
    ----------
        obj: dict
            serialized object resembling an avsc schema
        parent_namespace: str
            parent object namespace if applicable

    Returns
    -------
        String namespace or empty string.
    """
    (namespace, _, name) = obj.get('name', '').rpartition(".")
    if namespace and name:
        return namespace
    if obj.get('namespace', None):
        return obj['namespace']
    elif parent_namespace:
        return parent_namespace
    else:
        return ''


[docs]def get_union_types( field: Field, PRIMITIVE_TYPE_MAP: dict=PRIMITIVE_TYPE_MAP ) -> str: """ Takes a field object and returns the types of the fields Parameters ---------- field: dict dictionary resembling a field for a union type PRIMITIVE_TYPE_MAP: dict lookup table mapping avro types to python types Returns ------- out_types: str comma seperated string of python types """ out_types = [] for obj in field.union_types: # primitive type if obj.fieldtype == 'primitive': out_types.append(PRIMITIVE_TYPE_MAP.get(obj.avrotype)) # reference to a named type elif obj.fieldtype == 'reference': out_types.append(obj.reference_name) elif obj.fieldtype == 'array': out_types.append('list') elif obj.fieldtype == 'map': out_types.append('dict') else: raise ValueError('unsupported type') return ','.join(out_types)
[docs]def dedupe_imports(imports: List[Reference]) -> None: """ Dedupes list of imports Parameters ---------- imports: list of dict list of imports of a file Returns ------- None """ hashmap = {} for i, obj in enumerate(imports): hashmap[obj.name + obj.namespace] = obj return list(hashmap.values())
[docs]def split_namespace(s: str) -> Tuple[str, str]: """ Splits a namespace and name into their parts Parameters ---------- s: str string to be split Returns ------- (tuple) namespace: str name: str """ split = s.split('.') name = split.pop() namespace = '.'.join(split) return (namespace, name)