ultk.language.language

Classes for modeling languages as form-meaning mappings, most important among them the Language and Expression classes.

Example usage:
>>> from ultk.language.language import Expression, Language
>>> # assuming the meaning `a_few_meaning` has already been constructed
>>> # define the expression
>>> a_few = NumeralExpression(form="a few", meaning=a_few_meaning)
>>> # define a very small language
>>> lang_1 = Language([a_few])
>>> # or a slightly larger one with synonymy
>>> lang_2 = Language([a_few] * 3)
  1"""Classes for modeling languages as form-meaning mappings, most important among them the Language and Expression classes.
  2
  3Example usage:
  4
  5    >>> from ultk.language.language import Expression, Language
  6    >>> # assuming the meaning `a_few_meaning` has already been constructed
  7    >>> # define the expression
  8    >>> a_few = NumeralExpression(form="a few", meaning=a_few_meaning)
  9    >>> # define a very small language
 10    >>> lang_1 = Language([a_few])
 11    >>> # or a slightly larger one with synonymy
 12    >>> lang_2 = Language([a_few] * 3)
 13"""
 14
 15import numpy as np
 16from dataclasses import dataclass
 17from typing import Callable, Generic, Iterable, TypeVar
 18from ultk.language.semantics import Meaning, Referent, Universe
 19from ultk.util.frozendict import FrozenDict
 20
 21# TODO: require Python 3.12 and use type parameter syntax instead? https://docs.python.org/3/reference/compound_stmts.html#type-params
 22T = TypeVar("T")
 23
 24
 25@dataclass(eq=True, unsafe_hash=True)
 26class Expression(Generic[T]):
 27    """Minimally contains a form and a meaning."""
 28
 29    # gneric/dummy form and meaning if not specified
 30    # useful for hashing in certain cases
 31    # (e.g. a GrammaticalExpression which has not yet been evaluate()'d and so does not yet have a Meaning)
 32    form: str = ""
 33    meaning: Meaning[T] = Meaning(FrozenDict(), Universe(tuple(), tuple()))
 34
 35    def can_express(self, referent: Referent) -> bool:
 36        """Return True if the expression can express the input single meaning point and false otherwise."""
 37        return bool(self.meaning[referent])
 38
 39    def to_dict(self) -> dict:
 40        """Return a dictionary representation of the expression."""
 41        return {"form": self.form, "meaning": self.meaning}
 42
 43    def __str__(self) -> str:
 44        return self.form
 45
 46    def __lt__(self, other: object) -> bool:
 47        return isinstance(other, Expression) and (self.form, other.meaning) < (
 48            other.form,
 49            other.meaning,
 50        )
 51
 52    def __bool__(self) -> bool:
 53        return bool(self.form and self.meaning)
 54
 55
 56class Language:
 57    """Minimally contains Expression objects."""
 58
 59    def __init__(self, expressions: tuple[Expression, ...], **kwargs):
 60        if not expressions:
 61            raise ValueError("Language cannot be empty.")
 62
 63        universe: Universe = expressions[0].meaning.universe
 64        if not all(expr.meaning.universe == universe for expr in expressions):
 65            raise ValueError(
 66                "All expressions in a language must have the same universe."
 67            )
 68
 69        self.universe = universe
 70        self.expressions = frozenset(expressions)
 71        self.__dict__.update(**kwargs)
 72
 73    # TODO: revisit evolutionary algorithm; do we need Languages to be mutable?
 74    @property
 75    def expressions(self) -> frozenset[Expression]:
 76        return self._expressions
 77
 78    @expressions.setter
 79    def expressions(self, val: frozenset[Expression]) -> None:
 80        if not val:
 81            raise ValueError("list of Expressions must not be empty.")
 82        self._expressions = val
 83
 84    def add_expression(self, e: Expression):
 85        """Add an expression to the list of expressions in a language."""
 86        self.expressions = frozenset(tuple(self.expressions) + (e,))
 87
 88    def is_natural(self) -> bool:
 89        """Whether a language represents a human natural language."""
 90        raise NotImplementedError
 91
 92    def degree_property(self, property: Callable[[Expression], bool]) -> float:
 93        """Count what percentage of expressions in a language have a given property."""
 94        return sum([property(item) for item in self.expressions]) / len(self)
 95
 96    def binary_matrix(self) -> np.ndarray:
 97        """Get a binary matrix of shape `(num_meanings, num_expressions)`
 98        specifying which expressions can express which meanings."""
 99        return np.array(
100            [
101                [float(e.can_express(m)) for e in self.expressions]
102                for m in self.universe.referents
103            ]
104        )
105
106    def as_dict_with_properties(self, **kwargs) -> dict:
107        """Return a dictionary representation of the language, including additional properties as keyword arguments.
108
109        This is used in some examples to serialize the language to outputs."""
110        the_dict = {"expressions": [str(expr) for expr in self.expressions]}
111        the_dict.update(kwargs)
112        return the_dict
113
114    def __contains__(self, expression) -> bool:
115        """Whether the language has the expression"""
116        return expression in self.expressions
117
118    def __hash__(self) -> int:
119        return hash(self.expressions)
120
121    def __eq__(self, __o: object) -> bool:
122        return isinstance(__o, Language) and self.expressions == __o.expressions
123
124    def __len__(self) -> int:
125        return len(self.expressions)
126
127    def __lt__(self, other) -> bool:
128        return self.expressions < other.expressions
129
130    def __str__(self) -> str:
131        return (
132            "---------\nExpressions:\n"
133            + "\n-----\n".join(str(expression) for expression in self.expressions)
134            + "\n---------"
135        )
136
137
138# TODO: move this to effcomm?
139def aggregate_expression_complexity(
140    language: Language,
141    expression_complexity_func: Callable[[Expression], float],
142    aggregator: Callable[[Iterable[float]], float] = sum,
143) -> float:
144    """Aggregate complexities for individual `Expression`s into a complexity for a `Language`.
145
146    Args:
147        language: the Language to measure
148        expression_complexity_func: the function that returns the complexity of an individual expression
149        aggregator: (optional, default = sum) the function that aggregates individual complexities
150
151    Returns:
152        a float, the complexity of a language
153    """
154    return aggregator(
155        expression_complexity_func(expression) for expression in language.expressions
156    )
@dataclass(eq=True, unsafe_hash=True)
class Expression(typing.Generic[~T]):
26@dataclass(eq=True, unsafe_hash=True)
27class Expression(Generic[T]):
28    """Minimally contains a form and a meaning."""
29
30    # gneric/dummy form and meaning if not specified
31    # useful for hashing in certain cases
32    # (e.g. a GrammaticalExpression which has not yet been evaluate()'d and so does not yet have a Meaning)
33    form: str = ""
34    meaning: Meaning[T] = Meaning(FrozenDict(), Universe(tuple(), tuple()))
35
36    def can_express(self, referent: Referent) -> bool:
37        """Return True if the expression can express the input single meaning point and false otherwise."""
38        return bool(self.meaning[referent])
39
40    def to_dict(self) -> dict:
41        """Return a dictionary representation of the expression."""
42        return {"form": self.form, "meaning": self.meaning}
43
44    def __str__(self) -> str:
45        return self.form
46
47    def __lt__(self, other: object) -> bool:
48        return isinstance(other, Expression) and (self.form, other.meaning) < (
49            other.form,
50            other.meaning,
51        )
52
53    def __bool__(self) -> bool:
54        return bool(self.form and self.meaning)

Minimally contains a form and a meaning.

Expression( form: str = '', meaning: ultk.language.semantics.Meaning[~T] = Meaning(mapping=FrozenDict({}), universe=Universe(referents=(), prior=()), _dist=FrozenDict({})))
form: str = ''
meaning: ultk.language.semantics.Meaning[~T] = Meaning(mapping=FrozenDict({}), universe=Universe(referents=(), prior=()), _dist=FrozenDict({}))
def can_express(self, referent: ultk.language.semantics.Referent) -> bool:
36    def can_express(self, referent: Referent) -> bool:
37        """Return True if the expression can express the input single meaning point and false otherwise."""
38        return bool(self.meaning[referent])

Return True if the expression can express the input single meaning point and false otherwise.

def to_dict(self) -> dict:
40    def to_dict(self) -> dict:
41        """Return a dictionary representation of the expression."""
42        return {"form": self.form, "meaning": self.meaning}

Return a dictionary representation of the expression.

class Language:
 57class Language:
 58    """Minimally contains Expression objects."""
 59
 60    def __init__(self, expressions: tuple[Expression, ...], **kwargs):
 61        if not expressions:
 62            raise ValueError("Language cannot be empty.")
 63
 64        universe: Universe = expressions[0].meaning.universe
 65        if not all(expr.meaning.universe == universe for expr in expressions):
 66            raise ValueError(
 67                "All expressions in a language must have the same universe."
 68            )
 69
 70        self.universe = universe
 71        self.expressions = frozenset(expressions)
 72        self.__dict__.update(**kwargs)
 73
 74    # TODO: revisit evolutionary algorithm; do we need Languages to be mutable?
 75    @property
 76    def expressions(self) -> frozenset[Expression]:
 77        return self._expressions
 78
 79    @expressions.setter
 80    def expressions(self, val: frozenset[Expression]) -> None:
 81        if not val:
 82            raise ValueError("list of Expressions must not be empty.")
 83        self._expressions = val
 84
 85    def add_expression(self, e: Expression):
 86        """Add an expression to the list of expressions in a language."""
 87        self.expressions = frozenset(tuple(self.expressions) + (e,))
 88
 89    def is_natural(self) -> bool:
 90        """Whether a language represents a human natural language."""
 91        raise NotImplementedError
 92
 93    def degree_property(self, property: Callable[[Expression], bool]) -> float:
 94        """Count what percentage of expressions in a language have a given property."""
 95        return sum([property(item) for item in self.expressions]) / len(self)
 96
 97    def binary_matrix(self) -> np.ndarray:
 98        """Get a binary matrix of shape `(num_meanings, num_expressions)`
 99        specifying which expressions can express which meanings."""
100        return np.array(
101            [
102                [float(e.can_express(m)) for e in self.expressions]
103                for m in self.universe.referents
104            ]
105        )
106
107    def as_dict_with_properties(self, **kwargs) -> dict:
108        """Return a dictionary representation of the language, including additional properties as keyword arguments.
109
110        This is used in some examples to serialize the language to outputs."""
111        the_dict = {"expressions": [str(expr) for expr in self.expressions]}
112        the_dict.update(kwargs)
113        return the_dict
114
115    def __contains__(self, expression) -> bool:
116        """Whether the language has the expression"""
117        return expression in self.expressions
118
119    def __hash__(self) -> int:
120        return hash(self.expressions)
121
122    def __eq__(self, __o: object) -> bool:
123        return isinstance(__o, Language) and self.expressions == __o.expressions
124
125    def __len__(self) -> int:
126        return len(self.expressions)
127
128    def __lt__(self, other) -> bool:
129        return self.expressions < other.expressions
130
131    def __str__(self) -> str:
132        return (
133            "---------\nExpressions:\n"
134            + "\n-----\n".join(str(expression) for expression in self.expressions)
135            + "\n---------"
136        )

Minimally contains Expression objects.

Language(expressions: tuple[Expression, ...], **kwargs)
60    def __init__(self, expressions: tuple[Expression, ...], **kwargs):
61        if not expressions:
62            raise ValueError("Language cannot be empty.")
63
64        universe: Universe = expressions[0].meaning.universe
65        if not all(expr.meaning.universe == universe for expr in expressions):
66            raise ValueError(
67                "All expressions in a language must have the same universe."
68            )
69
70        self.universe = universe
71        self.expressions = frozenset(expressions)
72        self.__dict__.update(**kwargs)
universe
expressions: frozenset[Expression]
75    @property
76    def expressions(self) -> frozenset[Expression]:
77        return self._expressions
def add_expression(self, e: Expression):
85    def add_expression(self, e: Expression):
86        """Add an expression to the list of expressions in a language."""
87        self.expressions = frozenset(tuple(self.expressions) + (e,))

Add an expression to the list of expressions in a language.

def is_natural(self) -> bool:
89    def is_natural(self) -> bool:
90        """Whether a language represents a human natural language."""
91        raise NotImplementedError

Whether a language represents a human natural language.

def degree_property( self, property: Callable[[Expression], bool]) -> float:
93    def degree_property(self, property: Callable[[Expression], bool]) -> float:
94        """Count what percentage of expressions in a language have a given property."""
95        return sum([property(item) for item in self.expressions]) / len(self)

Count what percentage of expressions in a language have a given property.

def binary_matrix(self) -> numpy.ndarray:
 97    def binary_matrix(self) -> np.ndarray:
 98        """Get a binary matrix of shape `(num_meanings, num_expressions)`
 99        specifying which expressions can express which meanings."""
100        return np.array(
101            [
102                [float(e.can_express(m)) for e in self.expressions]
103                for m in self.universe.referents
104            ]
105        )

Get a binary matrix of shape (num_meanings, num_expressions) specifying which expressions can express which meanings.

def as_dict_with_properties(self, **kwargs) -> dict:
107    def as_dict_with_properties(self, **kwargs) -> dict:
108        """Return a dictionary representation of the language, including additional properties as keyword arguments.
109
110        This is used in some examples to serialize the language to outputs."""
111        the_dict = {"expressions": [str(expr) for expr in self.expressions]}
112        the_dict.update(kwargs)
113        return the_dict

Return a dictionary representation of the language, including additional properties as keyword arguments.

This is used in some examples to serialize the language to outputs.

def aggregate_expression_complexity( language: Language, expression_complexity_func: Callable[[Expression], float], aggregator: Callable[[Iterable[float]], float] = <built-in function sum>) -> float:
140def aggregate_expression_complexity(
141    language: Language,
142    expression_complexity_func: Callable[[Expression], float],
143    aggregator: Callable[[Iterable[float]], float] = sum,
144) -> float:
145    """Aggregate complexities for individual `Expression`s into a complexity for a `Language`.
146
147    Args:
148        language: the Language to measure
149        expression_complexity_func: the function that returns the complexity of an individual expression
150        aggregator: (optional, default = sum) the function that aggregates individual complexities
151
152    Returns:
153        a float, the complexity of a language
154    """
155    return aggregator(
156        expression_complexity_func(expression) for expression in language.expressions
157    )

Aggregate complexities for individual Expressions into a complexity for a Language.

Arguments:
  • language: the Language to measure
  • expression_complexity_func: the function that returns the complexity of an individual expression
  • aggregator: (optional, default = sum) the function that aggregates individual complexities
Returns:

a float, the complexity of a language