ultk.language.language

Classes for modeling languages as form-meaning mappings, most important among them the Language and Expression classes.

Example usage:
>>> from ultk.language.language import Expression, Language
>>> # assuming the meaning `a_few_meaning` has already been constructed
>>> # define the expression
>>> a_few = NumeralExpression(form="a few", meaning=a_few_meaning)
>>> # define a very small language
>>> lang_1 = Language([a_few])
>>> # or a slightly larger one with synonymy
>>> lang_2 = Language([a_few] * 3)
  1"""Classes for modeling languages as form-meaning mappings, most important among them the Language and Expression classes.
  2
  3Example usage:
  4
  5    >>> from ultk.language.language import Expression, Language
  6    >>> # assuming the meaning `a_few_meaning` has already been constructed
  7    >>> # define the expression
  8    >>> a_few = NumeralExpression(form="a few", meaning=a_few_meaning)
  9    >>> # define a very small language
 10    >>> lang_1 = Language([a_few])
 11    >>> # or a slightly larger one with synonymy
 12    >>> lang_2 = Language([a_few] * 3)
 13"""
 14
 15import numpy as np
 16from dataclasses import dataclass
 17from typing import Callable, Generic, Iterable, TypeVar
 18from ultk.language.semantics import Meaning, Referent, Universe
 19
 20# TODO: require Python 3.12 and use type parameter syntax instead? https://docs.python.org/3/reference/compound_stmts.html#type-params
 21T = TypeVar("T")
 22
 23
 24@dataclass(eq=True, unsafe_hash=True)
 25class Expression(Generic[T]):
 26    """Minimally contains a form and a meaning."""
 27
 28    # gneric/dummy form and meaning if not specified
 29    # useful for hashing in certain cases
 30    # (e.g. a GrammaticalExpression which has not yet been evaluate()'d and so does not yet have a Meaning)
 31    form: str = ""
 32    meaning: Meaning[T] = Meaning(tuple(), Universe(tuple(), tuple()))
 33
 34    def can_express(self, referent: Referent) -> bool:
 35        """Return True if the expression can express the input single meaning point and false otherwise."""
 36        return bool(self.meaning[referent])
 37
 38    def to_dict(self) -> dict:
 39        """Return a dictionary representation of the expression."""
 40        return {"form": self.form, "meaning": self.meaning}
 41
 42    def __str__(self) -> str:
 43        return self.form
 44
 45    def __lt__(self, other: object) -> bool:
 46        return isinstance(other, Expression) and (self.form, other.meaning) < (
 47            other.form,
 48            other.meaning,
 49        )
 50
 51    def __bool__(self) -> bool:
 52        return bool(self.form and self.meaning)
 53
 54
 55class Language:
 56    """Minimally contains Expression objects."""
 57
 58    def __init__(self, expressions: tuple[Expression, ...], **kwargs):
 59        if not expressions:
 60            raise ValueError("Language cannot be empty.")
 61
 62        universe: Universe = expressions[0].meaning.universe
 63        if not all(expr.meaning.universe == universe for expr in expressions):
 64            raise ValueError(
 65                "All expressions in a language must have the same universe."
 66            )
 67
 68        self.universe = universe
 69        self.expressions = frozenset(expressions)
 70        self.__dict__.update(**kwargs)
 71
 72    # TODO: revisit evolutionary algorithm; do we need Languages to be mutable?
 73    @property
 74    def expressions(self) -> frozenset[Expression]:
 75        return self._expressions
 76
 77    @expressions.setter
 78    def expressions(self, val: frozenset[Expression]) -> None:
 79        if not val:
 80            raise ValueError("list of Expressions must not be empty.")
 81        self._expressions = val
 82
 83    def add_expression(self, e: Expression):
 84        """Add an expression to the list of expressions in a language."""
 85        self.expressions = frozenset(tuple(self.expressions) + (e,))
 86
 87    def is_natural(self) -> bool:
 88        """Whether a language represents a human natural language."""
 89        raise NotImplementedError
 90
 91    def degree_property(self, property: Callable[[Expression], bool]) -> float:
 92        """Count what percentage of expressions in a language have a given property."""
 93        return sum([property(item) for item in self.expressions]) / len(self)
 94
 95    def binary_matrix(self) -> np.ndarray:
 96        """Get a binary matrix of shape `(num_meanings, num_expressions)`
 97        specifying which expressions can express which meanings."""
 98        return np.array(
 99            [
100                [float(e.can_express(m)) for e in self.expressions]
101                for m in self.universe.referents
102            ]
103        )
104
105    def as_dict_with_properties(self, **kwargs) -> dict:
106        """Return a dictionary representation of the language, including additional properties as keyword arguments.
107
108        This is used in some examples to serialize the language to outputs."""
109        the_dict = {"expressions": [str(expr) for expr in self.expressions]}
110        the_dict.update(kwargs)
111        return the_dict
112
113    def __contains__(self, expression) -> bool:
114        """Whether the language has the expression"""
115        return expression in self.expressions
116
117    def __hash__(self) -> int:
118        return hash(self.expressions)
119
120    def __eq__(self, __o: object) -> bool:
121        return isinstance(__o, Language) and self.expressions == __o.expressions
122
123    def __len__(self) -> int:
124        return len(self.expressions)
125
126    def __lt__(self, other) -> bool:
127        return self.expressions < other.expressions
128
129    def __str__(self) -> str:
130        return (
131            "---------\nExpressions:\n"
132            + "\n-----\n".join(str(expression) for expression in self.expressions)
133            + "\n---------"
134        )
135
136
137# TODO: move this to effcomm?
138def aggregate_expression_complexity(
139    language: Language,
140    expression_complexity_func: Callable[[Expression], float],
141    aggregator: Callable[[Iterable[float]], float] = sum,
142) -> float:
143    """Aggregate complexities for individual `Expression`s into a complexity for a `Language`.
144
145    Args:
146        language: the Language to measure
147        expression_complexity_func: the function that returns the complexity of an individual expression
148        aggregator: (optional, default = sum) the function that aggregates individual complexities
149
150    Returns:
151        a float, the complexity of a language
152    """
153    return aggregator(
154        expression_complexity_func(expression) for expression in language.expressions
155    )
@dataclass(eq=True, unsafe_hash=True)
class Expression(typing.Generic[~T]):
25@dataclass(eq=True, unsafe_hash=True)
26class Expression(Generic[T]):
27    """Minimally contains a form and a meaning."""
28
29    # gneric/dummy form and meaning if not specified
30    # useful for hashing in certain cases
31    # (e.g. a GrammaticalExpression which has not yet been evaluate()'d and so does not yet have a Meaning)
32    form: str = ""
33    meaning: Meaning[T] = Meaning(tuple(), Universe(tuple(), tuple()))
34
35    def can_express(self, referent: Referent) -> bool:
36        """Return True if the expression can express the input single meaning point and false otherwise."""
37        return bool(self.meaning[referent])
38
39    def to_dict(self) -> dict:
40        """Return a dictionary representation of the expression."""
41        return {"form": self.form, "meaning": self.meaning}
42
43    def __str__(self) -> str:
44        return self.form
45
46    def __lt__(self, other: object) -> bool:
47        return isinstance(other, Expression) and (self.form, other.meaning) < (
48            other.form,
49            other.meaning,
50        )
51
52    def __bool__(self) -> bool:
53        return bool(self.form and self.meaning)

Minimally contains a form and a meaning.

Expression( form: str = '', meaning: ultk.language.semantics.Meaning[~T] = Meaning(mapping=(), universe=Universe(referents=(), prior=(), _ref_to_idx=FrozenDict({})), dist=()))
form: str = ''
meaning: ultk.language.semantics.Meaning[~T] = Meaning(mapping=(), universe=Universe(referents=(), prior=(), _ref_to_idx=FrozenDict({})), dist=())
def can_express(self, referent: ultk.language.semantics.Referent) -> bool:
35    def can_express(self, referent: Referent) -> bool:
36        """Return True if the expression can express the input single meaning point and false otherwise."""
37        return bool(self.meaning[referent])

Return True if the expression can express the input single meaning point and false otherwise.

def to_dict(self) -> dict:
39    def to_dict(self) -> dict:
40        """Return a dictionary representation of the expression."""
41        return {"form": self.form, "meaning": self.meaning}

Return a dictionary representation of the expression.

class Language:
 56class Language:
 57    """Minimally contains Expression objects."""
 58
 59    def __init__(self, expressions: tuple[Expression, ...], **kwargs):
 60        if not expressions:
 61            raise ValueError("Language cannot be empty.")
 62
 63        universe: Universe = expressions[0].meaning.universe
 64        if not all(expr.meaning.universe == universe for expr in expressions):
 65            raise ValueError(
 66                "All expressions in a language must have the same universe."
 67            )
 68
 69        self.universe = universe
 70        self.expressions = frozenset(expressions)
 71        self.__dict__.update(**kwargs)
 72
 73    # TODO: revisit evolutionary algorithm; do we need Languages to be mutable?
 74    @property
 75    def expressions(self) -> frozenset[Expression]:
 76        return self._expressions
 77
 78    @expressions.setter
 79    def expressions(self, val: frozenset[Expression]) -> None:
 80        if not val:
 81            raise ValueError("list of Expressions must not be empty.")
 82        self._expressions = val
 83
 84    def add_expression(self, e: Expression):
 85        """Add an expression to the list of expressions in a language."""
 86        self.expressions = frozenset(tuple(self.expressions) + (e,))
 87
 88    def is_natural(self) -> bool:
 89        """Whether a language represents a human natural language."""
 90        raise NotImplementedError
 91
 92    def degree_property(self, property: Callable[[Expression], bool]) -> float:
 93        """Count what percentage of expressions in a language have a given property."""
 94        return sum([property(item) for item in self.expressions]) / len(self)
 95
 96    def binary_matrix(self) -> np.ndarray:
 97        """Get a binary matrix of shape `(num_meanings, num_expressions)`
 98        specifying which expressions can express which meanings."""
 99        return np.array(
100            [
101                [float(e.can_express(m)) for e in self.expressions]
102                for m in self.universe.referents
103            ]
104        )
105
106    def as_dict_with_properties(self, **kwargs) -> dict:
107        """Return a dictionary representation of the language, including additional properties as keyword arguments.
108
109        This is used in some examples to serialize the language to outputs."""
110        the_dict = {"expressions": [str(expr) for expr in self.expressions]}
111        the_dict.update(kwargs)
112        return the_dict
113
114    def __contains__(self, expression) -> bool:
115        """Whether the language has the expression"""
116        return expression in self.expressions
117
118    def __hash__(self) -> int:
119        return hash(self.expressions)
120
121    def __eq__(self, __o: object) -> bool:
122        return isinstance(__o, Language) and self.expressions == __o.expressions
123
124    def __len__(self) -> int:
125        return len(self.expressions)
126
127    def __lt__(self, other) -> bool:
128        return self.expressions < other.expressions
129
130    def __str__(self) -> str:
131        return (
132            "---------\nExpressions:\n"
133            + "\n-----\n".join(str(expression) for expression in self.expressions)
134            + "\n---------"
135        )

Minimally contains Expression objects.

Language(expressions: tuple[Expression, ...], **kwargs)
59    def __init__(self, expressions: tuple[Expression, ...], **kwargs):
60        if not expressions:
61            raise ValueError("Language cannot be empty.")
62
63        universe: Universe = expressions[0].meaning.universe
64        if not all(expr.meaning.universe == universe for expr in expressions):
65            raise ValueError(
66                "All expressions in a language must have the same universe."
67            )
68
69        self.universe = universe
70        self.expressions = frozenset(expressions)
71        self.__dict__.update(**kwargs)
universe
expressions: frozenset[Expression]
74    @property
75    def expressions(self) -> frozenset[Expression]:
76        return self._expressions
def add_expression(self, e: Expression):
84    def add_expression(self, e: Expression):
85        """Add an expression to the list of expressions in a language."""
86        self.expressions = frozenset(tuple(self.expressions) + (e,))

Add an expression to the list of expressions in a language.

def is_natural(self) -> bool:
88    def is_natural(self) -> bool:
89        """Whether a language represents a human natural language."""
90        raise NotImplementedError

Whether a language represents a human natural language.

def degree_property( self, property: Callable[[Expression], bool]) -> float:
92    def degree_property(self, property: Callable[[Expression], bool]) -> float:
93        """Count what percentage of expressions in a language have a given property."""
94        return sum([property(item) for item in self.expressions]) / len(self)

Count what percentage of expressions in a language have a given property.

def binary_matrix(self) -> numpy.ndarray:
 96    def binary_matrix(self) -> np.ndarray:
 97        """Get a binary matrix of shape `(num_meanings, num_expressions)`
 98        specifying which expressions can express which meanings."""
 99        return np.array(
100            [
101                [float(e.can_express(m)) for e in self.expressions]
102                for m in self.universe.referents
103            ]
104        )

Get a binary matrix of shape (num_meanings, num_expressions) specifying which expressions can express which meanings.

def as_dict_with_properties(self, **kwargs) -> dict:
106    def as_dict_with_properties(self, **kwargs) -> dict:
107        """Return a dictionary representation of the language, including additional properties as keyword arguments.
108
109        This is used in some examples to serialize the language to outputs."""
110        the_dict = {"expressions": [str(expr) for expr in self.expressions]}
111        the_dict.update(kwargs)
112        return the_dict

Return a dictionary representation of the language, including additional properties as keyword arguments.

This is used in some examples to serialize the language to outputs.

def aggregate_expression_complexity( language: Language, expression_complexity_func: Callable[[Expression], float], aggregator: Callable[[Iterable[float]], float] = <built-in function sum>) -> float:
139def aggregate_expression_complexity(
140    language: Language,
141    expression_complexity_func: Callable[[Expression], float],
142    aggregator: Callable[[Iterable[float]], float] = sum,
143) -> float:
144    """Aggregate complexities for individual `Expression`s into a complexity for a `Language`.
145
146    Args:
147        language: the Language to measure
148        expression_complexity_func: the function that returns the complexity of an individual expression
149        aggregator: (optional, default = sum) the function that aggregates individual complexities
150
151    Returns:
152        a float, the complexity of a language
153    """
154    return aggregator(
155        expression_complexity_func(expression) for expression in language.expressions
156    )

Aggregate complexities for individual Expressions into a complexity for a Language.

Arguments:
  • language: the Language to measure
  • expression_complexity_func: the function that returns the complexity of an individual expression
  • aggregator: (optional, default = sum) the function that aggregates individual complexities
Returns:

a float, the complexity of a language