Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 248 additions & 0 deletions compilertoolkit/ntree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
"""Utilities for creation and use for arbitrarily sized tree structures.
The intent is for use in module/package trees"""

from typing import Any, Literal, Never, Protocol, Self, Sequence, overload


class Leaf(Protocol):
"""All things a leaf node MUST have."""

def matches(self, name: object) -> bool:
"""Match this node based on some input param. Useful for module name resolution"""
...

def __eq__(self, other: Any) -> bool: ...


class NTree[L: Leaf, I: str]():
"""
NTree[L: Leaf, I: str]
======
A tree structure with the ability to hold N# of "leaf" objects

L - The leaf type
I - The type of the tree indentifier

Note
#####

You *should* subclass this if you want to add more details or change how matching works.

"""

__slots__ = "children", "identifier"

children: list[L | Self]
identifier: I | None
"""An identifiable "name" of some kind. Useful for tree matching/comparison"""

def __init__(
self,
leaves: Sequence[L | Self] | None = None,
identifier: I | None = None,
):
self.children = []
self.identifier = identifier

if leaves is not None:
self.add_leaves(leaves)

def add_leaf(self, leaf: L | Self) -> Self:
"""Append a single leaf"""
self.children.append(leaf)
return self

def add_leaves(self, leaves: Sequence[L | Self]) -> Self:
"""Append a single leaf"""
for leaf in leaves:
self.add_leaf(
leaf
) # add leaf using function (this makes overwriting that function more impactful/useful)
return self

def set_leaves(self, leaves: Sequence[L | Self]) -> Self:
"""Append a single leaf"""
self.children = []
for leaf in leaves:
self.add_leaf(
leaf
) # add leaf using function (this makes overwriting that function more impactful/useful)
return self

def matches(self, name: object) -> bool:
"""Match against this node based on some input "name". Useful for package name resolution.
defaults to using __eq__ method
"""
if isinstance(name, NTree):
return self.identifier == name.identifier
return self.identifier == name

# overwrite to make your life easier!
def copy(self) -> Self:
return self.__class__(leaves=list(self.children), identifier=self.identifier)

@overload
def overlaps(self, other_tree: "NTree") -> bool: ...

@overload
def overlaps(self, other_tree: Any) -> Never: ...

def overlaps(self, other_tree: "NTree | Any") -> bool | Never:
"""Check for overlapping trees"""
if not isinstance(other_tree, NTree):
raise TypeError(other_tree)

return other_tree.matches(self.identifier) and len(
[
child # get overlap of subtrees
for child in self.children
for other_child in other_tree.children
if isinstance(child, NTree)
and isinstance(other_child, NTree)
and (child.overlaps(other_child))
]
+ [
child # get overlap of leaves
for other_child in other_tree.children
for child in self.children
if not isinstance(child, NTree)
and not isinstance(other_child, NTree)
and (child == other_child)
]
) == len(other_tree.children)

def _combine(self, other: Self) -> Sequence[L | Self]:
"""combine two trees- including sub-trees by identifying intersections"""

output = list(self.children)
for other_child in other.children:
for c, child in enumerate(output):
if not isinstance(other_child, self.__class__) or not isinstance(
child, self.__class__
):
if child == other_child:
break # we had a match- this element is already in our child list
continue # no match- move to next item
if child.matches(other_child):
output[c] = (
child | other_child
) # do a combine of these trees since they are the SAME tree
break
else: # use no-break to detect if there were ZERO MATCHES
output.append(
other_child
) # do typical appending since this element isnt found in our own child list
return output

def _intersect(self, other: "NTree[L, I]") -> Sequence[L | Self]:
output = []
for other_child in other.children:
for child in self.children:
if child in output:
continue
if not isinstance(other_child, NTree) or not isinstance(child, NTree):
if child == other_child:
output.append(child) # append child that had a match
elif child.matches(
other_child
): # both are children are trees and are the same tree
output.append(
child & other_child
) # get overlap of these trees since they are the SAME tree
return output

def __or__(self, other: Self) -> Self:
"""Calculate the combined tree"""
if not isinstance(other, NTree):
raise TypeError(other)

return self.copy().set_leaves(self._combine(other))

def __ior__(self, other: Self | object):
"""Calculate the combined tree"""
if not isinstance(other, self.__class__):
raise TypeError(other)

self.set_leaves(self._combine(other))

def __add__(self, other: Self | L | Sequence[Self | L]) -> Self:
if isinstance(other, Sequence):
return self.__class__(
leaves=self.children + list(other), identifier=self.identifier
)
return self.__class__(
leaves=self.children + [other], identifier=self.identifier
)

def __iadd__(self, other: Self | L | Sequence[Self | L]):
if isinstance(other, Sequence):
self.add_leaves(other)
return
self.add_leaf(other)

def __eq__(self, other: Any) -> bool:
if not isinstance(other, NTree):
return False
return other.identifier == self.identifier and other.children == self.children

def __and__(self, other: "NTree[Any, I] | object") -> Self:
"""Get overlap/intersection of trees (Useful for module/package resolution!)"""
if not isinstance(other, NTree):
raise TypeError(other)

return self.copy().set_leaves(self._intersect(other))

def __iand__(self, other: "NTree[Any, I] | object"):
"""Get overlap/intersection of trees (Useful for module/package resolution!)"""
if not isinstance(other, NTree):
raise TypeError(other)

self.children = []
self.add_leaves(self._intersect(other))

@overload
def __getitem__(self, key: I) -> Self:
"""Get a tree based on a tree identifier/matching"""
...

@overload
def __getitem__(self, key: object) -> L:
"""Get Any leaf node based on arbitrary key (will use .matches defined in Leaf protocol)"""
...

def __getitem__(self, key: I | object) -> L | Self:
"""Get a subtree or leaf node based on a key: I | Any"""
for child in self.children:
if child.matches(key):
return child
raise KeyError(key)

@overload
def __setitem__(self, key: I, value: Self):
"""Set a subtree item based on a tree identifier/matching"""
...

@overload
def __setitem__(self, key: object, value: L):
"""set a leaf node based on arbitrary key (will use .matches defined in Leaf protocol)"""
...

def __setitem__(self, key: I | object, value: Self | L):
"""Get a subtree or leaf node based on a key: I | Any"""
for c, child in enumerate(self.children):
if child.matches(key):
self.add_leaf(value)
self.children[c] = self.children.pop()
return
raise KeyError(key)

def __delitem__(self, key: I | object):
"""Deletes the *first* matching item"""
for c, child in enumerate(self.children):
if child.matches(key):
del self.children[c]
return
raise KeyError(key)

def __str__(self) -> str:
return f"(Tree: {self.identifier} | [{', '.join(str(child) for child in self.children)}])"
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ license = { text = "LGPL-3.0-only" }
keywords = [
"compilers",
"library",
"framework",
"trees",
"datastructures",
"tree manipulation",
"package",
"interpreters",
"parsers",
Expand Down
2 changes: 1 addition & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ An opinionated library to help you build compilers.
- [x] Parser builder (WIP, needs to be more ergonomic)
- [ ] Parser check functions built into patterns to allow automatic syntax error parsing.
- [ ] Source error highlighting (fine grained highlights)
- [ ] Package and module tree utilities
- [x] Package and module tree utilities
- [x] Lexing via rply library (and utilities)
- [x] Parser token class builtin

Expand Down
Loading