"""Abstract base class for graph corpus readers.This module provides the foundational :class:`Corpus` class for managing collectionsof graphs in the decomp framework. The Corpus class serves as an abstract base thatconcrete corpus implementations extend to handle specific graph formats.The module defines a generic corpus container that:- Accepts raw graphs in an input format- Transforms them to an output format via an abstract graph builder- Provides dictionary-like access to the processed graphs- Handles errors during graph construction gracefullyType Variables--------------InGraph The input graph type that will be processed by the corpus reader.OutGraph The output graph type produced after processing.Type Aliases------------GraphDict[T] Generic dictionary mapping hashable identifiers to graphs of type T.Classes-------Corpus Abstract base class for graph corpus containers with generic type parameters for input and output graph formats."""fromabcimportABCMeta,abstractmethodfromcollections.abcimportHashable,ItemsView,IteratorfromloggingimportwarningfromrandomimportsamplefromtypingimportTypeVarInGraph=TypeVar('InGraph')# the input graph typeOutGraph=TypeVar('OutGraph')# the output graph typetypeGraphDict[T]=dict[Hashable,T]
[docs]classCorpus[InGraph,OutGraph](metaclass=ABCMeta):"""Container for graphs. Parameters ---------- graphs_raw A sequence of graphs in a format that the graphbuilder for a subclass of this abstract class can process. """
[docs]defitems(self)->ItemsView[Hashable,OutGraph]:"""Dictionary-like iterator for (graphid, graph) pairs."""returnself._graphs.items()
def__getitem__(self,k:Hashable)->OutGraph:returnself._graphs[k]def__contains__(self,k:Hashable)->bool:returnkinself._graphsdef__len__(self)->int:returnlen(self._graphs)def_build_graphs(self)->None:forgraphid,rawgraphinself._graphs_raw.items():try:self._graphs[graphid]=self._graphbuilder(graphid,rawgraph)exceptValueError:warning(f'{graphid} has no or multiple root nodes')exceptRecursionError:warning(f'{graphid} has loops')@abstractmethoddef_graphbuilder(self,graphid:Hashable,rawgraph:InGraph)->OutGraph:raiseNotImplementedError@propertydefgraphs(self)->dict[Hashable,OutGraph]:"""The graphs in corpus."""returnself._graphs@propertydefgraphids(self)->list[Hashable]:"""The graph ids in corpus."""returnlist(self._graphs)@propertydefngraphs(self)->int:"""Number of graphs in corpus."""returnlen(self._graphs)
[docs]defsample(self,k:int)->dict[Hashable,OutGraph]:"""Sample k graphs without replacement. Parameters ---------- k the number of graphs to sample """sampled_keys=sample(list(self._graphs.keys()),k=k)return{tid:self._graphs[tid]fortidinsampled_keys}