Network Construction

Source Code

import networkx as nx
import pandas as pd
from collections import Counter
def get_bipartite(df,student_col,object_col,attr_col = None,group_col = None):

    """
    Constructs a bipartite graph from a given DataFrame.

    This function creates a weighted bipartite graph representing relationships between individuals and studied
    objects (e.g. coded constructs, behaviors, subtasks).The graph can optionally include group information for
    student nodes and attribute information for object nodes. Node types (student or object) are added as
    attributes to facilitate further individual-level analysis.

    Parameters:
    -----------
    df : pandas.DataFrame
        The input DataFrame containing the data to construct the bipartite graph.
    student_col : str
        The column name in the DataFrame representing student nodes.
    object_col : str
        The column name in the DataFrame representing the studied object nodes.
    attr_col : str, optional
        The column name in the DataFrame representing attributes for object nodes (e.g. the dimension of coded constructs).
        If provided, these attributes will be added as node attributes in the graph. Default is None.
    group_col : str, optional
        The column name in the DataFrame representing group information for student nodes. If provided, these groups
        will be added as node attributes in the graph. Default is None.

    Returns:
    --------
    networkx.Graph
        A bipartite graph with the following properties:
        - Nodes: Student nodes and object nodes, with 'bipartite' attribute indicating their type.
        - Edges: Weighted edges between student and object nodes, where weights represent the frequency of relationships.
        - Node attributes: If `group_col` is provided, student nodes will have a group attribute. If `attr_col` is provided,
          object nodes will have an attribute.

    Example:
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'student': ['Alice', 'Bob', 'Alice', 'Charlie'],
    ...     'object': ['ask questions', 'answer questions', 'evaluating', 'monitoring'],
    ...     'group': ['A', 'B', 'A', 'B'],
    ...     'attr': ['cognitive', 'cognitive', 'metacognitive', 'metacognitive']
    ... })
    >>> B = get_bipartite(df, student_col='student', object_col='object', attr_col='attr', group_col='group')
    >>> print(B.nodes(data=True))
    [('Alice', {'bipartite': 'student', 'group': 'A'}),
     ('Bob', {'bipartite': 'student', 'group': 'B'}),
     ('Charlie', {'bipartite': 'student', 'group': 'B'}),
     ('ask question', {'bipartite': 'object', 'attr': 'cognitive}),
     ('answer questions', {'bipartite': 'object', 'attr': 'cognitive'})]
    """

    edge_dict = Counter([tuple(e) for e in df[[student_col,object_col]].values])
    edgelist = [tuple([it[0][0],it[0][1],{'weight':it[1]}]) for it in edge_dict.items()]

    B = nx.Graph()
    B.add_nodes_from([i[0] for i in edgelist],bipartite= student_col)
    B.add_nodes_from([i[1] for i in edgelist],bipartite= object_col)
    B.add_edges_from(edgelist)
    if group_col is not None:
        student_groups = df[[student_col, group_col]].drop_duplicates().set_index(student_col)[group_col].to_dict()
        nx.set_node_attributes(B, {n: {group_col: student_groups[n]} for n in B.nodes if n in student_groups})
    if attr_col is not None:
        object_attrs = df[[object_col, attr_col]].drop_duplicates().set_index(object_col)[attr_col].to_dict()
        nx.set_node_attributes(B, {n: {attr_col: object_attrs[n]} for n in B.nodes if n in object_attrs})

    return B
def get_tripartite(df,student_col,object1_col,object2_col,group_col = None):

    """
    Constructs a tripartite graph from a given DataFrame.

    This function creates a weighted tripartite graph representing relationships between student nodes and two types of
    object nodes (e.g. codes from different modalities). This method can be particularly useful for multimodal data analysis.
    The graph can optionally include group information for student nodes as student attributes to facilitate further individual-level analysis.

    Parameters:
    -----------
    df : pandas.DataFrame
        The input DataFrame containing the data to construct the tripartite graph.
    student_col : str
        The column name in the DataFrame representing student nodes.
    object1_col : str
        The column name in the DataFrame representing the first type of object nodes.
    object2_col : str
        The column name in the DataFrame representing the second type of object nodes.
    group_col : str, optional
        The column name in the DataFrame representing group information for student nodes. If provided, these groups
        will be added as node attributes in the graph. Default is None.

    Returns:
    --------
    networkx.Graph
        A tripartite graph with the following properties:
        - Nodes: Student nodes and joint object nodes (combining `object1_col` and `object2_col`), with 'bipartite' and
          'tripartite' attributes indicating their type.
        - Edges: Weighted edges between student and joint object nodes, where weights represent the frequency of relationships.
        - Node attributes: If `group_col` is provided, student nodes will have a group attribute.

    Example:
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...     'student': ['Alice', 'Bob', 'Alice', 'Charlie'],
    ...     'object1': ['ask questions', 'answer questions', 'evaluating', 'monitoring'],
    ...     'object2': ['tilt head', 'shake head', 'nod head', 'nod head'],
    ...     'group': ['A', 'B', 'A', 'B']
    ... })
    """
    df_ = df.copy()
    df_['joint_objects'] = df_[object1_col].str.cat(df_[object2_col], sep='**', na_rep='NA')
    edge_dict = Counter([tuple(e) for e in df_[[student_col,'joint_objects']].values])
    edgelist = [tuple([it[0][0],it[0][1],{'weight':it[1]}]) for it in edge_dict.items()]

    T = nx.Graph()
    T.add_nodes_from([i[0] for i in edgelist],bipartite= student_col)
    T.add_nodes_from([i[1] for i in edgelist],bipartite= f"({object1_col},{object2_col})", tripartite = True)
    T.add_edges_from(edgelist)

    if group_col is not None:
        student_groups = df_[[student_col, group_col]].drop_duplicates().set_index(student_col)[group_col].to_dict()
        nx.set_node_attributes(T, {n: {group_col: student_groups[n]} for n in T.nodes if n in student_groups})

    return T