Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

Please complete the algorithm for computing PageRank in Python. The program is l

ID: 3755378 • Letter: P

Question

Please complete the algorithm for computing PageRank in Python. The program is listed below. You need to fill in some lines which are indicated by the comments. Thanks!

# This Python file computes PageRank by using Power Iteration algorithm

class cEdge:
def __init__(self):
self.nBgnNodeIndex = 0
self.nEndNodeIndex = 0
self.dEdgeWeight = 0.0
self.dTransitionProb = 0.0

def setBgnNodeIndex(self, nNodeIndex):
self.nBgnNodeIndex = nNodeIndex

def getBgnNodeIndex(self):
return self.nBgnNodeIndex

def setEndNodeIndex(self, nNodeIndex):
self.nEndNodeIndex = nNodeIndex

def getEndNodeIndex(self):
return self.nEndNodeIndex

def setEdgeWeight(self, dWeight):
self.dEdgeWeight = dWeight

def getEdgeWeight(self):
return self.dEdgeWeight

def setTransitionProb(self, dTransProb):
self.dTransitionProb = dTransProb

def getTransitionProb(self):
return self.dTransitionProb

class cNode:
def __init__(self, nNodeIndex):
self.nNodeIndex = nNodeIndex
self.dPageRankValue = 0.0
self.vcOutNeighbors = {} # Python dictionaries
self.vcInNeighbors = {} # Python dictionaries
self.nOutDegree = 0

def getNodeIndex(self):
return self.nNodeIndex

def getPageRankValue(self):
return self.dPageRankValue

def setPageRankValue(self, dPageRankValue):
self.dPageRankValue = dPageRankValue

def addOutNeighbor(self, nIdxOfOutNeighbor, dWeight):
aNewEdge = cEdge()
aNewEdge.setBgnNodeIndex(self.nNodeIndex)
aNewEdge.setEndNodeIndex(nIdxOfOutNeighbor)
aNewEdge.setEdgeWeight(dWeight)
self.vcOutNeighbors[nIdxOfOutNeighbor] = aNewEdge

def addInNeighbor(self, nIdxOfInNeighbor, dWeight):
aNewEdge = cEdge()
aNewEdge.setBgnNodeIndex(nIdxOfInNeighbor)
aNewEdge.setEndNodeIndex(self.nNodeIndex)
aNewEdge.setEdgeWeight(dWeight)
self.vcInNeighbors[nIdxOfInNeighbor] = aNewEdge

def getOutNeighbors(self):
return self.vcOutNeighbors.keys()

def getOutNeighborNodeIndex(self, nNodeIdx):
aEdge = self.vcOutNeighbors[nNodeIdx]
return aEdge.getEndNodeIndex()

def getOutNeighborWeight(self, nNodeIdx):
aEdge = self.vcOutNeighbors[nNodeIdx]
return aEdge.getEdgeWeight()

def getOutNeighborTransProb(self, nNodeIdx):
aEdge = self.vcOutNeighbors[nNodeIdx]
return aEdge.getTransitionProb()

def getInNeighbors(self):
return self.vcInNeighbors.keys()

def getInNeighborWeight(self, nNodeIdx):
aEdge = self.vcInNeighbors[nNodeIdx]
return aEdge.getEdgeWeight()

def getInNeighborTransProb(self, nNodeIdx):
aEdge = self.vcInNeighbors[nNodeIdx]
return aEdge.getTransitionProb()

def setInNeighborTransProb(self, nNodeIndex, dTransProb):
aEdge = self.vcInNeighbors[nNodeIndex]
aEdge.setTransitionProb(dTransProb)

def ComputeOutgoingTransitionProbabilities(self):
self.nOutDegree = len(self.vcOutNeighbors)
for nNodeIndex in self.vcOutNeighbors:
aOneOutEdge = self.vcOutNeighbors[nNodeIndex]
dWeight = aOneOutEdge.getEdgeWeight()
dOutgoingTransProb = # Please complete this line
aOneOutEdge.setTransitionProb(dOutgoingTransProb)

class cGraph:
def __init__(self):
self.vcNodeList = {} # Python dictionaries
self.nNumOfNodes = 0
self.dDecayFactor = 0.0

def setDecayFactor(self, dDecayFactor):
self.dDecayFactor = dDecayFactor

def addNode(self, nNodeIndex):
self.nNumOfNodes += 1
aNewNode = cNode(nNodeIndex)
self.vcNodeList[nNodeIndex] = aNewNode
return aNewNode

def getNode(self, nNodeIndex):
if nNodeIndex in self.vcNodeList:
return self.vcNodeList[nNodeIndex]
else:
return None

def addEdge(self, nBgnNodeIndex, nEndNodeIndex, dWeight):
if nBgnNodeIndex not in self.vcNodeList:
aBgnNode = self.addNode(nBgnNodeIndex)
if nEndNodeIndex not in self.vcNodeList:
aEndNode = self.addNode(nEndNodeIndex)
self.vcNodeList[nBgnNodeIndex].addOutNeighbor(nEndNodeIndex, dWeight)
self.vcNodeList[nEndNodeIndex].addInNeighbor(nBgnNodeIndex, dWeight)

def getAllNodes(self):
return self.vcNodeList.keys()

def loadAdjacencyListFile(self, sInputFile):
fileAdjListData = open(sInputFile, "r")
vsAllLines = fileAdjListData.readlines()
for line in vsAllLines:
vsNodeIndices = line.rstrip(' ').split(" ")
vnIndicesOfNodes = {}
if len(line) > 0:
vnIndicesOfNodes = list(map(int, vsNodeIndices))
nBgnNodeIndex = vnIndicesOfNodes[0]
for nIdx in range(1, len(vnIndicesOfNodes)):
nEndNodeIndex = vnIndicesOfNodes[nIdx]
self.addEdge(nBgnNodeIndex, nEndNodeIndex, 1)
fileAdjListData.close()

def displayEdgeList(self):
print("The list of edges of the graph:")
for nIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nIndex]
nBgnNodeIndex = aNode.getNodeIndex()
vcOutNeighbors = aNode.getOutNeighbors()
for nOutNeighborIndex in vcOutNeighbors:
nEndNodeIndex = aNode.getOutNeighborNodeIndex(nOutNeighborIndex)
print("( " + str(nBgnNodeIndex) + " , " + str(nEndNodeIndex) + " )")

def ComputeTransitionProbabilities(self):
for nNodeIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nNodeIndex]
aNode.ComputeOutgoingTransitionProbabilities()
vnOutNeighborIndices = aNode.getOutNeighbors()
for nOutNeighborNodeIndex in vnOutNeighborIndices:
aOutNeighborNodeRefer = self.vcNodeList[nOutNeighborNodeIndex]
dTransProb = aNode.getOutNeighborTransProb(nOutNeighborNodeIndex)
aOutNeighborNodeRefer.setInNeighborTransProb(nNodeIndex, dTransProb)

def RunOnePowerIteration(self):
for nNodeIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nNodeIndex]
vnInNeighborIndices = aNode.getInNeighbors()
dTotalSumPageRank = 0.0
for nIndexOfInNeighborNode in vnInNeighborIndices:
aInNeighborNode = self.vcNodeList[nIndexOfInNeighborNode]
dPageRankValueOfInNeighbor = aInNeighborNode.getPageRankValue()
dTransProb = aNode.getInNeighborTransProb(nIndexOfInNeighborNode)
dTotalSumPageRank += # Please complete this line
dNewPageRankValue = # Please complete this line
aNode.setPageRankValue(dNewPageRankValue)

def ComputePageRank(self):
# Initialization of PageRank values
dInitialPRValue = 1 / self.nNumOfNodes
for nIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nIndex]
aNode.setPageRankValue(dInitialPRValue)
# Compute the outgoing transition probabilities
self.ComputeTransitionProbabilities()
# Start iteration:
for nIndexOfIteration in range(1, 30):
self.RunOnePowerIteration()
# Output the final PageRank values:
print("PageRank values:")
for nIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nIndex]
nNodeIndex = aNode.getNodeIndex()
dPageRank = aNode.getPageRankValue()
print(str(nNodeIndex) + " : " + str(dPageRank))

if __name__ == "__main__":
# Input file
sInputFile = "E:\GSU\99PythonWorkspace\08DataMining\src\edu\gsu\01AdjacencyList.txt"
# Load the data
aGraph = cGraph()
aGraph.setDecayFactor(0.85)
aGraph.loadAdjacencyListFile(sInputFile)
aGraph.displayEdgeList()
# PageRank
aGraph.ComputePageRank()

Explanation / Answer

Following is the code for the calculation of the Page rank.

def pagerank(G, alpha=0.85, personalization=None,

             max_iter=100, tol=1.0e-6, nstart=None, weight='weight',

             dangling=None):

    """Return the PageRank of the nodes in the graph.

  

    PageRank computes a ranking of the nodes in the graph G based on

    the structure of the incoming links. It was originally designed as

    an algorithm to rank web pages.

  

    Parameters

    ----------

    G : graph

      A NetworkX graph. Undirected graphs will be converted to a directed

      graph with two directed edges for each undirected edge.

  

    alpha : float, optional

      Damping parameter for PageRank, default=0.85.

  

    personalization: dict, optional

      The "personalization vector" consisting of a dictionary with a

      key for every graph node and nonzero personalization value for each node.

      By default, a uniform distribution is used.

  

    max_iter : integer, optional

      Maximum number of iterations in power method eigenvalue solver.

  

    tol : float, optional

      Error tolerance used to check convergence in power method solver.

  

    nstart : dictionary, optional

      Starting value of PageRank iteration for each node.

  

    weight : key, optional

      Edge data key to use as weight. If None weights are set to 1.

  

    dangling: dict, optional

      The outedges to be assigned to any "dangling" nodes, i.e., nodes without

      any outedges. The dict key is the node the outedge points to and the dict

      value is the weight of that outedge. By default, dangling nodes are given

      outedges according to the personalization vector (uniform if not

      specified). This must be selected to result in an irreducible transition

      matrix (see notes under google_matrix). It may be common to have the

      dangling dict to be the same as the personalization dict.

  

    Returns

    -------

    pagerank : dictionary

       Dictionary of nodes with PageRank as value

  

    Notes

    -----

    The eigenvector calculation is done by the power iteration method

    and has no guarantee of convergence. The iteration will stop

    after max_iter iterations or an error tolerance of

    number_of_nodes(G)*tol has been reached.

  

    The PageRank algorithm was designed for directed graphs but this

    algorithm does not check if the input graph is directed and will

    execute on undirected graphs by converting each edge in the

    directed graph to two edges.

  

      

    """

    if len(G) == 0:

        return {}

  

    if not G.is_directed():

        D = G.to_directed()

    else:

        D = G

  

    # Create a copy in (right) stochastic form

    W = nx.stochastic_graph(D, weight=weight)

    N = W.number_of_nodes()

  

    # Choose fixed starting vector if not given

    if nstart is None:

        x = dict.fromkeys(W, 1.0 / N)

    else:

        # Normalized nstart vector

        s = float(sum(nstart.values()))

        x = dict((k, v / s) for k, v in nstart.items())

  

    if personalization is None:

  

        # Assign uniform personalization vector if not given

        p = dict.fromkeys(W, 1.0 / N)

    else:

        missing = set(G) - set(personalization)

        if missing:

            raise NetworkXError('Personalization dictionary '

                                'must have a value for every node. '

                                'Missing nodes %s' % missing)

        s = float(sum(personalization.values()))

        p = dict((k, v / s) for k, v in personalization.items())

  

    if dangling is None:

  

        # Use personalization vector if dangling vector not specified

        dangling_weights = p

    else:

        missing = set(G) - set(dangling)

        if missing:

            raise NetworkXError('Dangling node dictionary '

                                'must have a value for every node. '

                                'Missing nodes %s' % missing)

        s = float(sum(dangling.values()))

        dangling_weights = dict((k, v/s) for k, v in dangling.items())

    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

  

    # power iteration: make up to max_iter iterations

    for _ in range(max_iter):

        xlast = x

        x = dict.fromkeys(xlast.keys(), 0)

        danglesum = alpha * sum(xlast[n] for n in dangling_nodes)

        for n in x:

  

            # this matrix multiply looks odd because it is

            # doing a left multiply x^T=xlast^T*W

            for nbr in W[n]:

                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]

            x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]

  

        # check convergence, l1 norm

        err = sum([abs(x[n] - xlast[n]) for n in x])

        if err < N*tol:

            return x

    raise NetworkXError('pagerank: power iteration failed to converge '

                        'in %d iterations.' % max_iter)

def pagerank(G, alpha=0.85, personalization=None,

             max_iter=100, tol=1.0e-6, nstart=None, weight='weight',

             dangling=None):

    """Return the PageRank of the nodes in the graph.

  

    PageRank computes a ranking of the nodes in the graph G based on

    the structure of the incoming links. It was originally designed as

    an algorithm to rank web pages.

  

    Parameters

    ----------

    G : graph

      A NetworkX graph. Undirected graphs will be converted to a directed

      graph with two directed edges for each undirected edge.

  

    alpha : float, optional

      Damping parameter for PageRank, default=0.85.

  

    personalization: dict, optional

      The "personalization vector" consisting of a dictionary with a

      key for every graph node and nonzero personalization value for each node.

      By default, a uniform distribution is used.

  

    max_iter : integer, optional

      Maximum number of iterations in power method eigenvalue solver.

  

    tol : float, optional

      Error tolerance used to check convergence in power method solver.

  

    nstart : dictionary, optional

      Starting value of PageRank iteration for each node.

  

    weight : key, optional

      Edge data key to use as weight. If None weights are set to 1.

  

    dangling: dict, optional

      The outedges to be assigned to any "dangling" nodes, i.e., nodes without

      any outedges. The dict key is the node the outedge points to and the dict

      value is the weight of that outedge. By default, dangling nodes are given

      outedges according to the personalization vector (uniform if not

      specified). This must be selected to result in an irreducible transition

      matrix (see notes under google_matrix). It may be common to have the

      dangling dict to be the same as the personalization dict.

  

    Returns

    -------

    pagerank : dictionary

       Dictionary of nodes with PageRank as value

  

    Notes

    -----

    The eigenvector calculation is done by the power iteration method

    and has no guarantee of convergence. The iteration will stop

    after max_iter iterations or an error tolerance of

    number_of_nodes(G)*tol has been reached.

  

    The PageRank algorithm was designed for directed graphs but this

    algorithm does not check if the input graph is directed and will

    execute on undirected graphs by converting each edge in the

    directed graph to two edges.

  

      

    """

    if len(G) == 0:

        return {}

  

    if not G.is_directed():

        D = G.to_directed()

    else:

        D = G

  

    # Create a copy in (right) stochastic form

    W = nx.stochastic_graph(D, weight=weight)

    N = W.number_of_nodes()

  

    # Choose fixed starting vector if not given

    if nstart is None:

        x = dict.fromkeys(W, 1.0 / N)

    else:

        # Normalized nstart vector

        s = float(sum(nstart.values()))

        x = dict((k, v / s) for k, v in nstart.items())

  

    if personalization is None:

  

        # Assign uniform personalization vector if not given

        p = dict.fromkeys(W, 1.0 / N)

    else:

        missing = set(G) - set(personalization)

        if missing:

            raise NetworkXError('Personalization dictionary '

                                'must have a value for every node. '

                                'Missing nodes %s' % missing)

        s = float(sum(personalization.values()))

        p = dict((k, v / s) for k, v in personalization.items())

  

    if dangling is None:

  

        # Use personalization vector if dangling vector not specified

        dangling_weights = p

    else:

        missing = set(G) - set(dangling)

        if missing:

            raise NetworkXError('Dangling node dictionary '

                                'must have a value for every node. '

                                'Missing nodes %s' % missing)

        s = float(sum(dangling.values()))

        dangling_weights = dict((k, v/s) for k, v in dangling.items())

    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

  

    # power iteration: make up to max_iter iterations

    for _ in range(max_iter):

        xlast = x

        x = dict.fromkeys(xlast.keys(), 0)

        danglesum = alpha * sum(xlast[n] for n in dangling_nodes)

        for n in x:

  

            # this matrix multiply looks odd because it is

            # doing a left multiply x^T=xlast^T*W

            for nbr in W[n]:

                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]

            x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]

  

        # check convergence, l1 norm

        err = sum([abs(x[n] - xlast[n]) for n in x])

        if err < N*tol:

            return x

    raise NetworkXError('pagerank: power iteration failed to converge '

                        'in %d iterations.' % max_iter)

Hire Me For All Your Tutoring Needs
Integrity-first tutoring: clear explanations, guidance, and feedback.
Drop an Email at
drjack9650@gmail.com
Chat Now And Get Quote