- HITS
- import matplotlib.pyplot as plt
- class HITS:
- def __init__(self, adjacency_matrix, max_iterations=100, tolerance=1e-6):
- self.adjacency_matrix = adjacency_matrix
- self.max_iterations = max_iterations
- self.tolerance = tolerance
- self.authorities = None
- self.hubs = None
- def normalize_vector(self, vector):
- norm = sum(x**2 for x in vector) ** 0.5
- return [x / norm for x in vector]
- def run(self):
- num_nodes = len(self.adjacency_matrix)
- self.authorities = [1] * num_nodes
- self.hubs = [1] * num_nodes
- for _ in range(self.max_iterations):
- prev_authorities = self.authorities[:]
- prev_hubs = self.hubs[:]
- # Update authorities
- for i in range(num_nodes):
- self.authorities[i] = sum(prev_hubs[j] for j in range(num_nodes) if self.adjacency_matrix[j][i] == 1)
- # Update hubs
- for i in range(num_nodes):
- self.hubs[i] = sum(prev_authorities[j] for j in range(num_nodes) if self.adjacency_matrix[i][j] == 1)
- # Normalize vectors
- self.authorities = self.normalize_vector(self.authorities)
- self.hubs = self.normalize_vector(self.hubs)
- # Check convergence
- authority_diff = sum(abs(prev_authorities[i] - self.authorities[i]) for i in range(num_nodes))
- hubs_diff = sum(abs(prev_hubs[i] - self.hubs[i]) for i in range(num_nodes))
- if authority_diff < self.tolerance and hubs_diff < self.tolerance:
- break
- def plot_pie_chart(self):
- labels = ['Node {}'.format(i+1) for i in range(len(self.authorities))]
- plt.pie(self.authorities, labels=labels, autopct='%1.1f%%')
- plt.title('Authorities Scores')
- plt.axis('equal')
- plt.show()
- def plot_bar_graph(self):
- x = range(1, len(self.hubs) + 1)
- plt.bar(x, self.hubs)
- plt.xlabel('Node')
- plt.ylabel('Hub Score')
- plt.title('Hub Scores')
- plt.show()
- # Example usage
- adjacency_matrix = [
- [0, 1, 1, 0],
- [0, 0, 1, 1],
- [1, 0, 0, 1],
- [0, 1, 0, 0],
- ]
- hits = HITS(adjacency_matrix)
- hits.run()
- hits.plot_pie_chart()
- hits.plot_bar_graph()
- Apriori
- from itertools import combinations
- import matplotlib.pyplot as plt
- class Apriori:
- def __init__(self, transactions, min_support):
- self.transacti
- self.min_support = min_support
- self.itemsets = []
- def find_frequent_itemsets(self):
- # Count individual items
- item_counts = {}
- for transaction in self.transactions:
- for item in transaction:
- if item in item_counts:
- item_counts[item] += 1
- else:
- item_counts[item] = 1
- # Find frequent 1-itemsets
- frequent_1_itemsets = []
- total_transacti
- for item, count in item_counts.items():
- support = count / total_transactions
- if support >= self.min_support:
- frequent_1_itemsets.append([item])
- self.itemsets.append(frequent_1_itemsets)
- k = 2
- while True:
- candidate_itemsets = self.generate_candidates(self.itemsets[k-2], k)
- frequent_itemsets = self.prune(candidate_itemsets)
- if frequent_itemsets:
- self.itemsets.append(frequent_itemsets)
- k += 1
- else:
- break
- def generate_candidates(self, itemsets, k):
- candidates = []
- for i in range(len(itemsets)):
- for j in range(i + 1, len(itemsets)):
- # Join step
- itemset1 = sorted(itemsets[i])
- itemset2 = sorted(itemsets[j])
- if itemset1[:k-2] == itemset2[:k-2]:
- candidates.append(sorted(set(itemset1) | set(itemset2)))
- return candidates
- def prune(self, candidate_itemsets):
- frequent_itemsets = []
- total_transactions = len(self.transactions)
- for itemset in candidate_itemsets:
- count = 0
- for transaction in self.transactions:
- if set(itemset).issubset(set(transaction)):
- count += 1
- support = count / total_transactions
- if support >= self.min_support:
- frequent_itemsets.append(itemset)
- return frequent_itemsets
- def plot_histogram(self):
- itemset_sizes = [len(itemset) for itemset_list in self.itemsets for itemset in itemset_list]
- plt.hist(itemset_sizes, bins=range(1, max(itemset_sizes) + 2), align='left', rwidth=0.8)
- plt.xlabel('Itemset Size')
- plt.ylabel('Frequency')
- plt.title('Frequent Itemset Sizes')
- plt.show()
- def plot_bar_graph(self):
- itemset_counts = {}
- for itemset_list in self.itemsets:
- for itemset in itemset_list:
- size = len(itemset)
- if size in itemset_counts:
- itemset_counts[size] += 1
- else:
- itemset_counts[size] = 1
- sizes = sorted(itemset_counts.keys())
- counts = [itemset_counts[size] for size in sizes]
- plt.bar(sizes, counts)
- plt.xlabel('Itemset Size')
- plt.ylabel('Frequency')
- plt.title('Frequent Itemset Sizes')
- plt.show()
- # Example usage
- transactions = [
- ['bread', 'milk', 'butter'],
- ['bread', 'butter'],
- ['milk', 'butter'],
- ['bread', 'milk'],
- ['bread', 'milk', 'butter', 'jam'],
- ]
- min_support = 0.4
- apriori = Apriori(transactions, min_support)
- apriori.find_frequent_itemsets()
- apriori.plot_histogram()
- apriori.plot_bar_graph()
- Page rank
- import numpy as np
- def pagerank(num_pages, damping_factor=0.85, epsilon=1.0e-6):
- # Initialize transition probability matrix
- transition_matrix = np.ones((num_pages, num_pages)) / num_pages
- # Initialize initial PageRank scores
- pagerank_scores = np.ones(num_pages) / num_pages
- # Outlinks count
- outlinks_count = np.zeros(num_pages)
- # Random surfer probability
- random_surfer_prob = np.ones(num_pages) / num_pages
- # Example web graph (each page has a link to the next page)
- for i in range(num_pages):
- if i < num_pages - 1:
- transition_matrix[i, i+1] = 1
- # Compute outlinks count
- for i in range(num_pages):
- outlinks_count[i] = np.sum(transition_matrix[:, i])
- # Iterate until convergence
- while True:
- new_pagerank_scores = np.zeros(num_pages)
- for i in range(num_pages):
- for j in range(num_pages):
- if transition_matrix[j, i] == 1:
- new_pagerank_scores[i] += pagerank_scores[j] / outlinks_count[j]
- # Damping factor
- new_pagerank_scores = damping_factor * new_pagerank_scores + (1 - damping_factor) * random_surfer_prob
- # Check convergence
- if np.sum(np.abs(new_pagerank_scores - pagerank_scores)) < epsilon:
- break
- pagerank_scores = new_pagerank_scores
- return pagerank_scores
- # Example usage
- num_pages = 5
- scores = pagerank(num_pages)
- # Display the PageRank scores
- print("PageRank Scores:")
- for i, score in enumerate(scores):
- print(f"Page {i+1}: {score:.4f}")