Facebook
From Fardin, 1 Month ago, written in Plain Text.
Embed
Download Paste or View Raw
Hits: 137
  1. HITS
  2.  
  3. import matplotlib.pyplot as plt
  4.  
  5. class HITS:
  6.     def __init__(self, adjacency_matrix, max_iterations=100, tolerance=1e-6):
  7.         self.adjacency_matrix = adjacency_matrix
  8.         self.max_iterations = max_iterations
  9.         self.tolerance = tolerance
  10.         self.authorities = None
  11.         self.hubs = None
  12.  
  13.     def normalize_vector(self, vector):
  14.         norm = sum(x**2 for x in vector) ** 0.5
  15.         return [x / norm for x in vector]
  16.  
  17.     def run(self):
  18.         num_nodes = len(self.adjacency_matrix)
  19.         self.authorities = [1] * num_nodes
  20.         self.hubs = [1] * num_nodes
  21.  
  22.         for _ in range(self.max_iterations):
  23.             prev_authorities = self.authorities[:]
  24.             prev_hubs = self.hubs[:]
  25.  
  26.             # Update authorities
  27.             for i in range(num_nodes):
  28.                 self.authorities[i] = sum(prev_hubs[j] for j in range(num_nodes) if self.adjacency_matrix[j][i] == 1)
  29.  
  30.             # Update hubs
  31.             for i in range(num_nodes):
  32.                 self.hubs[i] = sum(prev_authorities[j] for j in range(num_nodes) if self.adjacency_matrix[i][j] == 1)
  33.  
  34.             # Normalize vectors
  35.             self.authorities = self.normalize_vector(self.authorities)
  36.             self.hubs = self.normalize_vector(self.hubs)
  37.  
  38.             # Check convergence
  39.             authority_diff = sum(abs(prev_authorities[i] - self.authorities[i]) for i in range(num_nodes))
  40.             hubs_diff = sum(abs(prev_hubs[i] - self.hubs[i]) for i in range(num_nodes))
  41.  
  42.             if authority_diff < self.tolerance and hubs_diff < self.tolerance:
  43.                 break
  44.  
  45.     def plot_pie_chart(self):
  46.         labels = ['Node {}'.format(i+1) for i in range(len(self.authorities))]
  47.         plt.pie(self.authorities, labels=labels, autopct='%1.1f%%')
  48.         plt.title('Authorities Scores')
  49.         plt.axis('equal')
  50.         plt.show()
  51.  
  52.     def plot_bar_graph(self):
  53.         x = range(1, len(self.hubs) + 1)
  54.         plt.bar(x, self.hubs)
  55.         plt.xlabel('Node')
  56.         plt.ylabel('Hub Score')
  57.         plt.title('Hub Scores')
  58.         plt.show()
  59.  
  60. # Example usage
  61. adjacency_matrix = [
  62.     [0, 1, 1, 0],
  63.     [0, 0, 1, 1],
  64.     [1, 0, 0, 1],
  65.     [0, 1, 0, 0],
  66. ]
  67.  
  68. hits = HITS(adjacency_matrix)
  69. hits.run()
  70.  
  71. hits.plot_pie_chart()
  72. hits.plot_bar_graph()
  73.  
  74.  
  75.  
  76.  
  77.  
  78.  
  79. Apriori
  80.  
  81.  
  82. from itertools import combinations
  83. import matplotlib.pyplot as plt
  84.  
  85. class Apriori:
  86.     def __init__(self, transactions, min_support):
  87.          self.transacti
  88.         self.min_support = min_support
  89.         self.itemsets = []
  90.  
  91.     def find_frequent_itemsets(self):
  92.         # Count individual items
  93.         item_counts = {}
  94.         for transaction in self.transactions:
  95.             for item in transaction:
  96.                 if item in item_counts:
  97.                     item_counts[item] += 1
  98.                 else:
  99.                     item_counts[item] = 1
  100.  
  101.         # Find frequent 1-itemsets
  102.         frequent_1_itemsets = []
  103.          total_transacti
  104.         for item, count in item_counts.items():
  105.             support = count / total_transactions
  106.             if support >= self.min_support:
  107.                 frequent_1_itemsets.append([item])
  108.  
  109.         self.itemsets.append(frequent_1_itemsets)
  110.  
  111.         k = 2
  112.         while True:
  113.             candidate_itemsets = self.generate_candidates(self.itemsets[k-2], k)
  114.             frequent_itemsets = self.prune(candidate_itemsets)
  115.             if frequent_itemsets:
  116.                 self.itemsets.append(frequent_itemsets)
  117.                 k += 1
  118.             else:
  119.                 break
  120.  
  121.     def generate_candidates(self, itemsets, k):
  122.         candidates = []
  123.         for i in range(len(itemsets)):
  124.             for j in range(i + 1, len(itemsets)):
  125.                 # Join step
  126.                 itemset1 = sorted(itemsets[i])
  127.                 itemset2 = sorted(itemsets[j])
  128.                 if itemset1[:k-2] == itemset2[:k-2]:
  129.                     candidates.append(sorted(set(itemset1) | set(itemset2)))
  130.         return candidates
  131.  
  132.     def prune(self, candidate_itemsets):
  133.         frequent_itemsets = []
  134.         total_transactions = len(self.transactions)
  135.         for itemset in candidate_itemsets:
  136.             count = 0
  137.             for transaction in self.transactions:
  138.                 if set(itemset).issubset(set(transaction)):
  139.                     count += 1
  140.             support = count / total_transactions
  141.             if support >= self.min_support:
  142.                 frequent_itemsets.append(itemset)
  143.         return frequent_itemsets
  144.  
  145.     def plot_histogram(self):
  146.         itemset_sizes = [len(itemset) for itemset_list in self.itemsets for itemset in itemset_list]
  147.         plt.hist(itemset_sizes, bins=range(1, max(itemset_sizes) + 2), align='left', rwidth=0.8)
  148.         plt.xlabel('Itemset Size')
  149.         plt.ylabel('Frequency')
  150.         plt.title('Frequent Itemset Sizes')
  151.         plt.show()
  152.  
  153.     def plot_bar_graph(self):
  154.         itemset_counts = {}
  155.         for itemset_list in self.itemsets:
  156.             for itemset in itemset_list:
  157.                 size = len(itemset)
  158.                 if size in itemset_counts:
  159.                     itemset_counts[size] += 1
  160.                 else:
  161.                     itemset_counts[size] = 1
  162.  
  163.         sizes = sorted(itemset_counts.keys())
  164.         counts = [itemset_counts[size] for size in sizes]
  165.  
  166.         plt.bar(sizes, counts)
  167.         plt.xlabel('Itemset Size')
  168.         plt.ylabel('Frequency')
  169.         plt.title('Frequent Itemset Sizes')
  170.         plt.show()
  171.  
  172. # Example usage
  173. transactions = [
  174.     ['bread', 'milk', 'butter'],
  175.     ['bread', 'butter'],
  176.     ['milk', 'butter'],
  177.     ['bread', 'milk'],
  178.     ['bread', 'milk', 'butter', 'jam'],
  179. ]
  180.  
  181. min_support = 0.4
  182.  
  183. apriori = Apriori(transactions, min_support)
  184. apriori.find_frequent_itemsets()
  185.  
  186. apriori.plot_histogram()
  187. apriori.plot_bar_graph()
  188.  
  189.  
  190.  
  191. Page rank
  192.  
  193.  
  194. import numpy as np
  195.  
  196. def pagerank(num_pages, damping_factor=0.85, epsilon=1.0e-6):
  197.     # Initialize transition probability matrix
  198.     transition_matrix = np.ones((num_pages, num_pages)) / num_pages
  199.    
  200.     # Initialize initial PageRank scores
  201.     pagerank_scores = np.ones(num_pages) / num_pages
  202.    
  203.     # Outlinks count
  204.     outlinks_count = np.zeros(num_pages)
  205.    
  206.     # Random surfer probability
  207.     random_surfer_prob = np.ones(num_pages) / num_pages
  208.  
  209.     # Example web graph (each page has a link to the next page)
  210.     for i in range(num_pages):
  211.         if i < num_pages - 1:
  212.             transition_matrix[i, i+1] = 1
  213.  
  214.     # Compute outlinks count
  215.     for i in range(num_pages):
  216.         outlinks_count[i] = np.sum(transition_matrix[:, i])
  217.  
  218.     # Iterate until convergence
  219.     while True:
  220.         new_pagerank_scores = np.zeros(num_pages)
  221.         for i in range(num_pages):
  222.             for j in range(num_pages):
  223.                 if transition_matrix[j, i] == 1:
  224.                     new_pagerank_scores[i] += pagerank_scores[j] / outlinks_count[j]
  225.  
  226.         # Damping factor
  227.         new_pagerank_scores = damping_factor * new_pagerank_scores + (1 - damping_factor) * random_surfer_prob
  228.  
  229.         # Check convergence
  230.         if np.sum(np.abs(new_pagerank_scores - pagerank_scores)) < epsilon:
  231.             break
  232.  
  233.         pagerank_scores = new_pagerank_scores
  234.  
  235.     return pagerank_scores
  236.  
  237. # Example usage
  238. num_pages = 5
  239. scores = pagerank(num_pages)
  240.  
  241. # Display the PageRank scores
  242. print("PageRank Scores:")
  243. for i, score in enumerate(scores):
  244.     print(f"Page {i+1}: {score:.4f}")
  245.