Frequent items in transaction sets

from collections import Counter transactions = [ {'milk', 'cereals', 'oranges', 'potatoes', 'celery', 'lettuce'}, {'cheese', 'donut', 'croissant', 'bread', 'butter'}, {'lime', 'cherries', 'onion', 'cabbage', 'olives', 'mushrooms', 'pork'}, {'tomatoes', 'lettuce', 'carrots', 'cucumber', 'lemons', 'tangerines', 'cookies', 'milk'}, {'jam', 'toothpaste', 'toilet paper', 'bread', 'cheese', 'chicken'}, {'beans', 'peas', 'ginger', 'tomatoes', 'cookies', 'banana juice', 'orange juice'}, {'croissant', 'bread', 'Russian salad', 'mackerel', 'lemons', 'yellow cheese', 'apple juice'}, {'cheese', 'mushrooms', 'tomatoes', 'cucumber', 'choco biscuits', 'rice', 'lentils'}, {'cauliflower', 'broccoli', 'garlic', 'onion', 'tomatoes', 'parsley', 'celery'}, {'potatoes', 'mushrooms', 'chicken', 'rice', 'olives', 'cookies', 'orange juice'}, {'watermelon', 'kiwi', 'mango', 'beans', 'lentils'}, {'avocado', 'mango', 'cheese', 'chicken', 'jam', 'shaving blade'}, {'avocado', 'lemons', 'ananas', 'cookies', 'milk', 'sugar', 'beans'}, {'spaghetti', 'tomatoes', 'cheese', 'parsley', 'lemons', 'cucumber'}, {'choco biscuits', 'orange juice', 'milk', 'potatoes', 'beans'}, {'ananas', 'cucumber', 'oranges', 'parsley', 'tomatoes', 'chicken', 'toothpaste', 'mushrooms', 'spaghetti'}, {'jam', 'cookies', 'milk', 'onion', 'garlic', 'lentils'}, {'cherries', 'banana juice', 'lemons', 'watermelon', 'potatoes', 'ketchup'}, {'choco biscuits', 'sugar', 'lettuce', 'avocado', 'spaghetti'}, {'lemons', 'mineral water', 'olives', 'bread', 'mackerel', 'chicken'} ] def reverse_sort_by_count(all_items_list): return sorted( Counter(all_items_list).items(), key=lambda x: x[1], reverse=True ) def list_except(items): this_common_items = [] for trans in transactions: if items <= trans: # issubset this_common_items.extend(list(trans - items)) return this_common_items top_n_to_consider, length_to_consider = 3, 3 frequent_items = [] all_items_list = [] for itemset in transactions: all_items_list.extend(list(itemset)) sorted_by_counts = reverse_sort_by_count(all_items_list) max_count = sorted_by_counts[0][1] upper_bound_idx = max_count - top_n_to_consider + 1 for item, v in sorted_by_counts: if v >= upper_bound_idx: s = {item} while len(s) < length_to_consider: subset_sorted = reverse_sort_by_count(list_except(s)) next_item = subset_sorted[0][0] s.add(next_item) if s not in frequent_items: frequent_items.append(s) for freq_it in frequent_items: print(freq_it) """ {'tomatoes', 'cucumber', 'parsley'} {'lemons', 'cookies', 'milk'} {'mushrooms', 'chicken', 'rice'} {'cheese', 'tomatoes', 'cucumber'} {'lemons', 'cookies', 'beans'} {'potatoes', 'choco biscuits', 'milk'} {'bread', 'lemons', 'mackerel'} {'mushrooms', 'tomatoes', 'cucumber'} """