Example:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt
df = pd.read_csv("retail_dataset.csv")
#print(df.head())
items = (df["0"].unique()) #unique items
#print(items)
# Data Preprocessing (Convert the dataset to either 0 and 1 or True and False)
encoded_vals = []
for index, row in df.iterrows():
labels= {}
uncommon = list(set(items) - set(row))
common = list(set(items).intersection(row))
for uc in uncommon:
labels[uc] = 0
for com in common:
labels[com] = 1
encoded_vals.append(labels)
#print(encoded_vals[100])
# A one hot encoding is a representation of categorical variables as binary vectors
ohe_df = pd.DataFrame(encoded_vals)
#print(ohe_df)
# Applying Apriori
freq_items = apriori(ohe_df, min_support=0.2, use_colnames=True, verbose = 1)
print(freq_items)
# Mining Association Rules
rules = association_rules(freq_items, metric="confidence", min_threshold=0.6)
print(rules)
# Visualizing results
plt.scatter(rules['support'], rules['confidence'], alpha=0.5) # Support vs Confidence
plt.xlabel('Support')
plt.ylabel('Confidence')
plt.title('Support vs Confidence')
plt.show()
#rules.to_csv("association_analysis.csv")