#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score

# Load your dataset
# Replace 'your_data.csv' with the path to your dataset file
data = pd.read_csv('your_data.csv')

# Select features relevant for customer segmentation
# Replace these column names with the ones relevant to your dataset
features = data[['feature1', 'feature2', 'feature3']]

# Data preprocessing
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Choose the number of clusters (k) - This might require some experimentation
k = 5

# Build and train the model
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(features_scaled)

# Assign the clusters to each record in the dataset
data['Cluster'] = kmeans.labels_

# Evaluate the model using silhouette score
score = silhouette_score(features_scaled, kmeans.labels_)
print(f'Silhouette Score: {score}')

# Optional: Save the model for future use
# from joblib import dump
# dump(kmeans, 'kmeans_model.joblib')

# Optional: Export the segmented data to a new CSV file
# data.to_csv('segmented_customers.csv', index=False)


# In[ ]:




