Introduction
Density-Based Spatial Clustering of Applications with Noise (or DBSCAN) is an algorithm used in cluster analysis which is described in Wikipedia.
The basic idea of cluster analysis is to partition a set of points into clusters which have some relationship to each other. In the case of DBSCAN the user chooses the minimum number of points required to form a cluster and the maximum distance between points in each cluster. Each point is then considered in turn, along with its neighbours, and allocated to a cluster.
I was recently asked if I could implement this algorithm in C# as there appears to be no other implementation which is freely available.
The source code is listed below in case it is of interest to anyone else involved in this specialized field.
Source Code
using System;
using System.Collections.Generic;
using System.Linq;
class Point
{
public const int NOISE = -1;
public const int UNCLASSIFIED = 0;
public int X, Y, ClusterId;
public Point(int x, int y)
{
this.X = x;
this.Y = y;
this.ClusterId = UNCLASSIFIED;
}
public override string ToString()
{
return String.Format("({0}, {1})", X, Y);
}
public static int DistanceSquared(Point p1, Point p2)
{
int diffX = p2.X - p1.X;
int diffY = p2.Y - p1.Y;
return diffX * diffX + diffY * diffY;
}
}
static class DBSCAN
{
static void Main()
{
List<Point> points = new List<Point>();
// sample data
points.Add(new Point(0, 100));
points.Add(new Point(0, 200));
// ... (add more points)
double eps = 100.0;
int minPts = 3;
List<List<Point>> clusters = GetClusters(points, eps, minPts);
Console.Clear();
// Print points to console
Console.WriteLine("The {0} points are:\n", points.Count);
foreach (Point p in points)
Console.Write(" {0} ", p);
Console.WriteLine();
// Print clusters to console
int total = 0;
for (int i = 0; i < clusters.Count; i++)
{
int count = clusters[i].Count;
total += count;
string plural = (count != 1) ? "s" : "";
Console.WriteLine("\nCluster {0} consists of the following {1} point{2}:\n", i + 1, count, plural);
foreach (Point p in clusters[i])
Console.Write(" {0} ", p);
Console.WriteLine();
}
// Print any points which are NOISE
total = points.Count - total;
if (total > 0)
{
string plural = (total != 1) ? "s" : "";
string verb = (total != 1) ? "are" : "is";
Console.WriteLine("\nThe following {0} point{1} {2} NOISE:\n", total, plural, verb);
foreach (Point p in points)
{
if (p.ClusterId == Point.NOISE)
Console.Write(" {0} ", p);
}
Console.WriteLine();
}
else
{
Console.WriteLine("\nNo points are NOISE");
}
Console.ReadKey();
}
static List<List<Point>> GetClusters(List<Point> points, double eps, int minPts)
{
if (points == null)
return null;
List<List<Point>> clusters = new List<List<Point>>();
eps *= eps; // square eps
int clusterId = 1;
for (int i = 0; i < points.Count; i++)
{
Point p = points[i];
if (p.ClusterId == Point.UNCLASSIFIED)
{
if (ExpandCluster(points, p, clusterId, eps, minPts))
clusterId++;
}
}
// sort out points into their clusters, if any
int maxClusterId = points.OrderByDescending(p => p.ClusterId).First().ClusterId;
if (maxClusterId < 1)
return clusters; // no clusters, so list is empty
for (int i = 0; i < maxClusterId; i++)
clusters.Add(new List<Point>());
foreach (Point p in points)
{
if (p.ClusterId > 0)
clusters[p.ClusterId - 1].Add(p);
}
return clusters;
}
static List<Point> GetRegion(List<Point> points, Point p, double eps)
{
List<Point> region = new List<Point>();
for (int i = 0; i < points.Count; i++)
{
int distSquared = Point.DistanceSquared(p, points[i]);
if (distSquared <= eps)
region.Add(points[i]);
}
return region;
}
static bool ExpandCluster(List<Point> points, Point p, int clusterId, double eps, int minPts)
{
List<Point> seeds = GetRegion(points, p, eps);
if (seeds.Count < minPts) // no core point
{
p.ClusterId = Point.NOISE;
return false;
}
else // all points in seeds are density reachable from point 'p'
{
foreach (Point seed in seeds)
seed.ClusterId = clusterId;
seeds.Remove(p);
while (seeds.Count > 0)
{
Point currentP = seeds[0];
List<Point> result = GetRegion(points, currentP, eps);
if (result.Count >= minPts)
{
foreach (Point resultP in result)
{
if (resultP.ClusterId == Point.UNCLASSIFIED || resultP.ClusterId == Point.NOISE)
{
if (resultP.ClusterId == Point.UNCLASSIFIED)
seeds.Add(resultP);
resultP.ClusterId = clusterId;
}
}
}
seeds.Remove(currentP);
}
return true;
}
}
}
Output