Spaces:

smu-ai
/

global-incidents

Build error

App Files Files Community

global-incidents / IS424_Data_Mining /code /GIS_DataMining_G1 /Datamining_qf.Rmd

dh-mc

WIP

23c2526 4 months ago

raw

history blame contribute delete

6.5 kB

	```{r}
	install.packages(c("sf", "raster"))

	# Load packages
	```

	```{r}
	install.packages(c("dplyr"))
	```

	```{r}
	# Load necessary libraries
	library(sf)
	library(ggplot2)
	library(rnaturalearth)
	library(rnaturalearthdata)
	# Read your CSV data
	data <- read.csv("GIS_Purpose.csv")
	```

	```{r}
	data_clean <- na.omit(data)

	# Convert the cleaned data frame to an sf object, specifying the coordinates and CRS (Coordinate Reference System)
	data_sf <- st_as_sf(data_clean, coords = c("lon", "lat"), crs = 4326)

	# Get world map data
	world <- ne_countries(scale = "medium", returnclass = "sf")

	# Plot the world map with points from your data
	my_plot <- ggplot(data = world) +
	geom_sf() + # This plots the world map as a base layer
	geom_sf(data = data_sf, aes(color = Severity), size = 0.4) + # This adds your points on top
	theme_minimal() +
	labs(title = "Spatial Distribution of Incidents with World Map Basemap") +
	theme(legend.position = "right") # Adjust legend position if needed

	# Save the plot to a file
	ggsave("my_spatial_plot.png", plot = my_plot, width = 10, height = 8, dpi = 300)
	```

	```{r}
	library(lubridate)
	library(ggplot2)
	library(forecast)

	# Check for NA values and remove them
	data <- na.omit(data)

	# Aggregate data by month
	data$Month <- floor_date(data$Datetime, "month")
	monthly_incidents <- aggregate(Index ~ Month, data, length)

	# Make sure that there are no NA values
	monthly_incidents <- na.omit(monthly_incidents)

	# Assuming that you've verified the 'monthly_incidents' dataframe and it looks correct
	# Create a time series object, checking the start and end values
	start_year <- min(year(monthly_incidents$Month), na.rm = TRUE)
	start_month <- min(month(monthly_incidents$Month), na.rm = TRUE)
	end_year <- max(year(monthly_incidents$Month), na.rm = TRUE)
	end_month <- max(month(monthly_incidents$Month), na.rm = TRUE)

	# Check if start date is after end date
	if (make_date(start_year, start_month) > make_date(end_year, end_month)) {
	stop("'start' cannot be after 'end'")
	}

	# Now create the time series object
	ts_data <- ts(monthly_incidents$Index, frequency=12, start=c(start_year, start_month))

	```

	```{r}
	plot(ts_data, main = "Monthly Incidents Time Series", xlab = "Time", ylab = "Number of Incidents", col = "blue")


	```

	```{r}
	decomposed_data <- decompose(ts_data)
	plot(decomposed_data)

	```

	```{r}
	incidents_by_severity <- aggregate(Index ~ Severity, data = data, FUN = length)

	# Visualize the number of incidents by Severity
	ggplot(incidents_by_severity, aes(x = Severity, y = Index, fill = Severity)) +
	geom_bar(stat = "identity") +
	theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
	labs(x = "Severity", y = "Frequency", title = "Frequency of Incidents by Severity")
	```

	```{r}
	# Assuming 'data' is your dataframe and 'Severity' is the column with the severity level
	# First, count the frequency of each severity level
	severity_counts <- table(data$Severity)

	# Convert the names of the table (the severity levels) to numeric ranks
	severity_ranks <- as.numeric(factor(names(severity_counts),
	levels = c("Minor", "Moderate", "Severe", "Extreme")))

	# Perform Spearman's rank correlation test between severity ranks and their frequencies
	cor.test(severity_ranks, severity_counts, method = "spearman")

	```

	```{r}
	# Assuming 'data' is your data frame and 'Category' is the column with incident types
	category_counts <- table(data$Category)
	top_categories <- sort(category_counts, decreasing = TRUE)[1:5]
	```

	```{r}
	# Convert table to data frame for filtering
	top_categories_df <- as.data.frame(top_categories)

	# Filter your original data for only top categories
	top_data <- data[data$Category %in% names(top_categories), ]

	```

	```{r}
	library(dplyr)
	library(ggplot2)
	library(maps)

	# Assuming 'data' is your data frame, 'Category' is the column with incident types, and 'lon', 'lat' are your longitude and latitude columns

	# Calculate counts of incidents for each category at each location
	top_data <- data %>%
	count(Category, lon, lat) %>%
	filter(Category %in% names(top_categories))

	# Get world map data
	world_map <- map_data("world")

	# Create the plot
	plot <- ggplot(data = world_map, aes(x = long, y = lat)) +
	geom_polygon(aes(group = group), fill = "gray80", color = "white") +
	geom_point(data = top_data, aes(x = lon, y = lat, color = Category, size = n), alpha = 0.7) +
	scale_size(range = c(4, 16)) + # Adjust the size range as needed
	scale_color_brewer(palette = "Dark2") +
	labs(title = "Top 5 Categories of Incidents on World Map",
	subtitle = "Size of point represents frequency of incidents",
	size = "Number of Incidents") +
	theme_minimal() +
	theme(legend.position = "bottom")

	# Save the plot
	ggsave("incident_map.png", plot = plot, width = 20, height = 10, dpi = 300)

	```

	```{r}
	library(dplyr)
	library(ggplot2)
	library(maps)
	library(scales) # For more refined control over point sizes

	# Assuming 'data' is your data frame, 'Category' is the column with incident types, and 'lon', 'lat' are your longitude and latitude columns

	# Calculate counts of incidents for each category at each location
	top_data <- data %>%
	count(Category, lon, lat) %>%
	filter(Category %in% names(top_categories)) %>%
	mutate(size = sqrt(n)) # Use square root scaling for point sizes

	# Get world map data
	world_map <- map_data("world")

	# Create the plot with improved aesthetics
	incident_map <- ggplot(data = world_map, aes(x = long, y = lat)) +
	geom_polygon(aes(group = group), fill = "lightblue", color = "white") + # Use a different fill color for water
	geom_point(data = top_data, aes(x = lon, y = lat, color = Category, size = size), alpha = 0.6) +
	scale_size_continuous(trans = "identity", range = c(1, 12)) + # Use identity transformation and adjust the size range
	scale_color_brewer(palette = "Dark2", name = "Category") +
	labs(title = "Top 5 Categories of Incidents on World Map",
	subtitle = "Size of point represents frequency of incidents",
	size = "Frequency (sqrt scale)") + # Updated legend title to reflect sqrt scaling
	coord_quickmap() + # Use an equirectangular projection
	theme_minimal() +
	theme(legend.position = "bottom",
	legend.key.size = unit(0.5, "cm")) # Adjust legend key size for better appearance

	# Save the plot using the new variable name
	ggsave("incident_map_refined.png", plot = incident_map, width = 12, height = 8, dpi = 300) # Adjusted dimensions for a better aspect ratio

	```