top of page
Black Background

DataSciencePortfolio

Data Analytics Project: Retail Sales Forecasting

As a data scientist, I utilized predictive modeling and statistical analysis to forecast retail sales, enabling the client to optimize inventory management and increase profitability. Click here to view details of the project.

Machine Learning Project: Fraud Detection System

In this project, I developed a machine learning-based fraud detection system that significantly reduced false positives, saving the client millions in potential losses. Click here to explore the technical aspects and outcomes.

Data Engineering Project: Real-time Data Processing

I spearheaded the design and implementation of a real-time data processing architecture, empowering the client to make informed decisions based on up-to-the-minute insights. Click here to delve into the project's infrastructure and impact.   SQL code attached 

--Top Customers by Purchase Frequency

SELECT CustomerID, COUNT(SalesID) AS PurchaseCount

FROM sales_data

GROUP BY CustomerID

ORDER BY PurchaseCount DESC;

 

 

--Top Customers by Total Spending

SELECT CustomerID, SUM(SaleAmount) AS TotalSpent

FROM sales_data

GROUP BY CustomerID

ORDER BY TotalSpent DESC

LIMIT 10;

 

-- total revenue per category:

SELECT P.Category, SUM(S.SaleAmount) AS TotalCategoryRevenue

FROM sales_data S

JOIN product_data P ON S.ProductID = P.ProductID

GROUP BY P.Category

ORDER BY TotalCategoryRevenue DESC;

 

--Top Products Purchased in a Specific Month

SELECT P.ProductID, P.ProductName, SUM(S.SaleAmount / P.Price) AS TotalUnitsSold

FROM sales_data S

JOIN product_data P ON S.ProductID = P.ProductID

WHERE strftime('%Y-%m', S.SaleDate) = '2023-01'  -- Specify the month (e.g., January 2023)

GROUP BY P.ProductID, P.ProductName

ORDER BY TotalUnitsSold DESC;

 

--Monthly Sales Trends for Products

SELECT P.ProductID, P.ProductName, strftime('%Y-%m', S.SaleDate) AS SaleMonth, SUM(S.SaleAmount / P.Price) AS TotalUnitsSold

FROM sales_data S

JOIN product_data P ON S.ProductID = P.ProductID

GROUP BY P.ProductID, P.ProductName, SaleMonth

ORDER BY SaleMonth DESC;

 

--Best-Selling Products

SELECT P.ProductID, P.ProductName, SUM(S.SaleAmount / P.Price) AS TotalUnitsSold

FROM sales_data S

JOIN product_data P ON S.ProductID = P.ProductID

GROUP BY P.ProductID, P.ProductName

ORDER BY TotalUnitsSold DESC

LIMIT 10;

 

--Products with the Highest Revenue

SELECT P.ProductID, P.ProductName, SUM(S.SaleAmount) AS TotalRevenue

FROM sales_data S

JOIN product_data P ON S.ProductID = P.ProductID

GROUP BY P.ProductID, P.ProductName

ORDER BY TotalRevenue DESC

LIMIT 10;

 

--Top Customers by revenue

SELECT S.CustomerID, SUM(S.SaleAmount) AS TotalRevenue

FROM sales_data S

GROUP BY S.CustomerID

ORDER BY TotalRevenue DESC

LIMIT 10;

 

--sales trend per product category

SELECT P.Category, strftime('%Y-%m', S.SaleDate) AS SaleMonth, SUM(S.SaleAmount) AS TotalRevenue

FROM sales_data S

JOIN product_data P ON S.ProductID = P.ProductID

GROUP BY P.Category, SaleMonth

ORDER BY SaleMonth, P.Category;

 

--to findSlow-Moving Products

SELECT P.ProductID, P.ProductName, P.StockQuantity, SUM(S.SaleAmount / P.Price) AS TotalUnitsSold

FROM product_data P

LEFT JOIN sales_data S ON P.ProductID = S.ProductID

GROUP BY P.ProductID, P.ProductName, P.StockQuantity

HAVING TotalUnitsSold IS NULL OR TotalUnitsSold < 5  -- Define the threshold for "slow-moving"

ORDER BY TotalUnitsSold ASC;

 

--top selling months

SELECT strftime('%Y-%m', S.SaleDate) AS SaleMonth, SUM(S.SaleAmount) AS TotalRevenue

FROM sales_data S

GROUP BY SaleMonth

ORDER BY TotalRevenue DESC

LIMIT 10;  -- Shows top 10 selling months

bottom of page