-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfinance_sql_project.sql
More file actions
126 lines (88 loc) · 2.85 KB
/
finance_sql_project.sql
File metadata and controls
126 lines (88 loc) · 2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
CREATE DATABASE finance;
GO
USE finance;
GO
USE finance;
GO
SELECT COUNT(*) AS total_rows FROM cc_data;
SELECT COUNT(*) AS total_rows FROM location_data;
SELECT TABLE_NAME
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE = 'BASE TABLE';
-- 1) Calculate the total number of transactions in the cc_data table
SELECT COUNT(*) AS total_transactions
FROM cc_data;
-- 2) Identify the top 10 most frequent merchants in the cc_data table
select * from cc_data
select top 10 merchant,
count(*) as transaction_count
from cc_data
group by merchant
order by transaction_count DESC;
-- 3) Find the average transaction amount for each category of transactions in the cc_data table
select category, ROUND(AVG(amt), 2) AS avg_transaction_amount
from cc_data
group by category
order by avg_transaction_amount DESC;
-- 4) Determine the number of fraudulent transactions and the percentage of total transactions that they represent
SELECT
SUM(CASE WHEN is_fraud = 1 THEN 1 ELSE 0 END) AS fraud_transactions,
COUNT(*) AS total_transactions,
ROUND(
100.0 * SUM(CASE WHEN is_fraud = 1 THEN 1 ELSE 0 END) / COUNT(*),
2
) AS fraud_percentage
FROM cc_data;
-- 5 ) Join the cc_data and location_data tables to identify the latitude and longitude of each transaction
-- Method 1
SELECT
cc.trans_date_trans_time,
cc.amt,
cc.cc_num,
ld.lat,
ld.long
FROM cc_data cc
LEFT JOIN location_data ld
ON CAST(cc.cc_num AS FLOAT) = ld.cc_num;
-- Method 2
SELECT
trans_date_trans_time,
amt,
lat,
long
FROM cc_data;
-- 6 ) Identify the city with the highest population in the location_data table
SELECT TOP 1
city,
city_pop
FROM cc_data
ORDER BY city_pop DESC;
-- 7 ) Find the earliest and latest transaction dates in the cc_data table
SELECT
MIN(trans_date_trans_time) AS earliest_transaction,
MAX(trans_date_trans_time) AS latest_transaction
FROM cc_data;
-- 8 ) What is the total amount spent across all transactions in the cc_data table?
SELECT
ROUND(SUM(amt), 2) AS total_amount_spent
FROM cc_data;
-- 9 ) How many transactions occurred in each category in the cc_data table?
SELECT
category,
COUNT(*) AS transaction_count
FROM cc_data
GROUP BY category
ORDER BY transaction_count DESC;
-- 10 ) What is the average transaction amount for each gender in the cc_data table?
SELECT
gender,
ROUND(AVG(amt), 2) AS avg_transaction_amount
FROM cc_data
GROUP BY gender;
-- 11 ) Which day of the week has the highest average transaction amount in the cc_data table?
SELECT TOP 1
DATENAME(WEEKDAY, trans_date_trans_time) AS day_of_week,
ROUND(AVG(amt), 2) AS avg_transaction_amount
FROM cc_data
GROUP BY DATENAME(WEEKDAY, trans_date_trans_time)
ORDER BY avg_transaction_amount DESC;