Data Generation and Conversion
Data Generation and Conversion
Raw data often needs transformation before it's useful. This chapter covers SQL's built-in functions for manipulating strings, working with dates, and converting between data types.
String Functions
String Length
-- Character length
SELECT title, LENGTH(title) AS title_length
FROM books;| title | title_length |
|---|---|
| 1984 | 4 |
| Animal Farm | 11 |
| Pride and Prejudice | 19 |
| One Hundred Years of Solitude | 29 |
Note
PostgreSQL uses LENGTH() or CHAR_LENGTH(). MySQL uses both. SQL Server
uses LEN(). Always check your database's documentation.
Case Conversion
SELECT
title,
UPPER(title) AS uppercase,
LOWER(title) AS lowercase
FROM books;| title | uppercase | lowercase |
|---|---|---|
| Animal Farm | ANIMAL FARM | animal farm |
String Concatenation
-- MySQL, PostgreSQL
SELECT CONCAT(first_name, ' ', last_name) AS full_name
FROM customers;
-- PostgreSQL also supports ||
SELECT first_name || ' ' || last_name AS full_name
FROM customers;
-- SQL Server
SELECT first_name + ' ' + last_name AS full_name
FROM customers;| full_name |
|---|
| Alice Chen |
| Bob Martinez |
| Carol Taylor |
CONCAT_WS: Concatenate with Separator
-- Join multiple values with a separator
SELECT CONCAT_WS(', ', last_name, first_name) AS name_formatted
FROM authors;| name_formatted |
|---|
| Orwell, George |
| Austen, Jane |
| García Márquez, Gabriel |
Substring Extraction
-- SUBSTRING(string, start, length)
SELECT
title,
SUBSTRING(title, 1, 10) AS short_title
FROM books;| title | short_title |
|---|---|
| One Hundred Years of Solitude | One Hundre |
| Pride and Prejudice | Pride and |
-- LEFT and RIGHT extract from ends
SELECT
isbn,
LEFT(isbn, 3) AS prefix,
RIGHT(isbn, 4) AS suffix
FROM books;Finding and Replacing
-- POSITION / LOCATE: Find substring position
SELECT title, POSITION('and' IN title) AS and_position
FROM books
WHERE title LIKE '%and%';
-- REPLACE: Substitute text
SELECT
title,
REPLACE(title, 'and', '&') AS shortened
FROM books;| title | shortened |
|---|---|
| Pride and Prejudice | Pride & Prejudice |
Trimming Whitespace
-- Remove leading/trailing spaces
SELECT
TRIM(' hello ') AS trimmed,
LTRIM(' hello ') AS left_trimmed,
RTRIM(' hello ') AS right_trimmed;| trimmed | left_trimmed | right_trimmed |
|---|---|---|
| hello | hello | hello |
Padding Strings
-- Pad to fixed width
SELECT
customer_id,
LPAD(customer_id, 5, '0') AS padded_id
FROM customers;| customer_id | padded_id |
|---|---|
| 1 | 00001 |
| 2 | 00002 |
Numeric Functions
Rounding
SELECT
price,
ROUND(price, 1) AS rounded_1,
ROUND(price, 0) AS rounded_0,
FLOOR(price) AS floor_val,
CEILING(price) AS ceiling_val
FROM books;| price | rounded_1 | rounded_0 | floor_val | ceiling_val |
|---|---|---|---|---|
| 15.99 | 16.0 | 16 | 15 | 16 |
| 12.99 | 13.0 | 13 | 12 | 13 |
Absolute Value and Sign
SELECT
ABS(-15.5) AS absolute, -- 15.5
SIGN(-15) AS negative_sign, -- -1
SIGN(15) AS positive_sign, -- 1
SIGN(0) AS zero_sign; -- 0Mathematical Functions
SELECT
POWER(2, 10) AS two_to_ten, -- 1024
SQRT(16) AS square_root, -- 4
MOD(17, 5) AS remainder, -- 2 (17 % 5)
RANDOM() AS random_value; -- 0.0 to 1.0 (PostgreSQL)Date and Time Functions
Dates are critical in business applications. SQL provides extensive functions for date manipulation.
Current Date and Time
-- Current date/time (varies by database)
SELECT
CURRENT_DATE AS today,
CURRENT_TIME AS now_time,
CURRENT_TIMESTAMP AS now_datetime;
-- MySQL alternatives
SELECT
CURDATE() AS today,
NOW() AS now_datetime;Extracting Date Parts
SELECT
order_date,
EXTRACT(YEAR FROM order_date) AS year,
EXTRACT(MONTH FROM order_date) AS month,
EXTRACT(DAY FROM order_date) AS day,
EXTRACT(HOUR FROM order_date) AS hour
FROM orders;| order_date | year | month | day | hour |
|---|---|---|---|---|
| 2024-01-15 10:30:00 | 2024 | 1 | 15 | 10 |
MySQL also provides shorthand functions:
SELECT
YEAR(order_date) AS year,
MONTH(order_date) AS month,
DAY(order_date) AS day,
DAYNAME(order_date) AS day_name,
MONTHNAME(order_date) AS month_name
FROM orders;Date Arithmetic
-- Add/subtract intervals
SELECT
order_date,
order_date + INTERVAL 7 DAY AS plus_week,
order_date - INTERVAL 1 MONTH AS minus_month
FROM orders;
-- PostgreSQL syntax
SELECT order_date + INTERVAL '7 days' FROM orders;Difference Between Dates
-- MySQL: DATEDIFF returns days
SELECT
order_date,
DATEDIFF(CURRENT_DATE, order_date) AS days_ago
FROM orders;
-- PostgreSQL: subtract dates directly
SELECT
order_date,
CURRENT_DATE - order_date::date AS days_ago
FROM orders;Formatting Dates
-- MySQL: DATE_FORMAT
SELECT DATE_FORMAT(order_date, '%M %d, %Y') AS formatted
FROM orders;
-- Result: "January 15, 2024"
-- PostgreSQL: TO_CHAR
SELECT TO_CHAR(order_date, 'Month DD, YYYY') AS formatted
FROM orders;Common Date Format Codes (MySQL)
| Code | Meaning | Example |
|---|---|---|
| %Y | 4-digit year | 2024 |
| %y | 2-digit year | 24 |
| %M | Month name | January |
| %m | Month number | 01 |
| %d | Day of month | 15 |
| %H | Hour (24-hour) | 14 |
| %i | Minutes | 30 |
| %s | Seconds | 00 |
| %W | Weekday name | Monday |
Truncating Dates
-- Truncate to start of period (PostgreSQL)
SELECT
order_date,
DATE_TRUNC('month', order_date) AS month_start,
DATE_TRUNC('year', order_date) AS year_start
FROM orders;Type Conversion (Casting)
Convert values between data types using CAST or database-specific syntax.
CAST Function
SELECT
CAST(price AS INTEGER) AS price_int,
CAST(customer_id AS VARCHAR(10)) AS id_string,
CAST('2024-01-15' AS DATE) AS date_val
FROM books;Database-Specific Syntax
-- PostgreSQL double-colon syntax
SELECT
price::INTEGER,
customer_id::TEXT,
'2024-01-15'::DATE;
-- MySQL CONVERT
SELECT
CONVERT(price, SIGNED) AS price_int,
CONVERT(price, CHAR) AS price_string;Implicit vs. Explicit Conversion
SQL sometimes converts automatically:
-- Implicit: string to number for arithmetic
SELECT '10' + 5; -- Returns 15 in MySQL
-- Explicit: always clear about intent
SELECT CAST('10' AS INTEGER) + 5;Warning
Implicit conversions vary by database and can cause unexpected results. Be explicit about conversions in production code.
NULL Handling Functions
COALESCE: First Non-NULL Value
-- Return first non-NULL value
SELECT
first_name,
COALESCE(phone, email, 'No contact') AS contact_info
FROM customers;COALESCE is extremely useful for providing defaults:
SELECT
title,
COALESCE(stock_quantity, 0) AS stock
FROM books;NULLIF: Create NULL Conditionally
-- Return NULL if values are equal
SELECT NULLIF(stock_quantity, 0) AS stock
FROM books;
-- Returns NULL for books with 0 stockUseful to prevent division by zero:
SELECT
total_amount / NULLIF(item_count, 0) AS avg_item_price
FROM orders;
-- Returns NULL instead of error when item_count is 0IFNULL / NVL
-- MySQL: IFNULL
SELECT IFNULL(phone, 'N/A') AS phone FROM customers;
-- Oracle: NVL
SELECT NVL(phone, 'N/A') AS phone FROM customers;
-- Standard: COALESCE (works everywhere)
SELECT COALESCE(phone, 'N/A') AS phone FROM customers;Generating Data
Sequences and Series
-- Generate a series of numbers (PostgreSQL)
SELECT generate_series(1, 10) AS num;
-- Generate date series (PostgreSQL)
SELECT generate_series(
'2024-01-01'::DATE,
'2024-01-31'::DATE,
'1 day'::INTERVAL
) AS date;Creating Derived Values
-- Generate formatted IDs
SELECT
order_id,
CONCAT('ORD-', LPAD(order_id, 6, '0')) AS order_number
FROM orders;| order_id | order_number |
|---|---|
| 1 | ORD-000001 |
| 2 | ORD-000002 |
Practical Examples
Example 1: Formatting Names
SELECT
CONCAT(
UPPER(SUBSTRING(first_name, 1, 1)),
LOWER(SUBSTRING(first_name, 2))
) AS proper_first,
UPPER(last_name) AS upper_last
FROM customers;Example 2: Age Calculation
SELECT
first_name,
last_name,
birth_date,
EXTRACT(YEAR FROM AGE(birth_date)) AS age
FROM authors
WHERE birth_date IS NOT NULL;Example 3: Order Summary with Formatting
SELECT
CONCAT('Order #', LPAD(order_id, 5, '0')) AS order_number,
DATE_FORMAT(order_date, '%b %d, %Y') AS order_date,
CONCAT('$', FORMAT(total_amount, 2)) AS total,
UPPER(status) AS status
FROM orders;| order_number | order_date | total | status |
|---|---|---|---|
| Order #00001 | Jan 15, 2024 | $31.98 | COMPLETED |
Example 4: Search Normalization
-- Case-insensitive search with trimmed input
SELECT * FROM customers
WHERE LOWER(TRIM(email)) = LOWER(TRIM(' ALICE.CHEN@email.com '));What's Next
The next chapter introduces grouping and aggregate functions—summarizing data with COUNT, SUM, AVG, and GROUP BY.