문제
내가 작성한 정답
MySQL
: substring_index(column,구분자, 맨처음 구분자 후 앞부터 몇조각-숫자 마이너스면 뒤부터)
select business_postal_code, count(distinct street) num
from (select business_postal_code,
upper(case when substring_index(business_address,' ',1) regexp '^[0-9]'
then substring_index(substring(business_address,instr(business_address,' ')+1),' ',1)
else substring_index(business_address,' ',1) end) street
from sf_restaurant_health_violations) a
where business_postal_code is not null
group by business_postal_code
order by 1;PostgreSQL
: split_part(column,구분자,몇번째 조각)
select business_postal_code, count(distinct street) num
from (select business_postal_code,
lower(case when split_part(business_address,' ',1) ~'^[0-9]' then split_part(business_address,' ',2)
else split_part(business_address,' ',1) end) street
from sf_restaurant_health_violations) a
where business_postal_code is not null
group by business_postal_code
order by 1;Oracle
with a as(select business_postal_code, business_address
,instr(business_address,' ') f
,instr(business_address,' ',1,2) s
from sf_restaurant_health_violations),
b as (select business_postal_code
,lower(case
when regexp_like(business_address,'^[0-9]')
then
case
when s=0 then substr(business_address,f+1)
else substr(business_address,f+1,s-f-1) end
else substr(business_address,1,f-1) end) as street
from a)
select business_postal_code, count(distinct street) n_street
from b
where business_postal_code is not null
group by business_postal_code
order by 1페이지에서 알려주는 정답
SELECT business_postal_code,
COUNT (DISTINCT CASE
WHEN regexp_like(business_address, '^[0-9]')
THEN lower(regexp_substr(business_address, '[^ ]+', 1, 2))
ELSE lower(regexp_substr(business_address, '[^ ]+', 1, 1))
END) AS n_streets
FROM sf_restaurant_health_violations
WHERE business_postal_code IS NOT NULL
GROUP BY business_postal_code
ORDER BY n_streets DESC,
business_postal_code ASCREGEXP_LIKE(col, pattern [, match_param])- WHERE에서 패턴 매칭 (LIKE의 정규식 버전).
REGEXP_SUBSTR(col, pattern, ...)- 패턴에 매칭되는 부분 문자열 추출.
REGEXP_REPLACE(col, pattern, replacement, ...)- 매칭 부분을 다른 문자열로 치환.
REGEXP_INSTR(col, pattern, ...)- 매칭 위치 반환 (INSTR의 정규식 버전).
REGEXP_COUNT(col, pattern, ...)- 패턴이 몇 번 나오는지 개수.
자주 쓰는 메타문자
- 앵커:
^: 문자열 시작$: 문자열 끝
- 문자 클래스:
[0-9]: 숫자 하나[a-z]: 소문자 하나[^0-9]: 숫자가 아닌 문자
- 수량자:
- : 0회 이상
+: 1회 이상?: 0 또는 1회{m}: 정확히 m회{m,n}: m~n회
- 특수:
.: 아무 문자 1개 (줄바꿈/NULL 제외)\s: 공백 문자 (스페이스, 탭 등)\S: 공백이 아닌 문자(): 그룹, 캡처
패턴 예제
-- 숫자로 시작하는 문자열
WHERE REGEXP_LIKE(col, '^[0-9]')
-- 숫자로만 구성
WHERE REGEXP_LIKE(col, '^[0-9]+$')
-- 문자열에서 숫자만 남기기 (나머지 삭제)
SELECT REGEXP_REPLACE(col, '[^0-9]', '') AS only_digits
FROM your_table;
-- 첫 단어만 추출 (공백 기준)
SELECT REGEXP_SUBSTR(col, '^\S+') AS first_word
FROM your_table;Share article