ルギア君の戯言

雑多な記事。

CMake で sed

を実装してみた。誰得。

# read from stdin
execute_process(COMMAND cat 
  OUTPUT_VARIABLE RET)

string(REPLACE ";" "\\;" RET "${RET}")
string(REPLACE "\n" ";" RET "${RET}")
string(REGEX REPLACE ";$" "" RET "${RET}")

foreach(I RANGE 1 "${CMAKE_ARGC}")
  set(ARGV "${ARGV}" "${CMAKE_ARGV${I}}")
endforeach()

string(ASCII 127 DEL)
foreach(MC IN LISTS ARGV)
  if(COMMAND_MODE)
    set(COMMAND ${COMMAND} "${MC}")
    set(COMMAND_MODE OFF)
  elseif(FILE_MODE)
    set(FILE ${FILE} "${MC}")
    set(FILE_MODE OFF)
  else()
    if(MC MATCHES "^-e")
      string(REGEX REPLACE "^-e(.*)$" "\\1" MC_ "${MC}")
      string(LENGTH "${MC_}" MC_L)
      if(MC_L GREATER 0)
	set(COMMAND ${COMMAND} "${MC_}")
      else()
	set(COMMAND_MODE ON)
      endif()
    elseif(MC MATCHES "^-f")
      string(REGEX REPLACE "^-f(.*)$" "\\1" MC_ "${MC}")
      string(LENGTH "${MC_}" MC_L)
      if(MC_L GREATER 0)
	set(FILE ${FILE} "${MC_}")
      else()
	set(FILE_MODE ON)
      endif()
    endif()
  endif()
endforeach()

foreach(F IN LISTS FILE)
  file(READ "${F}" C)
  string(REPLACE "\n" ";" C "${C}")
  set(COMMAND "${COMMAND}" "${C}")
endforeach()

set(CNT 1)
foreach(RR IN LISTS RET)
  set(RC "${RR}")
  foreach(C IN LISTS COMMAND)
    
    set(ADR_RE "\n")
    set(ADR_RA1 OFF)
    set(ADR_RA2 OFF)
    set(C1 OFF)
    set(C2 OFF)
    set(C3 OFF)
    
    string(REGEX MATCH "[sghd].*$" CC "${C}")
    string(SUBSTRING "${CC}" 0 1 CH)
    string(REGEX REPLACE "^/(.*)/ *[sghd].*$" "\\1" ADR_RE "${C}")
    if(ADR_RE STREQUAL C)
      set(C1 ON)
      set(ADR_RE "\n")
    endif()
    string(REGEX REPLACE "^([0-9$]*) *[sghd].*$" "\\1" ADR_RA1 "${C}")
    if(ADR_RA1 STREQUAL "" OR ADR_RA1 STREQUAL C)
      set(C2 ON)
    endif()
    string(REGEX REPLACE 
      "^([0-9$]*),([0-9$]*) *[sghd].*$" "\\1;\\2" ADR_RA2 "${C}")
    if(ADR_RA2 STREQUAL C)
      set(C3 ON)
    endif()
    if(NOT C2)
      set(ADR_RA2 "${ADR_RA1}")
    endif()
    if(NOT C3)
      list(GET ADR_RA2 0 ADR_RA1)
      list(GET ADR_RA2 1 ADR_RA3)
      set(ADR_RA2 ${ADR_RA3})
    endif()
    if(C1 AND C2 AND C3)
      set(ADR_RA1 "0")
      set(ADR_RA2 "2147483647")
    endif()
    if(ADR_RA1 STREQUAL "$")
      set(ADR_RA1 "0")
    endif()
    if(ADR_RA2 STREQUAL "$")
      set(ADR_RA2 "2147483647")
    endif()
    if(RR MATCHES "${ADR_RE}")
      set(C1 ON)
    elseif(CNT EQUAL ADR_RA1 OR CNT EQUAL ADR_RA2)
      set(C1 ON)
    elseif(CNT GREATER ADR_RA1 AND CNT LESS ADR_RA2)
      set(C1 ON)
    else()
      set(C1 OFF)
    endif()
    if(C1)
      if("${CH}" STREQUAL "s")
	string(SUBSTRING "${CC}" 1 1 CCH)
	set(REGMOD "")
	string(REGEX REPLACE 
	  "^s${CCH}(.*)${CCH}(.*)${CCH}(.*)$" "\\1" REGEXP "${CC}")
	string(REGEX REPLACE 
	  "^s${CCH}(.*)${CCH}(.*)${CCH}(.*)$" "\\2" REGREP "${CC}")
	string(REGEX REPLACE 
	  "^s${CCH}(.*)${CCH}(.*)${CCH}(.*)$" "\\3" REGMOD "${CC}")
	if(REGMOD STREQUAL "g")
	  string(REGEX REPLACE "${REGEXP}" "${REGREP}" RC "${RC}")
	elseif(REGEXP MATCHES "^\\^$")
	  set(RC "${REGREP}${RC}")
	elseif(REGEXP MATCHES "^\\\$$")
	  set(RC "${RC}${REGREP}")
	elseif(REGEXP MATCHES "^\\^")
	  string(REGEX REPLACE "${REGEXP}" "${REGREP}" RC "${RC}")
	elseif(REGEXP MATCHES "\\\$$")
	  string(REGEX REPLACE "${REGEXP}" "${REGREP}" RC "${RC}")
	else()
          string(REGEX REPLACE "([^\\])\\(" "\\1" BREGEXP "${REGEXP}")
          string(REGEX REPLACE "^\\(" "" BREGEXP "${BREGEXP}")
          string(REGEX REPLACE "([^\\])\\)" "\\1" BREGEXP "${BREGEXP}")
          string(REGEX REPLACE "\\\\" "\\\\\\\\" RCP "${RC}")
          string(REPLACE ";" "\\;" RCP "${RCP}")
          string(REGEX REPLACE "(\\\\?)${BREGEXP}(.*)$" "\\1\\1\\1;\\2"
            BREGEXP "${RCP}")
          string(REGEX REPLACE "([^\\])([][*+?|{()}^$])" "\\1\\\\\\2"
            BBREGEXP "${BREGEXP}")
          string(REGEX REPLACE "^([][*+?|{()}^$])" "\\\\\\1"
            BBREGEXP "${BBREGEXP}")
          list(GET  BREGEXP 0  B1)
          list(GET  BREGEXP 1  B2)
          list(GET BBREGEXP 0 BB1)
          list(GET BBREGEXP 1 BB2)
          string(REGEX REPLACE "\\\\\\\\$" "" BB1 "${BB1}")
          string(REGEX REPLACE "\\\\\\\\$" "" B1  "${B1}")
          set(BREG "${BB1}${REGEXP}${BB2}")
          set(BREP "${B1}${REGREP}${B2}")
          string(REGEX REPLACE "^${BREG}$" "${BREP}" RC "${RC}")
	endif()
      elseif("${CH}" STREQUAL "h")
	set(HOLD_SPACE "${RC}")
      elseif("${CH}" STREQUAL "g")
	set(RC "${HOLD_SPACE}")
      elseif("${CH}" STREQUAL "d")
	set(RC "${DEL}")
      endif()
    endif()
  endforeach()
  if(NOT RC STREQUAL "${DEL}")
    execute_process(COMMAND
      ${CMAKE_COMMAND} -E echo "${RC}")
  endif()
  math(EXPR CNT "${CNT} + 1")
endforeach()

フル仕様には全然至らず、対応するコマンドはとりあえず s, h, g, d だけだけどとりあえず自分の目的は果たせそうだからここで終わりかな。


って思ったけど正規表現の扱いが sed と全然違うので共通する動作を得るには正規表現に工夫が必要…。


ちなみに、CMake には標準入力から読み込む機能がないので、cat コマンドを叩いて読み込むという暴挙に出ています。


ものにもよりますが、本物の sed の 10 倍ぐらい遅いです。


【追記】 message(...) が stdout ではなく stderr に書き出していることが発覚したため cmake -E echo を実行するように変更。


【追記】 s/foo/bar/ (g なし!) にバグあったので修正した。バックスラッシュトルネード!! (意味不明