108 lines
2.9 KiB
Bash
108 lines
2.9 KiB
Bash
|
#!/usr/bin/env bash
|
||
|
|
||
|
set -e
|
||
|
|
||
|
# Copyright (C) 2021 Tessa Nordgren
|
||
|
#
|
||
|
# This program is free software: you can redistribute it and/or modify
|
||
|
# it under the terms of the GNU Affero General Public License as
|
||
|
# published by the Free Software Foundation, version 3.
|
||
|
#
|
||
|
# This program is distributed in the hope that it will be useful,
|
||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
# GNU Affero General Public License for more details.
|
||
|
#
|
||
|
# You should have received a copy of the GNU Affero General Public License
|
||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||
|
|
||
|
RESAMPLING_OPTS="-af aresample=resampler=soxr:dither_method=shibata:precision=28:out_channel_layout=mono -ar 16000"
|
||
|
ML_SPEECH_OPTS="--language-code=en --include-word-time-offsets --encoding=ogg-opus --sample-rate=16000"
|
||
|
|
||
|
function check_deps() {
|
||
|
for TOOL in gcloud ffmpeg python3; do
|
||
|
if [[ ! -f "$(which ${TOOL})" ]]; then
|
||
|
echo "missing ${TOOL}, please install!"
|
||
|
return 1
|
||
|
fi
|
||
|
done
|
||
|
return 0
|
||
|
}
|
||
|
|
||
|
function usage() {
|
||
|
echo "Usage: $(basename $0) [-h] video [video ...]"
|
||
|
echo -e "\tvideo\tvideo file(s) to convert audio to subtitles."
|
||
|
echo -e "\t-h\tshows this usage message."
|
||
|
echo -e "\t-n\tdon't include audio in output."
|
||
|
}
|
||
|
|
||
|
INCLUDE_AUDIO=1
|
||
|
function argparse() {
|
||
|
if [[ ${#} -eq 0 ]]; then
|
||
|
echo -e "$0: must supply one or more video files to process\n" >&2
|
||
|
usage
|
||
|
return 1
|
||
|
fi
|
||
|
optstring=":hn"
|
||
|
while getopts "${optstring}" arg; do
|
||
|
case "${arg}" in
|
||
|
h)
|
||
|
usage
|
||
|
exit 0
|
||
|
;;
|
||
|
n)
|
||
|
export INCLUDE_AUDIO=0
|
||
|
;;
|
||
|
:)
|
||
|
echo -e "$0: must supply argument to -$OPTARG.\n" >&2
|
||
|
exit 1
|
||
|
;;
|
||
|
?)
|
||
|
echo -e "$0: invalid option: -${OPTARG}\n" >&2
|
||
|
usage
|
||
|
exit 2
|
||
|
;;
|
||
|
esac
|
||
|
done
|
||
|
shift $(($OPTIND - 1))
|
||
|
export ARGS=( "$@" )
|
||
|
}
|
||
|
|
||
|
function title() {
|
||
|
echo "Processing '$1'..."
|
||
|
COUNT="$(seq 0 ${#1})"
|
||
|
UNDERLINE="$(printf '=%.0s' ${COUNT})"
|
||
|
echo "------------${UNDERLINE}----"
|
||
|
}
|
||
|
|
||
|
check_deps
|
||
|
argparse "$@"
|
||
|
mkdir -p tmp
|
||
|
for FILE in "${ARGS[@]}"; do
|
||
|
title "${FILE}"
|
||
|
BASENAME="$(basename "${FILE%.*}")"
|
||
|
AUDIO="tmp/${BASENAME}.opus"
|
||
|
YML="tmp/${BASENAME}.yml"
|
||
|
SRT="tmp/${BASENAME}.srt"
|
||
|
OUT="${BASENAME}_with_subs.mkv"
|
||
|
|
||
|
if [[ ! -f "${AUDIO}" ]]; then
|
||
|
echo "extracting audio..."
|
||
|
ffmpeg -loglevel error -y -i "${FILE}" ${RESAMPLING_OPTS} "${AUDIO}"
|
||
|
fi
|
||
|
if [[ ! -f "${YML}" ]]; then
|
||
|
echo "converting audio to text..."
|
||
|
gcloud -q --format yaml ml speech recognize-long-running "${AUDIO}" ${ML_SPEECH_OPTS} > "${YML}"
|
||
|
fi
|
||
|
if [[ ! -f ${SRT} ]]; then
|
||
|
echo "converting google yaml data to subtitle data..."
|
||
|
./ml2srt.py "${YML}"
|
||
|
fi
|
||
|
if [[ "${INCLUDE_AUDIO}" == 0 ]]; then
|
||
|
AUDIO_OPTS="-an"
|
||
|
fi
|
||
|
|
||
|
ffmpeg -loglevel error -y -i "${FILE}" -i "${SRT}" ${AUDIO_OPTS} -c copy "${OUT}"
|
||
|
echo -e "Finished, result is in: '${OUT}'\n"
|
||
|
done
|