voice2subs/voice2subs.sh

108 lines
2.9 KiB
Bash
Executable File

#!/usr/bin/env bash
set -e
# Copyright (C) 2021 Tessa Nordgren
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
RESAMPLING_OPTS="-af aresample=resampler=soxr:dither_method=shibata:precision=28:out_channel_layout=mono -ar 16000"
ML_SPEECH_OPTS="--language-code=en --include-word-time-offsets --encoding=ogg-opus --sample-rate=16000"
function check_deps() {
for TOOL in gcloud ffmpeg python3; do
if [[ ! -f "$(which ${TOOL})" ]]; then
echo "missing ${TOOL}, please install!"
return 1
fi
done
return 0
}
function usage() {
echo "Usage: $(basename $0) [-h] video [video ...]"
echo -e "\tvideo\tvideo file(s) to convert audio to subtitles."
echo -e "\t-h\tshows this usage message."
echo -e "\t-n\tdon't include audio in output."
}
INCLUDE_AUDIO=1
function argparse() {
if [[ ${#} -eq 0 ]]; then
echo -e "$0: must supply one or more video files to process\n" >&2
usage
return 1
fi
optstring=":hn"
while getopts "${optstring}" arg; do
case "${arg}" in
h)
usage
exit 0
;;
n)
export INCLUDE_AUDIO=0
;;
:)
echo -e "$0: must supply argument to -$OPTARG.\n" >&2
exit 1
;;
?)
echo -e "$0: invalid option: -${OPTARG}\n" >&2
usage
exit 2
;;
esac
done
shift $(($OPTIND - 1))
export ARGS=( "$@" )
}
function title() {
echo "Processing '$1'..."
COUNT="$(seq 0 ${#1})"
UNDERLINE="$(printf '=%.0s' ${COUNT})"
echo "------------${UNDERLINE}----"
}
check_deps
argparse "$@"
mkdir -p tmp
for FILE in "${ARGS[@]}"; do
title "${FILE}"
BASENAME="$(basename "${FILE%.*}")"
AUDIO="tmp/${BASENAME}.opus"
YML="tmp/${BASENAME}.yml"
SRT="tmp/${BASENAME}.srt"
OUT="${FILE%.*}_with_subs.mkv"
if [[ ! -f "${AUDIO}" ]]; then
echo "extracting audio..."
ffmpeg -loglevel error -y -i "${FILE}" ${RESAMPLING_OPTS} "${AUDIO}"
fi
if [[ ! -f "${YML}" ]]; then
echo "converting audio to text..."
gcloud -q --format yaml ml speech recognize-long-running "${AUDIO}" ${ML_SPEECH_OPTS} > "${YML}"
fi
if [[ ! -f ${SRT} ]]; then
echo "converting google yaml data to subtitle data..."
./ml2srt.py "${YML}"
fi
if [[ "${INCLUDE_AUDIO}" == 0 ]]; then
AUDIO_OPTS="-an"
fi
ffmpeg -loglevel error -y -i "${FILE}" -i "${SRT}" ${AUDIO_OPTS} -c copy "${OUT}"
echo -e "Finished, result is in: '${OUT}'\n"
done