#!/usr/bin/env bash set -e # Copyright (C) 2021 Tessa Nordgren # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, version 3. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . RESAMPLING_OPTS="-af aresample=resampler=soxr:dither_method=shibata:precision=28:out_channel_layout=mono -ar 16000" ML_SPEECH_OPTS="--language-code=en --include-word-time-offsets --encoding=ogg-opus --sample-rate=16000" function check_deps() { for TOOL in gcloud ffmpeg python3; do if [[ ! -f "$(which ${TOOL})" ]]; then echo "missing ${TOOL}, please install!" return 1 fi done return 0 } function usage() { echo "Usage: $(basename $0) [-h] video [video ...]" echo -e "\tvideo\tvideo file(s) to convert audio to subtitles." echo -e "\t-h\tshows this usage message." echo -e "\t-n\tdon't include audio in output." } INCLUDE_AUDIO=1 function argparse() { if [[ ${#} -eq 0 ]]; then echo -e "$0: must supply one or more video files to process\n" >&2 usage return 1 fi optstring=":hn" while getopts "${optstring}" arg; do case "${arg}" in h) usage exit 0 ;; n) export INCLUDE_AUDIO=0 ;; :) echo -e "$0: must supply argument to -$OPTARG.\n" >&2 exit 1 ;; ?) echo -e "$0: invalid option: -${OPTARG}\n" >&2 usage exit 2 ;; esac done shift $(($OPTIND - 1)) export ARGS=( "$@" ) } function title() { echo "Processing '$1'..." COUNT="$(seq 0 ${#1})" UNDERLINE="$(printf '=%.0s' ${COUNT})" echo "------------${UNDERLINE}----" } check_deps argparse "$@" mkdir -p tmp for FILE in "${ARGS[@]}"; do title "${FILE}" BASENAME="$(basename "${FILE%.*}")" AUDIO="tmp/${BASENAME}.opus" YML="tmp/${BASENAME}.yml" SRT="tmp/${BASENAME}.srt" OUT="${BASENAME}_with_subs.mkv" if [[ ! -f "${AUDIO}" ]]; then echo "extracting audio..." ffmpeg -loglevel error -y -i "${FILE}" ${RESAMPLING_OPTS} "${AUDIO}" fi if [[ ! -f "${YML}" ]]; then echo "converting audio to text..." gcloud -q --format yaml ml speech recognize-long-running "${AUDIO}" ${ML_SPEECH_OPTS} > "${YML}" fi if [[ ! -f ${SRT} ]]; then echo "converting google yaml data to subtitle data..." ./ml2srt.py "${YML}" fi if [[ "${INCLUDE_AUDIO}" == 0 ]]; then AUDIO_OPTS="-an" fi ffmpeg -loglevel error -y -i "${FILE}" -i "${SRT}" ${AUDIO_OPTS} -c copy "${OUT}" echo -e "Finished, result is in: '${OUT}'\n" done