voice2subs/ml2srt.py

#!/bin/env python3

# Copyright (C) 2021 Tessa Nordgren
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

import argparse
from datetime import timedelta
from pathlib import Path
import srt
from yaml import load as yload
try:
    from yaml import CLoader as Loader
except ImportError:
    from yaml import Loader


def init():
    parser = argparse.ArgumentParser(description='converts `glcoud ml speech` yaml into srt subtitles')
    parser.add_argument('yml', nargs='+', help='yaml file(s) to convert')
    return parser.parse_args()


args = init()
for in_filename in args.yml:
    out_filename = Path(in_filename).with_suffix('.srt')
    input = open(in_filename)
    data = yload(input, Loader=Loader)
    subs = []
    index = 1
    for sentence in [x['alternatives'][0] for x in data['results']]:
        start = timedelta(seconds=float(sentence['words'][0]['startTime'].strip('s')))
        end = timedelta(seconds=float(sentence['words'][-1]['endTime'].strip('s')))
        sub = srt.Subtitle(index=index, start=start, end=end, content=sentence['transcript'])
        subs.append(sub)
    sub_data = srt.compose(subs)
    output = open(out_filename, 'w')
    output.write(sub_data)
    output.close()