#!/bin/env python3 # Copyright (C) 2021 Tessa Nordgren # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, version 3. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . import argparse from datetime import timedelta from pathlib import Path import srt from yaml import load as yload try: from yaml import CLoader as Loader except ImportError: from yaml import Loader def init(): parser = argparse.ArgumentParser(description='converts `glcoud ml speech` yaml into srt subtitles') parser.add_argument('yml', nargs='+', help='yaml file(s) to convert') return parser.parse_args() args = init() for in_filename in args.yml: out_filename = Path(in_filename).with_suffix('.srt') input = open(in_filename) data = yload(input, Loader=Loader) subs = [] index = 1 for sentence in [x['alternatives'][0] for x in data['results']]: start = timedelta(seconds=float(sentence['words'][0]['startTime'].strip('s'))) end = timedelta(seconds=float(sentence['words'][-1]['endTime'].strip('s'))) sub = srt.Subtitle(index=index, start=start, end=end, content=sentence['transcript']) subs.append(sub) sub_data = srt.compose(subs) output = open(out_filename, 'w') output.write(sub_data) output.close()