voice2subs/ml2srt.py

#!/bin/env python3

# Copyright (C) 2021 Tessa Nordgren
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

import argparse
from datetime import timedelta
from pathlib import Path
import srt
from yaml import load as yload
try:
    from yaml import CLoader as Loader
except ImportError:
    from yaml import Loader


def init():
    parser = argparse.ArgumentParser(description='converts `glcoud ml speech` yaml into srt subtitles')
    parser.add_argument('yml', nargs='+', help='yaml file(s) to convert')
    return parser.parse_args()


args = init()
for in_filename in args.yml:
    out_filename = Path(in_filename).with_suffix('.srt')
    input = open(in_filename)
    data = yload(input, Loader=Loader)
    subs = []
    index = 1
    for sentence in [x['alternatives'][0] for x in data['results']]:
        start = timedelta(seconds=float(sentence['words'][0]['startTime'].strip('s')))
        end = timedelta(seconds=float(sentence['words'][-1]['endTime'].strip('s')))
        sub = srt.Subtitle(index=index, start=start, end=end, content=sentence['transcript'])
        subs.append(sub)
    sub_data = srt.compose(subs)
    output = open(out_filename, 'w')
    output.write(sub_data)
    output.close()
first pass, seems to work 2021-11-05 06:03:00 +00:00			`#!/bin/env python3`

			`# Copyright (C) 2021 Tessa Nordgren`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU Affero General Public License as`
			`# published by the Free Software Foundation, version 3.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU Affero General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU Affero General Public License`
			`# along with this program. If not, see <https://www.gnu.org/licenses/>.`

			`import argparse`
			`from datetime import timedelta`
			`from pathlib import Path`
			`import srt`
			`from yaml import load as yload`
			`try:`
			`from yaml import CLoader as Loader`
			`except ImportError:`
			`from yaml import Loader`


			`def init():`
			parser = argparse.ArgumentParser(description='converts `glcoud ml speech` yaml into srt subtitles')
			`parser.add_argument('yml', nargs='+', help='yaml file(s) to convert')`
			`return parser.parse_args()`


			`args = init()`
			`for in_filename in args.yml:`
			`out_filename = Path(in_filename).with_suffix('.srt')`
			`input = open(in_filename)`
			`data = yload(input, Loader=Loader)`
			`subs = []`
			`index = 1`
			`for sentence in [x['alternatives'][0] for x in data['results']]:`
			`start = timedelta(seconds=float(sentence['words'][0]['startTime'].strip('s')))`
			`end = timedelta(seconds=float(sentence['words'][-1]['endTime'].strip('s')))`
			`sub = srt.Subtitle(index=index, start=start, end=end, content=sentence['transcript'])`
			`subs.append(sub)`
			`sub_data = srt.compose(subs)`
			`output = open(out_filename, 'w')`
			`output.write(sub_data)`
			`output.close()`