104 lines
3.2 KiB
Java
104 lines
3.2 KiB
Java
package org.schabi.newpipe.streams;
|
|
|
|
import org.jsoup.Jsoup;
|
|
import org.jsoup.nodes.Document;
|
|
import org.jsoup.nodes.Element;
|
|
import org.jsoup.nodes.Node;
|
|
import org.jsoup.nodes.TextNode;
|
|
import org.jsoup.parser.Parser;
|
|
import org.jsoup.select.Elements;
|
|
import org.schabi.newpipe.streams.io.SharpStream;
|
|
|
|
import java.io.ByteArrayInputStream;
|
|
import java.io.IOException;
|
|
import java.nio.charset.Charset;
|
|
import java.nio.charset.StandardCharsets;
|
|
|
|
/**
|
|
* @author kapodamy
|
|
*/
|
|
public class SrtFromTtmlWriter {
|
|
private static final String NEW_LINE = "\r\n";
|
|
|
|
private final SharpStream out;
|
|
private final boolean ignoreEmptyFrames;
|
|
private final Charset charset = StandardCharsets.UTF_8;
|
|
|
|
private int frameIndex = 0;
|
|
|
|
public SrtFromTtmlWriter(final SharpStream out, final boolean ignoreEmptyFrames) {
|
|
this.out = out;
|
|
this.ignoreEmptyFrames = ignoreEmptyFrames;
|
|
}
|
|
|
|
private static String getTimestamp(final Element frame, final String attr) {
|
|
return frame
|
|
.attr(attr)
|
|
.replace('.', ','); // SRT subtitles uses comma as decimal separator
|
|
}
|
|
|
|
private void writeFrame(final String begin, final String end, final StringBuilder text)
|
|
throws IOException {
|
|
writeString(String.valueOf(frameIndex++));
|
|
writeString(NEW_LINE);
|
|
writeString(begin);
|
|
writeString(" --> ");
|
|
writeString(end);
|
|
writeString(NEW_LINE);
|
|
writeString(text.toString());
|
|
writeString(NEW_LINE);
|
|
writeString(NEW_LINE);
|
|
}
|
|
|
|
private void writeString(final String text) throws IOException {
|
|
out.write(text.getBytes(charset));
|
|
}
|
|
|
|
public void build(final SharpStream ttml) throws IOException {
|
|
/*
|
|
* TTML parser with BASIC support
|
|
* multiple CUE is not supported
|
|
* styling is not supported
|
|
* tag timestamps (in auto-generated subtitles) are not supported, maybe in the future
|
|
* also TimestampTagOption enum is not applicable
|
|
* Language parsing is not supported
|
|
*/
|
|
|
|
// parse XML
|
|
final byte[] buffer = new byte[(int) ttml.available()];
|
|
ttml.read(buffer);
|
|
final Document doc = Jsoup.parse(new ByteArrayInputStream(buffer), "UTF-8", "",
|
|
Parser.xmlParser());
|
|
|
|
final StringBuilder text = new StringBuilder(128);
|
|
final Elements paragraphList = doc.select("body > div > p");
|
|
|
|
// check if has frames
|
|
if (paragraphList.size() < 1) {
|
|
return;
|
|
}
|
|
|
|
for (final Element paragraph : paragraphList) {
|
|
text.setLength(0);
|
|
|
|
for (final Node children : paragraph.childNodes()) {
|
|
if (children instanceof TextNode) {
|
|
text.append(((TextNode) children).text());
|
|
} else if (children instanceof Element
|
|
&& ((Element) children).tagName().equalsIgnoreCase("br")) {
|
|
text.append(NEW_LINE);
|
|
}
|
|
}
|
|
|
|
if (ignoreEmptyFrames && text.length() < 1) {
|
|
continue;
|
|
}
|
|
|
|
final String begin = getTimestamp(paragraph, "begin");
|
|
final String end = getTimestamp(paragraph, "end");
|
|
|
|
writeFrame(begin, end, text);
|
|
}
|
|
}
|
|
}
|