1From f587861bbe12ad0b10370f835592746aafedbf56 Mon Sep 17 00:00:00 2001
2From: Mingke Wang <mingke.wang@freescale.com>
3Date: Thu, 19 Mar 2015 14:17:10 +0800
4Subject: [PATCH 2/4] ssaparse: enhance SSA text lines parsing.
5
6some parser will pass in the original ssa text line which starts with "Dialog:"
7and there's are maybe multiple Dialog lines in one input buffer.
8
9Upstream-Status: Submitted [https://bugzilla.gnome.org/show_bug.cgi?id=747496]
10
11Signed-off-by: Mingke Wang <mingke.wang@freescale.com>
12---
13 gst/subparse/gstssaparse.c | 150 +++++++++++++++++++++++++++++++++----
14 1 file changed, 134 insertions(+), 16 deletions(-)
15 mode change 100644 => 100755 gst/subparse/gstssaparse.c
16
17diff --git a/gst/subparse/gstssaparse.c b/gst/subparse/gstssaparse.c
18old mode 100644
19new mode 100755
20index c849c08..4b9636c
21--- a/gst/subparse/gstssaparse.c
22+++ b/gst/subparse/gstssaparse.c
23@@ -262,6 +262,7 @@ gst_ssa_parse_remove_override_codes (GstSsaParse * parse, gchar * txt)
24  * gst_ssa_parse_push_line:
25  * @parse: caller element
26  * @txt: text to push
27+ * @size: text size need to be parse
28  * @start: timestamp for the buffer
29  * @duration: duration for the buffer
30  *
31@@ -271,27 +272,133 @@ gst_ssa_parse_remove_override_codes (GstSsaParse * parse, gchar * txt)
32  * Returns: result of the push of the created buffer
33  */
34 static GstFlowReturn
35-gst_ssa_parse_push_line (GstSsaParse * parse, gchar * txt,
36+gst_ssa_parse_push_line (GstSsaParse * parse, gchar * txt, gint size,
37     GstClockTime start, GstClockTime duration)
38 {
39   GstFlowReturn ret;
40   GstBuffer *buf;
41-  gchar *t, *escaped;
42+  gchar *t, *text, *p, *escaped, *p_start, *p_end;
43   gint num, i, len;
44+  GstClockTime start_time = G_MAXUINT64, end_time = 0;
45
46-  num = atoi (txt);
47-  GST_LOG_OBJECT (parse, "Parsing line #%d at %" GST_TIME_FORMAT,
48-      num, GST_TIME_ARGS (start));
49-
50-  /* skip all non-text fields before the actual text */
51+  p = text = g_malloc(size + 1);
52+  *p = '\0';
53   t = txt;
54-  for (i = 0; i < 8; ++i) {
55-    t = strchr (t, ',');
56+
57+  /* there are may have multiple dialogue lines at a time */
58+  while (*t) {
59+    /* ignore leading white space characters */
60+    while (isspace(*t))
61+      t++;
62+
63+    /* ignore Format: and Style: lines */
64+    if (strncmp(t, "Format:", 7) == 0 || strncmp(t, "Style:", 6) == 0) {
65+      while (*t != '\0' && *t != '\n') {
66+        t++;
67+      }
68+    }
69+
70+    if (*t == '\0')
71+      break;
72+
73+    /* continue with next line */
74+    if (*t == '\n') {
75+      t++;
76+      continue;
77+    }
78+
79+    if(strncmp(t, "Dialogue:", 9) != 0) {
80+      /* not started with "Dialogue:", it must be a line trimmed by demuxer */
81+      num = atoi (t);
82+      GST_LOG_OBJECT (parse, "Parsing line #%d at %" GST_TIME_FORMAT,
83+          num, GST_TIME_ARGS (start));
84+
85+      /* skip all non-text fields before the actual text */
86+      for (i = 0; i < 8; ++i) {
87+        t = strchr (t, ',');
88+        if (t == NULL)
89+          break;
90+        ++t;
91+      }
92+    } else {
93+      /* started with "Dialogue:", update timestamp and duration */
94+      /* time format are like Dialog:Mark,0:00:01.02,0:00:03.04,xx,xxx,... */
95+      guint hour, min, sec, msec, len;
96+      GstClockTime tmp;
97+      gchar t_str[12] = {0};
98+
99+      /* find the first ',' */
100+      p_start = strchr (t, ',');
101+      if (p_start)
102+        p_end = strchr (++p_start, ',');
103+
104+      if (p_start && p_end) {
105+        /* copy text between first ',' and second ',' */
106+        strncpy(t_str, p_start, p_end - p_start);
107+        if (sscanf (t_str, "%u:%u:%u.%u", &hour, &min, &sec, &msec) == 4) {
108+          tmp = ((hour*3600) + (min*60) + sec) * GST_SECOND + msec*GST_MSECOND;
109+          GST_DEBUG_OBJECT (parse, "Get start time:%02d:%02d:%02d:%03d\n",
110+              hour, min, sec, msec);
111+          if (start_time > tmp)
112+            start_time = tmp;
113+        } else {
114+          GST_WARNING_OBJECT (parse,
115+              "failed to parse ssa start timestamp string :%s", t_str);
116+        }
117+
118+        p_start = p_end;
119+        p_end = strchr (++p_start, ',');
120+        if (p_end) {
121+          /* copy text between second ',' and third ',' */
122+          strncpy(t_str, p_start, p_end - p_start);
123+          if (sscanf (t_str, "%u:%u:%u.%u", &hour, &min, &sec, &msec) == 4) {
124+            tmp = ((hour*3600) + (min*60) + sec)*GST_SECOND + msec*GST_MSECOND;
125+            GST_DEBUG_OBJECT(parse, "Get end time:%02d:%02d:%02d:%03d\n",
126+                hour, min, sec, msec);
127+            if (end_time < tmp)
128+              end_time = tmp;
129+          } else {
130+            GST_WARNING_OBJECT (parse,
131+                "failed to parse ssa end timestamp string :%s", t_str);
132+          }
133+        }
134+      }
135+
136+      /* now skip all non-text fields before the actual text */
137+      for (i = 0; i <= 8; ++i) {
138+        t = strchr (t, ',');
139+        if (t == NULL)
140+          break;
141+        ++t;
142+      }
143+    }
144+
145+    /* line end before expected number of ',', not a Dialogue line */
146     if (t == NULL)
147-      return GST_FLOW_ERROR;
148-    ++t;
149+      break;
150+
151+    /* if not the first line, and the last character of previous line is '\0',
152+     * then replace it with '\N' */
153+    if (p != text && *p == '\0') {
154+      *p++ = '\\';
155+      *p++ = 'N';
156+    }
157+
158+    /* copy all actual text of this line */
159+    while ((*t != '\0') && (*t != '\n'))
160+      *p++ = *t++;
161+
162+    /* add a terminator at the end */
163+    *p = '\0';
164+  }
165+
166+  /* not valid text found in this buffer return OK to let caller unref buffer */
167+  if (strlen(text) <= 0) {
168+    GST_WARNING_OBJECT (parse, "Not valid text found in this buffer\n");
169+    return GST_FLOW_ERROR;
170   }
171
172+  t = text;
173   GST_LOG_OBJECT (parse, "Text : %s", t);
174
175   if (gst_ssa_parse_remove_override_codes (parse, t)) {
176@@ -309,13 +416,22 @@ gst_ssa_parse_push_line (GstSsaParse * parse, gchar * txt,
177   gst_buffer_fill (buf, 0, escaped, len + 1);
178   gst_buffer_set_size (buf, len);
179   g_free (escaped);
180+  g_free(t);
181+
182+  if (start_time != G_MAXUINT64)
183+    GST_BUFFER_TIMESTAMP (buf) = start_time;
184+  else
185+    GST_BUFFER_TIMESTAMP (buf) = start;
186
187-  GST_BUFFER_TIMESTAMP (buf) = start;
188-  GST_BUFFER_DURATION (buf) = duration;
189+  if (end_time > start_time)
190+    GST_BUFFER_DURATION (buf) = end_time - start_time;
191+  else
192+    GST_BUFFER_DURATION (buf) = duration;
193
194   GST_LOG_OBJECT (parse, "Pushing buffer with timestamp %" GST_TIME_FORMAT
195-      " and duration %" GST_TIME_FORMAT, GST_TIME_ARGS (start),
196-      GST_TIME_ARGS (duration));
197+      " and duration %" GST_TIME_FORMAT,
198+      GST_TIME_ARGS (GST_BUFFER_TIMESTAMP (buf)),
199+      GST_TIME_ARGS (GST_BUFFER_DURATION (buf)));
200
201   ret = gst_pad_push (parse->srcpad, buf);
202
203@@ -335,6 +451,7 @@ gst_ssa_parse_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * buf)
204   GstClockTime ts;
205   gchar *txt;
206   GstMapInfo map;
207+  gint size;
208
209   if (G_UNLIKELY (!parse->framed))
210     goto not_framed;
211@@ -352,13 +469,14 @@ gst_ssa_parse_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * buf)
212   /* make double-sure it's 0-terminated and all */
213   gst_buffer_map (buf, &map, GST_MAP_READ);
214   txt = g_strndup ((gchar *) map.data, map.size);
215+  size = map.size;
216   gst_buffer_unmap (buf, &map);
217
218   if (txt == NULL)
219     goto empty_text;
220
221   ts = GST_BUFFER_TIMESTAMP (buf);
222-  ret = gst_ssa_parse_push_line (parse, txt, ts, GST_BUFFER_DURATION (buf));
223+  ret = gst_ssa_parse_push_line (parse, txt, size, ts, GST_BUFFER_DURATION (buf));
224
225   if (ret != GST_FLOW_OK && GST_CLOCK_TIME_IS_VALID (ts)) {
226     GstSegment segment;
227--
2282.28.0
229
230