[PATCH gst-omapfb 1/2] Set self->enabled flag to false in stop()

57 views
Skip to first unread message

Anatolij Gustschin

unread,
Feb 7, 2012, 5:09:34 AM2/7/12
to gst...@googlegroups.com
We always need to set the self->enabled flag to 'false' when stopping
since some apps (e.g. as seen with Qt for Embedded Linux and Phonon
using gstreamer plugin) might call start()/stop() sequence repeatedly
and this will cause plugin crash since buffer_alloc() won't alloc/mmap
buffers properly when self->enabled wasn't previously set to 'false'
in stop().

Signed-off-by: Anatolij Gustschin <ag...@denx.de>
---
omapfb.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/omapfb.c b/omapfb.c
index 5bc208f..6db3c50 100644
--- a/omapfb.c
+++ b/omapfb.c
@@ -371,6 +371,7 @@ stop(GstBaseSink *base)
return false;
}

+ self->enabled = false;
return true;
}

--
1.7.7.6

Anatolij Gustschin

unread,
Feb 7, 2012, 5:09:35 AM2/7/12
to gst...@googlegroups.com
With this patch we used reduced memory buffers for overlay planes
and achieved better performance when rendering video on AM3517
based board. To compile for AM3517 setup environment for cross
compiling and run 'CPU_AM3517=1 make'.

Signed-off-by: Anatolij Gustschin <ag...@denx.de>
---

Makefile | 14 +++++++-
csp_conv.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
omapfb.c | 68 ++++++++++++++++++++++++++++++++++++-
3 files changed, 189 insertions(+), 3 deletions(-)
create mode 100644 csp_conv.c

diff --git a/Makefile b/Makefile
index fa8ec7d..939e0d0 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,12 @@ CC := $(CROSS_COMPILE)gcc
CFLAGS := -O2 -ggdb -Wall -Wextra -Wno-unused-parameter -Wmissing-prototypes -ansi -std=c99
LDFLAGS := -Wl,--no-undefined -Wl,--as-needed

+ifdef CPU_AM3517
+CFLAGS := -DCPU_AM3517 -Wall -Wextra -Wno-unused-parameter -Wmissing-prototypes -ansi \
+ -std=c99 -O3 -march=armv7-a -mfpu=neon -mfloat-abi=softfp -ftree-vectorize \
+ -ffast-math -fsingle-precision-constant
+endif
+
override CFLAGS += -D_GNU_SOURCE -DGST_DISABLE_DEPRECATED

GST_CFLAGS := $(shell pkg-config --cflags gstreamer-0.10 gstreamer-base-0.10)
@@ -19,7 +25,13 @@ D = $(DESTDIR)

# plugin

-libgstomapfb.so: omapfb.o log.o
+OBJS := omapfb.o log.o
+
+ifdef CPU_AM3517
+OBJS += csp_conv.o
+endif
+
+libgstomapfb.so: ${OBJS}
libgstomapfb.so: override CFLAGS += $(GST_CFLAGS) -fPIC \
-D VERSION='"$(version)"' -I./include
libgstomapfb.so: override LIBS += $(GST_LIBS)
diff --git a/csp_conv.c b/csp_conv.c
new file mode 100644
index 0000000..a02b454
--- /dev/null
+++ b/csp_conv.c
@@ -0,0 +1,110 @@
+/*
+ * NEON implementation of YV12/I420 to UYVY conversion
+ * Copyright 2008 Ian Rickards, <ian.ri...@arm.com>
+ */
+typedef unsigned char uint8_t;
+
+void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch,
+ uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest);
+
+void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch,
+ uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest)
+{
+ int x, y;
+ uint8_t *dest_even = dest;
+ uint8_t *dest_odd = dest + w * 2;
+ uint8_t *y_p_even = y_p;
+ uint8_t *y_p_odd = y_p + y_pitch;
+
+ /*ErrorF("in uv12_to_uyvy, w: %d, pitch: %d\n", w, pitch);*/
+ if (w<16)
+ {
+ for (y=0; y<h; y+=2)
+ {
+ for (x=0; x<w; x+=2)
+ {
+ /* Output two 2x1 macroblocks to form a 2x2 block from input */
+ uint8_t u_val = *u_p++;
+ uint8_t v_val = *v_p++;
+
+ /* Even row, first pixel */
+ *dest_even++ = u_val;
+ *dest_even++ = *y_p_even++;
+
+ /* Even row, second pixel */
+ *dest_even++ = v_val;
+ *dest_even++ = *y_p_even++;
+
+ /* Odd row, first pixel */
+ *dest_odd++ = u_val;
+ *dest_odd++ = *y_p_odd++;
+
+ /* Odd row, second pixel */
+ *dest_odd++ = v_val;
+ *dest_odd++ = *y_p_odd++;
+ }
+
+ dest_even += w * 2;
+ dest_odd += w * 2;
+
+ u_p += ((uv_pitch << 1) - w) >> 1;
+ v_p += ((uv_pitch << 1) - w) >> 1;
+
+ y_p_even += (y_pitch - w) + y_pitch;
+ y_p_odd += (y_pitch - w) + y_pitch;
+ }
+ }
+ else
+ {
+ for (y=0; y<h; y+=2)
+ {
+ x=w;
+ do {
+ // avoid using d8-d15 (q4-q7) aapcs callee-save registers
+ __asm__ volatile (
+ "1:\n\t"
+ "vld1.u8 {d0}, [%[u_p]]!\n\t"
+ "sub %[x],%[x],#16\n\t"
+ "cmp %[x],#16\n\t"
+ "vld1.u8 {d1}, [%[v_p]]!\n\t"
+ "vld1.u8 {q1}, [%[y_p_even]]!\n\t"
+ "vzip.u8 d0, d1\n\t"
+ "vld1.u8 {q2}, [%[y_p_odd]]!\n\t"
+ // use 2-element struct stores to zip up y with y&v
+ "vst2.u8 {q0,q1}, [%[dest_even]]!\n\t"
+ "vmov.u8 q1, q2\n\t"
+ "vst2.u8 {q0,q1}, [%[dest_odd]]!\n\t"
+ "bhs 1b\n\t"
+ : [u_p] "+r" (u_p), [v_p] "+r" (v_p), [y_p_even] "+r" (y_p_even), [y_p_odd] "+r" (y_p_odd),
+ [dest_even] "+r" (dest_even), [dest_odd] "+r" (dest_odd),
+ [x] "+r" (x)
+ :
+ : "cc", "memory", "d0","d1","d2","d3","d4","d5"
+ );
+ if (x!=0)
+ {
+ // overlap final 16-pixel block to process requested width exactly
+ x = 16-x;
+ u_p -= x/2;
+ v_p -= x/2;
+ y_p_even -= x;
+ y_p_odd -= x;
+ dest_even -= x*2;
+ dest_odd -= x*2;
+ x = 16;
+ // do another 16-pixel block
+ }
+ }
+ while (x!=0);
+
+ dest_even += w * 2;
+ dest_odd += w * 2;
+
+ u_p += ((uv_pitch << 1) - w) >> 1;
+ v_p += ((uv_pitch << 1) - w) >> 1;
+
+ y_p_even += (y_pitch - w) + y_pitch;
+ y_p_odd += (y_pitch - w) + y_pitch;
+ }
+ }
+}
diff --git a/omapfb.c b/omapfb.c
index 6db3c50..32d19e0 100644
--- a/omapfb.c
+++ b/omapfb.c
@@ -32,6 +32,12 @@ static void *parent_class;
GstDebugCategory *omapfb_debug;
#endif

+#ifdef CPU_AM3517
+#define GST_OMAPFB_NR_PAGES 2
+#else
+#define GST_OMAPFB_NR_PAGES 4
+#endif
+
struct page {
unsigned yoffset;
void *buf;
@@ -50,6 +56,7 @@ struct gst_omapfb_sink {
unsigned char *framebuffer;
bool enabled;
bool manual_update;
+ int cur_idx;

struct page *pages;
int nr_pages;
@@ -94,7 +101,11 @@ generate_sink_template(void)
gst_value_set_fourcc(&val, GST_MAKE_FOURCC('U', 'Y', 'V', 'Y'));
gst_value_list_append_value(&list, &val);
#else
+#ifdef CPU_AM3517
+ gst_value_set_fourcc(&val, GST_MAKE_FOURCC ('Y', 'V', '1', '2'));
+#else
gst_value_set_fourcc(&val, GST_MAKE_FOURCC('U', 'Y', 'V', 'Y'));
+#endif
gst_value_list_append_value(&list, &val);
#endif

@@ -109,6 +120,7 @@ generate_sink_template(void)
return caps;
}

+#ifndef CPU_AM3517
static void
update(struct gst_omapfb_sink *self)
{
@@ -153,6 +165,7 @@ static struct page *get_page(struct gst_omapfb_sink *self)
page->used = true;
return page;
}
+#endif

static gboolean
setup(struct gst_omapfb_sink *self, GstCaps *caps)
@@ -193,6 +206,7 @@ setup(struct gst_omapfb_sink *self, GstCaps *caps)
pr_err(self, "memory map failed");
return false;
}
+ memset(self->framebuffer, 0, self->mem_info.size);

self->overlay_info.xres = width;
self->overlay_info.yres = height;
@@ -261,6 +275,7 @@ setup(struct gst_omapfb_sink *self, GstCaps *caps)
return true;
}

+#ifndef CPU_AM3517
static GstFlowReturn
buffer_alloc(GstBaseSink *base, guint64 offset, guint size, GstCaps *caps, GstBuffer **buf)
{
@@ -290,6 +305,18 @@ missing:
*buf = NULL;
return GST_FLOW_OK;
}
+#else
+/*
+ * we will work with pre-allocated gstreamer buffers and do optimized
+ * color space conversion in the plugin. So just set *buf to NULL here.
+ */
+static GstFlowReturn
+buffer_alloc_am3517(GstBaseSink *base, guint64 offset, guint size, GstCaps *caps, GstBuffer **buf)
+{
+ *buf = NULL;
+ return GST_FLOW_OK;
+}
+#endif

static gboolean
setcaps(GstBaseSink *base, GstCaps *caps)
@@ -306,8 +333,9 @@ start(GstBaseSink *base)
struct gst_omapfb_sink *self = (struct gst_omapfb_sink *)base;
int fd;

- self->nr_pages = 4;
+ self->nr_pages = GST_OMAPFB_NR_PAGES;
self->cur_page = self->old_page = NULL;
+ self->cur_idx = 0;

fd = open("/dev/fb0", O_RDWR);

@@ -375,6 +403,7 @@ stop(GstBaseSink *base)
return true;
}

+#ifndef CPU_AM3517
static GstFlowReturn
render(GstBaseSink *base, GstBuffer *buffer)
{
@@ -409,6 +438,35 @@ render(GstBaseSink *base, GstBuffer *buffer)

return GST_FLOW_OK;
}
+#else /* CPU_AM3517 */
+typedef unsigned char uint8_t;
+extern void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch,
+ uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest);
+
+static GstFlowReturn
+render_am3517(GstBaseSink *base, GstBuffer *buffer)
+{
+ struct gst_omapfb_sink *self = (struct gst_omapfb_sink *)base;
+ int w, h, y_pitch, uv_pitch;
+ uint8_t *yb, *ub, *vb;
+ uint8_t *db = self->pages[self->cur_idx].buf;
+
+ w = self->overlay_info.xres;
+ h = self->overlay_info.yres;
+ y_pitch = (w + 3) & ~3;
+ uv_pitch = (((y_pitch >> 1) + 3) & ~3);
+ yb = GST_BUFFER_DATA(buffer);
+ vb = yb + (y_pitch * h);
+ ub = vb + (uv_pitch * (h / 2));
+ uv12_to_uyvy_neon(w & ~15, h & ~15, y_pitch, uv_pitch,
+ yb, ub, vb, db);
+ self->overlay_info.yoffset = self->pages[self->cur_idx].yoffset;
+ ioctl(self->overlay_fd, FBIOPAN_DISPLAY, &self->overlay_info);
+ self->cur_idx = self->cur_idx ? 0 : 1;
+
+ return GST_FLOW_OK;
+}
+#endif /* CPU_AM3517 */

static void
class_init(void *g_class, void *class_data)
@@ -420,11 +478,17 @@ class_init(void *g_class, void *class_data)
parent_class = g_type_class_ref(GST_OMAPFB_SINK_TYPE);

base_sink_class->set_caps = setcaps;
- base_sink_class->buffer_alloc = buffer_alloc;
base_sink_class->start = start;
base_sink_class->stop = stop;
+#ifdef CPU_AM3517
+ base_sink_class->buffer_alloc = buffer_alloc_am3517;
+ base_sink_class->render = render_am3517;
+ base_sink_class->preroll = render_am3517;
+#else
+ base_sink_class->buffer_alloc = buffer_alloc;
base_sink_class->render = render;
base_sink_class->preroll = render;
+#endif
}

static void
--
1.7.7.6

Felipe Contreras

unread,
Feb 14, 2012, 3:00:37 PM2/14/12
to Anatolij Gustschin, gst...@googlegroups.com
On Tue, Feb 7, 2012 at 12:09 PM, Anatolij Gustschin <ag...@denx.de> wrote:
> We always need to set the self->enabled flag to 'false' when stopping
> since some apps (e.g. as seen with Qt for Embedded Linux and Phonon
> using gstreamer plugin) might call start()/stop() sequence repeatedly
> and this will cause plugin crash since buffer_alloc() won't alloc/mmap
> buffers properly when self->enabled wasn't previously set to 'false'
> in stop().

All right, looks good to me.

--
Felipe Contreras

Felipe Contreras

unread,
Feb 14, 2012, 3:21:32 PM2/14/12
to Anatolij Gustschin, gst...@googlegroups.com
On Tue, Feb 7, 2012 at 12:09 PM, Anatolij Gustschin <ag...@denx.de> wrote:
> With this patch we used reduced memory buffers for overlay planes
> and achieved better performance when rendering video on AM3517
> based board. To compile for AM3517 setup environment for cross
> compiling and run 'CPU_AM3517=1 make'.

We certainly need this, but I don't like to have board-specific
checks. Most of the changes you propose can be implemented
differently.

> Signed-off-by: Anatolij Gustschin <ag...@denx.de>
> ---
>  Makefile   |   14 +++++++-
>  csp_conv.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  omapfb.c   |   68 ++++++++++++++++++++++++++++++++++++-
>  3 files changed, 189 insertions(+), 3 deletions(-)
>  create mode 100644 csp_conv.c
>
> diff --git a/Makefile b/Makefile
> index fa8ec7d..939e0d0 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -4,6 +4,12 @@ CC := $(CROSS_COMPILE)gcc
>  CFLAGS := -O2 -ggdb -Wall -Wextra -Wno-unused-parameter -Wmissing-prototypes -ansi -std=c99
>  LDFLAGS := -Wl,--no-undefined -Wl,--as-needed
>
> +ifdef CPU_AM3517
> +CFLAGS := -DCPU_AM3517 -Wall -Wextra -Wno-unused-parameter -Wmissing-prototypes -ansi \
> +       -std=c99

Up to this point you are mostly repeating the previous flags; you can
do CFLAGS += instead to add the extra flags you want.

> -O3 -march=armv7-a -mfpu=neon -mfloat-abi=softfp -ftree-vectorize \
> +       -ffast-math -fsingle-precision-constant

These seem to be related to the NEON code below. I think this code
should be dependent on a NEON flag, rather than CPU_AM3517, as there's
many other boards that do have NEON.

> +endif
> +
>  override CFLAGS += -D_GNU_SOURCE -DGST_DISABLE_DEPRECATED
>
>  GST_CFLAGS := $(shell pkg-config --cflags gstreamer-0.10 gstreamer-base-0.10)
> @@ -19,7 +25,13 @@ D = $(DESTDIR)
>
>  # plugin
>
> -libgstomapfb.so: omapfb.o log.o
> +OBJS := omapfb.o log.o
> +
> +ifdef CPU_AM3517
> +OBJS += csp_conv.o
> +endif

Again, depending on NEON.

> +libgstomapfb.so: ${OBJS}
>  libgstomapfb.so: override CFLAGS += $(GST_CFLAGS) -fPIC \
>        -D VERSION='"$(version)"' -I./include
>  libgstomapfb.so: override LIBS += $(GST_LIBS)
> diff --git a/csp_conv.c b/csp_conv.c
> new file mode 100644
> index 0000000..a02b454
> --- /dev/null
> +++ b/csp_conv.c
> @@ -0,0 +1,110 @@
> +/*
> + * NEON implementation of YV12/I420 to UYVY conversion
> + * Copyright 2008 Ian Rickards, <ian.ri...@arm.com>
> + */

What license is this?

I would like this code to follow the rest of the code-style (see
checkpatch), but I can do that myself. Also, I would like to take a
look to the code in omapfbplay since Mans Rullgard really know these
NEON optimization bits.

> diff --git a/omapfb.c b/omapfb.c
> index 6db3c50..32d19e0 100644
> --- a/omapfb.c
> +++ b/omapfb.c
> @@ -32,6 +32,12 @@ static void *parent_class;
>  GstDebugCategory *omapfb_debug;
>  #endif
>
> +#ifdef CPU_AM3517
> +#define GST_OMAPFB_NR_PAGES    2
> +#else
> +#define GST_OMAPFB_NR_PAGES    4
> +#endif

Again, I don't think this is specific to CPU_AM3517. If this is really
needed, I think there should be a configuration in the Makefile for
this, so other people can set the pages they want.

But why is this needed? I didn't see any explanation. Is it some
memory limitation? If so, maybe we can find that out on-the-fly.

>  struct page {
>        unsigned yoffset;
>        void *buf;
> @@ -50,6 +56,7 @@ struct gst_omapfb_sink {
>        unsigned char *framebuffer;
>        bool enabled;
>        bool manual_update;
> +       int cur_idx;

Unrelated change?

>
>        struct page *pages;
>        int nr_pages;
> @@ -94,7 +101,11 @@ generate_sink_template(void)
>                gst_value_set_fourcc(&val, GST_MAKE_FOURCC('U', 'Y', 'V', 'Y'));
>                gst_value_list_append_value(&list, &val);
>  #else
> +#ifdef CPU_AM3517
> +               gst_value_set_fourcc(&val, GST_MAKE_FOURCC ('Y', 'V', '1', '2'));
> +#else

This should depend on NEON.

>                gst_value_set_fourcc(&val, GST_MAKE_FOURCC('U', 'Y', 'V', 'Y'));

This one should be enabled regardless. CPU_AM3517 can render UYVY,
it's just that you are adding an extra option to convert from YV12 to
UYVY, which all OMAP boards can benefit from. Even better would be to
convert I420 as well.

> +#endif
>                gst_value_list_append_value(&list, &val);
>  #endif
>
> @@ -109,6 +120,7 @@ generate_sink_template(void)
>        return caps;
>  }
>
> +#ifndef CPU_AM3517
>  static void
>  update(struct gst_omapfb_sink *self)
>  {
> @@ -153,6 +165,7 @@ static struct page *get_page(struct gst_omapfb_sink *self)
>                page->used = true;
>        return page;
>  }
> +#endif

Why are you disabling this? If the panel doesn't support
OMAPFB_MANUAL_UPDATE, maybe OMAPFB_SET_UPDATE_MODE should not succeed.
I thought that was the purpose of it.

>  static gboolean
>  setup(struct gst_omapfb_sink *self, GstCaps *caps)
> @@ -193,6 +206,7 @@ setup(struct gst_omapfb_sink *self, GstCaps *caps)
>                pr_err(self, "memory map failed");
>                return false;
>        }
> +       memset(self->framebuffer, 0, self->mem_info.size);

This seems to be an unrelated change, and probably depends on the
color-format; would trigger a green frame on UYVY.

>
>        self->overlay_info.xres = width;
>        self->overlay_info.yres = height;
> @@ -261,6 +275,7 @@ setup(struct gst_omapfb_sink *self, GstCaps *caps)
>        return true;
>  }
>
> +#ifndef CPU_AM3517
>  static GstFlowReturn
>  buffer_alloc(GstBaseSink *base, guint64 offset, guint size, GstCaps *caps, GstBuffer **buf)
>  {
> @@ -290,6 +305,18 @@ missing:
>        *buf = NULL;
>        return GST_FLOW_OK;
>  }
> +#else
> +/*
> + * we will work with pre-allocated gstreamer buffers and do optimized
> + * color space conversion in the plugin. So just set *buf to NULL here.
> + */
> +static GstFlowReturn
> +buffer_alloc_am3517(GstBaseSink *base, guint64 offset, guint size, GstCaps *caps, GstBuffer **buf)
> +{
> +       *buf = NULL;
> +       return GST_FLOW_OK;
> +}
> +#endif

Instead of having a specific function for am3517, the original one
should be checking the color-format, and return *buf = NULL if it's
YV12/I420.

>  static gboolean
>  setcaps(GstBaseSink *base, GstCaps *caps)
> @@ -306,8 +333,9 @@ start(GstBaseSink *base)
>        struct gst_omapfb_sink *self = (struct gst_omapfb_sink *)base;
>        int fd;
>
> -       self->nr_pages = 4;
> +       self->nr_pages = GST_OMAPFB_NR_PAGES;
>        self->cur_page = self->old_page = NULL;
> +       self->cur_idx = 0;

Unrelated change?

Again, this should depend on the color-format, not board.

Thanks for contributing these patches, I was planning to do a similar
conversion myself (as GStreamer's ffmpegcolorspace element is not
ideal), but I think the changes have to be done in such a way that
everybody can benefit from them.

Cheers.

--
Felipe Contreras

Anatolij Gustschin

unread,
Jun 5, 2013, 2:11:58 AM6/5/13
to jorge.mo...@gmail.com, gst...@googlegroups.com
Hi,

On Tue, 4 Jun 2013 22:54:47 -0700 (PDT)
jorge.mo...@gmail.com wrote:

> I've just download the gst-omapfb project from git but I haven't found
> anything related
> to this. Is in the repository? Was the patch discarted?

this patch is not in the repository. It has some issues as explained
by Felipe, and unfortunately I didn't have time to address comments
from Felipe.

Thanks,

Anatolij

Reply all
Reply to author
Forward
0 new messages