[ANNOUNCE] UTF-8 patch 0.0.3 with Freetype2, enAIO for vdr-1.3.24

Message ID 20050515170504.GA4089@defiant.wachendorf.lan
State New
Headers

Commit Message

Ludwig Nussel May 15, 2005, 5:05 p.m. UTC
  Ludwig Nussel wrote:
> Lars Altenhain wrote:
> > thanks for this patch. Finally the mp3 plugin can display the correct names of 
> > the mp3s.
> > 
> > My vdr runs since 2 days with this patch and I have found only a small 
> > error :-) If vdr is allows to update the channel names, the names revert back 
> > to the ISO8859 encoded version. The transmitted channel names are not 
> > converted to utf8 before they are stored in the config data. The attached 
> > patch fixes this. 
> 
> It cannot work this way, you blindly recode iso-8859-1 to utf8.
> Judging from how xawtv does it each dvb string tells it's encoding.
> Therefore the central place to recode all strings must be
> decodeText() in libsi/si.c. That also means that the encoding does
> not need to be configured via channels.conf.

I've ripped the code from xawtv and put it into libsi. Seems to work
fine but someone with non-german stations should probably try it to
actually prove that.


cu
Ludwig
  

Patch

Index: vdr-1.3.24/libsi/Makefile
===================================================================
--- vdr-1.3.24.orig/libsi/Makefile
+++ vdr-1.3.24/libsi/Makefile
@@ -23,10 +23,13 @@  LIBS +=
 
 ### The object files (add further files here):
 
-OBJS = util.o si.o section.o descriptor.o
+OBJS = util.o si.o section.o descriptor.o charconv.o
 
 ### Implicit rules:
 
+charconv.o: charconv.c
+	$(CC) $(CFLAGS) -c $(DEFINES) $(INCLUDES) $<
+
 %.o: %.c
 	$(CXX) $(CXXFLAGS) -c $(DEFINES) $(INCLUDES) $<
 
Index: vdr-1.3.24/libsi/charconv.h
===================================================================
--- /dev/null
+++ vdr-1.3.24/libsi/charconv.h
@@ -0,0 +1,7 @@ 
+#ifdef __cplusplus
+extern "C" {
+#endif
+void mpeg_parse_psi_string(const unsigned char *src, int slen, unsigned char *dest, int dlen);
+#ifdef __cplusplus
+}
+#endif
Index: vdr-1.3.24/libsi/si.c
===================================================================
--- vdr-1.3.24.orig/libsi/si.c
+++ vdr-1.3.24/libsi/si.c
@@ -11,8 +11,10 @@ 
  ***************************************************************************/
 
 #include <string.h>
+#include <stdio.h>
 #include "si.h"
 #include "descriptor.h"
+#include "charconv.h"
 
 namespace SI {
 
@@ -240,8 +242,13 @@  char *String::getText(char *buffer, char
    return buffer;
 }
 
-//taken from libdtv, Copyright Rolf Hakenes <hakenes@hippomi.de>
 void String::decodeText(char *buffer, int size) {
+   const unsigned char *src=reinterpret_cast<const unsigned char*>(data.getData(0));
+   unsigned slen = getLength();
+   mpeg_parse_psi_string(src, slen, reinterpret_cast<unsigned char*>(buffer), size);
+}
+
+#if 0
    const unsigned char *from=data.getData(0);
    char *to=buffer;
 
@@ -271,8 +278,19 @@  void String::decodeText(char *buffer, in
    }
    *to = '\0';
 }
+#endif
+
 
 void String::decodeText(char *buffer, char *shortVersion, int sizeBuffer, int sizeShortVersion) {
+   const unsigned char *src=reinterpret_cast<const unsigned char*>(data.getData(0));
+   unsigned slen = getLength();
+   mpeg_parse_psi_string(src, slen, reinterpret_cast<unsigned char*>(buffer), sizeBuffer);
+//   mpeg_parse_psi_string(src, slen, reinterpret_cast<unsigned char*>(shortVersion), sizeShortVersion);
+   // FIXME: need to handle special chars
+   strncpy(shortVersion, buffer, sizeShortVersion);
+   shortVersion[sizeShortVersion-1] = '\0';
+}
+#if 0
    const unsigned char *from=data.getData(0);
    char *to=buffer;
    char *toShort=shortVersion;
@@ -306,6 +324,7 @@  void String::decodeText(char *buffer, ch
    *to = '\0';
    *toShort = '\0';
 }
+#endif
 
 Descriptor *Descriptor::getDescriptor(CharArray da, DescriptorTagDomain domain, bool returnUnimplemetedDescriptor) {
    Descriptor *d=0;
Index: vdr-1.3.24/libsi/charconv.c
===================================================================
--- /dev/null
+++ vdr-1.3.24/libsi/charconv.c
@@ -0,0 +1,124 @@ 
+#include <stdio.h>
+#include <string.h>
+#include <iconv.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#include "charconv.h"
+
+/* from xawtv4, copyright Gerd Knorr <kraxel@bytesex.org>, GPL */
+
+static const char *psi_charset[0x20] = {
+    [ 0x00 ... 0x1f ] = "reserved",
+    [ 0x00 ] = "ISO-8859-1",
+    [ 0x01 ] = "ISO-8859-5",
+    [ 0x02 ] = "ISO-8859-6",
+    [ 0x03 ] = "ISO-8859-7",
+    [ 0x04 ] = "ISO-8859-8",
+    [ 0x05 ] = "ISO-8859-9",
+    [ 0x06 ] = "ISO-8859-10",
+    [ 0x07 ] = "ISO-8859-11",
+    [ 0x08 ] = "ISO-8859-12",
+    [ 0x09 ] = "ISO-8859-13",
+    [ 0x0a ] = "ISO-8859-14",
+    [ 0x0b ] = "ISO-8859-15",
+    [ 0x10 ] = "fixme",
+    [ 0x11 ] = "UCS-2BE",        // correct?
+    [ 0x12 ] = "EUC-KR",
+    [ 0x13 ] = "GB2312",
+    [ 0x14 ] = "BIG5"
+};
+
+static iconv_t iconv_charset[0x20] = {0};
+
+static int iconv_string(unsigned char ch, char *to,
+			const char *src, size_t len,
+			char *dst, size_t max)
+{
+    size_t ilen = (-1 != len) ? len : strlen(src);
+    size_t olen = max-1;
+    const char *from = psi_charset[ch];
+    iconv_t ic = iconv_charset[ch];
+
+    if((iconv_t)-1 == ic)
+	return 0;
+
+    if(NULL == ic)
+    {
+	ic = iconv_charset[ch] = iconv_open(to,from);
+	if (NULL == ic || (iconv_t)-1 == ic)
+	    return 0;
+    }
+
+    while (ilen > 0) {
+	if (-1 == iconv(ic,(char**)&src,&ilen,&dst,&olen)) {
+	    /* skip + quote broken byte unless we are out of space */
+	    if (E2BIG == errno)
+		break;
+	    if (olen < 4)
+		break;
+	    sprintf(dst,"\\x%02x",(int)(unsigned char)src[0]);
+	    src  += 1;
+	    dst  += 4;
+	    ilen -= 1;
+	    olen -= 4;
+	}
+    }
+    dst[0] = 0;
+    return max-1 - olen;
+}
+
+static int handle_control_8(const unsigned char *src,  int slen,
+			    unsigned char *dest, int dlen)
+{
+    int s,d;
+
+    for (s = 0, d = 0; s < slen && d < dlen;) {
+	if (src[s] >= 0x80  &&  src[s] <= 0x9f) {
+	    switch (src[s]) {
+	    case 0x86: /* <em>  */
+	    case 0x87: /* </em> */
+		s++;
+		break;
+	    case 0x1a: /* ^Z    */
+		dest[d++] = ' ';
+		s++;
+		break;
+	    case 0x8a: /* <br>  */
+		dest[d++] = '\n';
+		s++;
+		break;
+	    default:
+		s++;
+	    }
+	} else {
+	    dest[d++] = src[s++];
+	}
+    }
+    return d;
+}
+
+void mpeg_parse_psi_string(const unsigned char *src, int slen,
+			   unsigned char *dest, int dlen)
+{
+    unsigned char *tmp;
+    int tlen,ch = 0;
+
+    if (src[0] < 0x20) {
+	ch = src[0];
+	src++;
+	slen--;
+    }
+
+    memset(dest,0,dlen);
+    if (ch < 0x10) {
+	/* 8bit charset */
+	tmp = malloc(slen);
+	tlen = handle_control_8(src, slen, tmp, slen);
+	iconv_string(ch, "UTF-8", tmp, tlen, dest, dlen);
+	free(tmp);
+    } else {
+	/* 16bit charset */
+	iconv_string(ch, "UTF-8", src, slen, dest, dlen);
+    }
+}
Index: vdr-1.3.24/channels.c
===================================================================
--- vdr-1.3.24.orig/channels.c
+++ vdr-1.3.24/channels.c
@@ -649,7 +649,7 @@  cString cChannel::ToText(const cChannel 
      q = caidbuf;
      q += IntArrayToString(q, Channel->caids, 16);
      *q = 0;
-     asprintf(&buffer, "%s:%d:%s:%s:%d:%s:%s:%d:%s:%d:%d:%d:%d:%s\n", FullName, Channel->frequency, *Channel->ParametersToString(), *cSource::ToString(Channel->source), Channel->srate, vpidbuf, apidbuf, Channel->tpid, caidbuf, Channel->sid, Channel->nid, Channel->tid, Channel->rid, Channel->charset );
+     asprintf(&buffer, "%s:%d:%s:%s:%d:%s:%s:%d:%s:%d:%d:%d:%d\n", FullName, Channel->frequency, *Channel->ParametersToString(), *cSource::ToString(Channel->source), Channel->srate, vpidbuf, apidbuf, Channel->tpid, caidbuf, Channel->sid, Channel->nid, Channel->tid, Channel->rid);
      }
   return cString(buffer, true);
 }
@@ -684,8 +684,7 @@  bool cChannel::Parse(const char *s)
      char *vpidbuf = NULL;
      char *apidbuf = NULL;
      char *caidbuf = NULL;
-     char *charsetbuf = NULL;
-     int fields = sscanf(s, "%a[^:]:%d :%a[^:]:%a[^:] :%d :%a[^:]:%a[^:]:%d :%a[^:]:%d :%d :%d :%d :%a[^:]", &namebuf, &frequency, &parambuf, &sourcebuf, &srate, &vpidbuf, &apidbuf, &tpid, &caidbuf, &sid, &nid, &tid, &rid, &charsetbuf);
+     int fields = sscanf(s, "%a[^:]:%d :%a[^:]:%a[^:] :%d :%a[^:]:%a[^:]:%d :%a[^:]:%d :%d :%d :%d ", &namebuf, &frequency, &parambuf, &sourcebuf, &srate, &vpidbuf, &apidbuf, &tpid, &caidbuf, &sid, &nid, &tid, &rid);
      if (fields >= 9) {
         if (fields == 9) {
            // allow reading of old format
@@ -792,18 +791,12 @@  bool cChannel::Parse(const char *s)
            shortName = strcpyrealloc(shortName, p);
            }
         name = strcpyrealloc(name, namebuf);
-	if(charsetbuf != NULL){
-	    charset = strcpyrealloc(charset, charsetbuf);
-	}else{
-	    charset = strcpyrealloc(charset, "ISO8859-15");
-	}
         free(parambuf);
         free(sourcebuf);
         free(vpidbuf);
         free(apidbuf);
         free(caidbuf);
         free(namebuf);
-        free(charsetbuf);
         if (!GetChannelID().Valid()) {
            esyslog("ERROR: channel data results in invalid ID!");
            return false;
Index: vdr-1.3.24/eit.c
===================================================================
--- vdr-1.3.24.orig/eit.c
+++ vdr-1.3.24/eit.c
@@ -210,21 +210,12 @@  cEIT::cEIT(cSchedules *Schedules, int So
       if (!rEvent) {
          if (ShortEventDescriptor) {
             char buffer[256];
-	    char tmpiconvbuf[256*4];
-	    ShortEventDescriptor->name.getText(buffer, sizeof(buffer));
-	    CharSetConv(tmpiconvbuf,sizeof(tmpiconvbuf), buffer, strlen(buffer)+1, channel->Charset(), "UTF8");
-            pEvent->SetTitle(tmpiconvbuf);
-
-	    ShortEventDescriptor->text.getText(buffer, sizeof(buffer));
-	    CharSetConv(tmpiconvbuf,sizeof(tmpiconvbuf), buffer, strlen(buffer)+1, channel->Charset(), "UTF8");
-            pEvent->SetShortText(tmpiconvbuf);
+	    pEvent->SetTitle(ShortEventDescriptor->name.getText(buffer, sizeof(buffer)));
+	    pEvent->SetShortText(ShortEventDescriptor->text.getText(buffer, sizeof(buffer)));
             }
          if (ExtendedEventDescriptors) {
             char buffer[ExtendedEventDescriptors->getMaximumTextLength(": ") + 1];
-	    char tmpiconvbuf[sizeof(buffer)*4];
-	    ExtendedEventDescriptors->getText(buffer, sizeof(buffer), ": ");
-	    CharSetConv(tmpiconvbuf,sizeof(tmpiconvbuf), buffer, strlen(buffer)+1, channel->Charset(), "UTF8");
-            pEvent->SetDescription(tmpiconvbuf);
+	    pEvent->SetDescription(ExtendedEventDescriptors->getText(buffer, sizeof(buffer), ": "));
             }
          }
       delete ExtendedEventDescriptors;
Index: vdr-1.3.24/channels.h
===================================================================
--- vdr-1.3.24.orig/channels.h
+++ vdr-1.3.24/channels.h
@@ -113,7 +113,6 @@  private:
   int tid;
   int sid;
   int rid;
-  char *charset;
   int number;    // Sequence number assigned on load
   bool groupSep;
   char polarization;
@@ -143,7 +142,6 @@  public:
   const char *ShortName(bool OrName = false) const { return (OrName && isempty(shortName)) ? name : shortName; }
   const char *Provider(void) const { return provider; }
   const char *PortalName(void) const { return portalName; }
-  const char *Charset(void) const { return charset; }
   int Frequency(void) const { return frequency; } ///< Returns the actual frequency, as given in 'channels.conf'
   int Transponder(void) const;                    ///< Returns the transponder frequency in MHz, plus the polarization in case of sat
   static int Transponder(int Frequency, char Polarization); ///< builds the transponder from the given Frequency and Polarization