From patchwork Sun May 15 17:05:04 2005 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ludwig Nussel X-Patchwork-Id: 11877 Received: from p549459f9.dip.t-dialin.net ([84.148.89.249] helo=defiant.wachendorf.lan) by www.linuxtv.org with esmtp (Exim 4.34) id 1DXMY8-0008Pt-T1 for vdr@linuxtv.org; Sun, 15 May 2005 19:05:10 +0200 Received: by defiant.wachendorf.lan (Postfix, from userid 1701) id 51E0A5715C; Sun, 15 May 2005 19:05:04 +0200 (CEST) Date: Sun, 15 May 2005 19:05:04 +0200 From: Ludwig Nussel To: vdr@linuxtv.org Subject: Re: [vdr] [ANNOUNCE] UTF-8 patch 0.0.3 with Freetype2, enAIO for vdr-1.3.24 Message-ID: <20050515170504.GA4089@defiant.wachendorf.lan> Mail-Followup-To: vdr@linuxtv.org References: <4283BDAF.7060207@t-online.de> <200505150028.55475.lars@altenhain.de> <20050515115200.GA14455@defiant.wachendorf.lan> Mime-Version: 1.0 Content-Disposition: inline In-Reply-To: <20050515115200.GA14455@defiant.wachendorf.lan> X-Operating-System: SuSE Linux 9.3 (x86-64) X-Accept-Language: de en X-PGP-Fingerprint: 2987 6870 60BA 3158 F587 88C5 2394 431A FF81 35CE X-Message-Flag: virtual memory exhausted, please reboot User-Agent: Mutt/1.5.9i X-BeenThere: vdr@linuxtv.org X-Mailman-Version: 2.1.5 Precedence: list Reply-To: Klaus Schmidinger's VDR List-Id: Klaus Schmidinger's VDR List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 15 May 2005 17:05:10 -0000 Status: O X-Status: X-Keywords: X-UID: 2225 Ludwig Nussel wrote: > Lars Altenhain wrote: > > thanks for this patch. Finally the mp3 plugin can display the correct names of > > the mp3s. > > > > My vdr runs since 2 days with this patch and I have found only a small > > error :-) If vdr is allows to update the channel names, the names revert back > > to the ISO8859 encoded version. The transmitted channel names are not > > converted to utf8 before they are stored in the config data. The attached > > patch fixes this. > > It cannot work this way, you blindly recode iso-8859-1 to utf8. > Judging from how xawtv does it each dvb string tells it's encoding. > Therefore the central place to recode all strings must be > decodeText() in libsi/si.c. That also means that the encoding does > not need to be configured via channels.conf. I've ripped the code from xawtv and put it into libsi. Seems to work fine but someone with non-german stations should probably try it to actually prove that. cu Ludwig Index: vdr-1.3.24/libsi/Makefile =================================================================== --- vdr-1.3.24.orig/libsi/Makefile +++ vdr-1.3.24/libsi/Makefile @@ -23,10 +23,13 @@ LIBS += ### The object files (add further files here): -OBJS = util.o si.o section.o descriptor.o +OBJS = util.o si.o section.o descriptor.o charconv.o ### Implicit rules: +charconv.o: charconv.c + $(CC) $(CFLAGS) -c $(DEFINES) $(INCLUDES) $< + %.o: %.c $(CXX) $(CXXFLAGS) -c $(DEFINES) $(INCLUDES) $< Index: vdr-1.3.24/libsi/charconv.h =================================================================== --- /dev/null +++ vdr-1.3.24/libsi/charconv.h @@ -0,0 +1,7 @@ +#ifdef __cplusplus +extern "C" { +#endif +void mpeg_parse_psi_string(const unsigned char *src, int slen, unsigned char *dest, int dlen); +#ifdef __cplusplus +} +#endif Index: vdr-1.3.24/libsi/si.c =================================================================== --- vdr-1.3.24.orig/libsi/si.c +++ vdr-1.3.24/libsi/si.c @@ -11,8 +11,10 @@ ***************************************************************************/ #include +#include #include "si.h" #include "descriptor.h" +#include "charconv.h" namespace SI { @@ -240,8 +242,13 @@ char *String::getText(char *buffer, char return buffer; } -//taken from libdtv, Copyright Rolf Hakenes void String::decodeText(char *buffer, int size) { + const unsigned char *src=reinterpret_cast(data.getData(0)); + unsigned slen = getLength(); + mpeg_parse_psi_string(src, slen, reinterpret_cast(buffer), size); +} + +#if 0 const unsigned char *from=data.getData(0); char *to=buffer; @@ -271,8 +278,19 @@ void String::decodeText(char *buffer, in } *to = '\0'; } +#endif + void String::decodeText(char *buffer, char *shortVersion, int sizeBuffer, int sizeShortVersion) { + const unsigned char *src=reinterpret_cast(data.getData(0)); + unsigned slen = getLength(); + mpeg_parse_psi_string(src, slen, reinterpret_cast(buffer), sizeBuffer); +// mpeg_parse_psi_string(src, slen, reinterpret_cast(shortVersion), sizeShortVersion); + // FIXME: need to handle special chars + strncpy(shortVersion, buffer, sizeShortVersion); + shortVersion[sizeShortVersion-1] = '\0'; +} +#if 0 const unsigned char *from=data.getData(0); char *to=buffer; char *toShort=shortVersion; @@ -306,6 +324,7 @@ void String::decodeText(char *buffer, ch *to = '\0'; *toShort = '\0'; } +#endif Descriptor *Descriptor::getDescriptor(CharArray da, DescriptorTagDomain domain, bool returnUnimplemetedDescriptor) { Descriptor *d=0; Index: vdr-1.3.24/libsi/charconv.c =================================================================== --- /dev/null +++ vdr-1.3.24/libsi/charconv.c @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include + +#include "charconv.h" + +/* from xawtv4, copyright Gerd Knorr , GPL */ + +static const char *psi_charset[0x20] = { + [ 0x00 ... 0x1f ] = "reserved", + [ 0x00 ] = "ISO-8859-1", + [ 0x01 ] = "ISO-8859-5", + [ 0x02 ] = "ISO-8859-6", + [ 0x03 ] = "ISO-8859-7", + [ 0x04 ] = "ISO-8859-8", + [ 0x05 ] = "ISO-8859-9", + [ 0x06 ] = "ISO-8859-10", + [ 0x07 ] = "ISO-8859-11", + [ 0x08 ] = "ISO-8859-12", + [ 0x09 ] = "ISO-8859-13", + [ 0x0a ] = "ISO-8859-14", + [ 0x0b ] = "ISO-8859-15", + [ 0x10 ] = "fixme", + [ 0x11 ] = "UCS-2BE", // correct? + [ 0x12 ] = "EUC-KR", + [ 0x13 ] = "GB2312", + [ 0x14 ] = "BIG5" +}; + +static iconv_t iconv_charset[0x20] = {0}; + +static int iconv_string(unsigned char ch, char *to, + const char *src, size_t len, + char *dst, size_t max) +{ + size_t ilen = (-1 != len) ? len : strlen(src); + size_t olen = max-1; + const char *from = psi_charset[ch]; + iconv_t ic = iconv_charset[ch]; + + if((iconv_t)-1 == ic) + return 0; + + if(NULL == ic) + { + ic = iconv_charset[ch] = iconv_open(to,from); + if (NULL == ic || (iconv_t)-1 == ic) + return 0; + } + + while (ilen > 0) { + if (-1 == iconv(ic,(char**)&src,&ilen,&dst,&olen)) { + /* skip + quote broken byte unless we are out of space */ + if (E2BIG == errno) + break; + if (olen < 4) + break; + sprintf(dst,"\\x%02x",(int)(unsigned char)src[0]); + src += 1; + dst += 4; + ilen -= 1; + olen -= 4; + } + } + dst[0] = 0; + return max-1 - olen; +} + +static int handle_control_8(const unsigned char *src, int slen, + unsigned char *dest, int dlen) +{ + int s,d; + + for (s = 0, d = 0; s < slen && d < dlen;) { + if (src[s] >= 0x80 && src[s] <= 0x9f) { + switch (src[s]) { + case 0x86: /* */ + case 0x87: /* */ + s++; + break; + case 0x1a: /* ^Z */ + dest[d++] = ' '; + s++; + break; + case 0x8a: /*
*/ + dest[d++] = '\n'; + s++; + break; + default: + s++; + } + } else { + dest[d++] = src[s++]; + } + } + return d; +} + +void mpeg_parse_psi_string(const unsigned char *src, int slen, + unsigned char *dest, int dlen) +{ + unsigned char *tmp; + int tlen,ch = 0; + + if (src[0] < 0x20) { + ch = src[0]; + src++; + slen--; + } + + memset(dest,0,dlen); + if (ch < 0x10) { + /* 8bit charset */ + tmp = malloc(slen); + tlen = handle_control_8(src, slen, tmp, slen); + iconv_string(ch, "UTF-8", tmp, tlen, dest, dlen); + free(tmp); + } else { + /* 16bit charset */ + iconv_string(ch, "UTF-8", src, slen, dest, dlen); + } +} Index: vdr-1.3.24/channels.c =================================================================== --- vdr-1.3.24.orig/channels.c +++ vdr-1.3.24/channels.c @@ -649,7 +649,7 @@ cString cChannel::ToText(const cChannel q = caidbuf; q += IntArrayToString(q, Channel->caids, 16); *q = 0; - asprintf(&buffer, "%s:%d:%s:%s:%d:%s:%s:%d:%s:%d:%d:%d:%d:%s\n", FullName, Channel->frequency, *Channel->ParametersToString(), *cSource::ToString(Channel->source), Channel->srate, vpidbuf, apidbuf, Channel->tpid, caidbuf, Channel->sid, Channel->nid, Channel->tid, Channel->rid, Channel->charset ); + asprintf(&buffer, "%s:%d:%s:%s:%d:%s:%s:%d:%s:%d:%d:%d:%d\n", FullName, Channel->frequency, *Channel->ParametersToString(), *cSource::ToString(Channel->source), Channel->srate, vpidbuf, apidbuf, Channel->tpid, caidbuf, Channel->sid, Channel->nid, Channel->tid, Channel->rid); } return cString(buffer, true); } @@ -684,8 +684,7 @@ bool cChannel::Parse(const char *s) char *vpidbuf = NULL; char *apidbuf = NULL; char *caidbuf = NULL; - char *charsetbuf = NULL; - int fields = sscanf(s, "%a[^:]:%d :%a[^:]:%a[^:] :%d :%a[^:]:%a[^:]:%d :%a[^:]:%d :%d :%d :%d :%a[^:]", &namebuf, &frequency, ¶mbuf, &sourcebuf, &srate, &vpidbuf, &apidbuf, &tpid, &caidbuf, &sid, &nid, &tid, &rid, &charsetbuf); + int fields = sscanf(s, "%a[^:]:%d :%a[^:]:%a[^:] :%d :%a[^:]:%a[^:]:%d :%a[^:]:%d :%d :%d :%d ", &namebuf, &frequency, ¶mbuf, &sourcebuf, &srate, &vpidbuf, &apidbuf, &tpid, &caidbuf, &sid, &nid, &tid, &rid); if (fields >= 9) { if (fields == 9) { // allow reading of old format @@ -792,18 +791,12 @@ bool cChannel::Parse(const char *s) shortName = strcpyrealloc(shortName, p); } name = strcpyrealloc(name, namebuf); - if(charsetbuf != NULL){ - charset = strcpyrealloc(charset, charsetbuf); - }else{ - charset = strcpyrealloc(charset, "ISO8859-15"); - } free(parambuf); free(sourcebuf); free(vpidbuf); free(apidbuf); free(caidbuf); free(namebuf); - free(charsetbuf); if (!GetChannelID().Valid()) { esyslog("ERROR: channel data results in invalid ID!"); return false; Index: vdr-1.3.24/eit.c =================================================================== --- vdr-1.3.24.orig/eit.c +++ vdr-1.3.24/eit.c @@ -210,21 +210,12 @@ cEIT::cEIT(cSchedules *Schedules, int So if (!rEvent) { if (ShortEventDescriptor) { char buffer[256]; - char tmpiconvbuf[256*4]; - ShortEventDescriptor->name.getText(buffer, sizeof(buffer)); - CharSetConv(tmpiconvbuf,sizeof(tmpiconvbuf), buffer, strlen(buffer)+1, channel->Charset(), "UTF8"); - pEvent->SetTitle(tmpiconvbuf); - - ShortEventDescriptor->text.getText(buffer, sizeof(buffer)); - CharSetConv(tmpiconvbuf,sizeof(tmpiconvbuf), buffer, strlen(buffer)+1, channel->Charset(), "UTF8"); - pEvent->SetShortText(tmpiconvbuf); + pEvent->SetTitle(ShortEventDescriptor->name.getText(buffer, sizeof(buffer))); + pEvent->SetShortText(ShortEventDescriptor->text.getText(buffer, sizeof(buffer))); } if (ExtendedEventDescriptors) { char buffer[ExtendedEventDescriptors->getMaximumTextLength(": ") + 1]; - char tmpiconvbuf[sizeof(buffer)*4]; - ExtendedEventDescriptors->getText(buffer, sizeof(buffer), ": "); - CharSetConv(tmpiconvbuf,sizeof(tmpiconvbuf), buffer, strlen(buffer)+1, channel->Charset(), "UTF8"); - pEvent->SetDescription(tmpiconvbuf); + pEvent->SetDescription(ExtendedEventDescriptors->getText(buffer, sizeof(buffer), ": ")); } } delete ExtendedEventDescriptors; Index: vdr-1.3.24/channels.h =================================================================== --- vdr-1.3.24.orig/channels.h +++ vdr-1.3.24/channels.h @@ -113,7 +113,6 @@ private: int tid; int sid; int rid; - char *charset; int number; // Sequence number assigned on load bool groupSep; char polarization; @@ -143,7 +142,6 @@ public: const char *ShortName(bool OrName = false) const { return (OrName && isempty(shortName)) ? name : shortName; } const char *Provider(void) const { return provider; } const char *PortalName(void) const { return portalName; } - const char *Charset(void) const { return charset; } int Frequency(void) const { return frequency; } ///< Returns the actual frequency, as given in 'channels.conf' int Transponder(void) const; ///< Returns the transponder frequency in MHz, plus the polarization in case of sat static int Transponder(int Frequency, char Polarization); ///< builds the transponder from the given Frequency and Polarization