<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:series="http://unfoldingneurons.com/"
	>

<channel>
	<title>The Software Developer &#187; Code</title>
	<atom:link href="http://blog.hachmeister.org/category/code/feed/" rel="self" type="application/rss+xml" />
	<link>http://blog.hachmeister.org</link>
	<description></description>
	<pubDate>Thu, 04 Jun 2009 16:11:53 +0000</pubDate>
	<generator>http://wordpress.org/?v=2.7.1</generator>
	<language>de</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
			<item>
		<title>Ein einfacher C++ Tokenizer</title>
		<link>http://blog.hachmeister.org/2009/03/ein-einfacher-cpp-tokenizer/</link>
		<comments>http://blog.hachmeister.org/2009/03/ein-einfacher-cpp-tokenizer/#comments</comments>
		<pubDate>Wed, 25 Mar 2009 12:56:16 +0000</pubDate>
		<dc:creator>Karsten</dc:creator>
		
		<category><![CDATA[Code]]></category>

		<category><![CDATA[c++]]></category>

		<category><![CDATA[tokenizer]]></category>

		<guid isPermaLink="false">http://blog.hachmeister.org/?p=82</guid>
		<description><![CDATA[Sehr viele Daten müssen erstmal aus einer Datei eingelesen werden. Dabei kann ein Tokenizer gute Dienste leisten um strukturierte Daten in die entsprechenden Stücke zu unterteilen. 
#ifndef TOKENIZER_H_
#define TOKENIZER_H_

#include &#60;string&#62;

using namespace std;

enum DelimType
{
  WHITESPACE = 0, ORDINARY = 1
};

class Tokenizer
{
public:
  Tokenizer(const string text);
  virtual ~Tokenizer();

  bool has_next();
  string next();

  void add_delims(int begin, int end, DelimType [...]]]></description>
			<content:encoded><![CDATA[<p>Sehr viele Daten müssen erstmal aus einer Datei eingelesen werden. Dabei kann ein Tokenizer gute Dienste leisten um strukturierte Daten in die entsprechenden Stücke zu unterteilen. <span id="more-82"></span></p>
<pre class="prettyprint lang-cpp">#ifndef TOKENIZER_H_
#define TOKENIZER_H_

#include &lt;string&gt;

using namespace std;

enum DelimType
{
  WHITESPACE = 0, ORDINARY = 1
};

class Tokenizer
{
public:
  Tokenizer(const string text);
  virtual ~Tokenizer();

  bool has_next();
  string next();

  void add_delims(int begin, int end, DelimType type);

private:
  void find_token();

private:
  unsigned int _pos;
  string _text;
  string _token;
  bool _token_available;
  DelimType _delims[256];
};

#endif</pre>
<pre class="prettyprint lang-cpp">#include &lt;iostream&gt;

#include "Tokenizer.h"

Tokenizer::Tokenizer(const string text) :
  _pos(0), _text(text), _token_available(false)
{
  _token.clear();
  add_delims(0x0, 0xFF, WHITESPACE);
  add_delims(0x30, 0x39, ORDINARY);
  add_delims(0x41, 0x5A, ORDINARY);
  add_delims(0x61, 0x7A, ORDINARY);
}

Tokenizer::~Tokenizer()
{
}

bool Tokenizer::has_next()
{
  find_token();

  if (_token_available)
  {
    return true;
  }

  return false;
}

string Tokenizer::next()
{
  find_token();

  if (_token_available)
  {
    _token_available = false;
    return _token;
  }

  throw "No next token!";
}

void Tokenizer::add_delims(int begin, int end, DelimType type)
{
  if (begin &lt; 0)
  {
    begin = 0;
  }

  if (end &gt; 255)
  {
    end = 255;
  }

  for (int i = begin; i &lt;= end; ++i)
  {
    _delims[i] = type;
  }
}

void Tokenizer::find_token()
{
  if (_pos &gt;= _text.length() || _token_available)
  {
    return;
  }

  _token.clear();
  _token_available = false;

  while (_pos &lt; _text.length())
  {
    char chr = _text.at(_pos);
    DelimType type = _delims[(int) chr];
    _pos++;

    if (type == ORDINARY)
    {
      _token.append(1, chr);
      _token_available = true;
    }
    else if (_token_available &amp;&amp; type == WHITESPACE)
    {
      break;
    }
  }
}</pre>
]]></content:encoded>
			<wfw:commentRss>http://blog.hachmeister.org/2009/03/ein-einfacher-cpp-tokenizer/feed/</wfw:commentRss>
		</item>
	</channel>
</rss>
