/***************************************************************************
 *
 * def.h
 *
 * $Id: def.h 648752 2008-04-16 17:01:56Z faridz $
 *
 ***************************************************************************
 *
 * Licensed to the Apache Software  Foundation (ASF) under one or more
 * contributor  license agreements.  See  the NOTICE  file distributed
 * with  this  work  for  additional information  regarding  copyright
 * ownership.   The ASF  licenses this  file to  you under  the Apache
 * License, Version  2.0 (the  "License"); you may  not use  this file
 * except in  compliance with the License.   You may obtain  a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the  License is distributed on an  "AS IS" BASIS,
 * WITHOUT  WARRANTIES OR CONDITIONS  OF ANY  KIND, either  express or
 * implied.   See  the License  for  the  specific language  governing
 * permissions and limitations under the License.
 *
 * Copyright 2001-2006 Rogue Wave Software.
 * 
 **************************************************************************/

#ifndef RWSTD_UTIL_DEF_H_INCLUDED
#define RWSTD_UTIL_DEF_H_INCLUDED

#include <list>
#include <locale>              // for ctype_base
#include <map>
#include <string>
#include <vector>

#include <cassert>             // for assert()
#include <climits>             // for UCHAR_MAX
#include <cstddef>             // for size_t

#include <loc/_localedef.h>

#include "scanner.h"
#include "charmap.h"


class Def
{
public:

    // the constructor takes in a pointer to the character map, the name
    // of the file that hold the locale definiton, the name of the locale
    // being created, and the value of mb_cur_max specified in the 
    // charmap file
    Def(const char* filename,
        const char* out_name, 
        Charmap& charmap, bool no_position);
    
    // free up all the dynamically allocated memory
    ~Def ();

    // start point for processing the input files
    void process_input ();

    // write the LC_CTYPE file to the specified directory
    void write_ctype(std::string dir_name);

    void write_codecvt(std::string dir_name);

    // write the LC_NUMERIC file to the specified directory
    void write_numeric(std::string dir_name);

    // write the LC_MONETARY file to the specified directory
    void write_monetary(std::string dir_name);

    // write the LC_TIME file to the specified directory
    void write_time(std::string dir_name);

    // write the LC_MESSAGES file to the specified directory
    void write_messages(std::string dir_name);

    // write the LC_COLLATE file to the specified directory
    void write_collate(std::string dir_name);
 
    // dump the collate information
    void dump_collate ();

    // have warnings occurred
    bool warnings_occurred_;

    // was the content of the locale definition file scanned ahead
    bool scan_ahead_;

    typedef Scanner::token_t                token_t;
    typedef std::pair<token_t,token_t>      token_pair_t;
    typedef std::list<token_t>              token_list_t;
    typedef std::pair<token_t,token_list_t> collate_entry_t;
    typedef std::pair<token_t,token_list_t> collate_elem_t;
    typedef std::list<collate_entry_t>      collate_entry_list_t;
    struct collate_section_t;

    struct collate_section_t {
        std::string          name;
        token_list_t         order;
        collate_entry_list_t entries;
    };

private:
    struct ce_info_t;
    struct collate_info_t;
    friend struct ce_info_t;
    friend struct collate_info_t;

    // a struct used to represent the weights for each collating element
    struct Weights_t {
        unsigned char size;
        unsigned int weight[256];
    };
    
    /////////////////////////////////////////////////////////////////////
    // collate preprocessing information

    token_list_t  script_list_;
    token_list_t  cs_list_;
    token_list_t  sym_list_;
    std::list<collate_elem_t>     ce_list_;
    std::list<collate_section_t>  section_list_;

    // preprocessing for collate section
    void preprocess_collate ();
    void preprocess_order   ();
    void preprocess_reorder ();
    void preprocess_reorder_section ();
    void preprocess_collation_definitions();

    void process_collation_definition ( bool, collate_entry_t&,
                                        unsigned int, unsigned int);
    unsigned int process_order_stmt (collate_section_t&);

    bool insert_entries (token_t&, collate_entry_list_t&);
    void remove_entry   (collate_entry_t&);
    void list_collate   ();

    // automatically fill any ctype categories that depend upon characters
    // being defined in other categories
    void auto_fill ();

    // copy a category from one locale into the current locale
    void copy_category(int cat, std::string name);

    // copy a file
    void copy_file(const std::string &name, const std::string &outname);
    
    // process absolute ellipsis
    std::size_t process_abs_ellipsis (const Scanner::token_t&,
                                      std::ctype_base::mask);

    // process hexadecimal symbolic ellipsis, decimal symbolic ellipsis,
    // and double increment hexadecimal symbolic ellipsis
    std::size_t process_sym_ellipsis (const std::string&,
                                      const std::string&,
                                      Scanner::token_id,
                                      std::ctype_base::mask);

    // parse the era string
    void parse_era (const token_t&);

    // process the ctype category specified by m with the exception of
    // (e.g. std::ctype_base::upper)
    void process_mask (std::ctype_base::mask, const char*);

    // process the ctype toupper and tolower definitions
    void process_upper_lower(Scanner::token_id tok);

    // process the ctype section of the locale definition file
    void process_ctype();

    // process transliteration information
    void process_xlit ();

    void process_xlit_statement (std::size_t&);

    // process the collate section of the locale definition file
    void process_collate ();

    // processing of collating definition statements
    void process_collate_definition (bool, collate_entry_t&,
                                     unsigned int&, unsigned int);

    // helper function for process_collate() that processes the collition
    // order of the collating elements
    void process_order      (collate_section_t&, unsigned int&);

    // helper function for process_order() that processes the sequence
    // of weights for each collating element
    void process_weights(collate_entry_t&);

    // get the next weight
    bool get_weight (token_t&, Weights_t*, int);

    // add a symbolic name to the collition array
    void add_to_coll (const wchar_t val, 
                      const Weights_t* weight_template, 
                      const unsigned int coll_value, 
                      const std::vector<bool>& ordinal_weights,
                      bool undefined_value);

    // add missing values when the UNDEFINED keyword is found or at the
    // end of the collition array if UNDEFINED is not found
    void add_missing_values (const std::vector<bool> &ordinal_weights,
                             const Weights_t* weights_template,
                             unsigned int &coll_value, bool give_warning);

    // process the monetary section of the locale definition file
    void process_monetary();

    // create the monetary formats
    void create_format (char [4], char, char, char, bool);

    // process the numeric section of the locale definition file
    void process_numeric();

    // extracts and converts an array of strings such as those
    // representing the names of weekdays in the LC_TIME section
    Scanner::token_t
    extract_string_array (std::string*, std::wstring*, std::size_t);


    // process the time section of the locale definition file
    void process_time();

    // process the messages section of the locale definition file
    void process_messages();

    std::string convert_string   (const std::string&);
    std::wstring convert_wstring (const token_t&);
    std::wstring convert_wstring (const std::vector<std::string>&);

    void strip_pair(const std::string&, std::string&, std::string&);

    // encode a wchar_t into utf8 encoding
    std::string utf8_encode (wchar_t ch);

    // convert a utf8 encoded string to the encoding for this locale
    std::string convert_to_ext (wchar_t val);

    bool get_n_val (const Scanner::token_t&, unsigned char &val);
    bool get_w_val (const Scanner::token_t&, wchar_t &val);

    // initialize the coll_map with all the characters in the codeset
    void init_coll_map();

    void gen_n_to_w_coll_tables (const std::string &charp, 
                                unsigned int tab_num);

    void gen_w_to_n_coll_tables (const std::string &charp, 
                                 unsigned int tab_num);

    // the next useable offset for collating elements greater then UCHAR_MAX
    unsigned int next_offset_;

    Scanner::token_t next;
    
    // the name of the locale we are creating
    std::string output_name_;
        
    // the charmap used to process the character map definition file
    Charmap& charmap_;

    // the scanner used to process the locale definition file
    Scanner scanner_;

    bool ctype_symlink_;
    std::string ctype_filename_;
    

    // maps characters to a mask value
    std::map<wchar_t, unsigned int> mask_;
    
    // maps characters to their lower case representation
    std::map<wchar_t, wchar_t> lower_;

    // maps characters to their upper case representation
    std::map<wchar_t, wchar_t> upper_;

    typedef std::map<std::string, unsigned int>::iterator mb_char_off_map_iter;

    struct codecvt_offset_tab_t {
        unsigned int off [UCHAR_MAX + 1];
    };

    void create_wchar_utf8_table ();
    std::map<std::string, std::string> wchar_utf8_to_ext_;
    typedef std::map<std::string, std::string>::iterator wchar_utf8_iter;

    void gen_valid_coll_wchar_set ();

    std::set<std::string> valid_coll_wchar_set_;
    typedef std::set<std::string>::iterator valid_coll_wchar_set_iter;
    std::set<std::string> valid_codecvt_wchar_set_;
    typedef std::set<std::string>::iterator valid_codecvt_wchar_set_iter;

    typedef std::map<unsigned, const codecvt_offset_tab_t*>
    codecvt_offsets_map_t;

    // generates conversion tables of all valid multibyte characters
    // from a multibyte character map populated from the character
    // set description file
    std::size_t
    gen_mbchar_tables (codecvt_offsets_map_t&,
                       std::map<std::string, unsigned>&,
                       const std::string& = "",
                       unsigned = 0);

    std::size_t
    gen_wchar_tables (codecvt_offsets_map_t&,
                      const std::string& = "",
                      unsigned = 0);

    std::size_t
    gen_utf8_tables (codecvt_offsets_map_t&,
                     std::map<std::string, unsigned>&,
                     const std::string& = "",
                     unsigned = 0);

    std::set<std::string> valid_coll_mb_set_;

    void gen_valid_coll_mb_set();

    // generation of transliteration tables
    void gen_xlit_data ();

    // specifies if the locale file has already been written such as when
    // the "copy" directive is used in a locale definition file
    bool ctype_written_, codecvt_written_, collate_written_, time_written_,
        num_written_, mon_written_, messages_written_;
    bool ctype_def_found_, collate_def_found_, 
        time_def_found_, num_def_found_, mon_def_found_, messages_def_found_;

    // specifies if the keyword UNDEFINED is used in the LC_COLLATE definition
    bool undefined_keyword_found_;

    // no_position_ is set by the "--no_position" command line option
    // when true forward,postion orders will be treated like forward orders
    bool no_position_;

    // collate maps

    struct offset_tab_t {
        int first_offset;
        unsigned int off[UCHAR_MAX + 1];
    };

    std::map<unsigned int, offset_tab_t> char_offs_;
    typedef std::map<unsigned int, offset_tab_t>::iterator char_offs_iter;

    std::map<unsigned int, offset_tab_t> w_to_n_coll_;
    typedef std::map<unsigned int, offset_tab_t>::iterator w_to_n_coll_iter;

    unsigned int next_tab_num_;
    unsigned int next_wchar_coll_tab_num_;


    struct ce_offset_tab_t {
        int first_offset;
        int last_offset;
        unsigned int off[UCHAR_MAX + 1];
    };
    std::map<unsigned int, ce_offset_tab_t> n_ce_offs_;
    typedef std::map<unsigned int, ce_offset_tab_t>::iterator n_ce_offs_iter;

    std::map<unsigned int, ce_offset_tab_t> w_ce_offs_;
    typedef std::map<unsigned int, ce_offset_tab_t>::iterator w_ce_offs_iter;
    
    std::set<std::string> valid_n_ce_set;
    typedef std::set<std::string>::iterator valid_n_ce_set_iter;
    
    void gen_n_ce_tables (const std::set<std::string>,
                          unsigned int, unsigned int);
    unsigned int next_n_ce_tab_num_;
    void gen_w_ce_tables (const std::set<std::string>,
                          unsigned int, unsigned int);
    unsigned int next_w_ce_tab_num_;

    std::map<std::string, std::string>ce_sym_map_;
    std::map<std::string, std::string>ce_wsym_map_;
    typedef std::map<std::string, std::string>::iterator ce_sym_map_iter;
    // off_mapr maps an offset value to the symbol name or collating element
    std::map<unsigned int, std::string> off_mapr_;

    // cs_map_ maps a collating symbol name to a collation value
    std::map<std::string, unsigned int> cs_map_;
    typedef std::map<std::string, unsigned int>::iterator cs_map_iter;

    // transliteration information
    struct xlit_offset_table {
        unsigned int offset_table [UCHAR_MAX + 1];
    };
    typedef struct xlit_offset_table  xlit_offset_table_t;
    typedef std::map<wchar_t,std::list<std::string> >  xlit_map_t;
    typedef std::map<wchar_t, unsigned int>            xlit_data_offset_map_t;
    typedef std::map<unsigned int,xlit_offset_table_t> xlit_table_map_t;
    xlit_map_t                    xlit_map_;
    xlit_data_offset_map_t        xlit_data_offset_map_;
    xlit_table_map_t              xlit_table_map_;

    // the collate_info_t struct contains information concerning the collation
    // of each character
    struct collate_info_t{
        unsigned int offset;
        unsigned int coll_val;
        unsigned int order;
        Weights_t *weights;
    };

    // we need one collate_info_t to hold information about the undefined
    // characters.  All the other characters have collate_info_ts that are
    // located in the coll_map.
    collate_info_t undef_char_info_;

    // the ce_info_t strurct contains information concerning the collation
    // of a collating element.

    struct ce_info_t {
        unsigned int offset;
        unsigned int coll_val;
        unsigned int order;
        Weights_t *weights;
        std::wstring ce_wstr;
    };

    // The coll_map_ contains a mapping from the wide char value to the
    // collition information about that value.
    std::map<wchar_t, collate_info_t> coll_map_;
    typedef std::map<wchar_t, collate_info_t>::iterator coll_map_iter;

    // the ce_map_ contains a mapping from the symbolic collating element
    // name to the collition information about that element
    std::map <std::string, ce_info_t> ce_map_;
    typedef std::map <std::string, ce_info_t>::iterator ce_map_iter;
    
    // iterator type definitions for the maps
    typedef std::map<wchar_t, unsigned int>::iterator mask_iter;
    typedef std::map<wchar_t, wchar_t>::iterator upper_iter;
    typedef std::map<wchar_t, wchar_t>::iterator lower_iter;
    typedef std::map< std::string, unsigned char >::const_iterator n_cmap_iter;
    typedef std::map<std::string, wchar_t>::const_iterator mb_cmap_iter;
    typedef std::map<wchar_t, std::string>::const_iterator rmb_cmap_iter;
    typedef std::map<std::string, wchar_t >::const_iterator w_cmap_iter;
    typedef std::map<wchar_t, std::string >::const_iterator rw_cmap_iter;
    typedef std::map<unsigned int, std::string>::iterator off_mapr_iter;
    typedef std::map<std::string, wchar_t>::const_iterator ucs4_cmap_iter;

    typedef std::list<std::string>::const_iterator symnames_list_iter;

    // the structures used to hold the offsets for each locale category
    // and any non-pointer locale information
    _RW::__rw_punct_t num_punct_out_;
    _RW::__rw_ctype_t ctype_out_;
    _RW::__rw_time_t time_out_;
    _RW::__rw_collate_t collate_out_;
    _RW::__rw_mon_t mon_out_;
    _RW::__rw_num_t num_out_;
    _RW::__rw_messages_t messages_out_;

    // structures used for internally holding locale information
    // LC_CTYPE structures


    struct ctype_t {
        struct mask_elm {
            wchar_t ch;             // the wide character value
            unsigned int mask;      // the mask for that character
        };
        struct upper_elm {
            wchar_t lower;    // the lower case wide character
            wchar_t upper;    // the upper case wide character 
        };
        struct lower_elm {
            wchar_t upper;    // the upper case wide character
            wchar_t lower;    // the lower case wide character 
        };

        char max_mb_s;                  // the max number of bytes in a char
        upper_elm* wtoupper_tab;        // the wide char to_upper table
        lower_elm* wtolower_tab;        // the wide char to_lower table
        mask_elm* wmask_tab;            // the wide char mask_table

    };
    

    struct era_st {
        std::string name;     
        std::string fmt;
        std::wstring wname;
        std::wstring wfmt;
        _RW::__rw_time_t::era_t era_out;
    };

    std::list<era_st> era_list_;
    typedef std::list<era_st>::iterator era_list_iter;

    // LC_COLLATE structure
    struct collate_t {
    } ;


    // LC_MONETARY structure
    struct mon_t {
        std::string int_curr_symbol;    // narrow char* int_curr_symbol
        std::string currency_symbol;    // narrow char* currency_symbol
        std::string mon_decimal_point;  // narrow char* mon_decimal_point
        std::string mon_thousands_sep;  // narrow char* mon_thoucands_sep
        std::string mon_grouping;       // narrow char* mon_grouping
        std::string positive_sign;      // narrow char* positive_sign
        std::string negative_sign;      // narrow char* negative_sign
        std::wstring wint_curr_symbol;   // wide wchar_t* int_curr_symbol
        std::wstring wcurrency_symbol;   // wide wchar_t* currency_symbol
        std::wstring wmon_decimal_point; // wide wchar_t* mon_decimal_point
        std::wstring wmon_thousands_sep; // wide wchar_t* mon_thousands_sep
        std::wstring wpositive_sign;     // wide wchar_t* positive_sign
        std::wstring wnegative_sign;     // wide wchar_t* negative_sign
    };


    // LC_NUMERIC structure
    struct num_t {
        std::string decimal_point;  // narrow char* decimal_point
        std::string thousands_sep;  // narrow char* thousands_sep
        std::string grouping;       // narrow char* grouping
        std::string truename;       // narrow char* truename
        std::string falsename;      // narrow char* falsename

        std::wstring wdecimal_point;   // wide wchar_t* decimal_point
        std::wstring wthousands_sep;   // wide wchar_t* thousands_sep     
        std::wstring wtruename;        // wide wchar_t* truename
        std::wstring wfalsename;       // wide wchar_t* falsename
    };

    // list to hold the alternate digits
    struct alt_digit_t {
        std::string n_alt_digit;
        std::wstring w_alt_digit;
        unsigned int n_offset;
        unsigned int w_offset;
    };
    std::list<alt_digit_t> alt_digits_;
    typedef std::list<alt_digit_t>::iterator alt_digits_iter;

    // LC_TIME structure
    struct time_t {
        std::string abday[7];      // narrow array of abbreviated days
        std::string day[7];        // narrow array of days
        std::string abmon[12];     // narrow array of abbreviated months
        std::string mon[12];       // narrow array of months
        std::string am_pm[2];      // narrow array of am/pm specifiers
        std::string d_t_fmt;       // narrow date and time format string
        std::string d_fmt;         // narrow date format string
        std::string t_fmt;         // narrow time format string
        std::string t_fmt_ampm;    // narrow time format string with am/pm
        std::string era_d_t_fmt;   // narrow era date and time format string
        std::string era_d_fmt;     // narrow era date format string
        std::string era_t_fmt;     // narrow era time format string


        std::wstring wabday[7];      // wide array of abbreviated days
        std::wstring wday[7];        // wide array of days
        std::wstring wabmon[12];     // wide array of abbreviated months
        std::wstring wmon[12];       // wide array of months
        std::wstring wam_pm[2];      // wide array of am/pm specifiers
        std::wstring wd_t_fmt;       // wide date and time format string
        std::wstring wd_fmt;         // wide date format string
        std::wstring wt_fmt;         // wide time format string
        std::wstring wt_fmt_ampm;    // wide time format string with am/pm
        std::wstring wera_d_t_fmt;   // wide era date and time format string
        std::wstring wera_d_fmt;     // wide era date format string
        std::wstring wera_t_fmt;     // wide era time format string
    } ;

    // LC_MESSAGES structure
    struct messages_t {
        std::string yesexpr;
        std::string noexpr;

        std::wstring wyesexpr;
        std::wstring wnoexpr;
    };

    messages_t messages_st_;
    time_t time_st_;
    ctype_t ctype_st_;
    mon_t mon_st_;
    num_t num_st_;
    collate_t collate_st_;


};


inline std::string strip_quotes (const std::string& str)
{
    assert (0 != str.size ());
    assert (str [0] == '\"');

    // return a string from str[1] to the position of the end-quote
    return std::string (str, 1, str.rfind ('\"') - 1);

}


#endif   // RWSTD_UTIL_DEF_H_INCLUDED