精华内容
下载资源
问答
  • 用C++进行哈夫曼编码计算信源熵及编码效率 统计各种概率
  • 哈夫曼编码与压缩效率分析

    千次阅读 2017-07-09 17:51:10
    1、本实验中Huffman编码算法  (1)将文件以ASCII字符流的形式读入,统计每个符号的发生频率;  (2)将所有文件中出现过的字符按照频率从小到大的顺序排列;  (3)每一次选出最小的两个值,作为二叉树的两个叶子节点...

    一、实验原理 
    1
    、本实验中Huffman编码算法 
    (1)
    将文件以ASCII字符流的形式读入,统计每个符号的发生频率
    (2)
    将所有文件中出现过的字符按照频率从小到大的顺序排列
    (3)
    每一次选出最小的两个值,作为二叉树的两个叶子节点,将和作为它们的根节点,这两个叶子节点不再参与比较,新的根节点参与比较
    (4)
    重复3,直到最后得到和为1的根节点
    (5)
    将形成的二叉树的左节点标0,右节点标1,把从最上面的根节点到最下面的叶子节点途中遇到的01序列串起来,得到了各个字符的编码表示。 
    2
    Huffman编码的数据结构设计,在程序实现中使用一种叫做二叉树的数据结构实现Huffman编码。 
    (1)
    哈夫曼节点结构
    typedef struct huffman_node_tag
    {
       unsigned char isLeaf;//
    是否为树叶 
       unsigned long count;//
    节点代表的符号加权和
       struct huffman_node_tag *parent;//
    父节点指针
       union 
       {
           struct 
           {
            struct huffman_node_tag *zero, *one; //
    子节点指针,分别代表0,1子节点指针 
            };
           unsigned char symbol;//
    节点代表的符号
       };
    } huffman_node;

    (2)哈夫曼码结构

    typedef struct huffman_code_tag 
    {
    unsigned long numbits;//
    该码所用的比特数 
    unsigned char *bits; //
    指向该码比特串的指针
    } huffman_code;

     二、主函数代码

    /*
     *  huffman - Encode/Decode files using Huffman encoding.
     *  http://huffman.sourceforge.net
     *  Copyright (C) 2003  Douglas Ryan Richardson; Gauss Interprise, Inc
     *
     *  This library is free software; you can redistribute it and/or
     *  modify it under the terms of the GNU Lesser General Public
     *  License as published by the Free Software Foundation; either
     *  version 2.1 of the License, or (at your option) any later version.
     *
     *  This library is distributed in the hope that it will be useful,
     *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     *  Lesser General Public License for more details.
     *
     *  You should have received a copy of the GNU Lesser General Public
     *  License along with this library; if not, write to the Free Software
     *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     */


    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <assert.h>
    #include "huffman.h"


    #define DEBUG true


    #ifdef WIN32
    #include <winsock2.h>
    #include <malloc.h>
    #define alloca _alloca
    #else
    #include <netinet/in.h>
    #endif




    typedef struct huffman_node_tag
    {
    unsigned char isLeaf;/*whether this node is leaf*/
    unsigned long count;/*how many times the symbol
    occured in the information source*/
    struct huffman_node_tag *parent;/*parent node indicator*/


    union
    {/*to save storage.If the node is a leaf,the union stands for the Sequencesymbol;
    else the union stands for the indicator of the left and right childnode.
    */
    struct
    {
    struct huffman_node_tag *zero, *one;
    };
    unsigned char symbol;
    };
    } huffman_node;


    typedef struct huffman_code_tag
    {
    /* The length of this code in bits. */
    unsigned long numbits;


    /* The bits that make up this code. The first
      bit is at position 0 in bits[0]. The second
      bit is at position 1 in bits[0]. The eighth
      bit is at position 7 in bits[0]. The ninth
      bit is at position 0 in bits[1]. */
    unsigned char *bits;
    } huffman_code;


    //step2:add by yzhang for huffman statistics
    typedef struct huffman_statistics_result
    {
    float freq[256];
    unsigned long numbits[256];
    unsigned char bits[256][100];
    }huffman_stat;


    /*huffman_stat *init_huffstatistics()
    { huffman_stat *p;
        int i;
    p = (huffman_stat*)malloc(sizeof(huffman_stat));
    p->freq = (float *)malloc(sizeof(float)*256 );
    p->numbits = (unsigned long *) malloc(sizeof(unsigned long)*256);
        for (i=0 ; i<256;i++)
    p->bits[i] = (unsigned char *)malloc(sizeof(unsigned char)*100); 
    return p;
    }*/
    //end by yzhang




    /* A ceiling funtion for numbits/8,
       used to calculate the byte number of huffman_code.
    */
    static unsigned long
    numbytes_from_numbits(unsigned long numbits)
    {
    return numbits / 8 + (numbits % 8 ? 1 : 0);
    }


    /*
     * get_bit returns the ith bit in the bits array
     * in the 0th position of the return value.
     */
    static unsigned char
    get_bit(unsigned char* bits, unsigned long i)
    {
    return (bits[i / 8] >> i % 8) & 1;
    }
    /*In order to reverse the bit order of the whole code.*/
    static void
    reverse_bits(unsigned char* bits, unsigned long numbits)
    {
    unsigned long numbytes = numbytes_from_numbits(numbits);
    unsigned char *tmp =
       (unsigned char*)alloca(numbytes);
    /*The funciton "alloca" applies for space on the stack
    and is released automatically.*/
    unsigned long curbit;
    long curbyte = 0;

    memset(tmp, 0, numbytes);


    for(curbit = 0; curbit < numbits; ++curbit)
    {
    unsigned int bitpos = curbit % 8;
    /*bitpos:the position,where curbit 
    is located in the current byte.*/


    if(curbit > 0 && curbit % 8 == 0)
    ++curbyte;
    /*Get inverted bit and put in the 0th bit,
    then shift left to the positive bit position. */
    tmp[curbyte] |= (get_bit(bits, numbits - curbit - 1) << bitpos);
    }


    memcpy(bits, tmp, numbytes);
    /*Copy "numbytes" bytes from the begining of tmp to "bits"*/
    }


    /*
     * new_code builds a huffman_code from a leaf in
     * a Huffman tree.
     */
    static huffman_code*
    new_code(const huffman_node* leaf)
    {
    /* Build the huffman code by walking up to
    * the root node and then reversing the bits,
    * since the Huffman code is calculated by
    * walking down the tree. */
    unsigned long numbits = 0;/*code length*/
    unsigned char* bits = NULL;/*the first address of the code*/
    huffman_code *p;
    /*
    *leaf!=NULL: 
       The current huffman_node exists,which needs to be encoded.
    *leaf->parent!=NULL:
       The current huffman_node has parent,indicating that
    the encoding process of the current node
    (backtracking from leaf to root)
    has not yet completed.
    */
    while(leaf && leaf->parent)
    {
    huffman_node *parent = leaf->parent;
    unsigned char cur_bit = (unsigned char)(numbits % 8);
    /*cur_bit:position of the encoding bit in the current byte,
    ranging from 0 to 7.*/
    unsigned long cur_byte = numbits / 8;
    /*cur_byte:
    Currently,how many complete bytes has been encoded.*/


    /* cur_bit==0:The encoding bit is the first bit of a byte,
    and the last byte has completely encoded, 
    so it needs to build a new coding byte.
      If we need another byte to hold the code,
      then allocate it. ==> newSize=cur_byte+1
      */
    if(cur_bit == 0)
    {
    size_t newSize = cur_byte + 1;
    bits = (char*)realloc(bits, newSize);
    bits[newSize - 1] = 0; /* Initialize the new byte. */
    /*
    Function"realloc" is different from function "malloc".
    "Realloc" can reallocate new space in the case of
    keeping the original data unchanged.
    The original data lies in the front of new space.
    (The space's address may be changed.)
    */
    }


    /* If a "one" must be added then or it in. If a zero
    * must be added then do nothing, since the byte
    * was initialized to zero. */
    if(leaf == parent->one)
    bits[cur_byte] |= 1 << cur_bit;
    /*Shift 1 left to the encoding bit.*/
    ++numbits;
    leaf = parent;/*backtracking*/
    }


    if(bits)
    reverse_bits(bits, numbits);
    /*From the above it can be seen that the encoding process
    is from leaf backtracking to root.(Leaf lies in the low bit,and root in the high.)
    So the bit order of code is inverted.
    Function "reverse_bits" is used to reverse the bit order of the whole code.*/


    p = (huffman_code*)malloc(sizeof(huffman_code));
    p->numbits = numbits;
    p->bits = bits;
    /*The bytes number is integer.
    Coresponding with numbits,the real codeword can be obtained*/
    return p;
    }


    #define MAX_SYMBOLS 256
    typedef huffman_node* SymbolFrequencies[MAX_SYMBOLS];
    /*indicator array:save huffman tree*/
    typedef huffman_code* SymbolEncoder[MAX_SYMBOLS];
    /*indicator array:256 incators of huffman_code,whose position is 
    coresponding to the order of ASCII, used to save code table.*/
    /*New a leaf node and initialize it.*/
    static huffman_node*
    new_leaf_node(unsigned char symbol)
    {
    huffman_node *p = (huffman_node*)malloc(sizeof(huffman_node));
    p->isLeaf = 1;
    p->symbol = symbol;
    p->count = 0;
    p->parent = 0;
    return p;
    }


    static huffman_node*
    new_nonleaf_node(unsigned long count, huffman_node *zero, huffman_node *one)
    {
    huffman_node *p = (huffman_node*)malloc(sizeof(huffman_node));
    p->isLeaf = 0;
    p->count = count;
    p->zero = zero;
    p->one = one;
    p->parent = 0;

    return p;
    }


    static void
    free_huffman_tree(huffman_node *subtree)
    {
    if(subtree == NULL)
    return;


    if(!subtree->isLeaf)
    {
    free_huffman_tree(subtree->zero);
    free_huffman_tree(subtree->one);
    }

    free(subtree);
    }


    static void
    free_code(huffman_code* p)
    {
    free(p->bits);
    free(p);
    }


    static void
    free_encoder(SymbolEncoder *pSE)
    {
    unsigned long i;
    for(i = 0; i < MAX_SYMBOLS; ++i)
    {
    huffman_code *p = (*pSE)[i];
    if(p)
    free_code(p);
    }


    free(pSE);
    }


    static void
    init_frequencies(SymbolFrequencies *pSF)
    {
    memset(*pSF, 0, sizeof(SymbolFrequencies));
    #if DEBUG
    unsigned int i;
    for(i = 0; i < MAX_SYMBOLS; ++i)
    {
    unsigned char uc = (unsigned char)i;
    (*pSF)[i] = new_leaf_node(uc);
    }
    #endif
    }


    typedef struct buf_cache_tag
    {
    unsigned char *cache;
    unsigned int cache_len;
    unsigned int cache_cur;
    unsigned char **pbufout;
    unsigned int *pbufoutlen;
    } buf_cache;


    static int init_cache(buf_cache* pc,
     unsigned int cache_size,
     unsigned char **pbufout,
     unsigned int *pbufoutlen)
    {
    assert(pc && pbufout && pbufoutlen);
    if(!pbufout || !pbufoutlen)
    return 1;

    pc->cache = (unsigned char*)malloc(cache_size);
    pc->cache_len = cache_size;
    pc->cache_cur = 0;
    pc->pbufout = pbufout;
    *pbufout = NULL;
    pc->pbufoutlen = pbufoutlen;
    *pbufoutlen = 0;


    return pc->cache ? 0 : 1;
    }


    static void free_cache(buf_cache* pc)
    {
    assert(pc);
    if(pc->cache)
    {
    free(pc->cache);
    pc->cache = NULL;
    }
    }


    static int flush_cache(buf_cache* pc)
    {
    assert(pc);

    if(pc->cache_cur > 0)
    {
    unsigned int newlen = pc->cache_cur + *pc->pbufoutlen;
    unsigned char* tmp = realloc(*pc->pbufout, newlen);
    if(!tmp)
    return 1;


    memcpy(tmp + *pc->pbufoutlen, pc->cache, pc->cache_cur);


    *pc->pbufout = tmp;
    *pc->pbufoutlen = newlen;
    pc->cache_cur = 0;
    }


    return 0;
    }


    static int write_cache(buf_cache* pc,
      const void *to_write,
      unsigned int to_write_len)
    {
    unsigned char* tmp;


    assert(pc && to_write);
    assert(pc->cache_len >= pc->cache_cur);

    /* If trying to write more than the cache will hold
    * flush the cache and allocate enough space immediately,
    * that is, don't use the cache. */
    if(to_write_len > pc->cache_len - pc->cache_cur)
    {
    unsigned int newlen;
    flush_cache(pc);
    newlen = *pc->pbufoutlen + to_write_len;
    tmp = realloc(*pc->pbufout, newlen);
    if(!tmp)
    return 1;
    memcpy(tmp + *pc->pbufoutlen, to_write, to_write_len);
    *pc->pbufout = tmp;
    *pc->pbufoutlen = newlen;
    }
    else
    {
    /* Write the data to the cache. */
    memcpy(pc->cache + pc->cache_cur, to_write, to_write_len);
    pc->cache_cur += to_write_len;
    }


    return 0;
    }


    static unsigned int
    get_symbol_frequencies(SymbolFrequencies *pSF, FILE *in)
    {
    int c;
    /*The total number of source symbols is initialized to 0.*/
    unsigned int total_count = 0;

    /* Set all frequencies to 0. */
    init_frequencies(pSF);

    /* Count the frequency of each symbol in the input file.
    Scan the whole file for the first time.*/
    while((c = fgetc(in)) != EOF)
    {
    unsigned char uc = c;
    /* If c(that is, uc) is new symbol,
    a new leaf node for that character is generated*/
    if(!(*pSF)[uc])
    (*pSF)[uc] = new_leaf_node(uc);
    /*the frequency(Unit:times) of the current symbol +1 */
    ++(*pSF)[uc]->count;
    /*the total number of source Ssymbol +1 */
    ++total_count;
    }


    return total_count;
    }


    static unsigned int
    get_symbol_frequencies_from_memory(SymbolFrequencies *pSF,
      const unsigned char *bufin,
      unsigned int bufinlen)
    {
    unsigned int i;
    unsigned int total_count = 0;

    /* Set all frequencies to 0. */
    init_frequencies(pSF);

    /* Count the frequency of each symbol in the input file. */
    for(i = 0; i < bufinlen; ++i)
    {
    unsigned char uc = bufin[i];
    if(!(*pSF)[uc])
    (*pSF)[uc] = new_leaf_node(uc);
    ++(*pSF)[uc]->count;
    ++total_count;
    }


    return total_count;
    }


    /*
     * When used by qsort, SFComp sorts the array so that
     * the symbol with the lowest frequency is first. Any
     * NULL entries will be sorted to the end of the list.
     */
    static int
    SFComp(const void *p1, const void *p2)
    {
    const huffman_node *hn1 = *(const huffman_node**)p1;
    const huffman_node *hn2 = *(const huffman_node**)p2;


    /* Sort all NULLs to the end. */
    if(hn1 == NULL && hn2 == NULL)
    return 0;
    if(hn1 == NULL)
    return 1;
    if(hn2 == NULL)
    return -1;

    if(hn1->count > hn2->count)
    return 1;
    else if(hn1->count < hn2->count)
    return -1;


    return 0;
    }


    #if DEBUG
    static void
    print_freqs(SymbolFrequencies * pSF)
    {
    size_t i;
    for(i = 0; i < MAX_SYMBOLS; ++i)
    {
    if((*pSF)[i])
    printf("%d, %ld\n", (*pSF)[i]->symbol, (*pSF)[i]->count);
    else
    printf("NULL\n");
    }
    }
    #endif


    /*
     * build_symbol_encoder builds a SymbolEncoder by walking
     * from root down to the leaves of the Huffman tree and then,
     * for each leaf, determines its code.
     * Recursive operation
     */
    static void
    build_symbol_encoder(huffman_node *subtree, SymbolEncoder *pSF)
    {
    if(subtree == NULL)
    return;
    /*If subtree is a leaf, a huffman_code is generated.*/
    if(subtree->isLeaf)
    (*pSF)[subtree->symbol] = new_code(subtree);//
    else
    {
    /*Depth traversal*/
    build_symbol_encoder(subtree->zero, pSF);
    build_symbol_encoder(subtree->one, pSF);
    }
    }


    /*
     * calculate_huffman_codes turns pSF into an array
     * with a single entry that is the root of the
     * huffman tree. The return value is a SymbolEncoder,
     * which is an array of huffman codes index by symbol value.
     */
    static SymbolEncoder*
    calculate_huffman_codes(SymbolFrequencies * pSF)
    {
    unsigned int i = 0;
    unsigned int n = 0;
    huffman_node *m1 = NULL, *m2 = NULL;
    SymbolEncoder *pSE = NULL;

    #if DEBUG
    printf("BEFORE SORT\n");
    print_freqs(pSF);   //to present the use of stack
    #endif


    /* Sort the symbol frequency array by ascending frequency. */
    qsort((*pSF), MAX_SYMBOLS, sizeof((*pSF)[0]), SFComp);   
    /*Function SFComp is called by function qsort to 
    compare two element of array by the frequency of source symbol 
    and return some certain value to indicate qsort how to rank.*/
    #if DEBUG
    printf("AFTER SORT\n");
    print_freqs(pSF);
    #endif


    /* Get the number of symbols. */
    for(n = 0; n < MAX_SYMBOLS && (*pSF)[n]; ++n)
    ;


    /*
    * Construct a Huffman tree. This code is based
    * on the algorithm given in Managing Gigabytes
    * by Ian Witten et al, 2nd edition, page 34.
    * Note that this implementation uses a simple
    * count instead of probability.
    */

    /*Build huffman tree.
     It needs to merge n-1 times, so cycle n-1 times.*/
    for(i = 0; i < n - 1; ++i)
    {
    /* Set m1 and m2 to the two subsets of least probability. */
    m1 = (*pSF)[0];
    m2 = (*pSF)[1];


    /* Replace m1 and m2 with a set {m1, m2} whose probability
    * is the sum of that of m1 and m2. */
    (*pSF)[0] = m1->parent = m2->parent =
    new_nonleaf_node(m1->count + m2->count, m1, m2);
    (*pSF)[1] = NULL;

    /* Put newSet into the correct count position in pSF. */
    qsort((*pSF), n, sizeof((*pSF)[0]), SFComp);
    }

    /* Build the SymbolEncoder array from the tree. */
    pSE = (SymbolEncoder*)malloc(sizeof(SymbolEncoder));
    memset(pSE, 0, sizeof(SymbolEncoder));
    build_symbol_encoder((*pSF)[0], pSE);
    return pSE;
    }


    /*
     * Write the huffman code table. The format is:
     * 4 byte code count in network byte order.
     * 4 byte number of bytes encoded
     *   (if you decode the data, you should get this number of bytes)
     * code1
     * ...
     * codeN, where N is the count read at the begginning of the file.
     * Each codeI has the following format:
     * 1 byte symbol, 1 byte code bit length, code bytes.
     * Each entry has numbytes_from_numbits code bytes.
     * The last byte of each code may have extra bits, if the number of
     * bits in the code is not a multiple of 8.
     */
    static int
    write_code_table(FILE* out, SymbolEncoder *se, unsigned int symbol_count)
    {
    unsigned long i, count = 0;

    /* Determine the number of entries in se. */
    for(i = 0; i < MAX_SYMBOLS; ++i)
    {
    if((*se)[i])
    ++count;
    }
    /*
    *htonl():This function converts a 32-bit number
    *from the host byte order to the network byte order of an unsigned long integer.


    *In the network transmission, big-endian order is used,
    *for 0x0A0B0C0D, the order of transmission is 0A 0B 0C 0D,
    *so we take big-endian as network byte order,
    *little-endian as host byte order.


    *The advantage of little-endian order is
    *that unsigned char / short / int/ long type conversion
    *can be accomplished without the storage location change.
    */
    /* Write the number of entries in network byte order. */
    i = htonl(count);    
    if(fwrite(&i, sizeof(i), 1, out) != 1)
    return 1;


    /* Write the number of bytes that will be encoded. */
    symbol_count = htonl(symbol_count);
    if(fwrite(&symbol_count, sizeof(symbol_count), 1, out) != 1)
    return 1;


    /* Write the entries. */
    for(i = 0; i < MAX_SYMBOLS; ++i)
    {
    huffman_code *p = (*se)[i];
    if(p)
    {
    unsigned int numbytes;
    /* Write the 1 byte symbol. */
    fputc((unsigned char)i, out);
    /* Write the 1 byte code bit length. */
    fputc(p->numbits, out);
    /* Write the code bytes. */
    numbytes = numbytes_from_numbits(p->numbits);
    if(fwrite(p->bits, 1, numbytes, out) != numbytes)
    return 1;
    }
    }


    return 0;
    }


    /*
     * Allocates memory and sets *pbufout to point to it. The memory
     * contains the code table.
     */
    static int
    write_code_table_to_memory(buf_cache *pc,
      SymbolEncoder *se,
      unsigned int symbol_count)
    {
    unsigned long i, count = 0;


    /* Determine the number of entries in se. */
    for(i = 0; i < MAX_SYMBOLS; ++i)
    {
    if((*se)[i])
    ++count;
    }


    /* Write the number of entries in network byte order. */
    i = htonl(count);

    if(write_cache(pc, &i, sizeof(i)))
    return 1;


    /* Write the number of bytes that will be encoded. */
    symbol_count = htonl(symbol_count);
    if(write_cache(pc, &symbol_count, sizeof(symbol_count)))
    return 1;


    /* Write the entries. */
    for(i = 0; i < MAX_SYMBOLS; ++i)
    {
    huffman_code *p = (*se)[i];
    if(p)
    {
    unsigned int numbytes;
    /* The value of i is < MAX_SYMBOLS (256), so it can
    be stored in an unsigned char. */
    unsigned char uc = (unsigned char)i;
    /* Write the 1 byte symbol. */
    if(write_cache(pc, &uc, sizeof(uc)))
    return 1;
    /* Write the 1 byte code bit length. */
    uc = (unsigned char)p->numbits;
    if(write_cache(pc, &uc, sizeof(uc)))
    return 1;
    /* Write the code bytes. */
    numbytes = numbytes_from_numbits(p->numbits);
    if(write_cache(pc, p->bits, numbytes))
    return 1;
    }
    }


    return 0;
    }


    /*
     * read_code_table builds a Huffman tree from the code
     * in the in file. This function returns NULL on error.
     * The returned value should be freed with free_huffman_tree.
     */
    static huffman_node*
    read_code_table(FILE* in, unsigned int *pDataBytes)
    {
    huffman_node *root = new_nonleaf_node(0, NULL, NULL);
    unsigned int count;

    /* Read the number of entries.
      (it is stored in network byte order). */
    if(fread(&count, sizeof(count), 1, in) != 1)
    {
    free_huffman_tree(root);
    return NULL;
    }


    count = ntohl(count);


    /* Read the number of data bytes this encoding represents. */
    if(fread(pDataBytes, sizeof(*pDataBytes), 1, in) != 1)
    {
    free_huffman_tree(root);
    return NULL;
    }


    *pDataBytes = ntohl(*pDataBytes);




    /* Read the entries. */
    while(count-- > 0)
    {
    int c;
    unsigned int curbit;
    unsigned char symbol;
    unsigned char numbits;
    unsigned char numbytes;
    unsigned char *bytes;
    huffman_node *p = root;

    if((c = fgetc(in)) == EOF)
    {
    free_huffman_tree(root);
    return NULL;
    }
    symbol = (unsigned char)c;

    if((c = fgetc(in)) == EOF)
    {
    free_huffman_tree(root);
    return NULL;
    }

    numbits = (unsigned char)c;
    numbytes = (unsigned char)numbytes_from_numbits(numbits);
    bytes = (unsigned char*)malloc(numbytes);
    if(fread(bytes, 1, numbytes, in) != numbytes)
    {
    free(bytes);
    free_huffman_tree(root);
    return NULL;
    }


    /*
    * Add the entry to the Huffman tree. The value
    * of the current bit is used switch between
    * zero and one child nodes in the tree. New nodes
    * are added as needed in the tree.
    */
    for(curbit = 0; curbit < numbits; ++curbit)
    {
    if(get_bit(bytes, curbit))
    {
    if(p->one == NULL)
    {
    p->one = curbit == (unsigned char)(numbits - 1)
    ? new_leaf_node(symbol)
    : new_nonleaf_node(0, NULL, NULL);
    p->one->parent = p;
    }
    p = p->one;
    }
    else
    {
    if(p->zero == NULL)
    {
    p->zero = curbit == (unsigned char)(numbits - 1)
    ? new_leaf_node(symbol)
    : new_nonleaf_node(0, NULL, NULL);
    p->zero->parent = p;
    }
    p = p->zero;
    }
    }

    free(bytes);
    }


    return root;
    }


    static int
    memread(const unsigned char* buf,
    unsigned int buflen,
    unsigned int *pindex,
    void* bufout,
    unsigned int readlen)
    {
    assert(buf && pindex && bufout);
    assert(buflen >= *pindex);
    if(buflen < *pindex)
    return 1;
    if(readlen + *pindex >= buflen)
    return 1;
    memcpy(bufout, buf + *pindex, readlen);
    *pindex += readlen;
    return 0;
    }


    static huffman_node*
    read_code_table_from_memory(const unsigned char* bufin,
    unsigned int bufinlen,
    unsigned int *pindex,
    unsigned int *pDataBytes)
    {
    huffman_node *root = new_nonleaf_node(0, NULL, NULL);
    unsigned int count;

    /* Read the number of entries.
      (it is stored in network byte order). */
    if(memread(bufin, bufinlen, pindex, &count, sizeof(count)))
    {
    free_huffman_tree(root);
    return NULL;
    }


    count = ntohl(count);


    /* Read the number of data bytes this encoding represents. */
    if(memread(bufin, bufinlen, pindex, pDataBytes, sizeof(*pDataBytes)))
    {
    free_huffman_tree(root);
    return NULL;
    }


    *pDataBytes = ntohl(*pDataBytes);


    /* Read the entries. */
    while(count-- > 0)
    {
    unsigned int curbit;
    unsigned char symbol;
    unsigned char numbits;
    unsigned char numbytes;
    unsigned char *bytes;
    huffman_node *p = root;


    if(memread(bufin, bufinlen, pindex, &symbol, sizeof(symbol)))
    {
    free_huffman_tree(root);
    return NULL;
    }


    if(memread(bufin, bufinlen, pindex, &numbits, sizeof(numbits)))
    {
    free_huffman_tree(root);
    return NULL;
    }

    numbytes = (unsigned char)numbytes_from_numbits(numbits);
    bytes = (unsigned char*)malloc(numbytes);
    if(memread(bufin, bufinlen, pindex, bytes, numbytes))
    {
    free(bytes);
    free_huffman_tree(root);
    return NULL;
    }


    /*
    * Add the entry to the Huffman tree. The value
    * of the current bit is used switch between
    * zero and one child nodes in the tree. New nodes
    * are added as needed in the tree.
    */
    for(curbit = 0; curbit < numbits; ++curbit)
    {
    if(get_bit(bytes, curbit))
    {
    if(p->one == NULL)
    {
    p->one = curbit == (unsigned char)(numbits - 1)
    ? new_leaf_node(symbol)
    : new_nonleaf_node(0, NULL, NULL);
    p->one->parent = p;
    }
    p = p->one;
    }
    else
    {
    if(p->zero == NULL)
    {
    p->zero = curbit == (unsigned char)(numbits - 1)
    ? new_leaf_node(symbol)
    : new_nonleaf_node(0, NULL, NULL);
    p->zero->parent = p;
    }
    p = p->zero;
    }
    }

    free(bytes);
    }


    return root;
    }
    /*This function is used in the second file scan
     in order to encode the whole input file by looking up the code table.*/
    static int
    do_file_encode(FILE* in, FILE* out, SymbolEncoder *se)
    {
    unsigned char curbyte = 0;
    unsigned char curbit = 0;
    int c;
    /*Traverse each symbol(/bytes) of the file.*/
    while((c = fgetc(in)) != EOF)
    {
    unsigned char uc = (unsigned char)c;
    huffman_code *code = (*se)[uc];/*Look up the code table.*/
    unsigned long i;

    for(i = 0; i < code->numbits; ++i)
    {
    /* Add the current bit to curbyte. */
    curbyte |= get_bit(code->bits, i) << curbit;


    /* If this byte is filled up then write it
    * out and reset the curbit and curbyte. */
    if(++curbit == 8)
    {
    fputc(curbyte, out);
    curbyte = 0;
    curbit = 0;
    }
    }
    }


    /*
    * If there is data in curbyte that has not been
    * output yet, which means that the last encoded
    * character did not fall on a byte boundary,
    * then output it.
    */
    if(curbit > 0)
    fputc(curbyte, out);


    return 0;
    }


    static int
    do_memory_encode(buf_cache *pc,
    const unsigned char* bufin,
    unsigned int bufinlen,
    SymbolEncoder *se)
    {
    unsigned char curbyte = 0;
    unsigned char curbit = 0;
    unsigned int i;

    for(i = 0; i < bufinlen; ++i)
    {
    unsigned char uc = bufin[i];
    huffman_code *code = (*se)[uc];
    unsigned long i;

    for(i = 0; i < code->numbits; ++i)
    {
    /* Add the current bit to curbyte. */
    curbyte |= get_bit(code->bits, i) << curbit;


    /* If this byte is filled up then write it
    * out and reset the curbit and curbyte. */
    if(++curbit == 8)
    {
    if(write_cache(pc, &curbyte, sizeof(curbyte)))
    return 1;
    curbyte = 0;
    curbit = 0;
    }
    }
    }


    /*
    * If there is data in curbyte that has not been
    * output yet, which means that the last encoded
    * character did not fall on a byte boundary,
    * then output it.
    */
    return curbit > 0 ? write_cache(pc, &curbyte, sizeof(curbyte)) : 0;
    }


    //step3:add by yzhang for huffman statistics
    /*In order to calculate frequencies by appearence times(count).*/
    int huffST_getSymFrequencies(SymbolFrequencies *SF, huffman_stat *st,int total_count)
    {
    int i,count =0;
    for(i = 0; i < MAX_SYMBOLS; ++i)
    {
    if((*SF)[i])
    {
    st->freq[i]=(float)(*SF)[i]->count/total_count;
    count+=(*SF)[i]->count;
    }
    else 
    {
    st->freq[i]= 0;
    }
    }
    if(count==total_count)
    return 1;
    else
    return 0;
    }
    /*Get codeword and its other information and save them
     to structure huffman_stat in order to output codeword table
     to txt file.*/
    int huffST_getcodeword(SymbolEncoder *se, huffman_stat *st)
    {
    unsigned long i,j;


    for(i = 0; i < MAX_SYMBOLS; ++i)
    {
    huffman_code *p = (*se)[i];
    if(p)
    {
    unsigned int numbytes;
                st->numbits[i] = p->numbits;
    numbytes = numbytes_from_numbits(p->numbits);
    for (j=0;j<numbytes;j++)
       st->bits[i][j] = p->bits[j];
    }
    else
    st->numbits[i] =0;
    }


    return 0;
    }
    /*Output the statistic results and huffman code table to file.*/
    void output_huffman_statistics(huffman_stat *st,FILE *out_Table)
    {
    int i,j;
    unsigned char c;
    fprintf(out_Table,"symbol\t   freq\t   codelength\t   code\n");
    for(i = 0; i < MAX_SYMBOLS; ++i)
    {
    /*Use the key "Tab" to seperate each element of huffman_stat,
    so that the file can be read by other software, such as Excel.
    */
    fprintf(out_Table,"%d\t   ",i);
    fprintf(out_Table,"%f\t   ",st->freq[i]);
    fprintf(out_Table,"%d\t    ",st->numbits[i]);
    if(st->numbits[i])
    {
    for(j = 0; j < st->numbits[i]; ++j)
    {
    c =get_bit(st->bits[i], j);
    fprintf(out_Table,"%d",c);
    }
    }
    fprintf(out_Table,"\n");
    }
    }
    //end by yzhang
    /*
     * huffman_encode_file huffman encodes in to out.
     */
    int
    huffman_encode_file(FILE *in, FILE *out, FILE *out_Table)  //step1:changed by yzhang for huffman statistics from (FILE *in, FILE *out) to (FILE *in, FILE *out, FILE *out_Table)
    {
    SymbolFrequencies sf;
    SymbolEncoder *se;
    huffman_node *root = NULL;
    int rc;
    unsigned int symbol_count;
        //step2:add by yzhang for huffman statistics
    huffman_stat hs;
    //end by yzhang


    /* Get the frequency of each symbol in the input file. */
    symbol_count = get_symbol_frequencies(&sf, in);


    //step3:add by yzhang for huffman statistics,...  get the frequency of each symbol 
        huffST_getSymFrequencies(&sf,&hs,symbol_count);
        //end by yzhang


    /* Build an optimal table from the symbolCount. */
    se = calculate_huffman_codes(&sf);
    root = sf[0];
        
    //step3:add by yzhang for huffman statistics... output the statistics to file
    huffST_getcodeword(se, &hs);
    output_huffman_statistics(&hs,out_Table);
    //end by yzhang


    /* Scan the file again and, using the table
      previously built, encode it into the output file. */
    //Reset the file incator to the beginning of the file.
    rewind(in);
    //First,write huffman code table to the output file.
    rc = write_code_table(out, se, symbol_count);
    /*"rc==0" stands for the fact that writing code table is successful.
     And then encode the whole input file by huffman code table
     into output file.
    */
    if(rc == 0)
    rc = do_file_encode(in, out, se);


    /* Free the Huffman tree. */
    free_huffman_tree(root);
    free_encoder(se);
    return rc;
    }


    int
    huffman_decode_file(FILE *in, FILE *out)
    {
    huffman_node *root, *p;
    int c;
    unsigned int data_count;

    /* Read the Huffman code table. */
    root = read_code_table(in, &data_count);
    if(!root)
    return 1;


    /* Decode the file. */
    p = root;
    while(data_count > 0 && (c = fgetc(in)) != EOF)
    {
    unsigned char byte = (unsigned char)c;
    unsigned char mask = 1;
    while(data_count > 0 && mask)
    {
    p = byte & mask ? p->one : p->zero;
    mask <<= 1;


    if(p->isLeaf)
    {
    fputc(p->symbol, out);
    p = root;
    --data_count;
    }
    }
    }


    free_huffman_tree(root);
    return 0;
    }


    #define CACHE_SIZE 1024


    int huffman_encode_memory(const unsigned char *bufin,
     unsigned int bufinlen,
     unsigned char **pbufout,
     unsigned int *pbufoutlen)
    {
    SymbolFrequencies sf;
    SymbolEncoder *se;
    huffman_node *root = NULL;
    int rc;
    unsigned int symbol_count;
    buf_cache cache;


    /* Ensure the arguments are valid. */
    if(!pbufout || !pbufoutlen)
    return 1;


    if(init_cache(&cache, CACHE_SIZE, pbufout, pbufoutlen))
    return 1;


    /* Get the frequency of each symbol in the input memory. */
    symbol_count = get_symbol_frequencies_from_memory(&sf, bufin, bufinlen);


    /* Build an optimal table from the symbolCount. */
    se = calculate_huffman_codes(&sf);
    root = sf[0];


    /* Scan the memory again and, using the table
      previously built, encode it into the output memory. */
    rc = write_code_table_to_memory(&cache, se, symbol_count);
    if(rc == 0)
    rc = do_memory_encode(&cache, bufin, bufinlen, se);


    /* Flush the cache. */
    flush_cache(&cache);

    /* Free the Huffman tree. */
    free_huffman_tree(root);
    free_encoder(se);
    free_cache(&cache);
    return rc;
    }


    int huffman_decode_memory(const unsigned char *bufin,
     unsigned int bufinlen,
     unsigned char **pbufout,
     unsigned int *pbufoutlen)
    {
    huffman_node *root, *p;
    unsigned int data_count;
    unsigned int i = 0;
    unsigned char *buf;
    unsigned int bufcur = 0;


    /* Ensure the arguments are valid. */
    if(!pbufout || !pbufoutlen)
    return 1;


    /* Read the Huffman code table. */
    root = read_code_table_from_memory(bufin, bufinlen, &i, &data_count);
    if(!root)
    return 1;


    buf = (unsigned char*)malloc(data_count);


    /* Decode the memory. */
    p = root;
    for(; i < bufinlen && data_count > 0; ++i) 
    {
    unsigned char byte = bufin[i];
    unsigned char mask = 1;
    while(data_count > 0 && mask)
    {
    p = byte & mask ? p->one : p->zero;
    mask <<= 1;


    if(p->isLeaf)
    {
    buf[bufcur++] = p->symbol;
    p = root;
    --data_count;
    }
    }
    }


    free_huffman_tree(root);
    *pbufout = buf;
    *pbufoutlen = bufcur;
    return 0;
    }

    三、压缩效率分析结果




    展开全文
  • 哈夫曼编码,保证正确的同时提高了效率 是一种二进制最短前缀编码 带权最短路径最短的树–哈夫曼树,可以用优先队列(底层实质上是堆)实现 对于输入的n个带权结点,初始为 n个只有根结点的树 每次选择权最小的两个...

    哈夫曼编码,保证正确的同时提高了效率
    是一种二进制最短前缀编码
    带权最短路径最短的树–哈夫曼树,可以用优先队列生成,注意,本次使用了指向结构体指针型的优先队列
    对于输入的n个带权结点,初始为 n个只有根结点的树
    每次选择权最小的两个树将他们的根结点合并,生成新的树,放入原来的森林中,
    重复上述操作,直到只有一棵树,此时即得到哈夫曼树
    哈夫曼树不唯一例如
    输入
    输入的第一行包含一个正整数n,表示共有n个字符需要编码。其中n不超过100。
    第二行中有n个用空格隔开的正整数,分别表示n个字符的权值。
    输出
    共n行,每行一个字符串,表示对应字符的赫夫曼编码
    测试样例
    8
    5 29 7 8 14 23 3 11
    他的两个哈夫曼树如图在这里插入图片描述
    在这里插入图片描述
    对应的哈夫曼编码自然也不同
    本程序对于测试样例生成的是第一棵哈夫曼树
    下图是两个哈夫曼树所以叶结点的权乘以路径长度之和
    二者相等,即可证明一个事实,对同一组叶子结点来说,哈夫曼树可以不唯一,但最小带权路径长度是唯一的
    在这里插入图片描述

    #include<cstdio>
    #include<algorithm>
    #include<iostream>
    #include<vector>
    #include<cstring>
    #include<queue>
    //输入n个数,表示字符的权值,如何用哈夫曼编码求出n个字符的前缀编码
    //首先构建哈夫曼树(最短带权路径树),最初是n个树的森林,每个树只有一个结点
    //不断将权值最小的两个树合并直到只剩下一颗树,用结构体指针的优先队列实现
    //合并操作,union()输入两个根结点指针,将这两个树合并,即新建一个结点其权值为两结点权值之和
    //其左右子树为这两棵树,返回新树的根结点指针
    //然后将新建结点的指针放入森林中
    //生成的树从根向叶结点的路径中,向左编码一个0,向右编码一个1
    using namespace std;
    struct node
    {
        int w;
        node* lchild;
        node* rchild;
    };
    struct cmp{
    bool operator()(node *a,node *b)
    {
        return a->w>b->w;
    }
    };
    node* newnode(int x)
    {
        node* Node=new node;
        Node->w=x;
        Node->lchild=Node->rchild=NULL;
        return Node;
    }
    priority_queue<node*,vector<node*>,cmp>forest;
    vector<char>ans;
    void init()
    {
        int n;
        scanf("%d",&n);
        while(n--){
            int temp;
            scanf("%d",&temp);
            forest.push(newnode(temp));
        }
    }
    node* Union(node*a,node*b)//合并两棵树
    {
        node* Node=newnode(a->w+b->w);
        Node->lchild=a;
        Node->rchild=b;
        return Node;
    }
    void Layerorder(node *root)
    {
        if(root==NULL){
            return ;
        }
        queue<node*>q;
        q.push(root);
        while(!q.empty()){
            node* Node=q.front();
            q.pop();
            printf("%d ",Node->w);
            if(Node->lchild!=NULL){
                q.push(Node->lchild);
            }
            if(Node->rchild!=NULL){
                q.push(Node->rchild);
            }
        }
    }
    void preorder(node* root)//先序遍历哈夫曼树,即可得到哈夫曼编码
    {
        if(root==NULL){
            return;
        }
        if(root->lchild==NULL&&root->rchild==NULL){//叶结点
            printf("weight=%d haffmancode=",root->w);
            for(int i=0;i<ans.size();i++){
                printf("%c",ans[i]);
            }
            printf("\n");
    
        }
        if(root->lchild!=NULL){
            ans.push_back('0');
            preorder(root->lchild);
            ans.pop_back();
        }
        if(root->rchild!=NULL){
            ans.push_back('1');
            preorder(root->rchild);
            ans.pop_back();
        }
    
    }
    void inorder(node* root)
    {
        if(root==NULL){
            return;
        }
        inorder(root->lchild);
        printf("%d ",root->w);
        inorder(root->rchild);
    }
    int main()
    {
        init();
        while(forest.size()>1){
            node* a=forest.top();
            forest.pop();
            node* b=forest.top();
            forest.pop();
            forest.push(Union(a,b));
        }
        preorder(forest.top());
        return 0;
    
    }
    
    

    运行结果如下
    在这里插入图片描述

    展开全文
  • 哈夫曼树要解决的问题是对不同频率元素查找的效率最高,哈夫曼编码代码如下: 首先要明白,哈夫曼树要查找的元素都在叶结点上,而且哈夫曼树的最终元素是确定的为2*n-1个。这样我们可以用结点数组来表示树,每一个...

    哈夫曼树要解决的问题是对不同频率元素查找的效率最高,哈夫曼编码代码如下:

    首先要明白,哈夫曼树要查找的元素都在叶结点上,而且哈夫曼树的最终元素是确定的为2*n-1个。这样我们可以用结点数组来表示树,每一个结点的 child和parent指向自己的父亲和儿子的索引。首先实现select函数,从结点数组选出权重最小的两个合并,存储新结点,构造出哈夫曼树,然后从叶结点开始向上查找,左儿子为0,右儿子为1得到哈夫曼编码,最后再把编码反转即可。

    //huffmanCoding.c
    #include <stdio.h>
    #include <limits.h>
    #include <string.h>
    #include <stdlib.h>
    #define N 6
    
    typedef struct huffNode
    {
    	 int weight;   //权重
    	 int lchild, rchild, parent;  //左右子节点和父节点
    }HTNode, *HuffTree;
    typedef char **HuffCode;
    
    //找出数组中无父节点且权值最小的两个节点下标,分别用s1和s2保存
    void select(const HuffTree &HT, int n, int &s1, int &s2);
    //HT:哈夫曼树,HC:哈夫曼编码,w:构造哈夫曼树节点的权值,n:构造哈夫曼树节点的个数
    void HuffmanCode(HuffTree &HT, HuffCode &HC, int *w, int n);
    
    
    int main()
    {
    	int i;
    	char key[N] = { '0','A','B','C','D','E' };//第0个元素保留不用
    	int w[N] = { 0,1,2,4,5,6 }; //第0个元素保留不用
    	HuffTree HT;
    	HuffCode HC;
    	HuffmanCode(HT, HC, w, N - 1);
    	for (i = 1; i < N; i++)
    		printf("%c:%s\n", key[i], HC[i]);
    
    	printf("\n");
    	return 0;
    }
    
    
    
    
    //找出数组中权值最小的两个节点下标,分别用s1和s2保存
    void select(const HuffTree &HT, int n, int &s1, int &s2)
    {
    	int i;
    	s1 = s2 = 0;
    	int min1 = INT_MAX;//最小值,INT_MAX在<limits.h>中定义的
    	int min2 = INT_MAX;//次小值
    
    	for (i = 1; i <= n; ++i)
    	{
    		if (HT[i].parent == 0)
    		{//筛选没有父节点的最小和次小权值下标
    			if (HT[i].weight < min1)
    			{//如果比最小值小
    				min2 = min1;
    				s2 = s1;
    				min1 = HT[i].weight;
    				s1 = i;
    			}
    			else if ((HT[i].weight >= min1) && (HT[i].weight < min2))
    			{//如果大于等于最小值,且小于次小值
    				min2 = HT[i].weight;
    				s2 = i;
    			}
    			else
    			{//如果大于次小值,则什么都不做
    				;
    			}
    		}
    	}
    }
    
    //HT:哈夫曼树,HC:哈夫曼编码,w:构造哈夫曼树节点的权值,n:构造哈夫曼树节点的个数
    void HuffmanCode(HuffTree &HT, HuffCode &HC, int *w, int n)
    {
    	int s1;
    	int s2;
    	int m = 2 * n - 1;       //容易知道n个节点构造的哈夫曼树是2n-1个节点
    	int i, c, f, j;
    	char *code;  //暂存编码的
    	HT = (HuffTree)malloc((m + 1) * sizeof(HTNode));  //0单元未使用
    
    
    	for (i = 1; i <= n; i++)
    		HT[i] = { w[i],0,0,0 };//初始化前n个节点(构造哈夫曼树的原始节点)
    
    	for (i = n + 1; i <= m; i++)
    		HT[i] = { 0,0,0,0 };  //初始化后n-1个节点
    
    							  //构建哈夫曼树
    	for (i = n + 1; i <= m; i++)
    	{
    		select(HT, i - 1, s1, s2);//找出前i-1个节点中权值最小的节点下标
    		HT[s1].parent = i;
    		HT[s2].parent = i;
    		HT[i].lchild = s1;
    		HT[i].rchild = s2;
    		HT[i].weight = HT[s1].weight + HT[s2].weight;
    	}
    	//哈夫曼编码
    	HC = (char **)malloc((n) * sizeof(char *));
    	//暂存编码
    	code = (char *)malloc(n * sizeof(char));//使用了第0单元
    	for (i = 1; i <= n; i++)
    	{
    		for (c = i, f = HT[c].parent, j = 0; f != 0; c = HT[c].parent, f = HT[c].parent, j++)
    		{//从叶子扫描到根
    			if (HT[f].lchild == c)
    			{
    				code[j] = '0';
    			}
    			else if (HT[f].rchild == c)
    			{
    				code[j] = '1';
    			}
    			else
    			{//否则什么也不做
    				;
    			}
    		}
    		code[j] = '\0';
    		for (int k = 0; k < j/2; k++)
    		{
    			char tmp = code[k];
    			code[k] = code[j - 1- k];
    			code[j - 1- k] = tmp;
    		}
    		HC[i] = (char *)malloc(strlen(code) * sizeof(char));
    		strcpy(HC[i], code);
    	}
    }
    
    
    
    


    展开全文
  • 实验名称文件压缩问题 班级20132012 学号 姓名 时间2015-6-9 一问题描述 哈夫曼编码是一种常用的数据压缩技术对数据文件进行哈夫曼编码可 大大缩短文件的传输长度提高信道利用率及传输效率要求采用哈夫曼编码原 ...
  • 实验课名称数据结构实验 实验名称:文件压缩问题 班级203212 学号: 姓名 时间2015 一问题描述 哈夫曼编码就是一种常用得数据压缩技术,对数据文件进行哈夫曼编码可大大缩短文件得传输长度,提高信道利用率及传输效率....
  • 哈夫曼树与哈夫曼编码 什么是哈夫曼树 判定树 考虑学生成绩的分布的概率 查找效率 修改判定树 根据结点不同的查找概率构造更有效的搜索树 哈夫曼树的定义 带权路径长度(WPL):设二叉树有n个叶子节点,每个...

    哈夫曼树与哈夫曼编码

    什么是哈夫曼树

    • 判定树
    • 考虑学生成绩的分布的概率
      • 查找效率
    • 修改判定树
      • 根据结点不同的查找概率构造更有效的搜索树
    • 哈夫曼树的定义
      • 带权路径长度(WPL):设二叉树有n个叶子节点,每个叶子节点带有权值WkW_k,从根结点到每个叶子结点的长度为IkI_k,则每个叶子结点的带权路径长度之和就是:WPL=k=1nwklkWPL=\sum_{k=1} ^n w_kl_k
      • 最优二叉树或哈夫曼树:WPL最小的二叉树

    哈夫曼树的构造

    • 每次把权值最小的两棵二叉树合并
    typedef struct TreeNode *HuffmanTree;
    struct TreeNode{
        int Weight;
        HuffmanTree Laft, Right;
    };
    HuffManTree Huffman( MinHeap H )
    { // 假设H->Size个权值已经存在H->Elements[]->Weight里
        int i; HuffmanTree T;
        BuildMinHeap(H); //将H->Elements[]按权值调整为最小堆
        for(i = 1; i < H->Size; i++ ) {//做H->Size-1次合并
            T = malloc( sizeof( struct TreeNode) );//建立新结点
            T->Left = DeleteMin(H);
            	//从最小堆中删除一个结点,作为新T的左子结点
            T->Right = DeleteMin(H);
            	//从最小堆中删除一个结点,作为新T的右子结点
            T->Weight = T->Left->Weight+T->Right->Weight;
            	//计算新权值
            Insert( H, T ); //将新T插入最小堆
        }
        T = DeleteMin(H);
        return T;
    }
    • 整体复杂度为O(N logN)

    哈夫曼树的特点

    • 没有度为1的结点
    • n个 叶子结点的哈夫曼树共有2n-1个结点
    • 哈夫曼树的任意非叶结点的左右子树交换后仍为哈夫曼树
    • 对同一组权值,存在不同构的哈夫曼树,但WPL值相同

    哈夫曼编码

    • 怎么进行不等长编码?
    • 如何避免二义性?
      • 前缀码:任何字符的编码都不是另一字符编码的前缀
        • 可以无二义地编码
      • 二叉树用于编码
        • 左右分支:0、1
        • 字符只出现在叶结点上
    • 怎么构造一棵编码代价最小的二叉树?
      • 哈夫曼树(无二义、代价小)
    展开全文
  • 哈夫曼编码的实现

    万次阅读 多人点赞 2016-08-08 21:13:17
    而我对哈夫曼编码的理解也仅仅局限在其用于编码领域,可以提高数据传输效率,或者是用于压缩文件?这些可能并不准确,我没有细细的去查证。 哈夫曼编码可以通过构建哈夫曼树来得到。 我们用一个简单的例子,来简单...
  • Huffman Tree,中文名是哈夫曼树或霍夫曼树,它是最优二叉树。 引入 如果有一篇文章,由若干个字符构成。每个ABC…Z都由7位编码,文章有1w个字符,那么有7w位进行编码。一个字节8位,首位是0。需要占用1W个字节。 ...
  • 浅谈哈夫曼编码(含matlab代码)

    千次阅读 2020-06-12 12:28:21
    文章目录浅谈哈夫曼编码哈夫曼树哈夫曼树的构造哈夫曼树WPL值的计算哈夫曼编码引入哈夫曼编码哈夫曼编码的原理哈夫曼编码的编码压缩效率通过matlab代码实现哈夫曼编码思路及代码哈夫曼编码实例完整代码已上传到...
  • 1.哈夫曼编码 在讲哈夫曼树之前先来介绍一下哈夫曼编码的概念。在信息编码、数据压缩等方面,我们总是希望编码能够尽可能的简短一点,如此才能节省空间,提高传输效率。例如:如果采用等长编码的话编码'abcd',则为...
  • 哈夫曼树 1.历史 首先是为了找到最有效的高效的(空间,整体)编码,然后借助数据结构种的树型结构,发现通过构建这样一颗二叉树,可以得到最有效的编码 2.概念 给定n个权值作为n个叶子结点,构造一棵二叉树,...
  • 根据香农编码,费诺编码和哈夫曼编码的最佳编码思想,...5.输出信源熵,平均码长,编码效率等 6.编程加注解 附加要求(5): 1.能实现哈夫曼多进制(元)编码(3进制) 或 2.能实现哈夫曼多重(扩展)编码(2重,信源2-3个符号)
  • 哈夫曼编码的c语言实现,代码中有注释。有编码和译码功能,能输出每个字符的Huffman码。可以输入一段Huffman码反应成文本,也可以输入一段文本翻译成Huffman码。计算了信源熵,编码效率,和平均编码长度。
  • 平衡树插入删除效率高的前提各个节点的访问概率相等, 哈夫曼树在建树过程中考虑了节点的访问概率. 哈夫曼树是 N 叉树,这里讨论的是二叉哈夫曼树。 定义带权路径长度: WPL=∑i=0n−1wiliWPL = \sum_{i=0}^{n-1}w_{i}...
  • 对26个英文字母(已知它们的概率分布)进行了哈夫曼编码,并计算了编码效率。有助于大家理解哈夫曼编码以及信息论的相关知识哦。
  • 并进行编码效率的计算,对一幅BMP格式的灰度图像进行二 元Fano编码、译码 ) 原始图片 灰度处理 编码生成的码表:noldList: 解码后图片 编码结果 fano编码实现 源码: 哈夫曼编码实现 #Writen by james ...
  • 贪心算法-哈夫曼编码

    2020-05-13 17:34:18
    哈夫曼编码 哈夫曼编码可以有效的压缩数据。 定长编码和变长编码 定长编码指使用固定长度的二进制串表示一个字符,如a=000; b=001;c=002;d=003。 变长编码具有更加好的压缩效率(思想是赋予高频字符短码,低频...
  • 图像处理课要求对一幅图像进行哈夫曼编码/解码,并计算编码效率和平均编码长度。哈夫曼编码的原理就不写了,也可以在网上找到比较详细的介绍,比如这个博客。这种数据结构方面的代码其实最好用C写,用Python反而有些...
  • 哈夫曼树是一种特殊的树,结合前面做书上动态规划题的了解,哈夫曼树就是最优二叉树。  建立一颗哈夫曼树前...查找效率会更好,通过搜索频率(权值)与节点离根节点的路径距离计算出WPL(带权路径长),当词典树...
  • 其中提到了哈夫曼编码.今天要开始学习相关内容了. 如何根据频率构造效率最好的搜索树? 哈夫曼树(最优二叉树):WPL(带权路径长度)最小的二叉树. 哈夫曼树: 选取最小的两个权值进行合并:使用堆 ...
  • 当Kotlin遇见数据结构丨哈夫曼编码

    多人点赞 2019-10-08 08:50:27
    哈夫曼编码是一种编码格式,属于可变字长编码的一种,该方法依照字符出现的概率来构建异字头的平均长度最短的码字,最终实现根据使用频率来最大化节省码字(字符)的存储空间和提高传输效率的目的,在数据压缩和通讯...
  • 哈夫曼编码是一种效率比较高的变长无失真信源编码方法。哈夫曼编码的编码方法,步骤如下: 将信源符号按概率从大到小的顺序排列,为方便起见,令p(a1)>=p(a2)>=…>=p(ai); 给两个概率最小的信源符号p(a(n...
  • 哈夫曼(huffman)树/哈夫曼编码 ASCII码表中,8位转为一个具体的字符,如字母e和字母p,1万字的文章即一共8万位,即1万字节,但实际文章中字母e出现的频率远大于其他字母,故可以将字母e设计为5位或者4位,一些不...
  • 哈夫曼编码是为了实现编码的压缩,提高传输的效率。在编码端进行压缩,解码端进行相应的解码,得到传输的真实内容。编码时,先统计字符出现的频率,按照从低到高的顺序进行统计,出现频率越高的编码越简单,从而对...
  • 哈夫曼编码是一种编码格式,属于可变字长编码的一种,该方法依照字符出现的概率来构建异字头的平均长度最短的码字,最终实现根据使用频率来最大化节省码字(字符)的存储空间和提高传输效率的目的,在数据压缩和通讯...
  • Matlab 函数实现哈夫曼编码的算法 一 设计目的和意义 在当今信息化 代 数字信号充斥着各个角落 在数字信号的 理和 中信源 是首先遇到的 一个信源 的好坏 劣直接影响到了后面的 理和 如何无失真地 如何使 的效率最高 ...

空空如也

空空如也

1 2 3 4 5 ... 8
收藏数 158
精华内容 63
关键字:

哈夫曼编码效率