精华内容
下载资源
问答
  • Hadoop天气系统

    2020-09-13 23:45:05
    3.hadoop jar weather.jar com.hadoop.mr.weather.WeatherSystem hdfs dfs -ls /data/weather/output hdfs dfs -cat /data/weather/output/part-r-00000 也可以把内容copy到当前的目录 hdfs dfs -get /data/weather/...

    1.写完计数程序打包成jar
    只要class文件即可
    2.上传到node1上
    3.hadoop jar weather.jar com.hadoop.mr.weather.WeatherSystem

    hdfs dfs -ls /data/weather/output
    hdfs dfs -cat /data/weather/output/part-r-00000
    也可以把内容copy到当前的目录
    hdfs dfs -get /data/weather/output/* ./

    public class WeatherSystem {
    
    	public static void main(String[] args) throws Exception {
    		Configuration configuration = new Configuration(true);
    		Job job = Job.getInstance(configuration);
    		
    		job.setJarByClass(WeatherSystem.class);
    		job.setJobName("weather");
    		//map start
    
    		
    		job.setMapperClass(WeatherMapper.class);
    		job.setMapOutputKeyClass(WeatherData.class);
    		job.setOutputValueClass(IntWritable.class);
    		job.setPartitionerClass(WeatherPartition.class);
    		job.setSortComparatorClass(WeatherComparator.class);
    //		job.setCombinerClass(cls);
    		//map end
    		//reduce start
    		job.setGroupingComparatorClass(WeatherGroupComparator.class);
        job.setReducerClass(WeatherReducer.class);
    		job.setNumReduceTasks(2);
    		//reduce end
    
        
    		Path input = new Path("/data/weather/input/weather.txt");
    		FileInputFormat.addInputPath(job, input );
    		Path output = new Path("/data/weather/output");
    		//测试专用为防止出问题
    		if(output.getFileSystem(configuration).exists(output)){
    			output.getFileSystem(configuration).delete(output, true);
    		}
    		FileOutputFormat.setOutputPath(job, output);	
    		
        // Submit the job, then poll for progress until the job is complete
        job.waitForCompletion(true);
    
    	}
    	
    }
    
    
    public class WeatherMapper extends Mapper<LongWritable, Text, WeatherData, IntWritable>{
    
    
       private WeatherData wdkey = new WeatherData();
    	 private final static IntWritable ivalue = new IntWritable(1);
    
       public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
      	 String str[] = StringUtils.split(value.toString(),'\t');
      	 
      	 try{
    	  	 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
    	  	 Date date = sdf.parse(str[0]);
    	  	 Calendar calendar = Calendar.getInstance();
    	  	 calendar.setTime(date);
    	  	 wdkey.setYear(calendar.get(calendar.YEAR));
    	  	 wdkey.setMonth(calendar.get(calendar.MONTH)+1);
    	  	 wdkey.setDay(calendar.get(calendar.DAY_OF_MONTH));
    	  	 
    	  	 int temperature = Integer.valueOf(str[1].substring(0, str[1].length()-1));
    	  	 wdkey.setTemperature(temperature);
    	 	 
    	  	 ivalue.set(temperature);
    	  	 
    	  	 context.write(wdkey, ivalue);
      	 } catch (Exception e) {
    			// TODO: handle exception
    		}
    
       }
    
    	
    }
    
    
    public class WeatherReducer extends Reducer<WeatherData, IntWritable, Text, IntWritable>{
    	//相同的key为一组
    	//1999 01 01 38(key)  38(value)
    	//1999 01 11 32(key)  32(value)
    	//1999 01 12 38(key)  38(value)
    	//1999 01 11 28(key)  28(value)
    	private Text key = new Text();
    	private IntWritable result = new IntWritable();
    	
    	
    	@Override
    	protected void reduce(WeatherData data, Iterable<IntWritable> iterable,
    	    Reducer<WeatherData, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
    		// TODO Auto-generated method stub
    		
    		int flag = 0;
    		int day = 0;
    		for(IntWritable writable : iterable){
    			
    			if(flag == 0){
    				//1999-01-01 38
    				key.set(data.getYear()+"-"+data.getMonth()+"-"+data.getDay()+" " + data.getTemperature());
    				result.set(data.getTemperature());
    				flag ++;
    				day = data.getDay();
    				context.write(key, result);
    			}
    			if(flag != 0 && day != data.getDay()){
    				key.set(data.getYear()+"-"+data.getMonth()+"-"+data.getDay()+" " + data.getTemperature());
    				result.set(data.getTemperature());
    				context.write(key, result);
    				break;
    			}
    		}
    		
    	}
    
    	
    	
    	
    }
    
    
    public class WeatherPartition extends Partitioner<WeatherData, IntWritable>{
    
    	@Override
    	public int getPartition(WeatherData key, IntWritable value, int numPartitions) {
    		// 这里对key做分组,这个方法应该简单,尽量让reduce能并行处理完
    		//针对weather没有效果
    		return key.hashCode() % numPartitions;
    	}
    
    }
    
    
    import org.apache.hadoop.io.WritableComparable;
    import org.apache.hadoop.io.WritableComparator;
    
    public class WeatherComparator extends WritableComparator{
    	
    	public WeatherComparator(){
    		super(WeatherData.class, true);
    	}
    
    	@Override
    	public int compare(WritableComparable a, WritableComparable b) {
    		// TODO Auto-generated method stub
    		WeatherData data1 = (WeatherData)a;
    		WeatherData data2 = (WeatherData)b;
    		int yearCompare = Integer.compare(data1.getYear(), data2.getYear());
    		if(yearCompare == 0){
    			int monthCompare = Integer.compare(data1.getMonth(), data2.getMonth());
    			if(monthCompare == 0){
    				int temperatureCompare = Integer.compare(data1.getTemperature(), data2.getTemperature());
    				return -temperatureCompare;
    			}
    			return monthCompare;
    		}
    		return yearCompare;
    	}
    
    	
    }
    
    
    package com.hadoop.mr.weather;
    
    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    
    import org.apache.hadoop.io.WritableComparable;
    
    public class WeatherData implements WritableComparable<WeatherData>{
    
    	private int year;
    	private int month;
    	private int day;
    	private int temperature;
    	
    	
    	
    	@Override
    	public void write(DataOutput out) throws IOException {
    		// TODO Auto-generated method stub
    		out.writeInt(year);
    		out.writeInt(month);
    		out.writeInt(day);
    		out.writeInt(temperature);
    	}
    
    	@Override
    	public void readFields(DataInput in) throws IOException {
    		// TODO Auto-generated method stub
    		year = in.readInt();
    		month = in.readInt();
    		day = in.readInt();
    		temperature = in.readInt();
    	}
    
    	@Override
    	public int compareTo(WeatherData that) {
    		// TODO Auto-generated method stub
    		
    		//日期正序
    		int yearCompare = Integer.compare(this.year, that.year);
    		if(yearCompare == 0){
    			int monthCompare = Integer.compare(this.month, that.month);
    			if(monthCompare == 0 ){
    				return Integer.compare(this.day, that.day);
    			}
    			return monthCompare;
    		}
    		
    		return yearCompare;
    	}
    
    	/**
    	 * @return the year
    	 */
    	public int getYear() {
    		return year;
    	}
    
    	/**
    	 * @param year the year to set
    	 */
    	public void setYear(int year) {
    		this.year = year;
    	}
    
    	/**
    	 * @return the month
    	 */
    	public int getMonth() {
    		return month;
    	}
    
    	/**
    	 * @param month the month to set
    	 */
    	public void setMonth(int month) {
    		this.month = month;
    	}
    
    	/**
    	 * @return the day
    	 */
    	public int getDay() {
    		return day;
    	}
    
    	/**
    	 * @param day the day to set
    	 */
    	public void setDay(int day) {
    		this.day = day;
    	}
    
    	/**
    	 * @return the temperature
    	 */
    	public int getTemperature() {
    		return temperature;
    	}
    
    	/**
    	 * @param temperature the temperature to set
    	 */
    	public void setTemperature(int temperature) {
    		this.temperature = temperature;
    	}
    
    	
    	
    }
    
    
    import org.apache.hadoop.io.WritableComparable;
    import org.apache.hadoop.io.WritableComparator;
    
    public class WeatherGroupComparator extends WritableComparator{
    
    	public WeatherGroupComparator(){
    		super(WeatherData.class, true);
    	}
    	
    	@Override
    	public int compare(WritableComparable a, WritableComparable b) {
    		// TODO Auto-generated method stub
    		WeatherData data1 = (WeatherData)a;
    		WeatherData data2 = (WeatherData)b;
    		int yearCompare = Integer.compare(data1.getYear(), data2.getYear());
    		if(yearCompare == 0){
    			return Integer.compare(data1.getMonth(), data2.getMonth());
    		}
    		return yearCompare;
    	}
    }
    
    
    展开全文
  • hadoop 天气案例

    2019-01-14 21:49:00
    hdfs://hadoop01:9000 " );  //windows下面运行添加一下两个配置 conf. set ( " mapreduce.app-submission.cross-platform " , " true " ); conf. set ( " mapreduce.framework.name " , " local " ); Job job...

    对下面一组气温数据进行处理,得到每个月份最高的两个气温值

    2018-12-12 14:30 25c
    2018-12-12 15:30 26c
    2017-12-12 12:30 36c
    2019-01-01 14:30 22c
    2018-05-05 15:30 26c
    2018-05-26 15:30 37c
    2018-05-06 15:30 36c
    2018-07-05 15:30 36c
    2018-07-05 12:30 40c
    2017-12-15 12:30 16c

     

    输出格式如下:

    2019-1 22
    2018-12 26
    2018-12 25
    2018-7 40
    2018-7 36
    2018-5 37
    2018-5 36
    2017-12 36
    2017-12 16

     

    public class App {
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Configuration conf = new Configuration(true);
            conf.set("fs.defaultFS","hdfs://hadoop01:9000");
         //windows下面运行添加一下两个配置 conf.
    set("mapreduce.app-submission.cross-platform","true"); conf.set("mapreduce.framework.name","local"); Job job = Job.getInstance(conf); //设置jobName job.setJobName("myJob"); job.setJarByClass(App.class); //配置map //mapper类 job.setMapperClass(MyMapperClass.class); //输出的key类型 job.setMapOutputKeyClass(TQ.class); //输出的value类型 job.setMapOutputValueClass(IntWritable.class); //将输出的(K,V)=>(K,V,P) //job.setPartitionerClass(MyPartitioner.class); //数据在内存spill(溢写)之前先排序,注:继承WritableComparator job.setSortComparatorClass(MySortComparator.class); //配置reduce //根据需求确定分组的维度,继承自WritableComparator job.setGroupingComparatorClass(MyGrouping.class); //如map阶段根据年、月、温度三个维度排序,而reduce只根据年、月两个维度 job.setReducerClass(MyReduce.class); Path input=new Path("/input/weather.txt"); Path out=new Path("/output/weather"); if(out.getFileSystem(conf).exists(out)){ out.getFileSystem(conf).delete(out,true); } //数据来源 HDFS路径 FileInputFormat.addInputPath(job,input); //计算结果的输出目录 FileOutputFormat.setOutputPath(job,out); //job.setNumReduceTasks(2); job.waitForCompletion(true); } }

     

    public class TQ implements WritableComparable<TQ> {
    
        private int year;
    
        public int getYear() {
            return year;
        }
    
        public void setYear(int year) {
            this.year = year;
        }
    
        public int getMonth() {
            return month;
        }
    
        public void setMonth(int month) {
            this.month = month;
        }
    
        public int getDay() {
            return day;
        }
    
        public void setDay(int day) {
            this.day = day;
        }
    
        public int getTemp() {
            return temp;
        }
    
        public void setTemp(int temp) {
            this.temp = temp;
        }
    
        private int month;
        private int day;
        /**
            温度
         */
        private int temp;
    
        @Override
        public int compareTo(TQ other) {
    
            int c1= Integer.compare(this.getYear(),other.getYear());
            if(c1==0){
               return Integer.compare(this.getMonth(),other.getMonth());
            }
            return c1;
        }
    
        @Override
        public void write(DataOutput out) throws IOException {
            out.writeInt(this.year);
            out.writeInt(this.month);
            out.writeInt(this.day);
            out.writeInt(this.temp);
        }
    
        @Override
        public void readFields(DataInput in) throws IOException {
            this.year=in.readInt();
            this.month=in.readInt();
            this.day=in.readInt();
            this.temp=in.readInt();
        }
    }

     

     

     

    /**
     * 根据年-月对map输出进行分组
     */
    public class MyGrouping extends WritableComparator {
        public MyGrouping(){
            super(TQ.class,true);
        }
        @Override
        public int compare(WritableComparable a, WritableComparable b) {
            TQ tq1 = (TQ) a;
            TQ tq2 = (TQ) b;
            if (tq1.getYear() == tq2.getYear() && tq1.getMonth() == tq2.getMonth()) {
                return 0;
            }
            return 1;
        }
    }
    public class MyMapperClass extends Mapper<LongWritable,Text,TQ, IntWritable> {
        TQ tq=new TQ();
        IntWritable outVal=new IntWritable();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[]splits= value.toString().split(" ");
            String[]date=splits[0].split("-");
            tq.setYear(Integer.parseInt(date[0]));
            tq.setMonth(Integer.parseInt(date[1]));
            tq.setDay(Integer.parseInt(date[2]));
    
            tq.setTemp(Integer.parseInt(splits[2].replace("c","")));
            outVal.set(tq.getTemp());
            context.write(tq,outVal);
    
        }
    }
    public class MyReduce extends Reducer<TQ, IntWritable, Text,IntWritable> {
        Text txtKey=new Text();
    
        @Override
        protected void reduce(TQ key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            Iterator<IntWritable> iterator = values.iterator();
            int flag=0;
            while (iterator.hasNext()) {
                if (flag == 2) {
                    break;
                }
                txtKey.set(String.format("%s-%s",key.getYear(),key.getMonth()));
    
                IntWritable next = iterator.next();
                context.write(txtKey,next);
                flag++;
            }
        }
    }
    /**
        数据在内存spill(溢写)之前先排序,根据年月温度
     */
    public class MySortComparator extends WritableComparator {
        public MySortComparator(){
            super(TQ.class,true);
        }
    
        @Override
        public int compare(WritableComparable a, WritableComparable b) {
            TQ tq1=(TQ)a;
            TQ tq2=(TQ)b;
    
            int c1= Integer.compare(tq1.getYear(),tq2.getYear());
                if(c1==0){
                    int c2=Integer.compare(tq1.getMonth(),tq2.getMonth());
                    if (c2 == 0) {
                        return -Integer.compare(tq1.getTemp(),tq2.getTemp());
                    }
                    return -c2;
                }
            return -c1;
        }
    }

     

    转载于:https://www.cnblogs.com/yehuabin/p/10269248.html

    展开全文
  • Hadoop天气数据分析案例

    千次阅读 2019-09-21 18:29:00
    * 实现天气 年月正序,温度倒序 */ public class TqSortComparator extends WritableComparator {  Tq t1 = null;  Tq t2 = null;      public TqSortComparator() {  super(Tq.class...

    需求:

    找出每个月气温最高的2天(案例测试)

    数据源:

    1949-10-01 14:21:02    34c

    1949-10-01 19:21:02    38c

    1949-10-02 14:01:02    36c

    1950-01-01 11:21:02    32c

    1950-10-01 12:21:02    37c

    1951-12-01 12:21:02    23c

    1950-10-02 12:21:02    41c

    1950-10-03 12:21:02    27c

    1951-07-01 12:21:02    45c

    1951-07-02 12:21:02    46c

    1951-07-03 12:21:03    47c

    项目结构:

    TQtest.java

    package com.tq.test;

    import java.io.IOException;

    import org.apache.hadoop.conf.Configuration;

    import org.apache.hadoop.fs.Path;

    import org.apache.hadoop.io.IntWritable;

    import org.apache.hadoop.mapreduce.Job;

    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    public class TQtest {

           public static void main(String[] args) throws IOException,  ClassNotFoundException, InterruptedException {

                 // TODO Auto-generated method stub

                 // 1配置

                 Configuration conf = new Configuration();

                 Job job = Job.getInstance(conf);

                 job.setJarByClass(TQtest.class);

                 job.setJobName("tq");

                 // 2设置输入路径和输出路径

                 Path inpath = new Path("/tq/input");

                 FileInputFormat.addInputPath(job, inpath);

                 Path outpath = new Path("/tq/output");

                 if (outpath.getFileSystem(conf).exists(outpath))

                        outpath.getFileSystem(conf).delete(outpath, true);

                 FileOutputFormat.setOutputPath(job, outpath);

                 // 3设置Mapper

                 job.setMapperClass(Tmapper.class);

                 job.setMapOutputKeyClass(Tq.class);

                 job.setMapOutputValueClass(IntWritable.class);

                 // 4 自定义比较器

                 job.setSortComparatorClass(TqSortComparator.class);

                 // 5自定义分区器

                 job.setPartitionerClass(TPartitioner.class);

                 // 6 自定义组排序

                 job.setGroupingComparatorClass(TGroupCmparator.class);

                 // 7 设置reducetask数量

                 job.setNumReduceTasks(2);

                 // 8 设置reducer

                 job.setReducerClass(Treducer.class);

                 // 9

                 job.waitForCompletion(true);

           }

    }

    Tmapper.java

    package com.tq.test;

     

     

    import java.io.IOException;

    import java.text.ParseException;

    import java.text.SimpleDateFormat;

    import java.util.Calendar;

    import java.util.Date;

     

     

    import org.apache.hadoop.io.IntWritable;

    import org.apache.hadoop.io.LongWritable;

    import org.apache.hadoop.io.Text;

    import org.apache.hadoop.mapreduce.Mapper;

    import org.jboss.netty.util.internal.StringUtil;

     

     

    public class Tmapper extends Mapper<LongWritable, Text, Tq, IntWritable> {

     

     

        Tq tkey = new Tq();

        IntWritable tvalue = new IntWritable();

     

     

        @Override

        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            // 获得时间 温度数组

            String[] words = StringUtil.split(value.toString(), '\t');

            String pattern = "yyyy-MM-dd";

            SimpleDateFormat sdf = new SimpleDateFormat(pattern);

            try {

                // 处理日期

                Date date = sdf.parse(words[0]);

                Calendar cal = Calendar.getInstance();

                cal.setTime(date);

                tkey.setYear(cal.get(Calendar.YEAR));

                tkey.setMonth(cal.get(Calendar.MONTH) + 1);

                tkey.setDay(cal.get(Calendar.DAY_OF_MONTH));

                // 处理温度

                int temp = Integer.parseInt(words[1].substring(0, words[1].lastIndexOf("c")));

                tkey.setTemp(temp);

                tvalue.set(temp);

                context.write(tkey, tvalue);

            } catch (ParseException e) {

                // TODO Auto-generated catch block

                e.printStackTrace();

            }

     

     

        }

    }

     

     

    Tq.java

    package com.tq.test;

     

     

    import java.io.DataInput;

    import java.io.DataOutput;

    import java.io.IOException;

     

     

    import org.apache.hadoop.io.WritableComparable;

     

     

    public class Tq implements WritableComparable<Tq> {

     

     

        private int year;

        private int month;

        private int day;

        private int temp;

     

     

        public int getYear() {

            return year;

        }

     

     

        public void setYear(int year) {

            this.year = year;

        }

     

     

        public int getMonth() {

            return month;

        }

     

     

        public void setMonth(int month) {

            this.month = month;

        }

     

     

        public int getDay() {

            return day;

        }

     

     

        public void setDay(int day) {

            this.day = day;

        }

     

     

        public int getTemp() {

            return temp;

        }

     

     

        public void setTemp(int temp) {

            this.temp = temp;

        }

     

     

        public void write(DataOutput out) throws IOException {

            out.writeInt(this.getYear());

            out.writeInt(this.getMonth());

            out.writeInt(this.getDay());

            out.writeInt(this.getTemp());

     

     

        }

     

     

        public void readFields(DataInput in) throws IOException {

            this.setYear(in.readInt());

            this.setMonth(in.readInt());

            this.setDay(in.readInt());

            this.setTemp(in.readInt());

        }

     

     

        public int compareTo(Tq o) {

            int c1 = Integer.compare(this.getYear(), o.getYear());

            if (c1 == 0) {

                int c2 = Integer.compare(this.getMonth(), o.getMonth());

                if (c2 == 0) {

                    return Integer.compare(this.getDay(), o.getDay());

                }

                return c2;

            }

            return c1;

        }

     

     

        @Override

        public String toString() {

            return year + "-" + month + "-" + day;

        }

     

     

    }

    TqSortComparator.java

    package com.tq.test;

     

     

    import org.apache.hadoop.io.WritableComparable;

    import org.apache.hadoop.io.WritableComparator;

     

     

    /**

    * 实现天气 年月正序,温度倒序

    */

    public class TqSortComparator extends WritableComparator {

        Tq t1 = null;

        Tq t2 = null;

     

     

        public TqSortComparator() {

            super(Tq.class, true);

        }

     

     

        public int compare(WritableComparable a, WritableComparable b) {

            t1 = (Tq) a;

            t2 = (Tq) b;

            int c1 = Integer.compare(t1.getYear(), t2.getYear());

            if (c1 == 0) {

                int c2 = Integer.compare(t1.getMonth(), t2.getMonth());

                if (c2 == 0) {

                    return -Integer.compare(t1.getTemp(), t2.getTemp());

                }

                return c2;

            }

            return c1;

        }

    }

     

    TPartitioner.java

    package com.tq.test;

     

     

    import org.apache.hadoop.io.IntWritable;

    import org.apache.hadoop.mapreduce.Partitioner;

     

     

    public class TPartitioner extends Partitioner<Tq, IntWritable> {

     

     

        @Override

        public int getPartition(Tq key, IntWritable value, int numPartitions) {

            // TODO Auto-generated method stub

            return key.getYear() % numPartitions;

        }

     

     

    }

     

    TGroupCmparator.java

    package com.tq.test;

     

     

    import org.apache.hadoop.io.WritableComparable;

    import org.apache.hadoop.io.WritableComparator;

     

     

    public class TGroupCmparator extends WritableComparator {

        Tq t1 = null;

        Tq t2 = null;

     

     

        public TGroupCmparator() {

            super(Tq.class, true);

        }

     

     

        public int compare(WritableComparable a, WritableComparable b) {

            t1 = (Tq) a;

            t2 = (Tq) b;

            int c1 = Integer.compare(t1.getYear(), t2.getYear());

            if (c1 == 0) {

                return Integer.compare(t1.getMonth(), t2.getMonth());

            }

            return c1;

        }

    }

     

    Treducer.java

    package com.tq.test;

     

     

    import java.io.IOException;

     

     

    import org.apache.hadoop.io.IntWritable;

    import org.apache.hadoop.io.Text;

    import org.apache.hadoop.mapreduce.Reducer;

     

     

    public class Treducer extends Reducer<Tq, IntWritable, Text, IntWritable> {

        Text tkey = new Text();

        IntWritable tval = new IntWritable();

     

     

        @Override

        protected void reduce(Tq key, Iterable<IntWritable> vals, Context context)

                throws IOException, InterruptedException {

            int flag = 0;

            int day = 0;

            for (IntWritable val : vals) {

                if (flag == 0) {

                    tkey.set(key.toString());

                    tval.set(val.get());

                    context.write(tkey, tval);

                    flag++;

                    day = key.getDay();

                }

                if (flag != 0 && day != key.getDay()) {

                    tkey.set(key.toString());

                    tval.set(val.get());

                    context.write(tkey, tval);

                    return;

                }

            }

        }

    }

     

     

    打包成jar,放到linux虚拟机上执行(要先启动hdfs yarn zookeeper DFSZKFailoverController)

    hadoop jar tq.jar com.tq.test.TQtest

    执行结果:

     

    展开全文
  • hadoop权威指南》的天气数据可以在ftp://ftp3.ncdc.noaa.gov/pub/data/noaa下载,在网上看到这个数据好开心,打开ftp发现个问题,呀呀,这么多文件啊,我一个个去点另存为,得点到啥时候啊,迅雷应该有批量下载,...
    这篇文章主要介绍了python从ftp下载数据到本地保存的实例代码方法,大家参考使用吧

    《hadoop权威指南》的天气数据可以在ftp://ftp3.ncdc.noaa.gov/pub/data/noaa下载,在网上看到这个数据好开心,打开ftp发现个问题,呀呀,这么多文件啊,我一个个去点另存为,得点到啥时候啊,迅雷应该有批量下载,只是我没找到,估计是我浏览器把迅雷禁掉了,干脆自己用python写一个实现下载好了,网上早了一下,发现很简单啊

    复制代码代码如下:

    #!/usr/bin/python
    #-*- coding: utf-8 -*-

    from ftplib import FTP

    def ftpconnect():
        ftp_server = 'ftp3.ncdc.noaa.gov'
        username = ''
        password = ''
        ftp=FTP()
        ftp.set_debuglevel(2) #打开调试级别2,显示详细信息
        ftp.connect(ftp_server,21) #连接
        ftp.login(username,password) #登录,如果匿名登录则用空串代替即可
        return ftp

    def downloadfile():  
        ftp = ftpconnect()    
        #print ftp.getwelcome() #显示ftp服务器欢迎信息
        datapath = "/pub/data/noaa/"
        year=1911
        while year<=1930:
            path=datapath+str(year)
            li = ftp.nlst(path)
            for eachFile in li:
                localpaths = eachFile.split("/")
                localpath = localpaths[len(localpaths)-1]
                localpath='weatherdata/'+str(year)+'--'+localpath#把日期放在最前面,方便排序
                bufsize = 1024 #设置缓冲块大小      
                fp = open(localpath,'wb') #以写模式在本地打开文件
                ftp.retrbinary('RETR ' + eachFile,fp.write,bufsize) #接收服务器上文件并写入本地文件
            year=year+1
        ftp.set_debuglevel(0) #关闭调试
        fp.close()
        ftp.quit() #退出ftp服务器


    if __name__=="__main__":
        downloadfile()


    展开全文
  • 这是hadoop权威指南 天气的数据2.
  • Hadoop权威指南程序演示所使用的正确格式的天气数据压缩文件:1901.gz和1902.gz。
  • hadoop权威指南天气测试案例和执行脚本
  • Hadoop-最高温度- Hadoop- 从 Tom White 的书 Hadoop: Definitive Guide (3rd edition) Chapter 2 中探索天气数据以找到最高温度
  • hadoop入门学习 mapreduce求解 天气数据 2002年整年数据的最高气温
  • hadoop权威指南,天气数据,包含1901和1902两年数据。
  • hadoop下c++程序-天气实例

    千次阅读 2014-09-04 15:48:38
    hadoop c++代码天气温度实例实现
  • 1.修改环境变量 ~/bash_profileexport ANDROID_HOME=~/Library/Android/sdk # added by Anaconda3 5.1.0 installer export PATH="/Users/walle/anaconda3/bin:$...export HADOOP_HOME=/usr/local/Cellar/hado...
  • 我的hadoop路劲是/Users/chenxun/software/hadoop-2.8.1 所以我在这个建了个自己文件夹myclass目录,把代码放到这个目录下面。如图所示:[chenxun@chen.local 17:21 ~/software/hadoop-2.8.1/myclass]$ll total 64...
  • hadoop-hive查询ncdc天气数据实例

    千次阅读 2015-06-16 09:40:24
    使用hive查询ncdc天气数据 在hive中将ncdc天气数据导入,然后执行查询shell,可以让hive自动生成mapredjob,快速去的想要的数据结果。 1. 在hive中创建ncdc表,这个表用来存放ncdc的数据 create table ncdc ...
  • 今天将Hadoop 权威指南天气数据示例代码在hadoop集群上跑通,记录一下。 之前在百度/Google上怎么也没有找到怎么样将自己的Map-Reduce方法跑在集群上的每一步都具体描述,经过一番痛苦的无头苍蝇式的摸索,成功了,...
  • 我在看《Hadoop权威指南》时,里
  • 下载《Hadoop权威教程》里用到的NCDC天气数据,供后续在此数据基础上跑mapred程序。 操作过程 步骤一、编写简单的shell脚本,下载数据文件到本地文件系统 已知NCDC天气数据父目录地址为ftp://ftp.ncdc.noaa.gov/...
  • 准备天气数据(详情请看hadoop权威指南附录气象数据) 1. 在 http://www.hadoopbook.com/ 下载天气数据文件 1901.gz 1902.gz 2. 上传数据到服务器 3. 创建hadoop添加数据文件夹 hadoop dfs –mkdir /ncdc 4....
  • Hadoop Map-Reduce 天气示例

    千次阅读 2013-09-06 10:34:42
    我们照着Hadoop教程简单的写了一个例子,它可以用于分析天气数据然后找到某年的最高气温。 我们是用hadoop 0.20的新的API写的,具体代码如下: Mapper类: /* */ package com.charles.parseweather; import ...
  • hadoop权威指南,天气数据文件自动下载下载
  • Hadoop天气分析 该项目将下载世界上大多数国家的天气历史数据,并将数据存储到HDFS中。 将数据放入HDFS后,映射器和化简器作业将针对该数据运行,并将分析结果保存到HBase。 该代码是使用Java和Hbase作为NoSQL数据库...

空空如也

空空如也

1 2 3 4 5 ... 20
收藏数 3,445
精华内容 1,378
关键字:

hadoop天气