Wednesday, April 23, 2014

Spring & Hive

Getting Spring & Hive integration working wasn't a breeze, but I got it. Here is what I had to do:

First, a typical BookRepository class:
package com.noushin.spring.ht.dao;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.hadoop.hive.HiveTemplate;
import org.springframework.stereotype.Repository;

import java.util.List;

/**
 * This class handles accessing books in Hadoop using Hive.
 * 
 * @author nbashir
 * 
 */
@Repository
public class BookRepositroy {

   @Autowired
   private HiveTemplate hiveTemplate;

   public void showTables() {
      List<String> tables = hiveTemplate.query("show tables;");
      System.out.println("tables size: " + tables.size());
   }

   public Long count() {
      return hiveTemplate.queryForLong("select count(*) from books;");
   }
}
Next a typical Service layer class:
package com.noushin.spring.ht.service;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import com.noushin.spring.ht.dao.BookRepositroy;

/**
 * This class handles any business logic related to handling books.
 * 
 * @author nbashir
 *
 */
@Component
public class BookService {

   @Autowired 
   BookRepositroy bookRepo;
   
   public Long count() {
      return bookRepo.count();
   }

   public void showTables() {
      bookRepo.showTables();
   }
 }

Here is the pom file you need:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.noushin.spring</groupId>
    <artifactId>ht</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <packaging>jar</packaging>
    <name>ht</name>
    <url>http://maven.apache.org</url>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <spring.hadoop.version>1.0.2.RELEASE</spring.hadoop.version>
        <hadoop.version>1.2.1</hadoop.version>
        <hive.version>0.10.0</hive.version>
        <spring.version>4.0.3.RELEASE</spring.version>
    </properties>

    <dependencies>

        <!-- Spring Data -->
        <dependency>
            <groupId>org.springframework.data</groupId>
            <artifactId>spring-data-hadoop</artifactId>
            <version>${spring.hadoop.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>org.springframework</groupId>
                    <artifactId>spring-context-support</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.springframework</groupId>
            <artifactId>spring-jdbc</artifactId>
            <version>${spring.version}</version>
        </dependency>

        <dependency>
            <groupId>org.springframework</groupId>
            <artifactId>spring-context</artifactId>
            <version>${spring.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-core</artifactId>
            <version>${hadoop.version}</version>
            <scope>compile</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-builtins</artifactId>
            <version>${hive.version}</version>
            <scope>runtime</scope>
        </dependency>
        
    </dependencies>
    
</project>

You application-context.xml:
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:context="http://www.springframework.org/schema/context"
    xmlns:hdp="http://www.springframework.org/schema/hadoop" xmlns:c="http://www.springframework.org/schema/c"
    xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
         http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd
         http://www.springframework.org/schema/hadoop http://www.springframework.org/schema/hadoop/spring-hadoop.xsd">

    <context:property-placeholder location="hive.properties" />

    <!-- Activate annotation configured components -->
    <context:annotation-config />

    <!-- Scan components for annotations within the configured package -->
    <context:component-scan base-package="com.noushin.spring.ht" />

    <hdp:hive-client-factory id="hiveClientFactory"
        host="${hive.host}" port="${hive.port}">
        <hdp:script>
            ADD JARS /usr/lib/hive/lib/books.jar;
        </hdp:script>
    </hdp:hive-client-factory>

    <hdp:hive-template id="hiveTemplate" />

</beans>


Your hive.properties:
hive.host=your-hive-server
hive.port=10000
hive.url=jdbc:hive://${hive.host}:${hive.port}/default

And finally a main class to start and run your app:
package com.noushin.spring.ht;

import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;

import com.noushin.spring.ht.service.BookService;

/**
 * Main class to demonstrate accessing MongoDB with Spring Data and MongoTemplate.
 * 
 * @author nbashir
 *
 */
public class HadoopMain {

   public static void main(String[] args) {
      try {
         ApplicationContext ctx = new ClassPathXmlApplicationContext("application-context.xml");
         if (ctx != null) {
            BookService service = ctx.getBean(BookService.class);
            service.showTables();
            Long count = service.count();
            System.out.println("result  : " + count);        
         }
      }
      catch (Exception ex) {
         System.out.println("HadoopMain encountered an error and ended.");
      }
   }
}

Saturday, April 12, 2014

Scheduling a task in Java

Using Spring, scheduling a task is quite simple. Just follow these steps:

1. Write a class that defines the scheduled tasks. Use @Scheduled annotation.
package com.noushin.spring.scheduler;

import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;

@Component
public class TaskScheduler {

   // run the task every n seconds, 5 secs = 5000
   @Scheduled(fixedDelayString = "${task.execution.time.delayed}")
   public void delayedTask() {
      System.out.println("Do something meaningful after each delayed period.");
   }

   // run the task every once in a while, example: 0 13 30 * * * means run this task every day at 1:30 pm
   @Scheduled(cron = "${task.execution.time.every.day}")
   public void dailyTask() {
      System.out.println("Do something meaningful once a day.");
   }

}
2. Write a main class to load Spring's application context.
package com.noushin.spring.scheduler;

import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;

public class SechedulerMain {
   
   static TaskScheduler reportScheduler;
   
   public static void main(String[] args) {
      try {
         ApplicationContext ctx = new ClassPathXmlApplicationContext("application-context.xml");
         if (ctx != null) {
            reportScheduler = ctx.getBean(TaskScheduler.class);
            while (true) {
               System.out.println("main: --------------------------");
               Thread.sleep(1000);
            }          
         }
      }
      catch (Exception ex) {
         System.out.println("SchedulerMain encountered an error and ended.");
         ex.printStackTrace();
      }
   }
}
3. Add application-context.xml.
<?xml version="1.0" encoding="UTF-8"?>
<beans 
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
    xmlns="http://www.springframework.org/schema/beans"
    xmlns:context="http://www.springframework.org/schema/context"
    xmlns:task="http://www.springframework.org/schema/task"
    xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
        http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd
        http://www.springframework.org/schema/task http://www.springframework.org/schema/task/spring-task.xsd">

 <!-- Scan components for annotations within the configured package -->
 <context:component-scan base-package="com.noushin.spring" />
 
 <context:property-placeholder location="task.properties" />
       
 <!-- Activate annotation configured components -->
 <context:annotation-config />
 
 <!-- Scheduling tasks using annotations -->
 <task:annotation-driven />

</beans>
4. A property file to configure the scheduler, lets call it task.properties.
# Scheduling report job
task.execution.time.delayed=5000
report.execution.time.every.day=13 30 * * * *

Note: Use the Crontab Patterns as defined in : http://docs.spring.io/spring/docs/current/javadoc-api/org/springframework/scheduling/support/CronSequenceGenerator.html

5. Use the following dependencies in your pom.xml:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.noushin.spring</groupId>
    <artifactId>scheduler</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>scheduler</name>
    <description>Demo</description>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <log4j.version>1.2.17</log4j.version>
        <spring.version>4.0.2.RELEASE</spring.version>
    </properties>

    <dependencies>
        <!-- Logging -->
        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>${log4j.version}</version>
        </dependency>

        <!-- Spring Context -->
        <dependency>
            <groupId>org.springframework</groupId>
            <artifactId>spring-context</artifactId>
            <version>${spring.version}</version>
        </dependency>
    </dependencies>
</project>