注册 登录  
 加关注
   显示下一条  |  关闭
温馨提示!由于新浪微博认证机制调整,您的新浪微博帐号绑定已过期,请重新绑定!立即重新绑定新浪微博》  |  关闭

千鸟

本blog所有日志均系原创 转载请注明出处

 
 
 

日志

 
 

用dom,dom4j,sax读大xml比较  

2007-03-14 19:39:13|  分类: J2SE |  标签: |举报 |字号 订阅

  下载LOFTER 我的照片书  |

boss要把80M的xml存到oracle里面去,先试dom,报异常OutofMerroyError,然后试传说中最好的dom4j,还是OutofMerroyError,然后祭出传说中最麻烦的Sax,运行正常,3分多钟,生成了14万条sql语句.

 

    先看看DOM的.

/*

 * myxml.java

 *

 * Created on 2007年1月26日, 下午10:00

 *

 * To change this template, choose Tools | Template Manager

 * and open the template in the editor.

 */

package ecom;

import ecom.*;

import java.util.Vector;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.IOException;

import java.io.InputStream;

import javax.xml.parsers.DocumentBuilder;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

//下面主要是org.xml.sax包的类

import org.w3c.dom.Document;

import org.w3c.dom.Element;

import org.w3c.dom.Node;

import org.w3c.dom.NodeList;

import org.xml.sax.SAXException;

/**

 *

 * @author jzxl

 */

public class myxml {

   

    /** Creates a new instance of myxml */

    public myxml() {

        DocumentBuilderFactory domfac=DocumentBuilderFactory.newInstance();

        try {

            DocumentBuilder dombuilder=domfac.newDocumentBuilder();

            InputStream is=new FileInputStream("D://scaffold_mddr.xml");

            Document doc=dombuilder.parse(is);

            Element root=doc.getDocumentElement();

            NodeList books=root.getChildNodes();

           

            System.out.print("\n"+books.getLength()+"\n");

           

            Vector vector =getNodeListValue(books);//get all the value of nodelist

           

           

            for(int i=0;i<vector.size();i++){

                Vector v=(Vector)vector.get(i);

                String insertsql="insert into scaffold_mddr(name,smiles,mddr_mol_count, mddr_mw, mddr_clogp,mddr_hba,mddr_hbd,target_s)values(";

                for (int ii=0;ii<v.size()-2;ii++){

                    if(ii!=0){

                        insertsql=insertsql+",";

                    }

                    String s=(String)v.get(ii);

                    insertsql=insertsql+"'"+s+"'";

                   

                }

                insertsql=insertsql+")";

                System.out.print("\n"+insertsql);

                datacon data=new datacon();

                data.insert(insertsql);

            }

           

       

           

        } catch (ParserConfigurationException e) {

           

            e.printStackTrace();

           

        } catch (FileNotFoundException e) {

           

            e.printStackTrace();

           

        } catch (SAXException e) {

           

            e.printStackTrace();

           

        } catch (IOException e) {

           

            e.printStackTrace();

           

        }

    }

    /*

     *   to get the values of the Nodelist

     */

   Vector getNodeListValue(NodeList nodelist){

       Vector vector=new Vector();                 //get all of the values of Nodelist

                        

        if(nodelist!=null){

               

                for(int i=0;i<nodelist.getLength();i++){

                   

                    Node node=nodelist.item(i);

                    if(node.getNodeType()==Node.ELEMENT_NODE){

                       

                        Vector v=new Vector();      // get all of the values of Node

                       

                        for(node=node.getFirstChild();node!=null;node=node.getNextSibling()){

                            if(node.getNodeType()==Node.ELEMENT_NODE){                         

                                

                                String String nodevalue=node.getFirstChild().getNodeValue();

                               

                                v.add(nodevalue);

                            }

                        }

                       vector.add(v);

                    }

                }

            }

    

       

    return vector;

   

    }

    public static void main(String[] args) {

       

        new myxml();

       

    }

   

}

然后是DOM4j

/*

 * mydom4j.java

 *

 * Created on 2007年1月27日, 下午5:20

 *

 * To change this template, choose Tools | Template Manager

 * and open the template in the editor.

 */

package ecom;

import java.io.*;

import java.util.*;

import org.dom4j.*;

import org.dom4j.io.*;

import ecom.datacon;

/**

 *

 * @author jzxl

 */

public class mydom4j {

   

    /** Creates a new instance of mydom4j */

    public mydom4j() {

    }

    public static void main(String arge[]) {

       

        long lasting = System.currentTimeMillis();

       

        try {

           

            File f = new File("D://scaffold_mddr.xml");

            

           

          SAXReader reader = new SAXReader();

           

            Document doc = reader.read(f);

           

          //  Element root = doc.getRootElement();

           

          //  Element foo;

        //    datacon data=new datacon();

           

        /*    for (Iterator i = root.elementIterator("scaffold"); i.hasNext();) {

               

                foo = (Element) i.next();

                String insertsql="insert into scaffold_mddr(name,smiles,mddr_mol_count, mddr_mw, mddr_clogp,mddr_hba,mddr_hbd,target_s) values('";

                insertsql=insertsql+foo.elementText("name");

                insertsql=insertsql+"','";

                insertsql=insertsql+foo.elementText("smiles");

                insertsql=insertsql+"','";

                insertsql=insertsql+foo.elementText("MDDR_Mol_Count");

                insertsql=insertsql+"','";

                insertsql=insertsql+foo.elementText("MDDR_MW");

                insertsql=insertsql+"','";

                insertsql=insertsql+foo.elementText("MDDR_CLOGP");

                insertsql=insertsql+"','";

                insertsql=insertsql+foo.elementText("MDDR_HBA");

                insertsql=insertsql+"','";

                insertsql=insertsql+foo.elementText("MDDR_HBD");

                insertsql=insertsql+"','";

                insertsql=insertsql+foo.elementText("TARGET_S");

                insertsql=insertsql+"')";

                System.out.print("\n"+insertsql);

                data.insert(insertsql);

               

            }*/

//            data.close();

           

        } catch (Exception e) {

           

            e.printStackTrace();

           

        }

    }

   

}

最后上的Sax

package ecom;

import ecom.datacon;

import org.xml.sax.*;

import org.xml.sax.helpers.*;

import javax.xml.parsers.*;

public class XMLBySax extends DefaultHandler {

   

    java.util.Stack tags = new java.util.Stack();

    String insertsql="insert into scaffold_mddr(name,smiles,mddr_mol_count, mddr_mw, mddr_clogp,mddr_hba,mddr_hbd,target_s) values(";

    String sql=insertsql;

    double innumber=0;

    datacon data=new datacon();

    public XMLBySax() {

       

       super();

    }

   

    public static void main(String args[]) {

       

        long lasting = System.currentTimeMillis();

       

        try {

           

            SAXParserFactory sf = SAXParserFactory.newInstance();

           

            SAXParser sp = sf.newSAXParser();

           

            XMLBySax reader = new XMLBySax();

           

            sp.parse(new InputSource("D://scaffold_mddr.xml"), reader);//D://scaffold_mddr.xml //src/java/xmldata.xml

           

        } catch (Exception e) {

           

            e.printStackTrace();

           

        }

        System.out.println("运行时间:" + (System.currentTimeMillis() - lasting) + "毫秒");

    

}

    public void characters(char ch[], int start, int length) throws SAXException {

       

        String tag = (String) tags.peek();

       

        if (tag.equals("name")) {

            getinsertsql(new String(ch, start, length));

        }

        if (tag.equals("smiles")) {

           getinsertsql(new String(ch, start, length));     

        }

         if(tag.equals("MDDR_Mol_Count")){

            getinsertsql(new String(ch, start, length));

        }

        if(tag.equals("MDDR_MW")){

            getinsertsql(new String(ch, start, length));

        }

        if(tag.equals("MDDR_CLOGP")){

            getinsertsql(new String(ch, start, length));

        }

        if(tag.equals("MDDR_HBA")){

            getinsertsql(new String(ch, start, length));

        }

        if(tag.equals("MDDR_HBD")){

           getinsertsql(new String(ch, start, length));

        }

        if(tag.equals("TARGET_S")){

           getinsertsql(new String(ch, start, length),true);          

          

        }

      }

   

   public  void doinsert(String sql){     

       data.insert(sql);

     

    }

   public void getinsertsql(String element){

       if (!element.startsWith("\n")){ //空的value不打印      System.out.println(getIndent()+ " Value: " + s);

         sql=sql+"'"+element+"',";

       }

   }

   public void getinsertsql(String element,boolean last){

      

       if (!element.startsWith("\n")){          

           sql=sql+"'"+element+"'";

        //   System.out.print("\n+++++++++++++++++++++++++++\n");

           String in=sql+")";

           innumber++;

          //System.out.print(innumber);

          

           doinsert(in);

           sql=insertsql;

       }

   }

    public void startElement(String uri,String localName,String qName,Attributes attrs) {

       

        tags.push(qName);

    }

}

///////////////////////////////////////////////////////

//////////////////////////////////////////////////////

///////////////////////////////////////////////////////

//////////////////////////////////////////////////////

 

///////////////////////////////////////////////////////

//////////////////////////////////////////////////////

4.SAX 解析xml新的版本.减少了出错的可能

package ecom;
import ecom.datacon;
import ecom.ThreadPool;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import javax.xml.parsers.*;
//import java.util.LinkedList;
import java.util.logging.Logger;
/**
 *
 * @author jzxl
 */
public class XMLBySax extends DefaultHandler {
   
    java.util.Stack tags = new java.util.Stack();
    String insertsql="insert into scaffold(name,smiles,mddr_mol_count, mddr_mw, mddr_clogp,mddr_hba,mddr_hbd,target_s) values('";
    // 插入到表 scafflod
    String sql_parent_mol="insert into parent_mol_id(name,MDDR_NO) values('";
    //插入到表 parent_mol_id
    String sql_activity_class="insert into activity_class(name,act_INDEX,TARGET,COUNT) values('";
    //插入到表 activity_class
    String sqlmain=insertsql;
    String sqlmol=sql_parent_mol;
    String sqlact=sql_activity_class;
 
    datacon data;
    static int threadnum=10;
    String ;
    static  Logger log;
    static ThreadPool threadPool;
 //   String in=""; //最终插入的sql
    int >    ScaffoldAct act;
    ScaffoldMain smain;
    ScaffoldMol  smol;
    ;

   
   
    /**
     *
     */

    public XMLBySax() {
      
       super();
       data=new datacon();
       log=Logger.getLogger("xmllog.log");
       threadPool = new ThreadPool(10);
      
    }
   
    /**
     *
     * @param args
     */

    public static void main(String args[]) {
     
       
        long lasting = System.currentTimeMillis();     
   
        XMLBySax sax=new XMLBySax();
        log.info("begin:"+lasting);     
       
        try {
           
            SAXParserFactory sf = SAXParserFactory.newInstance();
           
            SAXParser sp = sf.newSAXParser();
           
            XMLBySax reader = new XMLBySax();
           
           // sp.parse(new InputSource("src/java/xmldata.xml"), reader);//E://scaffold_mddr.xml//D:\\xmldata.xml
            sp.parse(new InputSource("E://scaffold_mddr.xml"), reader);
            //D:\scaffold_mddr.xml
        } catch (Exception e) {
           
            e.printStackTrace();
           
        }
        threadPool.join();

        System.out.println("运行时间:" + (System.currentTimeMillis() - lasting) + "毫秒");
        log.info("运行时间:" + (System.currentTimeMillis() - lasting) + "毫秒");
}

    public void characters(char ch[], int start, int length) throws SAXException {
       
        String tag = (String) tags.peek();
        String temp=new String(ch, start, length);
        if(temp.startsWith("\n")||temp.equals("")){
            return;
        }
        if (tag.equals("name")) {              //三个表有相同的字段
           

            this.name=temp;
            smain.name=smain.name+temp;
           
           
        }
        else if (tag.equals("smiles")) {
 
            smain.smiles=smain.smiles+temp;
           
        }
        else if(tag.equals("MDDR_Mol_Count")){
            smain.MDDR_Mol_Count=smain.MDDR_Mol_Count+temp;

        }
        else if(tag.equals("MDDR_MW")){
            smain.MDDR_MW=smain.MDDR_MW+temp;

        }
        else if(tag.equals("MDDR_CLOGP")){
            smain.MDDR_CLOGP=smain.MDDR_CLOGP+temp;
                   

        }
        else if(tag.equals("MDDR_HBA")){
            smain.MDDR_HBA=smain.MDDR_HBA+temp;

        }
        else if(tag.equals("MDDR_HBD")){
            smain.MDDR_HBD=smain.MDDR_HBD +temp;

        }
        else if(tag.equals("TARGET_S")){
            smain.TARGET_S=smain.TARGET_S+temp;           
                
          
          
        }
        else if(tag.equals("MDDR_NO")){
//           getmolsql(new String(ch, start, length),true);
           smol.MDDR_NO=smol.MDDR_NO+temp;
          
        }
        else if(tag.equals("index")){           
//           getactsql(new String(ch, start, length));          
           act.index=act.index+temp;
        }
        else if(tag.equals("target")){
//           getactsql(new String(ch, start, length));          
           act.target=act.target+temp;
        }
        else if(tag.equals("count")){
//           getactsql(new String(ch, start, length),true);          
           act.count=act.count+temp;
        }
       
      }
   

 


  
   /**
    *
    * @return
    */
   public Runnable doinsert() {       
          return new Runnable(){      
             
            public void  run(){
//               NewConn nc=new NewConn();
//                nc.insert(sqlmol);
                    data.insert(sqlmain);

            }
         } ;    
   }
      public Runnable doinsertmol() {       
          return new Runnable(){      
//              NewConn nc=new NewConn();
            public void  run(){                       
//                       nc.insert(sqlmol);
                    data.insert(sqlmol);    
                   
            }
         } ;    
   }
      public Runnable doinsertact() {       
          return new Runnable(){      
             
            public void  run(){
//                NewConn nc=new NewConn();
//                    nc.insert(sqlact);
                    data.insert(sqlact);    
                   
            }
         } ;    
   }

  

  
    public void startElement(String uri,String localName,String qName,Attributes attrs) {       
        tags.push(qName);
        if(qName.equals("scaffold")){
            smain=new ScaffoldMain();
        }else if(qName.equals("MDDR_NO")){
            smol=new ScaffoldMol();           
        }if(qName.equals("index")){
            act=new ScaffoldAct();
        }
    }
   
    public void endElement(String uri,String localName,String qName) throws SAXException {
        tags.pop();
        if(qName.equals("scaffold")){
            getsmainsql();
           // threadPool.runTask(doinsert());
           // data.insert(sqlmain);
          Runnable r=doinsert();
          r.run();          
        }else if(qName.equals("MDDR_NO")){
            getmolsql();
          Runnable r=doinsertmol();
          r.run();           
          //  data.insert(sqlmol);
          //  threadPool.runTask(doinsertmol());
        }if(qName.equals("count")){
           getactsql();
          // data.insert(sqlact);
          Runnable r=doinsertact();
          r.run();
          // threadPool.runTask(doinsertact());
        }
    }
   
    public void getsmainsql(){
        sqlmain=insertsql+smain.name+getf()+smain.smiles+getf()+smain.MDDR_Mol_Count+getf()+smain.MDDR_MW+getf();
        sqlmain=sqlmain+smain.MDDR_CLOGP+getf()+smain.MDDR_HBA+getf()+smain.MDDR_HBD+getf()+smain.TARGET_S+"')";
       
        //return sqlmain;
    }
    public void getmolsql(){
        sqlmol=sql_parent_mol+name+getf()+smol.MDDR_NO+"')";
       // return sqlmol;       
    }
    public void getactsql(){
        sqlact=sql_activity_class+name+getf()+act.index+getf()+act.target+getf()+act.count+"')";
      //  return sqlact;
    }
    public String getf(){
        return "','";
    }
    public void endDocument() throws SAXException {
       
           System.out.print("\n"+tags.size()+"\n is the size of tags.\nall the end");
  }
   
   
   
    class ScaffoldMain{
       
        String ;
       
        String smiles="";
       
        String MDDR_Mol_Count="";
       
        String MDDR_MW="";
       
        String MDDR_CLOGP="";
       
        String MDDR_HBA="";
       
        String MDDR_HBD="";
       
        String TARGET_S="";
        public ScaffoldMain(){
        }
       
    }
    class ScaffoldMol{
        String ;
        String MDDR_NO="";
        public ScaffoldMol(){
        }
    }
    class ScaffoldAct{
        String ;

        String index="";    

        String target="";

        String count="";
        public ScaffoldAct(){
        }
    }
   

}

 

  评论这张
 
阅读(883)| 评论(0)
推荐 转载

历史上的今天

评论

<#--最新日志,群博日志--> <#--推荐日志--> <#--引用记录--> <#--博主推荐--> <#--随机阅读--> <#--首页推荐--> <#--历史上的今天--> <#--被推荐日志--> <#--上一篇,下一篇--> <#-- 热度 --> <#-- 网易新闻广告 --> <#--右边模块结构--> <#--评论模块结构--> <#--引用模块结构--> <#--博主发起的投票-->
 
 
 
 
 
 
 
 
 
 
 
 
 
 

页脚

网易公司版权所有 ©1997-2017