總網頁瀏覽量

2012年9月2日 星期日

Java版本 處理字幕檔

由於 java對中文的編碼有不同的格式, 處理字幕檔有些麻煩, 需先判斷編碼內容, 再去做處理, 輸出也不好處理, 最好找到一樣格式的srt檔案, 否則要先轉檔後再做擷取會比較好實作

程式內容:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;


public class Subtitle {
    public static void main(String []args)throws IOException{
        GetSubtitle mGetSubtitle = new GetSubtitle();
        mGetSubtitle.startOperation();
        mGetSubtitle.stopOperation();
    }
}

class GetSubtitle{
    public File fileEng;


        
    public File fileCht;
    public InputStreamReader readEng;
    public InputStreamReader readCht;
    
    public BufferedReader brEng;
    public BufferedReader brCht;
    public StringBuilder sbEng;
    public StringBuilder sbCht;
    public String strEng, strCht;
    
    GetSubtitle() throws IOException{
        initial();
    }

    public void initial() throws IOException{
        fileEng = new File("C:\\Documents and Settings\\kent\\eclipse_work\\Subtitle\\Mission.Impossible_eng.txt");
        fileCht = new File("C:\\Documents and Settings\\kent\\eclipse_work\\Subtitle\\Mission.Impossible_cht.txt");
        readEng = new InputStreamReader (new FileInputStream(fileEng),"utf-8");
        brEng = new BufferedReader(readEng);
    }
    
    public void startOperation() throws IOException{
        while((strEng = brEng.readLine()) != null ){
            if(strEng.contains(":")){
                repeatReadCht();
            }
        }
    }
    public void repeatReadCht() throws IOException{
        readCht = new InputStreamReader (new FileInputStream(fileCht),"utf-8");
        brCht = new BufferedReader(readCht);
        while((strCht = brCht.readLine()) != null){
            if(strEng.equals(strCht)){
                outputData();
                break;
            }
        }
    }
    public void outputData() throws IOException{
        while(strEng.length() != 0){
            if((strEng = brEng.readLine()) != null){
                System.out.print(strEng + " ");
            }else{
                break;
            }
        }
        System.out.print("\r\n");
        while(strCht.length() != 0 ){
            if((strCht = brCht.readLine()) != null){
                System.out.println(strCht);
            }else{
                break;
            }
        }
    }
    
    public void stopOperation() throws IOException{
        brEng.close();
        brCht.close();
    }
}




網路上判斷編碼的方法:

class Convert{
    Convert(){  
    }
    public String convertCodeAndGetText() {
        File file = new File("C:\\Documents and Settings\\kent\\eclipse_work\\Subtitle\\Avengers_cht.txt");
        BufferedReader reader;
        String text = "";
        try{
        FileInputStream fis = new FileInputStream(file);
        BufferedInputStream in = new BufferedInputStream(fis);
        in.mark(4);
        byte[] first3bytes = new byte[3];
        in.read(first3bytes);//找到文字檔的前三個字節並自動判斷文字檔類型
        in.reset();
        if(first3bytes[0] == (byte) 0xEF && first3bytes[1] == (byte) 0xBB && first3bytes[2] == (byte) 0xBF) {// utf-8
            reader = new BufferedReader(new InputStreamReader(in, "utf-8"));
            System.out.println("utf-8");
        }else if(first3bytes[0] == (byte) 0xFF && first3bytes[1] == (byte) 0xFE) {
            reader = new BufferedReader(new InputStreamReader(in, "unicode"));
            System.out.println("unicode");
        }else if(first3bytes[0] == (byte) 0xFE && first3bytes[1] == (byte) 0xFF) {
            reader = new BufferedReader(new InputStreamReader(in,"utf-16be"));
            System.out.println("utf-16be");
        }else if(first3bytes[0] == (byte) 0xFF && first3bytes[1] == (byte) 0xFF) {
            reader = new BufferedReader(new InputStreamReader(in, "utf-16le"));
            System.out.println("utf-16le");
        }else{
            reader = new BufferedReader(new InputStreamReader(in, "GBK"));
            System.out.println("GBK");
        }
        String str = reader.readLine();
        while(str != null){
            text = text + str + "\n";
            str = reader.readLine();
        }
        reader.close();
        }catch(FileNotFoundException e) {
        e.printStackTrace();
        }catch(IOException e) {
                e.printStackTrace();
        }
        return text;
    }
}

沒有留言:

張貼留言