当前位置:网站首页>Preview CSV file
Preview CSV file
2022-04-23 14:16:00 【You are my bug forever】
UnicodeReader Tool class
package com.example.file.file;
/** version: 1.1 / 2007-01-25 - changed BOM recognition ordering (longer boms first) Original pseudocode : Thomas Weidenfeller Implementation tweaked: Aki Nieminen http://www.unicode.org/unicode/faq/utf_bom.html BOMs: 00 00 FE FF = UTF-32, big-endian FF FE 00 00 = UTF-32, little-endian EF BB BF = UTF-8, FE FF = UTF-16, big-endian FF FE = UTF-16, little-endian Win2k Notepad: Unicode format = UTF-16LE ***/
import java.io.*;
/** * Generic unicode textreader, which will use BOM mark * to identify the encoding to be used. If BOM is not found * then use a given default or system encoding. */
public class UnicodeReader extends Reader {
PushbackInputStream internalIn;
InputStreamReader internalIn2 = null;
String defaultEnc;
private static final int BOM_SIZE = 4;
/** * @param in inputstream to be read * @param defaultEnc default encoding if stream does not have * BOM marker. Give NULL to use system-level default. */
public UnicodeReader(InputStream in, String defaultEnc) {
internalIn = new PushbackInputStream(in, BOM_SIZE);
this.defaultEnc = defaultEnc;
}
public String getDefaultEncoding() {
return defaultEnc;
}
/** * Get stream encoding or NULL if stream is uninitialized. * Call init() or read() method to initialize it. */
public String getEncoding() {
if (internalIn2 == null) return null;
return internalIn2.getEncoding();
}
/** * Read-ahead four bytes and check for BOM marks. Extra bytes are * unread back to the stream, only BOM bytes are skipped. */
protected void init() throws IOException {
if (internalIn2 != null) return;
String encoding;
byte bom[] = new byte[BOM_SIZE];
int n, unread;
n = internalIn.read(bom, 0, bom.length);
if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) &&
(bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
encoding = "UTF-32BE";
unread = n - 4;
} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) &&
(bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
encoding = "UTF-32LE";
unread = n - 4;
} else if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) &&
(bom[2] == (byte) 0xBF)) {
encoding = "UTF-8";
unread = n - 3;
} else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
encoding = "UTF-16BE";
unread = n - 2;
} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
encoding = "UTF-16LE";
unread = n - 2;
} else {
// Unicode BOM mark not found, unread all bytes
encoding = defaultEnc;
unread = n;
}
//System.out.println("read=" + n + ", unread=" + unread);
if (unread > 0) internalIn.unread(bom, (n - unread), unread);
// Use given encoding
if (encoding == null) {
internalIn2 = new InputStreamReader(internalIn);
} else {
internalIn2 = new InputStreamReader(internalIn, encoding);
}
}
public void close() throws IOException {
init();
internalIn2.close();
}
public int read(char[] cbuf, int off, int len) throws IOException {
init();
return internalIn2.read(cbuf, off, len);
}
}
Main program class
package com.example.file.file;
import com.csvreader.CsvReader;
import java.io.*;
import java.util.ArrayList;
public class PreviewChunkCSV {
public static void main(String[] args) throws IOException {
// Path to file
File file = new File("C:\\Users\\86130\\Desktop\\ Xianqi product Kit \\ workbook 1.csv");
// To hold data
ArrayList<String[]> csvFileList = new ArrayList<>();
// File encoding format
String filecharset = getFilecharset(new FileInputStream(file));
// Define a CSV route
UnicodeReader breader = new UnicodeReader(new FileInputStream(file), filecharset);
CsvReader csvReader = new CsvReader(breader);
// If you only get Data BODY( Bytecode file )
// InputStream inputStream = new //ByteArrayInputStream(fileDescriptor.getBody().toByteArray());
// BufferedReader breader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"), 8192);
// Skip header Header required Ignore this sentence
csvReader.readHeaders();
// obtain Header
String[] headers = csvReader.getHeaders();
ArrayList<String> mRowList = new ArrayList<>();
// obtain most 40 List header 、200 Row data
int headCount = 40;
int rowCount = 200;
if (headers.length < headCount){
headCount = headers.length;
}
// All headers
String[] mHeadArray = new String[headers.length];
// The header of the display
String[] showHeadArray = new String[headCount];
for(int i = 0; i < headers.length; i++){
mHeadArray[i] = headers[i];
}
for (int i = 0; i < headCount; i++){
showHeadArray[i] = headers[i];
}
// Read the data except the header line by line
while (csvReader.readRecord()){
csvFileList.add(csvReader.getValues());
}
csvReader.close();
if (csvFileList.size() < 200){
rowCount = csvFileList.size();
}
// Traversal read CSV file
for (int row = 0; row < rowCount; row++){
String rowItem = "";
for (int i = 0; i < headCount; i++){
// For the first row That's ok The first 0 Columns of data
String cell = "";
if (i < headCount-1){
cell = csvFileList.get(row)[i] + ",";
}else {
cell = csvFileList.get(row)[i];
}
rowItem = rowItem + cell;
}
mRowList.add(rowItem);
}
System.out.println("================== Header ===========================");
System.out.println(mHeadArray);
System.out.println("================== The header of the display ======================");
System.out.println(showHeadArray);
System.out.println("==================mRowList======================");
System.out.println(mRowList);
System.out.println("================== Number of rows displayed ======================");
System.out.println(mRowList.size());
}
private static String getFilecharset(InputStream inputStream) {
// Default GBK
String charset = "GBK";
byte[] first3Bytes = new byte[3];
try (BufferedInputStream bis = new BufferedInputStream(inputStream)) {
bis.mark(0);
int read = bis.read(first3Bytes, 0, 3);
// The file code is ANSI
if (read == -1) {
return charset;
}
// The file code is Unicode
if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE) {
return "UTF-16LE";
}
// The file code is Unicode big endian
if (first3Bytes[0] == (byte) 0xFE && first3Bytes[1] == (byte) 0xFF) {
return "UTF-16BE";
}
// The file code is UTF-8
if (first3Bytes[0] == (byte) 0xEF && first3Bytes[1] == (byte) 0xBB && first3Bytes[2] == (byte) 0xBF) {
return "UTF-8";
}
bis.reset();
int loc = 0;
while ((read = bis.read()) != -1) {
loc++;
if (read >= 0xF0) {
break;
}
// alone BF Following , It is GBK
if (0x80 <= read && read <= 0xBF) {
break;
}
if (0xC0 <= read && read <= 0xDF) {
read = bis.read();
// Double byte (0xC0 - 0xDF)
if (0x80 <= read && read <= 0xBF) {
// (0x80
// - 0xBF), Or maybe GB In the code
continue;
}
break;
}
// It's also possible to make mistakes , But the odds are small
if (0xE0 <= read && read <= 0xEF) {
read = bis.read();
if (0x80 <= read && read <= 0xBF) {
read = bis.read();
if (0x80 <= read && read <= 0xBF) {
charset = "UTF-8";
}
}
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
return charset;
}
}
版权声明
本文为[You are my bug forever]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/04/202204231406272141.html
边栏推荐
猜你喜欢
随机推荐
OpenStack命令操作
sql中出现一个变态问题
jsp学习3
文字组合,不重复,做搜索或查询关键字匹配
统信UOS PHP7.2.3升级至PHP7.2.24
连接公司跳板机取别名
STD:: map and STD:: vector memory free
js 递归(1)
教育行业云迁移最佳实践:海云捷迅使用HyperMotion云迁移产品为北京某大学实施渐进式迁移,成功率100%
std::map 和 std::vector 内存释放
获取线程返回值Future接口与FutureTask类使用介绍
mysql 5.1升级到5.611
1到100号的灯开关问题
MySQL数据库讲解(八)
01-NIO基础之ByteBuffer和FileChannel
Five ways of using synchronized to remove clouds and fog are introduced
Quickly understand the three ways of thread implementation
Redis数据库讲解(一)
返回数组排序后下标
预览CSV文件