怎么使用python取出一个目录下面所有文件的指定内容
0
需求:
1.在指定目录有约500个的Java文件;
2.需要提取出每个文件中特定的内容,需要去重;
3.最终生成文本文件或excel文件;
4.提取的内容是从每个方法里面提取出包含有 collection = MongoUtil. 这个的内容;
5.需要每个方法里面的collection = MongoUtil. 这个内容对应;
6.500个文件的内容最终输出到一个文件内;
例:
下面这个文件(IndexInfoService.java)需要提取的数据是这个样子的:
IndexInfoService.java
getIndexBasicInfo(方法1)
DBCollection collection = MongoUtil.getGGf10dbCollection("t_f10_index_basicinfo")
collection = MongoUtil.getGGStockBaseCollection("gg_cihdquote")
collection = MongoUtil.getGGf10dbCollection("t_f10_index_industry_code")
-----------------------------------------------------------------------------------------
getIndustryStockCodes(方法2)
DBCollection collection = MongoUtil.getGGStockBaseCollection("stock_sw_industry")
-----------------------------------------------------------------------------------------
getIndexMarketNews(方法3)
DBCollection collection = MongoUtil.getGGf10dbCollection("t_f10_index_industry_code")
-----------------------------------------------------------------------------------------
getConsensusExpecData(方法4)
DBCollection collection = MongoUtil.getGGf10dbCollection("t_f10_index_industry_code")
文件IndexInfoService.java:
package f10service.v1.index.service;
import ggframework.bottom.store.mongodb.AggregationOutput;
import ggframework.bottom.store.mongodb.BasicDBObject;
import ggframework.bottom.store.mongodb.DBCollection;
import ggframework.bottom.store.mongodb.DBCursor;
import ggframework.bottom.store.mongodb.DBObject;
import ggframework.bottom.store.mongodb.GGDBCursor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.collect.Maps;
import org.h2.expression.Aggregate;
import com.google.common.collect.Lists;
import ggf10service.common.DateUtil;
import ggf10service.common.HtmlUtils;
import ggf10service.common.MongoUtil;
public class IndexInfoService {
/**
*
* 方法描述 指数概括
*
* @param indexCode
* @return
* @date 2017年9月11日 下午1:42:19
*/
public static Map<String, Object> getIndexBasicInfo(String indexCode){
Map<String, Object> result = Maps.newHashMap();
DBCollection collection = MongoUtil.getGGf10dbCollection("t_f10_index_basicinfo");
// 查询条件
DBObject query = new BasicDBObject();
query.put("status", 1);
query.put("symbol", indexCode);
// 查询字段
DBObject fields = new BasicDBObject();
fields.put("index_name", 1);//指数名称
fields.put("indexsname", 1);//指数简称
fields.put("symbol", 1);//指数代码
fields.put("index_ename", 1);//指数英文名称
fields.put("issuename", 1);//发布方式
fields.put("publishdate", 1);//发布日期
fields.put("cur", 1);//币种
fields.put("benchdate", 1);//基准日期
fields.put("benchnum", 1);//基准点数
fields.put("consecurities", 1);//涵盖证券数
fields.put("chgperiod", 1);//变动周期
fields.put("index_type", 1);//指数类别
fields.put("estclass", 1);//指数编制方式
fields.put("weimode", 1);//指数加权方式
fields.put("mcap", 1);//流通市值
fields.put("mcapital", 1);//流通股本
fields.put("_id", 0);
DBObject obj = collection.findOne(query, fields);
if (obj !=null) {
obj.put("mcap", obj.getDouble("mcap")/100000000);//流通市值
obj.put("mcapital", obj.getDouble("mcapital")/100000000);//流通股本
}
result.put("index_basic", obj.toMap());
/**
* 指数行业强弱
*/
collection = MongoUtil.getGGStockBaseCollection("gg_cihdquote");
query = new BasicDBObject();
query.append("symbol", new BasicDBObject("$in", Arrays.asList("000300", indexCode)));
query.append("tdate", new BasicDBObject("$gte", DateUtil.dateToInteger(DateUtil.getDateBeforeMonths(new Date(), 12), "yyyyMMdd")));
fields = new BasicDBObject();
fields.put("tdate", 1);//交易日期
fields.put("symbol", 1);//指数代码
fields.put("tclose", 1);//当前收盘价
BasicDBObject sort = new BasicDBObject();
sort.append("tdate", 1);
DBCursor cursor = collection.find(query, fields).sort(sort);
Map<Date, Double> maphs = Maps.newLinkedHashMap();
Map<Date, Double> mapindex = Maps.newLinkedHashMap();
while(cursor.hasNext()) {
DBObject o = cursor.next();
if(o.getString("symbol").equals("000300")){
maphs.put(DateUtil.integerToDate(o.getInteger("tdate")), o.getDouble("tclose"));
}else {
mapindex.put(DateUtil.integerToDate(o.getInteger("tdate")), o.getDouble("tclose"));
}
}
cursor.close();
Map<String, Object> mapIntensity = Maps.newLinkedHashMap();
mapIntensity.put("hs300", maphs);//沪深300指数
mapIntensity.put("currentIndex", mapindex);//当前指数
result.put("index_intensity", mapIntensity);
/**
* 成分行业
*/
collection = MongoUtil.getGGf10dbCollection("t_f10_index_industry_code");
query = new BasicDBObject();
query.append("status", 1);
query.append("symbol", indexCode);
fields = new BasicDBObject();
fields.put("industrycode", 1);//指数成分股分布行业代码
fields.put("industryname", 1);//指数成分股分布行业名称
fields.put("indstock_zb", 1);//指数行业成分股占比
fields.put("_id", 0);
List<Map<String, Object>> list_industry = Lists.newArrayList();
cursor = collection.find(query, fields);
List<String> industrys = Lists.newArrayList();
while(cursor.hasNext()) {
DBObject o = cursor.next();
industrys.add(o.getString("industrycode"));
list_industry.add(o.toMap());
}
cursor.close();
Map<String, Integer> map = getIndustryStockCodes(industrys, indexCode);
for(Map<String, Object> indus: list_industry){
String industrycode = indus.get("industrycode").toString();
indus.put("stock_code_count", map.get(industrycode));
}
result.put("index_industry", list_industry);
return result;
}
/**
*
* 方法描述 获取行业代码对应的个股数量
*
* @param industrys
* @return
* @date 2017年9月22日 下午3:29:55
*/
public static Map<String, Integer> getIndustryStockCodes(List<String> industrys, String indexCode){
Map<String, Integer> map = Maps.newHashMap();
List<String> indexCodes = IndexCapitalService.getStockByIndex(indexCode);
DBCollection collection = MongoUtil.getGGStockBaseCollection("stock_sw_industry");
BasicDBObject match = new BasicDBObject();
match.append("$match", new BasicDBObject("sw_second_code", new BasicDBObject("$in", industrys.toArray()))
.append("stock_code", new BasicDBObject("$in", indexCodes.toArray())));
BasicDBObject group = new BasicDBObject();
group.append("$group", new BasicDBObject("_id", "$sw_second_code")
.append("count", new BasicDBObject("$sum", 1)));
AggregationOutput out = collection.aggregate(match, group);
for(Iterator<DBObject> it=out.results().iterator();it.hasNext();){
DBObject obj = it.next();
map.put(obj.getString("_id"), obj.getInteger("count"));
}
return map;
}
/**
*
* 方法描述 获取市场要闻
*
* @param indexCode 指数代码
* @return
* @date 2017年9月11日 下午5:41:13
*/
public static Map<String, Object> getIndexMarketNews(String indexCode){
Map<String, Object> map = Maps.newHashMap();
DBCollection collection = MongoUtil.getGGf10dbCollection("t_f10_index_industry_code");
BasicDBObject query = new BasicDBObject();
query.append("status", 1);
query.append("symbol", indexCode);
BasicDBObject fields = new BasicDBObject();
fields.put("industrycode", 1);//指数成分股分布行业代码
fields.put("industryname", 1);//指数成分股分布行业名称
fields.put("reportcnt", 1);//指数行业月度报告数量
fields.put("industryname1", 1);//申万一级行业名称
fields.put("_id", 0);
List<Map<String, Object>> list_report = Lists.newArrayList();
double reportNum = 0;//指数对应的研报总数
DBCursor cursor = collection.find(query, fields);
while(cursor.hasNext()) {
DBObject o = cursor.next();
reportNum += o.getInteger("reportcnt");
list_report.add(o.toMap());
}
cursor.close();
for(Map<String, Object> report:list_report) {
if(reportNum != 0) {
report.put("report_rate", Double.parseDouble(report.get("reportcnt").toString())/reportNum);
} else {
report.put("report_rate", 0);
}
}
map.put("news_report", list_report);
Collections.sort(list_report, new Comparator<Map<String, Object>>() {
@Override
public int compare(Map<String, Object> o1, Map<String, Object> o2) {
Double reportNum1 = Double.parseDouble(o1.get("report_rate").toString());
Double reportNum2 = Double.parseDouble(o2.get("report_rate").toString());
return reportNum2.compareTo(reportNum1);
}
});
if(list_report.size() > 5) {
list_report = list_report.subList(0, 5);
}
List<String> industrys = Lists.newArrayList();
for(Map<String, Object> report:list_report) {
industrys.add(report.get("industryname1").toString());
}
DBCollection urlcontents = MongoUtil.getGGStockCollection("urlcontents");
/**
* 产业新闻
*/
map.put("news_industry", getIndustryNews(urlcontents, industrys));
/**
* 政策动态
*/
map.put("news_policy", getPolicyNew(urlcontents));
return map;
}
/**
*
* 方法描述 获取行业要闻
*
* @param urlcontents
* @param industrys
* @return
* @date 2017年9月12日 下午1:39:03
*/
public static List<Map<String, Object>> getIndustryNews(DBCollection urlcontents, List<String> industrys){
List<Map<String, Object>> list = Lists.newArrayList();
List<String> key = Lists.newArrayList();
for(String industry:industrys){
DBCursor cursor = urlcontents.find(new BasicDBObject("ir_groupname", industry)
.append("ir_urlcontent", new BasicDBObject("$nin", Arrays.asList(null, ""))),
new BasicDBObject("ir_groupname", 1)
.append("ir_urlcontent", 1)
.append("ir_urltitle", 1)
.append("ir_srcname", 1)
.append("ir_urltime", 1)
.append("ir_hkey", 1)
.append("_id", 0)
).sort(new BasicDBObject("ir_urltime", -1)).limit(1);
if(cursor.hasNext()){
DBObject obj = cursor.next();
String ir_hkey = obj.getString("ir_hkey");
if(!key.contains(ir_hkey)) {
key.add(ir_hkey);
list.add(obj.toMap());
}
}
cursor.close();
}
return list;
}
/**
*
* 方法描述 获取政策动态
*
* @param urlcontents
* @return
* @date 2017年9月12日 上午10:59:00
*/
public static List<Map<String, Object>> getPolicyNew(DBCollection urlcontents) {
BasicDBObject urlQuery = new BasicDBObject();
urlQuery.append("ir_groupname", "政策动态");
BasicDBObject fields = new BasicDBObject();
fields.append("_id", 0);
fields.append("ir_urlcontent", 1);//新闻内容
fields.append("ir_urltitle", 1);//标题
fields.append("ir_srcname", 1);//来源
fields.append("ir_urltime", 1);//时间
fields.append("ir_hkey", 1);//主键
DBCursor urlCursor = urlcontents.find(urlQuery, fields).sort(new BasicDBObject("ir_urltime", -1)).limit(5);
List<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
while(urlCursor.hasNext()){
DBObject o = urlCursor.next();
if(o.get("ir_urlcontent")==null || StringUtils.isBlank(o.get("ir_urlcontent").toString())){
continue;
}
Map<String, Object> map = new HashMap<String, Object>();
String summary = o.get("ir_urlcontent").toString();
summary = summary.replaceAll("\r\n", "");
summary = HtmlUtils.trimHtml(summary);
if(summary.length() > 200){
summary = summary.substring(0, 200);
}
map.put("summary", summary);//摘要
map.put("title", o.get("ir_urltitle"));
map.put("source", o.get("ir_srcname"));
map.put("date", o.get("ir_urltime"));
map.put("id", o.get("ir_hkey"));
result.add(map);
}
urlCursor.close();
return result;
}
/**
*
* 方法描述 获取预期研究
*
* @param indexCode
* @return
* @date 2017年9月12日 下午5:34:10
*/
public static Map<String, Object> getConsensusExpecData(String indexCode){
Map<String, Object> map = Maps.newHashMap();
DBCollection collection = MongoUtil.getGGf10dbCollection("t_f10_index_industry_code");
BasicDBObject query = new BasicDBObject();
query.append("status", 1);
query.append("symbol", indexCode);
BasicDBObject fields = new BasicDBObject();
fields.put("industrycode1", 1);//申万一级行业代码
fields.put("industryname1", 1);//申万一级行业名称
fields.put("industryname", 1);//申万二级行业名称
fields.put("industrycode", 1);//申万二级行业代码
fields.put("_id", 0);
List<Map<String, Object>> list_report = Lists.newArrayList();
DBCursor cursor = collection.find(query, fields);
while(cursor.hasNext()) {
DBObject o = cursor.next();
list_report.add(o.toMap());
}
cursor.close();
if(list_report.size() > 5) {
list_report = list_report.subList(0, 5);
}
collection = MongoUtil.getGGIndustryCollection("t_hy_consensus");
for(Map<String, Object> report:list_report) {
map.put(report.get("industryname").toString(), getExpected(collection, report.get("industrycode").toString()));
}
return map;
}
/**
*
* 方法描述 获取一致预期数据
*
* @param collection
* @param industry
* @date 2017年9月12日 下午5:40:23
*/
public static List<Map<String, Object>> getExpected(DBCollection collection, String industry){
Map<String, Object> baseYear = IndustryUtil.getDmYear(new Date()); // 获取基准年,以及基准年前1年和后3年的预测年
List<Integer> yearList = new ArrayList<Integer>();// 预测年集合
for (Object value : baseYear.values()) {
yearList.add(Integer.parseInt(value.toString()));
}
Collections.sort(yearList);
BasicDBObject query = new BasicDBObject();
query.put("industrycode", industry);
DBObject field = new BasicDBObject();
field.put("_id", 0);
field.put("eps", 1);// 一致预期EPS
field.put("eps_tb", 1);// 一致预期EPS同比
field.put("profit", 1);// 一致预期净利润(万元)
field.put("profit_tb", 1);// 一致预期净利同比
field.put("pe", 1);// 一致预期PE(倍)
field.put("pb", 1);// 一致预期PB(倍)
field.put("forecast_income", 1);// 一致预期营业收入(万元)
field.put("income_tb", 1);// 一致预期营业收入同比
field.put("ps", 1);// 一致预期ps
field.put("time_year", 1);// 预测年
DBCursor cursor = collection.find(query, field);
List<Map<String, Object>> result = GGDBCursor.find(cursor, ImmutableMap.<String, String>of("profit_tb", "profittb"), 0, 0);
List<Map<String, Object>> resultList = null;// 返回结果集
if (result != null && !result.isEmpty()) {
Map<String, Map<String, Object>> tmp = new HashMap<String, Map<String, Object>>();
for (Map<String, Object> map : result) {
if (map.get("income_tb") != null) {
map.put("income_tb", Double.parseDouble(map.get("income_tb").toString()) * 100);
}
map.put("profit", map.get("profit")==null?null:Double.valueOf(map.get("profit").toString())/100000000);// 一致预期净利润
map.put("forecast_income", map.get("forecast_income")==null?null:Double.valueOf(map.get("forecast_income").toString())/100000000);// 一致预期营业收入
tmp.put(map.get("time_year").toString(), map);
}
resultList = new ArrayList<Map<String,Object>>();
for (Integer year : yearList) {
if (tmp.get(year.toString()) != null) {
resultList.add(tmp.get(year.toString()));
} else {
Map<String, Object> map = new LinkedHashMap<String, Object>();
map.put("eps", null);
map.put("eps_tb", null);
map.put("profit", null);
map.put("profit_tb", null);
map.put("pe", null);
map.put("pb", null);
map.put("forecast_income", null);
map.put("income_tb", null);
map.put("ps", null);
map.put("time_year", year);
resultList.add(map);
}
}
}
return resultList;
}
}
1.在指定目录有约500个的Java文件;
2.需要提取出每个文件中特定的内容,需要去重;
3.最终生成文本文件或excel文件;
4.提取的内容是从每个方法里面提取出包含有 collection = MongoUtil. 这个的内容;
5.需要每个方法里面的collection = MongoUtil. 这个内容对应;
6.500个文件的内容最终输出到一个文件内;
例:
下面这个文件(IndexInfoService.java)需要提取的数据是这个样子的:
IndexInfoService.java
getIndexBasicInfo(方法1)
DBCollection collection = MongoUtil.getGGf10dbCollection("t_f10_index_basicinfo")
collection = MongoUtil.getGGStockBaseCollection("gg_cihdquote")
collection = MongoUtil.getGGf10dbCollection("t_f10_index_industry_code")
-----------------------------------------------------------------------------------------
getIndustryStockCodes(方法2)
DBCollection collection = MongoUtil.getGGStockBaseCollection("stock_sw_industry")
-----------------------------------------------------------------------------------------
getIndexMarketNews(方法3)
DBCollection collection = MongoUtil.getGGf10dbCollection("t_f10_index_industry_code")
-----------------------------------------------------------------------------------------
getConsensusExpecData(方法4)
DBCollection collection = MongoUtil.getGGf10dbCollection("t_f10_index_industry_code")
文件IndexInfoService.java:
package f10service.v1.index.service;
import ggframework.bottom.store.mongodb.AggregationOutput;
import ggframework.bottom.store.mongodb.BasicDBObject;
import ggframework.bottom.store.mongodb.DBCollection;
import ggframework.bottom.store.mongodb.DBCursor;
import ggframework.bottom.store.mongodb.DBObject;
import ggframework.bottom.store.mongodb.GGDBCursor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.collect.Maps;
import org.h2.expression.Aggregate;
import com.google.common.collect.Lists;
import ggf10service.common.DateUtil;
import ggf10service.common.HtmlUtils;
import ggf10service.common.MongoUtil;
public class IndexInfoService {
/**
*
* 方法描述 指数概括
*
* @param indexCode
* @return
* @date 2017年9月11日 下午1:42:19
*/
public static Map<String, Object> getIndexBasicInfo(String indexCode){
Map<String, Object> result = Maps.newHashMap();
DBCollection collection = MongoUtil.getGGf10dbCollection("t_f10_index_basicinfo");
// 查询条件
DBObject query = new BasicDBObject();
query.put("status", 1);
query.put("symbol", indexCode);
// 查询字段
DBObject fields = new BasicDBObject();
fields.put("index_name", 1);//指数名称
fields.put("indexsname", 1);//指数简称
fields.put("symbol", 1);//指数代码
fields.put("index_ename", 1);//指数英文名称
fields.put("issuename", 1);//发布方式
fields.put("publishdate", 1);//发布日期
fields.put("cur", 1);//币种
fields.put("benchdate", 1);//基准日期
fields.put("benchnum", 1);//基准点数
fields.put("consecurities", 1);//涵盖证券数
fields.put("chgperiod", 1);//变动周期
fields.put("index_type", 1);//指数类别
fields.put("estclass", 1);//指数编制方式
fields.put("weimode", 1);//指数加权方式
fields.put("mcap", 1);//流通市值
fields.put("mcapital", 1);//流通股本
fields.put("_id", 0);
DBObject obj = collection.findOne(query, fields);
if (obj !=null) {
obj.put("mcap", obj.getDouble("mcap")/100000000);//流通市值
obj.put("mcapital", obj.getDouble("mcapital")/100000000);//流通股本
}
result.put("index_basic", obj.toMap());
/**
* 指数行业强弱
*/
collection = MongoUtil.getGGStockBaseCollection("gg_cihdquote");
query = new BasicDBObject();
query.append("symbol", new BasicDBObject("$in", Arrays.asList("000300", indexCode)));
query.append("tdate", new BasicDBObject("$gte", DateUtil.dateToInteger(DateUtil.getDateBeforeMonths(new Date(), 12), "yyyyMMdd")));
fields = new BasicDBObject();
fields.put("tdate", 1);//交易日期
fields.put("symbol", 1);//指数代码
fields.put("tclose", 1);//当前收盘价
BasicDBObject sort = new BasicDBObject();
sort.append("tdate", 1);
DBCursor cursor = collection.find(query, fields).sort(sort);
Map<Date, Double> maphs = Maps.newLinkedHashMap();
Map<Date, Double> mapindex = Maps.newLinkedHashMap();
while(cursor.hasNext()) {
DBObject o = cursor.next();
if(o.getString("symbol").equals("000300")){
maphs.put(DateUtil.integerToDate(o.getInteger("tdate")), o.getDouble("tclose"));
}else {
mapindex.put(DateUtil.integerToDate(o.getInteger("tdate")), o.getDouble("tclose"));
}
}
cursor.close();
Map<String, Object> mapIntensity = Maps.newLinkedHashMap();
mapIntensity.put("hs300", maphs);//沪深300指数
mapIntensity.put("currentIndex", mapindex);//当前指数
result.put("index_intensity", mapIntensity);
/**
* 成分行业
*/
collection = MongoUtil.getGGf10dbCollection("t_f10_index_industry_code");
query = new BasicDBObject();
query.append("status", 1);
query.append("symbol", indexCode);
fields = new BasicDBObject();
fields.put("industrycode", 1);//指数成分股分布行业代码
fields.put("industryname", 1);//指数成分股分布行业名称
fields.put("indstock_zb", 1);//指数行业成分股占比
fields.put("_id", 0);
List<Map<String, Object>> list_industry = Lists.newArrayList();
cursor = collection.find(query, fields);
List<String> industrys = Lists.newArrayList();
while(cursor.hasNext()) {
DBObject o = cursor.next();
industrys.add(o.getString("industrycode"));
list_industry.add(o.toMap());
}
cursor.close();
Map<String, Integer> map = getIndustryStockCodes(industrys, indexCode);
for(Map<String, Object> indus: list_industry){
String industrycode = indus.get("industrycode").toString();
indus.put("stock_code_count", map.get(industrycode));
}
result.put("index_industry", list_industry);
return result;
}
/**
*
* 方法描述 获取行业代码对应的个股数量
*
* @param industrys
* @return
* @date 2017年9月22日 下午3:29:55
*/
public static Map<String, Integer> getIndustryStockCodes(List<String> industrys, String indexCode){
Map<String, Integer> map = Maps.newHashMap();
List<String> indexCodes = IndexCapitalService.getStockByIndex(indexCode);
DBCollection collection = MongoUtil.getGGStockBaseCollection("stock_sw_industry");
BasicDBObject match = new BasicDBObject();
match.append("$match", new BasicDBObject("sw_second_code", new BasicDBObject("$in", industrys.toArray()))
.append("stock_code", new BasicDBObject("$in", indexCodes.toArray())));
BasicDBObject group = new BasicDBObject();
group.append("$group", new BasicDBObject("_id", "$sw_second_code")
.append("count", new BasicDBObject("$sum", 1)));
AggregationOutput out = collection.aggregate(match, group);
for(Iterator<DBObject> it=out.results().iterator();it.hasNext();){
DBObject obj = it.next();
map.put(obj.getString("_id"), obj.getInteger("count"));
}
return map;
}
/**
*
* 方法描述 获取市场要闻
*
* @param indexCode 指数代码
* @return
* @date 2017年9月11日 下午5:41:13
*/
public static Map<String, Object> getIndexMarketNews(String indexCode){
Map<String, Object> map = Maps.newHashMap();
DBCollection collection = MongoUtil.getGGf10dbCollection("t_f10_index_industry_code");
BasicDBObject query = new BasicDBObject();
query.append("status", 1);
query.append("symbol", indexCode);
BasicDBObject fields = new BasicDBObject();
fields.put("industrycode", 1);//指数成分股分布行业代码
fields.put("industryname", 1);//指数成分股分布行业名称
fields.put("reportcnt", 1);//指数行业月度报告数量
fields.put("industryname1", 1);//申万一级行业名称
fields.put("_id", 0);
List<Map<String, Object>> list_report = Lists.newArrayList();
double reportNum = 0;//指数对应的研报总数
DBCursor cursor = collection.find(query, fields);
while(cursor.hasNext()) {
DBObject o = cursor.next();
reportNum += o.getInteger("reportcnt");
list_report.add(o.toMap());
}
cursor.close();
for(Map<String, Object> report:list_report) {
if(reportNum != 0) {
report.put("report_rate", Double.parseDouble(report.get("reportcnt").toString())/reportNum);
} else {
report.put("report_rate", 0);
}
}
map.put("news_report", list_report);
Collections.sort(list_report, new Comparator<Map<String, Object>>() {
@Override
public int compare(Map<String, Object> o1, Map<String, Object> o2) {
Double reportNum1 = Double.parseDouble(o1.get("report_rate").toString());
Double reportNum2 = Double.parseDouble(o2.get("report_rate").toString());
return reportNum2.compareTo(reportNum1);
}
});
if(list_report.size() > 5) {
list_report = list_report.subList(0, 5);
}
List<String> industrys = Lists.newArrayList();
for(Map<String, Object> report:list_report) {
industrys.add(report.get("industryname1").toString());
}
DBCollection urlcontents = MongoUtil.getGGStockCollection("urlcontents");
/**
* 产业新闻
*/
map.put("news_industry", getIndustryNews(urlcontents, industrys));
/**
* 政策动态
*/
map.put("news_policy", getPolicyNew(urlcontents));
return map;
}
/**
*
* 方法描述 获取行业要闻
*
* @param urlcontents
* @param industrys
* @return
* @date 2017年9月12日 下午1:39:03
*/
public static List<Map<String, Object>> getIndustryNews(DBCollection urlcontents, List<String> industrys){
List<Map<String, Object>> list = Lists.newArrayList();
List<String> key = Lists.newArrayList();
for(String industry:industrys){
DBCursor cursor = urlcontents.find(new BasicDBObject("ir_groupname", industry)
.append("ir_urlcontent", new BasicDBObject("$nin", Arrays.asList(null, ""))),
new BasicDBObject("ir_groupname", 1)
.append("ir_urlcontent", 1)
.append("ir_urltitle", 1)
.append("ir_srcname", 1)
.append("ir_urltime", 1)
.append("ir_hkey", 1)
.append("_id", 0)
).sort(new BasicDBObject("ir_urltime", -1)).limit(1);
if(cursor.hasNext()){
DBObject obj = cursor.next();
String ir_hkey = obj.getString("ir_hkey");
if(!key.contains(ir_hkey)) {
key.add(ir_hkey);
list.add(obj.toMap());
}
}
cursor.close();
}
return list;
}
/**
*
* 方法描述 获取政策动态
*
* @param urlcontents
* @return
* @date 2017年9月12日 上午10:59:00
*/
public static List<Map<String, Object>> getPolicyNew(DBCollection urlcontents) {
BasicDBObject urlQuery = new BasicDBObject();
urlQuery.append("ir_groupname", "政策动态");
BasicDBObject fields = new BasicDBObject();
fields.append("_id", 0);
fields.append("ir_urlcontent", 1);//新闻内容
fields.append("ir_urltitle", 1);//标题
fields.append("ir_srcname", 1);//来源
fields.append("ir_urltime", 1);//时间
fields.append("ir_hkey", 1);//主键
DBCursor urlCursor = urlcontents.find(urlQuery, fields).sort(new BasicDBObject("ir_urltime", -1)).limit(5);
List<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
while(urlCursor.hasNext()){
DBObject o = urlCursor.next();
if(o.get("ir_urlcontent")==null || StringUtils.isBlank(o.get("ir_urlcontent").toString())){
continue;
}
Map<String, Object> map = new HashMap<String, Object>();
String summary = o.get("ir_urlcontent").toString();
summary = summary.replaceAll("\r\n", "");
summary = HtmlUtils.trimHtml(summary);
if(summary.length() > 200){
summary = summary.substring(0, 200);
}
map.put("summary", summary);//摘要
map.put("title", o.get("ir_urltitle"));
map.put("source", o.get("ir_srcname"));
map.put("date", o.get("ir_urltime"));
map.put("id", o.get("ir_hkey"));
result.add(map);
}
urlCursor.close();
return result;
}
/**
*
* 方法描述 获取预期研究
*
* @param indexCode
* @return
* @date 2017年9月12日 下午5:34:10
*/
public static Map<String, Object> getConsensusExpecData(String indexCode){
Map<String, Object> map = Maps.newHashMap();
DBCollection collection = MongoUtil.getGGf10dbCollection("t_f10_index_industry_code");
BasicDBObject query = new BasicDBObject();
query.append("status", 1);
query.append("symbol", indexCode);
BasicDBObject fields = new BasicDBObject();
fields.put("industrycode1", 1);//申万一级行业代码
fields.put("industryname1", 1);//申万一级行业名称
fields.put("industryname", 1);//申万二级行业名称
fields.put("industrycode", 1);//申万二级行业代码
fields.put("_id", 0);
List<Map<String, Object>> list_report = Lists.newArrayList();
DBCursor cursor = collection.find(query, fields);
while(cursor.hasNext()) {
DBObject o = cursor.next();
list_report.add(o.toMap());
}
cursor.close();
if(list_report.size() > 5) {
list_report = list_report.subList(0, 5);
}
collection = MongoUtil.getGGIndustryCollection("t_hy_consensus");
for(Map<String, Object> report:list_report) {
map.put(report.get("industryname").toString(), getExpected(collection, report.get("industrycode").toString()));
}
return map;
}
/**
*
* 方法描述 获取一致预期数据
*
* @param collection
* @param industry
* @date 2017年9月12日 下午5:40:23
*/
public static List<Map<String, Object>> getExpected(DBCollection collection, String industry){
Map<String, Object> baseYear = IndustryUtil.getDmYear(new Date()); // 获取基准年,以及基准年前1年和后3年的预测年
List<Integer> yearList = new ArrayList<Integer>();// 预测年集合
for (Object value : baseYear.values()) {
yearList.add(Integer.parseInt(value.toString()));
}
Collections.sort(yearList);
BasicDBObject query = new BasicDBObject();
query.put("industrycode", industry);
DBObject field = new BasicDBObject();
field.put("_id", 0);
field.put("eps", 1);// 一致预期EPS
field.put("eps_tb", 1);// 一致预期EPS同比
field.put("profit", 1);// 一致预期净利润(万元)
field.put("profit_tb", 1);// 一致预期净利同比
field.put("pe", 1);// 一致预期PE(倍)
field.put("pb", 1);// 一致预期PB(倍)
field.put("forecast_income", 1);// 一致预期营业收入(万元)
field.put("income_tb", 1);// 一致预期营业收入同比
field.put("ps", 1);// 一致预期ps
field.put("time_year", 1);// 预测年
DBCursor cursor = collection.find(query, field);
List<Map<String, Object>> result = GGDBCursor.find(cursor, ImmutableMap.<String, String>of("profit_tb", "profittb"), 0, 0);
List<Map<String, Object>> resultList = null;// 返回结果集
if (result != null && !result.isEmpty()) {
Map<String, Map<String, Object>> tmp = new HashMap<String, Map<String, Object>>();
for (Map<String, Object> map : result) {
if (map.get("income_tb") != null) {
map.put("income_tb", Double.parseDouble(map.get("income_tb").toString()) * 100);
}
map.put("profit", map.get("profit")==null?null:Double.valueOf(map.get("profit").toString())/100000000);// 一致预期净利润
map.put("forecast_income", map.get("forecast_income")==null?null:Double.valueOf(map.get("forecast_income").toString())/100000000);// 一致预期营业收入
tmp.put(map.get("time_year").toString(), map);
}
resultList = new ArrayList<Map<String,Object>>();
for (Integer year : yearList) {
if (tmp.get(year.toString()) != null) {
resultList.add(tmp.get(year.toString()));
} else {
Map<String, Object> map = new LinkedHashMap<String, Object>();
map.put("eps", null);
map.put("eps_tb", null);
map.put("profit", null);
map.put("profit_tb", null);
map.put("pe", null);
map.put("pb", null);
map.put("forecast_income", null);
map.put("income_tb", null);
map.put("ps", null);
map.put("time_year", year);
resultList.add(map);
}
}
}
return resultList;
}
}
没有找到相关结果
重要提示:提问者不能发表回复,可以通过评论与回答者沟通,沟通后可以通过编辑功能完善问题描述,以便后续其他人能够更容易理解问题.
1 个回复
seng - 从事BI、大数据、数据分析工作 2018-06-09 回答
赞同来自: deng111
1.每一行整理成from aa bb from aa 的格式 cat aa.txt |grep 'from'|sed 's/\w*\sfrom/from/'
2.from aa bb from cc提取 aa cc, 这个我行末解决不了 ,写了2个 sed 's/from\s\(\w*\s\)\w*/\1/g'| sed 's/from\s\(\w*\)$/\1/g'
3.转成多行,去空格,排序,去重 sed 's/\b/\n/g'|sed 's/ //g'|sort -u
总的就是
cat *.txt |grep 'from'|sed 's/\w*\sfrom/from/'|sed 's/from\s\(\w*\s\)\w*/\1/g'| sed 's/from\s\(\w*\)$/\1/g'|sed 's/\b/\n/g'|sed 's/ //g'|sort -u