导入股票列表
This commit is contained in:
@ -25,16 +25,16 @@ def main():
|
||||
success = wind_fetcher.process_and_save_data("000001.SZ", "2024-12-31")
|
||||
print(f"处理结果: {'成功' if success else '失败'}")
|
||||
|
||||
# # 批量处理示例
|
||||
# print("\n=== 批量处理多个股票 ===")
|
||||
# stock_list = [
|
||||
# "600519.SZ", # 贵州茅台
|
||||
# "000002.SZ", # 万科A
|
||||
# "600036.SH", # 招商银行
|
||||
# "601318.SH", # 中国平安
|
||||
# ]
|
||||
# 批量处理示例
|
||||
print("\n=== 批量处理多个股票 ===")
|
||||
stock_list = [
|
||||
"600519.SH",
|
||||
"600436.SH",
|
||||
"600887.SH",
|
||||
"000858.SZ",
|
||||
]
|
||||
|
||||
# results = wind_fetcher.batch_fetch_stocks(stock_list, "2024-12-31")
|
||||
results = wind_fetcher.batch_fetch_stocks(stock_list, "2024-12-31")
|
||||
|
||||
# # 打印结果摘要
|
||||
# print("\n=== 处理结果摘要 ===")
|
||||
|
||||
188
finance/excel_stock_importer.py
Normal file
188
finance/excel_stock_importer.py
Normal file
@ -0,0 +1,188 @@
|
||||
import pandas as pd
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
from financial_data_manager import FinancialDataManager
|
||||
|
||||
class ExcelStockImporter:
|
||||
"""Excel股票数据导入器"""
|
||||
|
||||
def __init__(self, db_config: Dict[str, Any]):
|
||||
"""
|
||||
初始化导入器
|
||||
|
||||
Args:
|
||||
db_config: 数据库配置字典
|
||||
"""
|
||||
self.db_config = db_config
|
||||
self.logger = self._setup_logger()
|
||||
|
||||
def _setup_logger(self) -> logging.Logger:
|
||||
"""设置日志记录器"""
|
||||
logger = logging.getLogger('ExcelStockImporter')
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
if not logger.handlers:
|
||||
handler = logging.StreamHandler()
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
return logger
|
||||
|
||||
def read_excel_file(self, file_path: str) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
读取Excel文件
|
||||
|
||||
Args:
|
||||
file_path: Excel文件路径
|
||||
|
||||
Returns:
|
||||
pandas DataFrame 或 None(如果读取失败)
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"开始读取Excel文件: {file_path}")
|
||||
|
||||
# 读取Excel文件
|
||||
df = pd.read_excel(file_path)
|
||||
|
||||
# 检查必要的列是否存在
|
||||
required_columns = ['Wind代码', 'Wind一级行业代码', 'Wind一级行业名称', '上市地国家(地区)代码', '交易所']
|
||||
missing_columns = [col for col in required_columns if col not in df.columns]
|
||||
|
||||
if missing_columns:
|
||||
self.logger.error(f"Excel文件中缺少必要的列: {missing_columns}")
|
||||
self.logger.info(f"Excel文件中的列: {df.columns.tolist()}")
|
||||
return None
|
||||
|
||||
self.logger.info(f"成功读取Excel文件,共 {len(df)} 行数据")
|
||||
self.logger.debug(f"数据前5行:\n{df.head()}")
|
||||
|
||||
return df
|
||||
|
||||
except FileNotFoundError:
|
||||
self.logger.error(f"文件不存在: {file_path}")
|
||||
return None
|
||||
except Exception as e:
|
||||
self.logger.error(f"读取Excel文件失败: {e}")
|
||||
return None
|
||||
|
||||
def process_stock_data(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
处理股票数据,转换为数据库格式
|
||||
|
||||
Args:
|
||||
df: 原始DataFrame
|
||||
|
||||
Returns:
|
||||
处理后的股票数据列表
|
||||
"""
|
||||
processed_data = []
|
||||
|
||||
for idx, row in df.iterrows():
|
||||
try:
|
||||
stock_data = {
|
||||
'stock_id': str(row['Wind代码']).strip(),
|
||||
'wind_industry_code': str(row['Wind一级行业代码']).strip() if pd.notna(row['Wind一级行业代码']) else None,
|
||||
'wind_industry_name': str(row['Wind一级行业名称']).strip() if pd.notna(row['Wind一级行业名称']) else None,
|
||||
'country_code': str(row['上市地国家(地区)代码']).strip() if pd.notna(row['上市地国家(地区)代码']) else None,
|
||||
'exchange': str(row['交易所']).strip() if pd.notna(row['交易所']) else None,
|
||||
# 如果有上市日期列,可以在这里添加
|
||||
# 'listing_date': row['上市日期'] if '上市日期' in row and pd.notna(row['上市日期']) else None
|
||||
}
|
||||
|
||||
# 验证必要字段
|
||||
if not stock_data['stock_id']:
|
||||
self.logger.warning(f"第 {idx+1} 行股票代码为空,跳过")
|
||||
continue
|
||||
|
||||
processed_data.append(stock_data)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"处理第 {idx+1} 行数据时出错: {e}")
|
||||
continue
|
||||
|
||||
self.logger.info(f"成功处理 {len(processed_data)} 条股票数据")
|
||||
return processed_data
|
||||
|
||||
def import_to_database(self, file_path: str) -> bool:
|
||||
"""
|
||||
导入Excel数据到数据库
|
||||
|
||||
Args:
|
||||
file_path: Excel文件路径
|
||||
|
||||
Returns:
|
||||
成功返回True,失败返回False
|
||||
"""
|
||||
try:
|
||||
# 读取Excel文件
|
||||
df = self.read_excel_file(file_path)
|
||||
if df is None or df.empty:
|
||||
self.logger.error("读取Excel文件失败或文件为空")
|
||||
return False
|
||||
|
||||
# 处理数据
|
||||
stock_data_list = self.process_stock_data(df)
|
||||
if not stock_data_list:
|
||||
self.logger.error("没有有效的股票数据需要导入")
|
||||
return False
|
||||
|
||||
# 插入数据库
|
||||
with FinancialDataManager(**self.db_config) as db_manager:
|
||||
success_count = 0
|
||||
total_count = len(stock_data_list)
|
||||
|
||||
for stock_data in stock_data_list:
|
||||
try:
|
||||
# 使用insert_stock方法插入数据
|
||||
if db_manager.insert_stock(stock_data):
|
||||
success_count += 1
|
||||
else:
|
||||
self.logger.warning(f"插入股票数据失败: {stock_data['stock_id']}")
|
||||
except Exception as e:
|
||||
self.logger.error(f"插入股票 {stock_data['stock_id']} 时出错: {e}")
|
||||
|
||||
self.logger.info(f"导入完成: 成功 {success_count}/{total_count}")
|
||||
return success_count > 0
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"导入过程失败: {e}")
|
||||
return False
|
||||
|
||||
def validate_data(self, file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
验证Excel数据
|
||||
|
||||
Args:
|
||||
file_path: Excel文件路径
|
||||
|
||||
Returns:
|
||||
验证结果字典
|
||||
"""
|
||||
try:
|
||||
df = self.read_excel_file(file_path)
|
||||
if df is None:
|
||||
return {'valid': False, 'message': '文件读取失败'}
|
||||
|
||||
# 基本统计
|
||||
result = {
|
||||
'valid': True,
|
||||
'total_rows': len(df),
|
||||
'stock_codes': df['Wind代码'].tolist(),
|
||||
'industries': df['Wind一级行业名称'].unique().tolist(),
|
||||
'exchanges': df['交易所'].unique().tolist(),
|
||||
'duplicate_stocks': df[df.duplicated('Wind代码')]['Wind代码'].tolist()
|
||||
}
|
||||
|
||||
# 检查重复股票代码
|
||||
if result['duplicate_stocks']:
|
||||
self.logger.warning(f"发现重复的股票代码: {result['duplicate_stocks']}")
|
||||
result['valid'] = False
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"数据验证失败: {e}")
|
||||
return {'valid': False, 'message': str(e)}
|
||||
@ -127,31 +127,31 @@ class FinancialDataManager:
|
||||
|
||||
def insert_stock(self, stock_data: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
插入或更新股票基本信息
|
||||
插入或更新股票基本信息 - 更新版本
|
||||
"""
|
||||
required_fields = ['stock_id', 'stock_name']
|
||||
required_fields = ['stock_id']
|
||||
if not all(field in stock_data for field in required_fields):
|
||||
self.logger.error(f"缺少必需字段: {required_fields}")
|
||||
return False
|
||||
|
||||
query = """
|
||||
INSERT INTO stocks (stock_id, stock_name, exchange, industry, market_cap, listing_date)
|
||||
INSERT INTO stocks (stock_id, wind_industry_code, wind_industry_name, country_code, exchange, listing_date)
|
||||
VALUES (%s, %s, %s, %s, %s, %s)
|
||||
ON DUPLICATE KEY UPDATE
|
||||
stock_name = VALUES(stock_name),
|
||||
wind_industry_code = VALUES(wind_industry_code),
|
||||
wind_industry_name = VALUES(wind_industry_name),
|
||||
country_code = VALUES(country_code),
|
||||
exchange = VALUES(exchange),
|
||||
industry = VALUES(industry),
|
||||
market_cap = VALUES(market_cap),
|
||||
listing_date = VALUES(listing_date),
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
"""
|
||||
|
||||
success = self._execute_query(query, (
|
||||
stock_data['stock_id'],
|
||||
stock_data['stock_name'],
|
||||
stock_data.get('wind_industry_code'),
|
||||
stock_data.get('wind_industry_name'),
|
||||
stock_data.get('country_code'),
|
||||
stock_data.get('exchange'),
|
||||
stock_data.get('industry'),
|
||||
stock_data.get('market_cap'),
|
||||
stock_data.get('listing_date')
|
||||
))
|
||||
|
||||
@ -159,6 +159,7 @@ class FinancialDataManager:
|
||||
self.logger.info(f"成功处理股票: {stock_data['stock_id']}")
|
||||
return success
|
||||
|
||||
|
||||
def batch_insert_stocks(self, stocks_list: List[Dict[str, Any]]) -> bool:
|
||||
"""批量插入股票数据"""
|
||||
success_count = 0
|
||||
|
||||
70
finance/import_stocks.py
Normal file
70
finance/import_stocks.py
Normal file
@ -0,0 +1,70 @@
|
||||
import os
|
||||
import logging
|
||||
from excel_stock_importer import ExcelStockImporter
|
||||
|
||||
# 设置日志
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
def main():
|
||||
# 数据库配置
|
||||
db_config = {
|
||||
'host': '127.0.0.1',
|
||||
'database': 'fintech',
|
||||
'user': 'root',
|
||||
'password': 'secret',
|
||||
'port': 3306
|
||||
}
|
||||
|
||||
# Excel文件路径 - 尝试不同的路径
|
||||
excel_files = [
|
||||
"stock_list.xlsx", # 当前目录
|
||||
"./finance/stock_list.xlsx", # 当前目录(明确指定)
|
||||
os.path.join(os.getcwd(), "./finance/stock_list.xlsx"), # 绝对路径
|
||||
]
|
||||
|
||||
print("=== 当前工作目录 ===")
|
||||
print(f"工作目录: {os.getcwd()}")
|
||||
print(f"目录内容: {os.listdir('.')}")
|
||||
|
||||
# 初始化导入器
|
||||
importer = ExcelStockImporter(db_config)
|
||||
|
||||
found_file = None
|
||||
for excel_file in excel_files:
|
||||
if os.path.exists(excel_file):
|
||||
found_file = excel_file
|
||||
print(f"找到文件: {excel_file}")
|
||||
break
|
||||
else:
|
||||
print(f"文件不存在: {excel_file}")
|
||||
|
||||
if not found_file:
|
||||
print("未找到stock_list.xlsx文件,请检查:")
|
||||
print("1. 文件名是否正确(包括扩展名)")
|
||||
print("2. 文件是否在当前目录")
|
||||
print("3. 文件权限是否正常")
|
||||
return
|
||||
|
||||
# 验证数据
|
||||
print("\n=== 验证数据 ===")
|
||||
validation_result = importer.validate_data(found_file)
|
||||
print(f"数据验证结果: {'通过' if validation_result['valid'] else '失败'}")
|
||||
|
||||
if 'total_rows' in validation_result:
|
||||
print(f"总行数: {validation_result['total_rows']}")
|
||||
print(f"行业列表: {validation_result.get('industries', [])}")
|
||||
print(f"交易所列表: {validation_result.get('exchanges', [])}")
|
||||
|
||||
if validation_result.get('duplicate_stocks'):
|
||||
print(f"重复股票代码: {validation_result['duplicate_stocks']}")
|
||||
|
||||
if validation_result.get('valid'):
|
||||
# 导入数据
|
||||
print("\n=== 导入数据 ===")
|
||||
success = importer.import_to_database(found_file)
|
||||
print(f"导入结果: {'成功' if success else '失败'}")
|
||||
else:
|
||||
print(f"数据验证失败: {validation_result.get('message', '未知错误')}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
BIN
finance/stock_list.xlsx
Normal file
BIN
finance/stock_list.xlsx
Normal file
Binary file not shown.
Reference in New Issue
Block a user