Commit e4c10062 authored by Francisco Arcila Salamanca's avatar Francisco Arcila Salamanca
Browse files

add provisional 10X expression parser with pandas

parent 045a27d9
......@@ -22,7 +22,8 @@ def safe_create_schema(db: MySQLcon, project_name: str) -> None:
if project_name in pd.read_sql("show databases", db).Database.tolist():
db.close()
logger.error(f"{project_name} is already in the database")
raise NameError(f"Database `{project_name}` already exists")
db.close()
# raise NameError(f"Database `{project_name}` already exists")
else:
# Create database scheme
c.execute(f"CREATE SCHEMA IF NOT EXISTS {project_name};")
......@@ -566,14 +567,70 @@ def import_repertoires(
db.commit()
def import_expression(flow_hdf: pd.DataFrame, project_name: str, db: MySQLcon) -> None:
def load_normalize_expression(path_to_sparse_mx: str) -> pd.DataFrame:
"""
:param path_to_sparse_mx: path to 10X gzipped TSV sparse matrix
:return: normalized pandas dataframe to be imported into the db
"""
expression_mx = pd.read_csv(
path_to_sparse_mx, compression="gzip", sep="\t", index_col=0
).T
stacked_mx = (
expression_mx.stack()
.reset_index()
.rename(columns={"level_0": "CELL", "level_1": "GENE", 0: "LEVEL"})
)
return stacked_mx[stacked_mx.LEVEL != 0]
def import_10X_expression(
path_to_sparse_mx: str, project_name: str, db: MySQLcon
) -> None:
"""
:param flow_hdf: flow dataframe from the hdf5 expression file
:param path_to_sparse_mx: path to 10X gzipped TSV sparse matrix
:param project_name: name of the db where data will be imported
:param db: database connection object
:return: imports 10X expression data into the `project_name` db
"""
expression_df = load_normalize_expression(path_to_sparse_mx)
logger.info("10X expression import started, this might take a while...")
c = db.cursor()
for i, row in expression_df.iterrows():
sql = (
f"INSERT IGNORE INTO `{project_name}`.expression (`"
+ "CELL_ID`, `GENE`, `LEVEL"
+ "`) VALUES ("
+ "%s," * (len(row) - 1)
+ "%s)"
)
insert_tuple = tuple(row)
c.execute(sql, insert_tuple)
db.commit()
logger.info(
f"[SUCCESS] 10X expression data have been imported into the `{project_name}` database"
)
def import_FACS_expression(
path_to_flow_tsv: str, project_name: str, db: MySQLcon
) -> None:
"""
:param path_to_flow_tsv: path to FACS expression file
:param project_name: name of the db where data will be imported
:param db: database connection object
:return: imports flow data into the `project_name` db
"""
flow_hdf = pd.read_csv(path_to_flow_tsv, sep="\t")
# set cursor
c = db.cursor()
flow_hdf.set_index("cell_id", inplace=True)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment