您的位置:首页 > 科技 > 能源 > 做网站哪家便宜_广东网页空间租用平台_电子商务网络营销_云南网站建设快速优化

做网站哪家便宜_广东网页空间租用平台_电子商务网络营销_云南网站建设快速优化

2025/1/8 20:33:24 来源:https://blog.csdn.net/weixin_44874487/article/details/144916179  浏览:    关键词:做网站哪家便宜_广东网页空间租用平台_电子商务网络营销_云南网站建设快速优化
做网站哪家便宜_广东网页空间租用平台_电子商务网络营销_云南网站建设快速优化

embl2fa.py 一行命令行

 python embl2fa.py X.embl > X.fa

embl2fa.py 

import sys
import reclass EMBL:def __init__(self, file_name):self.file_name = file_nameself.records = self._parse_embl_file()def _parse_embl_file(self):records = []with open(self.file_name, 'r') as f:record_data = []for line in f:if line.startswith("//"):if record_data:records.append(self._create_record(record_data))record_data = []else:record_data.append(line)if record_data:records.append(self._create_record(record_data))return recordsdef _create_record(self, record_data):record = EMBLRecord()for line in record_data:if line.startswith("ID"):record.id = line.split()[1]elif line.startswith("DE"):record.description = line[2:].strip()elif line.startswith("SQ"):seq = "".join([l.strip() for l in record_data[record_data.index(line) + 1:] if not l.startswith("//")])record.sequence = re.sub(r'\s+', '', seq)return recorddef get_record_count(self):return len(self.records)def get_record(self, index):return self.records[index]class EMBLRecord:def __init__(self):self.id = ""self.description = ""self.sequence = ""def get_id(self):return self.iddef get_description(self):return self.descriptiondef get_sequence(self):return self.sequencedef get_rm_type(self):return "RMType"  # Placeholder: Add actual logicdef get_rm_sub_type(self):return ""  # Placeholder: Add actual logicdef get_rm_species_array(self):return []  # Placeholder: Add actual logicdef get_rm_search_stages_array(self):return []  # Placeholder: Add actual logicdef get_rm_buffer_stages_array(self):return []  # Placeholder: Add actual logicif __name__ == "__main__":if len(sys.argv) < 2:print("Usage: python X.py X.embl > X.fa", file=sys.stderr)sys.exit(1)in_file = sys.argv[1]try:db = EMBL(file_name=in_file)seq_count = db.get_record_count()for i in range(seq_count):record = db.get_record(i)record_id = record.get_id()record_type = f"#{record.get_rm_type()}"if record.get_rm_sub_type():record_type += f"/{record.get_rm_sub_type()}"description = record.get_description()species_list = " ".join([f"@{name}" for name in record.get_rm_species_array()])stage_list = "[S:" + ",".join(record.get_rm_search_stages_array()) + "]"stage_list = re.sub(r",\]", "]", stage_list)# Write the sequenceseq = record.get_sequence()print(f">{record_id}{record_type} {species_list} {stage_list} {description}")seq = re.sub(r"(.{50})", r"\1\n", seq)if not seq.endswith("\n"):seq += "\n"print(seq)# Write the buffered sequencestages = record.get_rm_buffer_stages_array()stage_hash = {}for stage in stages:match = re.match(r"(\d+)\[(\d+)\-(\d+)\]", stage)if match:start, end = match.group(2), match.group(3)stage_hash.setdefault(f"{start}-{end}", []).append(match.group(1))elif re.match(r"(\d+)", stage):stage_hash.setdefault("full", []).append(stage)else:print(f"Warning: Buffer stage {stage} not understood!", file=sys.stderr)for buffer_seqs, stage_list in stage_hash.items():seq = record.get_sequence()stage_list_str = "[S:" + ",".join(stage_list) + "]"if buffer_seqs == "full":record_type = "#buffer"else:start, end = map(int, buffer_seqs.split("-"))seq = seq[start - 1:end]record_type = f"_{start}_{end}#buffer"print(f">{record_id}{record_type} {species_list} {stage_list_str} {description}")seq = re.sub(r"(.{50})", r"\1\n", seq)if not seq.endswith("\n"):seq += "\n"print(seq)except FileNotFoundError:print(f"Error: File {in_file} not found!", file=sys.stderr)sys.exit(1)

参考RepeatMasker/util/buildRMLibFromEMBL.pl

版权声明:

本网仅为发布的内容提供存储空间,不对发表、转载的内容提供任何形式的保证。凡本网注明“来源:XXX网络”的作品,均转载自其它媒体,著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

我们尊重并感谢每一位作者,均已注明文章来源和作者。如因作品内容、版权或其它问题,请及时与我们联系,联系邮箱:809451989@qq.com,投稿邮箱:809451989@qq.com