{ "cells": [ { "cell_type": "code", "execution_count": 55, "id": "6ee294dd-020e-417f-97a1-628ab8be56c6", "metadata": {}, "outputs": [], "source": [ "import rpy2\n", "import pdb,sys,os\n", "from junlib.File import *\n", "import pandas as pd\n", "import pyreadr\n", "import numpy as np\n", "from junlib.BioUtils import BioList\n", "import scipy.io,scipy.sparse\n", "from scipy.sparse import csc_matrix" ] }, { "cell_type": "code", "execution_count": 2, "id": "b65982ed-74e8-459f-9447-ca0e4022422c", "metadata": {}, "outputs": [], "source": [ "ex=TabFile(\"treutlein2016_2\").read(\"\\t\")" ] }, { "cell_type": "code", "execution_count": 18, "id": "bebed5d6-4e30-491f-a349-6ab51aff6c34", "metadata": {}, "outputs": [], "source": [ "Genes=ex[0][3:]\n", "Cells=[item[0:3] for item in ex[1:]]\n", "Matrix=[item[3:] for item in ex[1:]]" ] }, { "cell_type": "code", "execution_count": 4, "id": "f63ab962-4b4d-414e-8915-1c03ee8f80b5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['1_iN2_C02', '0', 'MEF']" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Cells[0]" ] }, { "cell_type": "code", "execution_count": 5, "id": "9b193544-9221-4800-921f-1aed6b0c9afe", "metadata": {}, "outputs": [], "source": [ "df_Cells=pd.DataFrame(index=[item[0] for item in Cells],data=Cells,columns=['Cell','Time','Label'])" ] }, { "cell_type": "code", "execution_count": null, "id": "75647449-ab11-4474-96e4-a66fcfa2588d", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 6, "id": "f30e5236-dea6-45ec-9aec-c631e60aa7ea", "metadata": {}, "outputs": [], "source": [ "pyreadr.write_rds(\"Cells.Rds\", df_Cells)" ] }, { "cell_type": "code", "execution_count": 7, "id": "82957d3c-18f3-4cef-8d5b-98077904c60a", "metadata": {}, "outputs": [], "source": [ "df_Genes=pd.DataFrame(data=Genes,columns=['Gene'],index=Genes)" ] }, { "cell_type": "code", "execution_count": 8, "id": "982de120-b220-4cbd-96e0-0f764d7765ea", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11835" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(Matrix[0])" ] }, { "cell_type": "code", "execution_count": 9, "id": "bdef33fc-77b9-4d6e-848c-9263b582480d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Gene
0610007C21Rik0610007C21Rik
0610007L01Rik0610007L01Rik
0610007N19Rik0610007N19Rik
0610007P08Rik0610007P08Rik
0610007P14Rik0610007P14Rik
......
ZyxZyx
Zzef1Zzef1
Zzz3Zzz3
l7Rn6l7Rn6
rtTArtTA
\n", "

11835 rows × 1 columns

\n", "
" ], "text/plain": [ " Gene\n", "0610007C21Rik 0610007C21Rik\n", "0610007L01Rik 0610007L01Rik\n", "0610007N19Rik 0610007N19Rik\n", "0610007P08Rik 0610007P08Rik\n", "0610007P14Rik 0610007P14Rik\n", "... ...\n", "Zyx Zyx\n", "Zzef1 Zzef1\n", "Zzz3 Zzz3\n", "l7Rn6 l7Rn6\n", "rtTA rtTA\n", "\n", "[11835 rows x 1 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_Genes" ] }, { "cell_type": "code", "execution_count": 10, "id": "c6be4ab7-a1fe-4abf-ae46-cf91c5d670c8", "metadata": {}, "outputs": [], "source": [ "pyreadr.write_rds(\"Genes.Rds\",df_Genes)" ] }, { "cell_type": "code", "execution_count": 27, "id": "12e9e40d-5b73-4ddb-af28-fc40cbf544af", "metadata": {}, "outputs": [], "source": [ "Matrix=np.array(Matrix).transpose()" ] }, { "cell_type": "code", "execution_count": 28, "id": "c443c5ff-6a30-4d0d-bd33-f2cd91867671", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11835" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(Matrix)" ] }, { "cell_type": "code", "execution_count": 29, "id": "55a0b974-d4e5-40fc-8233-26723f2ab3c1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "252" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(Matrix[0])" ] }, { "cell_type": "code", "execution_count": 30, "id": "ec3b7b99-cc8c-483f-a0ad-68ddbe8df7fd", "metadata": {}, "outputs": [], "source": [ "Matrix=Matrix.tolist()" ] }, { "cell_type": "code", "execution_count": 33, "id": "acef98fb-9641-4eb6-8fc9-5f74e9c67eaa", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11835" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(Matrix)" ] }, { "cell_type": "code", "execution_count": 42, "id": "c9c3282f-672b-4f71-a927-05832bfbff88", "metadata": {}, "outputs": [], "source": [ "dMatrix=pd.DataFrame(data=Matrix,columns=[item[0] for item in Cells],index=Genes)" ] }, { "cell_type": "code", "execution_count": 43, "id": "b81497bc-b124-4838-85ae-e6ed46633f5b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
1_iN2_C021_iN2_C051_iN2_C071_iN2_C081_iN2_C091_iN2_C121_iN2_C131_iN2_C141_iN2_C151_iN2_C16...1ggM_iN4_C761ggM_iN4_C831ggM_iN4_C851ggS_iN4_C101ggS_iN4_C131ggS_iN4_C671gL_iN4_C021gL_iN4_C311gL_iN4_C391gM_iN4_C42
0610007C21Rik6.341676335931644.9599446922216504.697531054191496.609152110041536.7494643471286804.896860597740157.150976207941783.27141502017117...5.781296470627595.972230324198064.747790044183846.485284144890478.552965067868422.415375255746616.489036752342397.409228965323554.12870631207056.61612668319865
0610007L01Rik06.2314316605630503.062911938762946.6279758472515805.586616487408541.316935017154752.412010704121514.70269246804867...0.75435962624328504.1902934165008402.500044379185146.725155872741124.404424166456755.1077107404908400
0610007N19Rik003.8889908630582505.5598679800271901.556088748970961.4833557782516903.55424048563973...4.99616229084016003.85089831294737004.184277130319164.0090058656345604.2097714690844
0610007P08Rik000.04136835629246603.24737025967400000...000.1429249850493150002.3112007267365005.13145556233943
0610007P14Rik5.690382656545896.055018725890755.530216473969390.6937638831193216.673741672214026.191660344267336.226035832150957.230031916898676.407416162274047.31172597492533...04.159596563971443.821584669186840004.459129992743423.430740494976964.580871937173667.49525945442237
..................................................................
Zyx1.682170818089195.838476428503742.614466066551157.464195678927847.933591751292723.944000397848296.832367070030293.809923991177427.563686772912494.35961428372074...6.896435686173554.5377717270082704.624376027624112.01896082761210.5398204377202344.965916677625437.190140752661986.099715451578646.06130415583606
Zzef11.1392916159751102.145845639451673.407588122695541.933924241860950.73883162522301300.64106100877031802.3670933216363...001.436602860517221.264164179155753.8651050364883700.0192430133830156003.27570987739765
Zzz303.642170309024253.86499003785313002.584838264118443.121611441396840.2835459882721312.268629305699973.15643281728014...2.31685707660686002.91576112647274.5938957290220500.97510575169318401.623088126792314.26004685209986
l7Rn62.065228103297435.092430413606461.989719876950613.995451345515513.627951492213426.551059537173056.416119311309286.113691196261576.348139326754486.12916286074585...2.536711732423994.988359139137515.2166662515891505.782106972938504.752770324411835.6814820144664704.54152040770032
rtTA0000000000...6.210589609128567.612386967959786.480159650835575.405732439221867.848706123493045.95816207481279005.621044044527780
\n", "

11835 rows × 252 columns

\n", "
" ], "text/plain": [ " 1_iN2_C02 1_iN2_C05 1_iN2_C07 \\\n", "0610007C21Rik 6.34167633593164 4.95994469222165 0 \n", "0610007L01Rik 0 6.23143166056305 0 \n", "0610007N19Rik 0 0 3.88899086305825 \n", "0610007P08Rik 0 0 0.041368356292466 \n", "0610007P14Rik 5.69038265654589 6.05501872589075 5.53021647396939 \n", "... ... ... ... \n", "Zyx 1.68217081808919 5.83847642850374 2.61446606655115 \n", "Zzef1 1.13929161597511 0 2.14584563945167 \n", "Zzz3 0 3.64217030902425 3.86499003785313 \n", "l7Rn6 2.06522810329743 5.09243041360646 1.98971987695061 \n", "rtTA 0 0 0 \n", "\n", " 1_iN2_C08 1_iN2_C09 1_iN2_C12 \\\n", "0610007C21Rik 4.69753105419149 6.60915211004153 6.74946434712868 \n", "0610007L01Rik 3.06291193876294 6.62797584725158 0 \n", "0610007N19Rik 0 5.55986798002719 0 \n", "0610007P08Rik 0 3.247370259674 0 \n", "0610007P14Rik 0.693763883119321 6.67374167221402 6.19166034426733 \n", "... ... ... ... \n", "Zyx 7.46419567892784 7.93359175129272 3.94400039784829 \n", "Zzef1 3.40758812269554 1.93392424186095 0.738831625223013 \n", "Zzz3 0 0 2.58483826411844 \n", "l7Rn6 3.99545134551551 3.62795149221342 6.55105953717305 \n", "rtTA 0 0 0 \n", "\n", " 1_iN2_C13 1_iN2_C14 1_iN2_C15 \\\n", "0610007C21Rik 0 4.89686059774015 7.15097620794178 \n", "0610007L01Rik 5.58661648740854 1.31693501715475 2.41201070412151 \n", "0610007N19Rik 1.55608874897096 1.48335577825169 0 \n", "0610007P08Rik 0 0 0 \n", "0610007P14Rik 6.22603583215095 7.23003191689867 6.40741616227404 \n", "... ... ... ... \n", "Zyx 6.83236707003029 3.80992399117742 7.56368677291249 \n", "Zzef1 0 0.641061008770318 0 \n", "Zzz3 3.12161144139684 0.283545988272131 2.26862930569997 \n", "l7Rn6 6.41611931130928 6.11369119626157 6.34813932675448 \n", "rtTA 0 0 0 \n", "\n", " 1_iN2_C16 ... 1ggM_iN4_C76 1ggM_iN4_C83 \\\n", "0610007C21Rik 3.27141502017117 ... 5.78129647062759 5.97223032419806 \n", "0610007L01Rik 4.70269246804867 ... 0.754359626243285 0 \n", "0610007N19Rik 3.55424048563973 ... 4.99616229084016 0 \n", "0610007P08Rik 0 ... 0 0 \n", "0610007P14Rik 7.31172597492533 ... 0 4.15959656397144 \n", "... ... ... ... ... \n", "Zyx 4.35961428372074 ... 6.89643568617355 4.53777172700827 \n", "Zzef1 2.3670933216363 ... 0 0 \n", "Zzz3 3.15643281728014 ... 2.31685707660686 0 \n", "l7Rn6 6.12916286074585 ... 2.53671173242399 4.98835913913751 \n", "rtTA 0 ... 6.21058960912856 7.61238696795978 \n", "\n", " 1ggM_iN4_C85 1ggS_iN4_C10 1ggS_iN4_C13 \\\n", "0610007C21Rik 4.74779004418384 6.48528414489047 8.55296506786842 \n", "0610007L01Rik 4.19029341650084 0 2.50004437918514 \n", "0610007N19Rik 0 3.85089831294737 0 \n", "0610007P08Rik 0.142924985049315 0 0 \n", "0610007P14Rik 3.82158466918684 0 0 \n", "... ... ... ... \n", "Zyx 0 4.62437602762411 2.0189608276121 \n", "Zzef1 1.43660286051722 1.26416417915575 3.86510503648837 \n", "Zzz3 0 2.9157611264727 4.59389572902205 \n", "l7Rn6 5.21666625158915 0 5.7821069729385 \n", "rtTA 6.48015965083557 5.40573243922186 7.84870612349304 \n", "\n", " 1ggS_iN4_C67 1gL_iN4_C02 1gL_iN4_C31 \\\n", "0610007C21Rik 2.41537525574661 6.48903675234239 7.40922896532355 \n", "0610007L01Rik 6.72515587274112 4.40442416645675 5.10771074049084 \n", "0610007N19Rik 0 4.18427713031916 4.00900586563456 \n", "0610007P08Rik 0 2.3112007267365 0 \n", "0610007P14Rik 0 4.45912999274342 3.43074049497696 \n", "... ... ... ... \n", "Zyx 0.539820437720234 4.96591667762543 7.19014075266198 \n", "Zzef1 0 0.0192430133830156 0 \n", "Zzz3 0 0.975105751693184 0 \n", "l7Rn6 0 4.75277032441183 5.68148201446647 \n", "rtTA 5.95816207481279 0 0 \n", "\n", " 1gL_iN4_C39 1gM_iN4_C42 \n", "0610007C21Rik 4.1287063120705 6.61612668319865 \n", "0610007L01Rik 0 0 \n", "0610007N19Rik 0 4.2097714690844 \n", "0610007P08Rik 0 5.13145556233943 \n", "0610007P14Rik 4.58087193717366 7.49525945442237 \n", "... ... ... \n", "Zyx 6.09971545157864 6.06130415583606 \n", "Zzef1 0 3.27570987739765 \n", "Zzz3 1.62308812679231 4.26004685209986 \n", "l7Rn6 0 4.54152040770032 \n", "rtTA 5.62104404452778 0 \n", "\n", "[11835 rows x 252 columns]" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dMatrix" ] }, { "cell_type": "code", "execution_count": 34, "id": "44f52fe9-c058-48b2-9c5c-054e2ea9674e", "metadata": {}, "outputs": [], "source": [ "BioList(Matrix).ex2File(\"Matrix.tsv\",\"\\t\")" ] }, { "cell_type": "code", "execution_count": 44, "id": "3e790921-c762-4685-9829-57e059b6e085", "metadata": {}, "outputs": [], "source": [ "pyreadr.write_rds(\"ExprMat.Rds\",dMatrix)" ] }, { "cell_type": "code", "execution_count": 45, "id": "263acc3f-aab7-4538-9ee5-d3c0a8a132d2", "metadata": {}, "outputs": [], "source": [ "BioList(Cells).ex2File(\"Cells.tsv\",\"\\t\")" ] }, { "cell_type": "code", "execution_count": 46, "id": "8ce66160-a33e-4366-9ed5-979e6a7fd862", "metadata": {}, "outputs": [], "source": [ "BioList(Genes).ex2File(\"Genes.tsv\",\"\\t\")" ] }, { "cell_type": "code", "execution_count": 56, "id": "1a60e32f-dba8-42be-961d-7102c5d0c1fd", "metadata": {}, "outputs": [], "source": [ "Matrix=np.array(Matrix,dtype=float)\n", "Matrix=csc_matrix(Matrix)" ] }, { "cell_type": "code", "execution_count": 57, "id": "dedc68b5-6384-4ee3-a932-a8c36cfc8fe4", "metadata": {}, "outputs": [], "source": [ "scipy.io.mmwrite(\"Matrix.mtx\",Matrix)" ] }, { "cell_type": "code", "execution_count": null, "id": "d6f972f4-5b3d-4bb2-bd1e-13df27bea059", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 5 }