网易云课堂 Linux内核分析(三)

    xiaoxiao2021-03-25  64

    寇亚飞 + 原创作品转载请注明出处 + 《Linux内核分析》MOOC课程http://mooc.study.163.com/course/USTC-1000029000

    实验内容

    跟踪分析Linux内核的启动过程

    使用gdb跟踪调试内核从start_kernel到init进程启动

    实验步骤及结果

    参见实验报告

    实验分析

    本次实验是使用gdb调试跟踪内核启动代码。通过前几次课了解到,计算机是通过执行指令来完成一系列操作,这其中很重要的一点就是eip寄存器,它给cpu指明了下一条要执行的指令是谁。因此,x86计算机启动的第一个动作就是从硬盘固定位置读取BIOS程序位置到eip中,然后BIOS例行程序检测完硬件并完成相应的初始化之后就会寻找可引导介质,找到后把引导程序加载到指定内存区域后,就把控制权交给了引导程序。这里一般是把硬盘的第一个扇区MBR和活动分区的引导程序加载到内存(即加载BootLoader),加载完整后把控制权交给BootLoader。引导程序BootLoader开始负责操作系统初始化,然后起动操作系统。启动操作系统时一般会指定kernel、initrd和root所在的分区和目录,比如root (hd0,0),kernel (hd0,0)/bzImage root=/dev/ram init=/bin/ash,initrd (hd0,0)/myinitrd4M.img 内核启动过程包括start_kernel之前和之后,之前全部是做初始化的汇编指令,之后开始C代码的操作系统初始化,最后执行第一个用户态进程init。本次实验就是调试从start_kernel开始的代码。

    在start_kernel中

    set_task_stack_end_magic(&init_task);

    完成了创建0号进程的任务,即idle进程。它是系统创建的第一个进程。 在start_kernel中,完成了许多初始化的工作。如trap_init()完成了中断初始化,mm_init()完成了内存管理初始化,sched_init()完成了调度初始化等等。start_kernel代码如下:

    asmlinkage __visible void __init start_kernel(void) { char *command_line; char *after_dashes; set_task_stack_end_magic(&init_task); smp_setup_processor_id(); debug_objects_early_init(); /* * Set up the the initial canary ASAP: */ boot_init_stack_canary(); cgroup_init_early(); local_irq_disable(); early_boot_irqs_disabled = true; /* * Interrupts are still disabled. Do necessary setups, then * enable them */ boot_cpu_init(); page_address_init(); pr_notice("%s", linux_banner); setup_arch(&command_line); mm_init_cpumask(&init_mm); setup_command_line(command_line); setup_nr_cpu_ids(); setup_per_cpu_areas(); boot_cpu_state_init(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ build_all_zonelists(NULL, NULL); page_alloc_init(); pr_notice("Kernel command line: %s\n", boot_command_line); parse_early_param(); after_dashes = parse_args("Booting kernel", static_command_line, __start___param, __stop___param - __start___param, -1, -1, NULL, &unknown_bootoption); if (!IS_ERR_OR_NULL(after_dashes)) parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, NULL, set_init_arg); jump_label_init(); /* * These use large bootmem allocations and must precede * kmem_cache_init() */ setup_log_buf(0); pidhash_init(); vfs_caches_init_early(); sort_main_extable(); trap_init(); mm_init(); /* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() * time - but meanwhile we still have a functioning scheduler. */ sched_init(); /* * Disable preemption - early bootup scheduling is extremely * fragile until we cpu_idle() for the first time. */ preempt_disable(); if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); idr_init_cache(); /* * Allow workqueue creation and work item queueing/cancelling * early. Work item execution depends on kthreads and starts after * workqueue_init(). */ workqueue_init_early(); rcu_init(); /* trace_printk() and trace points may be used after this */ trace_init(); context_tracking_init(); radix_tree_init(); /* init some links before init_ISA_irqs() */ early_irq_init(); init_IRQ(); tick_init(); rcu_init_nohz(); init_timers(); hrtimers_init(); softirq_init(); timekeeping_init(); time_init(); sched_clock_postinit(); printk_safe_init(); perf_event_init(); profile_init(); call_function_init(); WARN(!irqs_disabled(), "Interrupts were enabled early\n"); early_boot_irqs_disabled = false; local_irq_enable(); kmem_cache_init_late(); /* * HACK ALERT! This is early. We're enabling the console before * we've done PCI setups etc, and console_init() must be aware of * this. But we do want output early, in case something goes wrong. */ console_init(); if (panic_later) panic("Too many boot %s vars at `%s'", panic_later, panic_param); lockdep_info(); /* * Need to run this when irqs are enabled, because it wants * to self-test [hard/soft]-irqs on/off lock inversion bugs * too: */ locking_selftest(); #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) { pr_crit("initrd overwritten (0xlx < 0xlx) - disabling it.\n", page_to_pfn(virt_to_page((void *)initrd_start)), min_low_pfn); initrd_start = 0; } #endif page_ext_init(); debug_objects_mem_init(); kmemleak_init(); setup_per_cpu_pageset(); numa_policy_init(); if (late_time_init) late_time_init(); calibrate_delay(); pidmap_init(); anon_vma_init(); acpi_early_init(); #ifdef CONFIG_X86 if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_enter_virtual_mode(); #endif #ifdef CONFIG_X86_ESPFIX64 /* Should be run before the first non-init thread is created */ init_espfix_bsp(); #endif thread_stack_cache_init(); cred_init(); fork_init(); proc_caches_init(); buffer_init(); key_init(); security_init(); dbg_late_init(); vfs_caches_init(); pagecache_init(); signals_init(); proc_root_init(); nsfs_init(); cpuset_init(); cgroup_init(); taskstats_init_early(); delayacct_init(); check_bugs(); acpi_subsystem_init(); arch_post_acpi_subsys_init(); sfi_init_late(); if (efi_enabled(EFI_RUNTIME_SERVICES)) { efi_free_boot_services(); } ftrace_init(); /* Do the rest non-__init'ed, we're now alive */ rest_init(); }

    在实验中,将断点设置在start_kernel的最后一行,即rest_init(),它完成了剩余部分的初始化。 在rest_init()中

    kernel_thread(kernel_init, NULL, CLONE_FS)

    创建了1号进程,它是所有用户态进程的祖先,也就是init进程。 然后通过

    pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);

    创建的kthreadd2号进程,它是所有内核态线程的祖先。

    static noinline void __ref rest_init(void) { int pid; rcu_scheduler_starting(); /* * We need to spawn init first so that it obtains pid 1, however * the init task will end up wanting to create kthreads, which, if * we schedule it before we create kthreadd, will OOPS. */ kernel_thread(kernel_init, NULL, CLONE_FS); numa_default_policy(); pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); rcu_read_lock(); kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); rcu_read_unlock(); complete(&kthreadd_done); /* * The boot idle thread must execute schedule() * at least once to get things moving: */ init_idle_bootup_task(current); schedule_preempt_disabled(); /* Call into cpu_idle with preempt disabled */ cpu_startup_entry(CPUHP_ONLINE); }

    在rest_init的最后

    void cpu_startup_entry(enum cpuhp_state state) { /* * This #ifdef needs to die, but it's too late in the cycle to * make this generic (arm and sh have never invoked the canary * init for the non boot cpus!). Will be fixed in 3.11 */ #ifdef CONFIG_X86 /* * If we're the non-boot CPU, nothing set the stack canary up * for us. The boot CPU already has it initialized but no harm * in doing it again. This is a good place for updating it, as * we wont ever return from this function (so the invalid * canaries already on the stack wont ever trigger). */ boot_init_stack_canary(); #endif arch_cpu_idle_prepare(); cpu_idle_loop(); }

    cpu_startup_entry()调用了cpu_idle_loop(),在cpu_idle_loop()中,是一个while(1)循环,它是在系统没有别的进程执行时就调用idle进程即0号进程。 更具体的关于0号,1号,2号进程的分析参考这个人的博客

    总结

    计算机在启动后,执行到start_kernel后,创建了第一个进程即0号进程,然后完成了一系列的内核初始化工作;然后0号进程通过kernel_thread创建了init进程,即1号进程;通过kernel_thread创建 kthreadd进程即2号进程。idle进程真的没意义吗?这么看来它是无比的有意义。

    转载请注明原文地址: https://ju.6miu.com/read-17801.html

    最新回复(0)