A 的 dominance frontier 含有B,如果A没有strictly dominate B,但是 dominate 了B的一个前驱节点。
用遍历的方式确定 dominance frontier 的伪代码。
1 2 3 4 5 6 7
for each node b if the number of immediate predecessors of b ≥ 2 for each p in immediate predecessors of b runner := p while runner ≠ idom(b) add b to runner’s dominance frontier set runner := idom(runner)
/* * This is the ISR for the vortex series chips. * full_bus_master_tx == 0 && full_bus_master_rx == 0 */
static irqreturn_t vortex_interrupt(int irq, void *dev_id) { struct net_device *dev = dev_id; struct vortex_private *vp = netdev_priv(dev); void __iomem *ioaddr; int status; int work_done = max_interrupt_work; int handled = 0; unsigned int bytes_compl = 0, pkts_compl = 0;
ioaddr = vp->ioaddr; spin_lock(&vp->lock);
status = ioread16(ioaddr + EL3_STATUS); // 从ioremap的地址当中读取网卡的状态,这个是设备规定的地址
if (vortex_debug > 6) pr_debug("vortex_interrupt(). status=0x%4x\n", status);
// 在中断处理的时候,设备是关中断的,这个时候可以通过观察这个status来检查设备是否有中断到来. if ((status & IntLatch) == 0) // 有中断需要处理,但是可能已经被其他中断处理函数处理了 goto handler_exit; /* No interrupt: shared IRQs cause this */ handled = 1;
if (status & IntReq) { // 中断请求 status |= vp->deferred; vp->deferred = 0; }
if (status == 0xffff) /* h/w no longer present (hotplug)? */ goto handler_exit;
if (vortex_debug > 4) pr_debug("%s: interrupt, status %4.4x, latency %d ticks.\n", dev->name, status, ioread8(ioaddr + Timer));
spin_lock(&vp->window_lock); window_set(vp, 7);
do { if (vortex_debug > 5) pr_debug("%s: In interrupt loop, status %4.4x.\n", dev->name, status); if (status & RxComplete) // 中断表示接收完成的时候调用 vortex_rx vortex_rx(dev);
if (status & TxAvailable) { // 中断表示可以传输的时候 if (vortex_debug > 5) pr_debug(" TX room bit was handled.\n"); /* There's room in the FIFO for a full-sized packet. */ iowrite16(AckIntr | TxAvailable, ioaddr + EL3_CMD); netif_wake_queue (dev); }
if (status & DMADone) {// 表示发送完成可以清除sk_buff了 if (ioread16(ioaddr + Wn7_MasterStatus) & 0x1000) { iowrite16(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */ pci_unmap_single(VORTEX_PCI(vp), vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, PCI_DMA_TODEVICE); pkts_compl++; bytes_compl += vp->tx_skb->len; dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */ if (ioread16(ioaddr + TxFree) > 1536) { /* * AKPM: FIXME: I don't think we need this. If the queue was stopped due to * insufficient FIFO room, the TxAvailable test will succeed and call * netif_wake_queue() */ netif_wake_queue(dev); } else { /* Interrupt when FIFO has room for max-sized packet. */ iowrite16(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD); netif_stop_queue(dev); } } } /* Check for all uncommon interrupts at once. */ if (status & (HostError | RxEarly | StatsFull | TxComplete | IntReq)) { if (status == 0xffff) break; if (status & RxEarly) vortex_rx(dev); spin_unlock(&vp->window_lock); vortex_error(dev, status); spin_lock(&vp->window_lock); window_set(vp, 7); }
if (--work_done < 0) { // 最多处理work_done个frame pr_warn("%s: Too much work in interrupt, status %4.4x\n", dev->name, status); /* Disable all pending interrupts. */ do { vp->deferred |= status; // 把当前状态保存起来等下次中断的时候处理 iowrite16(SetStatusEnb | (~vp->deferred & vp->status_enable), ioaddr + EL3_CMD); iowrite16(AckIntr | (vp->deferred & 0x7ff), ioaddr + EL3_CMD); } while ((status = ioread16(ioaddr + EL3_CMD)) & IntLatch); // 把中断清掉? /* The timer will reenable interrupts. */ mod_timer(&vp->timer, jiffies + 1*HZ); break; } /* Acknowledge the IRQ. */ iowrite16(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD); } while ((status = ioread16(ioaddr + EL3_STATUS)) & (IntLatch | RxComplete));// 当有pending的中断并且是接收完成的状态
/* * enqueue_to_backlog is called to queue an skb to a per CPU backlog * queue (may be a remote CPU queue). */ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, unsigned int *qtail) { struct softnet_data *sd; unsigned long flags; unsigned int qlen;
rps_lock(sd); if (!netif_running(skb->dev)) // 如果设备已经没有运行的直接丢frame goto drop; qlen = skb_queue_len(&sd->input_pkt_queue); // 获取softnet_data的&sk_buff的队列长度 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { // 如果没有超过最大长度并且没有被限制 if (qlen) { enqueue: __skb_queue_tail(&sd->input_pkt_queue, skb); input_queue_tail_incr_save(sd, qtail); rps_unlock(sd); local_irq_restore(flags); // 加入队列并且开中断 return NET_RX_SUCCESS; } // 如果队列为空可以尝试调度 backlog device, // 再把frame加入到队列当中。 /* Schedule NAPI for backlog device * We can use non atomic operation since we own the queue lock */ if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) { if (!rps_ipi_queued(sd)) ____napi_schedule(sd, &sd->backlog); } goto enqueue; }
/* If softirq window is exhausted then punt. * Allow this to run for 2 jiffies since which will allow * an average latency of 1.5/HZ. */ if (unlikely(budget <= 0 || // 如果budge耗尽或者超过了两个jiffies就会停止 time_after_eq(jiffies, time_limit))) { sd->time_squeeze++; break; } }
static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
/* Check if we have pending ipi, its better to send them now, * not waiting net_rx_action() end. */ if (sd_has_rps_ipi_waiting(sd)) { local_irq_disable(); net_rps_action_and_irq_enable(sd); }
rps_lock(sd); if (skb_queue_empty(&sd->input_pkt_queue)) { /* * Inline a custom version of __napi_complete(). * only current cpu owns and manipulates this napi, * and NAPI_STATE_SCHED is the only possible flag set * on backlog. * We can use a plain write instead of clear_bit(), * and we dont need an smp_mb() memory barrier. */ napi->state = 0; rps_unlock(sd);
if (status & TxAvailable) { // 中断表示可以传输的时候 if (vortex_debug > 5) pr_debug(" TX room bit was handled.\n"); /* There's room in the FIFO for a full-sized packet. */ iowrite16(AckIntr | TxAvailable, ioaddr + EL3_CMD); netif_wake_queue (dev); }
就是说当状态可用的时候,尝试调用netif_wake_queue来触发frame的发送。
首先来看一下如何选择发送的设备。
1 2 3 4 5 6 7 8 9 10 11
/** * netif_wake_queue - restart transmit * @dev: network device * * Allow upper layers to call the device hard_start_xmit routine. * Used for flow control when transmit resources are available. */ static inline void netif_wake_queue(struct net_device *dev) { netif_tx_wake_queue(netdev_get_tx_queue(dev, 0)); }
/* * Transmit possibly several skbs, and handle the return status as * required. Holding the __QDISC___STATE_RUNNING bit guarantees that * only one CPU can execute this function. * * Returns to the caller: * 0 - queue is empty or throttled. * >0 - queue is not empty. */ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq, spinlock_t *root_lock, bool validate) { int ret = NETDEV_TX_BUSY;
/* And release qdisc */ spin_unlock(root_lock);
/* Note that we validate skb (GSO, checksum, ...) outside of locks */ if (validate) skb = validate_xmit_skb_list(skb, dev);
if (likely(skb)) { HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) skb = dev_hard_start_xmit(skb, dev, txq, &ret);
/* * Allocate a region of KVA of the specified size and alignment, within the * vstart and vend. */ static struct vmap_area *alloc_vmap_area(unsigned long size, unsigned long align, unsigned long vstart, unsigned long vend, int node, gfp_t gfp_mask) { struct vmap_area *va; struct rb_node *n; unsigned long addr; int purged = 0; struct vmap_area *first;
va = kmalloc_node(sizeof(struct vmap_area), gfp_mask & GFP_RECLAIM_MASK, node); if (unlikely(!va)) return ERR_PTR(-ENOMEM);
/* * Only scan the relevant parts containing pointers to other objects * to avoid false negatives. */ kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK);
retry: spin_lock(&vmap_area_lock); /* * Invalidate cache if we have more permissive parameters. * cached_hole_size notes the largest hole noticed _below_ * the vmap_area cached in free_vmap_cache: if size fits * into that hole, we want to scan from vstart to reuse * the hole instead of allocating above free_vmap_cache. * Note that __free_vmap_area may update free_vmap_cache * without updating cached_hole_size or cached_align. */ if (!free_vmap_cache || /* 缓存为空, 并且在下面条件满足的时候不使用缓存 */ size < cached_hole_size || /* 至少有一个空洞可以复用, 直接从头开始搜索 */ vstart < cached_vstart || /* 别缓存的起点小 */ align < cached_align) { /* 对齐大小比缓存的小 */ nocache: cached_hole_size = 0; free_vmap_cache = NULL; } /* record if we encounter less permissive parameters */ cached_vstart = vstart; cached_align = align;
/* find starting point for our search */ if (free_vmap_cache) { /* 这个地方是用缓存上次搜索的结果, 每次找到以后会存到这里 */ first = rb_entry(free_vmap_cache, struct vmap_area, rb_node); addr = ALIGN(first->va_end, align); if (addr < vstart) /* 缓存的末尾对齐之后超出了范围从头开始搜索 */ goto nocache; if (addr + size < addr) /* 整数溢出 */ goto overflow;
while (n) { /* 用对齐之后的vstart正好<=va_ned&&>=va_start的节点 */ struct vmap_area *tmp; tmp = rb_entry(n, struct vmap_area, rb_node); if (tmp->va_end >= addr) { first = tmp; if (tmp->va_start <= addr) break; n = n->rb_left; } else n = n->rb_right; }
if (!first) goto found; } /* from the starting point, walk areas until a suitable hole is found */ while (addr + size > first->va_start && addr + size <= vend) { if (addr + cached_hole_size < first->va_start) /* cached_hole_size */ cached_hole_size = first->va_start - addr; /* 缓存最大的空洞大小 */ addr = ALIGN(first->va_end, align); if (addr + size < addr) goto overflow;
if (list_is_last(&first->list, &vmap_area_list)) goto found;
if (gfpflags_allow_blocking(gfp_mask)) { unsigned long freed = 0; blocking_notifier_call_chain(&vmap_notify_list, 0, &freed); if (freed > 0) { purged = 0; goto retry; } }
if (printk_ratelimit()) pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n", size); kfree(va); return ERR_PTR(-EBUSY); }
/* * * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and * will have pfns corresponding to the "pages" array. * 这里的kva指的是kernel virtual address. * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N] */ static int vmap_page_range_noflush(unsigned long start, unsigned long end, pgprot_t prot, struct page **pages) { pgd_t *pgd; unsigned long next; unsigned long addr = start; int err = 0; int nr = 0;
/* * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed * costly to service. That is between allocation orders which should * coalesce naturally under reasonable reclaim pressure and those which * will not. */ /* 内核认为超过8个页算是大的内存分配 */ #define PAGE_ALLOC_COSTLY_ORDER 3
enum { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RECLAIMABLE, MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES, #ifdef CONFIG_CMA /* * MIGRATE_CMA migration type is designed to mimic the way * ZONE_MOVABLE works. Only movable pages can be allocated * from MIGRATE_CMA pageblocks and page allocator never * implicitly change migration type of MIGRATE_CMA pageblock. * * The way to use it is to change migratetype of a range of * pageblocks to MIGRATE_CMA which can be done by * __free_pageblock_cma() function. What is important though * is that a range of pageblocks must be aligned to * MAX_ORDER_NR_PAGES should biggest page be bigger then * a single pageblock. */ MIGRATE_CMA, #endif #ifdef CONFIG_MEMORY_ISOLATION MIGRATE_ISOLATE, /* can't allocate from here */ #endif MIGRATE_TYPES };
/* * Do the hard work of removing an element from the buddy allocator. * Call me with the zone->lock already held. */ static struct page *__rmqueue(struct zone *zone, unsigned int order, int migratetype) { struct page *page;
page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page)) { if (migratetype == MIGRATE_MOVABLE) page = __rmqueue_cma_fallback(zone, order); /* MOVABLE的失败优先从cma迁移 */
/* * Go through the free lists for the given migratetype and remove * the smallest available page from the freelists */ static inline struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, int migratetype) { unsigned int current_order; struct free_area *area; struct page *page;
/* Find a page of the appropriate size in the preferred list */ for (current_order = order; current_order < MAX_ORDER; ++current_order) { area = &(zone->free_area[current_order]); page = list_first_entry_or_null(&area->free_list[migratetype], struct page, lru); if (!page) continue; /* 如果失败就尝试更大的块 */ list_del(&page->lru); /* 这个lru取决他的上下文,比如在这里就是free_list */ rmv_page_order(page); /* 设置flags */ area->nr_free--; /* free计数器-1 */ expand(zone, page, order, current_order, area, migratetype); set_pcppage_migratetype(page, migratetype);/* 设置page的migratetype */ return page; } return NULL; }
/* * The order of subdivision here is critical for the IO subsystem. * Please do not alter this order without good reasons and regression * testing. Specifically, as large blocks of memory are subdivided, * the order in which smaller blocks are delivered depends on the order * they're subdivided in this function. This is the primary factor * influencing the order in which pages are delivered to the IO * subsystem according to empirical testing, and this is also justified * by considering the behavior of a buddy system containing a single * large block of memory acted on by a series of small allocations. * This behavior is a critical factor in sglist merging's success. * * -- nyc */ static inline void expand(struct zone *zone, struct page *page, int low, int high, struct free_area *area, int migratetype) { unsigned long size = 1 << high;
while (high > low) { /* 从高阶向低阶迭代 */ area--; high--; size >>= 1; VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]); /* guardpage TODO */ if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) && debug_guardpage_enabled() && high < debug_guardpage_minorder()) { /* * Mark as guard pages (or page), that will allow to * merge back to allocator when buddy will be freed. * Corresponding page table entries will not be touched, * pages will stay not present in virtual address space */ set_page_guard(zone, &page[size], high, migratetype); continue; } list_add(&page[size].lru, &area->free_list[migratetype]); /* 把page放到area的free_list里面, 这个page是一个数组形式的 比如之前是8 这里变成4,那么后半部分会被留下来,前半部分会继续用于迭代 */ area->nr_free++; /* 空闲块计数器+1 */ set_page_order(&page[size], high); /* 相当于page->private = high 表示自己属于order为high的阶的block中 */ } }
/* * This array describes the order lists are fallen back to when * the free lists for the desirable migrate type are depleted */ static int fallbacks[MIGRATE_TYPES][4] = { [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_TYPES }, [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_TYPES }, [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES }, #ifdef CONFIG_CMA [MIGRATE_CMA] = { MIGRATE_TYPES }, /* Never used */ #endif #ifdef CONFIG_MEMORY_ISOLATION [MIGRATE_ISOLATE] = { MIGRATE_TYPES }, /* Never used */ #endif };
/* Remove an element from the buddy allocator from the fallback list */ static inline struct page * __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) { struct free_area *area; unsigned int current_order; struct page *page; int fallback_mt; bool can_steal;
/* Find the largest possible block of pages in the other list */ for (current_order = MAX_ORDER-1; current_order >= order && current_order <= MAX_ORDER-1; --current_order) { area = &(zone->free_area[current_order]); /* 遍历fallback table 找到合适的fallback migratetype 要综合考虑: 是否有超过一定阈值的空闲页,并且是可以退化的页类型 */ fallback_mt = find_suitable_fallback(area, current_order, start_migratetype, false, &can_steal); if (fallback_mt == -1) continue;
/* Remove the page from the freelists */ area->nr_free--; list_del(&page->lru); rmv_page_order(page);
expand(zone, page, order, current_order, area, start_migratetype); /* * The pcppage_migratetype may differ from pageblock's * migratetype depending on the decisions in * find_suitable_fallback(). This is OK as long as it does not * differ for MIGRATE_CMA pageblocks. Those can be used as * fallback only via special __rmqueue_cma_fallback() function */ set_pcppage_migratetype(page, start_migratetype);
/* * Structure for holding the mostly immutable allocation parameters passed * between functions involved in allocations, including the alloc_pages* * family of functions. * * nodemask, migratetype and high_zoneidx are initialized only once in * __alloc_pages_nodemask() and then never change. * * zonelist, preferred_zone and classzone_idx are set first in * __alloc_pages_nodemask() for the fast path, and might be later changed * in __alloc_pages_slowpath(). All other functions pass the whole strucure * by a const pointer. */ struct alloc_context { struct zonelist *zonelist; nodemask_t *nodemask; struct zoneref *preferred_zoneref; int migratetype; enum zone_type high_zoneidx; bool spread_dirty_pages; };
struct per_cpu_pages { int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ int batch; /* chunk size for buddy add/remove */
/* Lists of pages, one per migrate type stored on the pcp-lists */ struct list_head lists[MIGRATE_PCPTYPES]; };
/* * Freeing function for a buddy system allocator. * * The concept of a buddy system is to maintain direct-mapped table * (containing bit values) for memory blocks of various "orders". * The bottom level table contains the map for the smallest allocatable * units of memory (here, pages), and each level above it describes * pairs of units from the levels below, hence, "buddies". * At a high level, all that happens here is marking the table entry * at the bottom level available, and propagating the changes upward * as necessary, plus some accounting needed to play nicely with other * parts of the VM system. * At each level, we keep a list of pages, which are heads of continuous * free pages of length of (1 << order) and marked with _mapcount * PAGE_BUDDY_MAPCOUNT_VALUE. Page's order is recorded in page_private(page) * field. * So when we are allocating or freeing one, we can derive the state of the * other. That is, if we allocate a small block, and both were * free, the remainder of the region must be split into blocks. * If a block is freed, and its buddy is also free, then this * triggers coalescing into a block of larger size. * * -- nyc */
static inline void __free_one_page(struct page *page, /* 把page加回到free_list里面 */ unsigned long pfn, struct zone *zone, unsigned int order, int migratetype) { unsigned long page_idx; unsigned long combined_idx; unsigned long uninitialized_var(buddy_idx); struct page *buddy; unsigned int max_order;
continue_merging: while (order < max_order - 1) { buddy_idx = __find_buddy_index(page_idx, order);/* 获取buddy的下标 */ buddy = page + (buddy_idx - page_idx); /* 根据相对距离得到buddy page */ if (!page_is_buddy(page, buddy, order)) /* 如果不是buddy就结束合并 */ goto done_merging; /* * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page, * merge with it and move up one order. */ if (page_is_guard(buddy)) { clear_page_guard(zone, buddy, order, migratetype); } else { /* 把buddy从free_area当中移除 */ list_del(&buddy->lru); zone->free_area[order].nr_free--; rmv_page_order(buddy); } combined_idx = buddy_idx & page_idx; page = page + (combined_idx - page_idx); page_idx = combined_idx; order++; /* 向上合并 */ } if (max_order < MAX_ORDER) { /* If we are here, it means order is >= pageblock_order. * We want to prevent merge between freepages on isolate * pageblock and normal pageblock. Without this, pageblock * isolation could cause incorrect freepage or CMA accounting. * * We don't want to hit this code for the more frequent * low-order merging. */ /* 在这里说明已经超出了pageblock的order, 可能在不同的migratetype的block边界了,这里再检查一次是不是isolate, 不允许节点间迁移的page, 如果是的话就要结束合并的迭代,不然继续向上合并 */ if (unlikely(has_isolate_pageblock(zone))) { int buddy_mt;
/* * If this is not the largest possible page, check if the buddy * of the next-highest order is free. If it is, it's possible * that pages are being freed that will coalesce soon. In case, * that is happening, add the free page to the tail of the list * so it's less likely to be used soon and more likely to be merged * as a higher order page */ if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) { /* 这个条件判断没有合并到最大块, 内核认为很有可能接下来这个块和其他free的块合并, 从减少碎片的角度来说会更倾向于放到链表的末尾, 让这个块的free状态保持久一点, 更有机会被合并成更大的块 */ struct page *higher_page, *higher_buddy; combined_idx = buddy_idx & page_idx; /* 合并后的idx */ higher_page = page + (combined_idx - page_idx); /* 合并后的page */ buddy_idx = __find_buddy_index(combined_idx, order + 1); /* 向上寻找buddy */ higher_buddy = higher_page + (buddy_idx - combined_idx); if (page_is_buddy(higher_page, higher_buddy, order + 1)) { list_add_tail(&page->lru, &zone->free_area[order].free_list[migratetype]); goto out; } }
// All node types implement the Node interface. type Node interface { Pos() token.Pos // position of first character belonging to the node End() token.Pos // position of first character immediately after the node }
// All expression nodes implement the Expr interface. type Expr interface { Node exprNode() }
// All statement nodes implement the Stmt interface. type Stmt interface { Node stmtNode() }
// All declaration nodes implement the Decl interface. type Decl interface { Node declNode() }
// The parser structure holds the parser's internal state. type parser struct { file *token.File errors scanner.ErrorList // 解析过程中遇到的错误列表 scanner scanner.Scanner // 词法分析器.
// Tracing/debugging mode Mode // parsing mode // 解析模式 trace bool // == (mode & Trace != 0) indent int // indentation used for tracing output
// Comments 列表 comments []*ast.CommentGroup leadComment *ast.CommentGroup // last lead comment lineComment *ast.CommentGroup // last line comment
// Next token pos token.Pos // token position tok token.Token // one token look-ahead lit string // token literal
// Error recovery // (used to limit the number of calls to syncXXX functions // w/o making scanning progress - avoids potential endless // loops across multiple parser functions during error recovery) syncPos token.Pos // last synchronization position 解析错误的同步点. syncCnt int // number of calls to syncXXX without progress
// Non-syntactic parser control // 非语法性的控制 // <0 在控制语句中, >= 在表达式中. exprLev int // < 0: in control clause, >= 0: in expression // 正在解析右值表达式 inRhs bool // if set, the parser is parsing a rhs expression
// Ordinary identifier scopes pkgScope *ast.Scope // pkgScope.Outer == nil topScope *ast.Scope // top-most scope; may be pkgScope unresolved []*ast.Ident // unresolved identifiers imports []*ast.ImportSpec // list of imports
// Label scopes // (maintained by open/close LabelScope) labelScope *ast.Scope // label scope for current function targetStack [][]*ast.Ident // stack of unresolved labels }
func (p *parser) parseStmt() (s ast.Stmt) { if p.trace { defer un(trace(p, "Statement")) }
switch p.tok { case token.CONST, token.TYPE, token.VAR: s = &ast.DeclStmt{Decl: p.parseDecl(syncStmt)} case // tokens that may start an expression token.IDENT, token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING, token.FUNC, token.LPAREN, // operands token.LBRACK, token.STRUCT, token.MAP, token.CHAN, token.INTERFACE, // composite types token.ADD, token.SUB, token.MUL, token.AND, token.XOR, token.ARROW, token.NOT: // unary operators s, _ = p.parseSimpleStmt(labelOk) // because of the required look-ahead, labeled statements are // parsed by parseSimpleStmt - don't expect a semicolon after // them if _, isLabeledStmt := s.(*ast.LabeledStmt); !isLabeledStmt { p.expectSemi() } case token.GO: s = p.parseGoStmt() case token.DEFER: s = p.parseDeferStmt() case token.RETURN: s = p.parseReturnStmt() case token.BREAK, token.CONTINUE, token.GOTO, token.FALLTHROUGH: s = p.parseBranchStmt(p.tok) case token.LBRACE: s = p.parseBlockStmt() ...省略
// 解析左列表 一般是 l := r 或者 l1,l2 = r1,r2 或者 l <- r 或者 l++ x := p.parseLhsList() switch p.tok { case token.DEFINE, token.ASSIGN, token.ADD_ASSIGN, token.SUB_ASSIGN, token.MUL_ASSIGN, token.QUO_ASSIGN, token.REM_ASSIGN, token.AND_ASSIGN, token.OR_ASSIGN, token.XOR_ASSIGN, token.SHL_ASSIGN, token.SHR_ASSIGN, token.AND_NOT_ASSIGN: // 如果看到range,range作为一种运算符按照range rhs来解析 // 如果没看到就按正常赋值语句解析 lhs op rhs 来解析op可以是上面那些token中的一种. pos, tok := p.pos, p.tok p.next() var y []ast.Expr isRange := false if mode == rangeOk && p.tok == token.RANGE && (tok == token.DEFINE || tok == token.ASSIGN) { pos := p.pos p.next() y = []ast.Expr{&ast.UnaryExpr{OpPos: pos, Op: token.RANGE, X: p.parseRhs()}} isRange = true } else { y = p.parseRhsList() } as := &ast.AssignStmt{Lhs: x, TokPos: pos, Tok: tok, Rhs: y}
// 碰到":"找一个ident, 构成 goto: indent 之类的语句. case token.COLON: colon := p.pos p.next() if label, isIdent := x[0].(*ast.Ident); mode == labelOk && isIdent { // Go spec: The scope of a label is the body of the function // in which it is declared and excludes the body of any nested // function. stmt := &ast.LabeledStmt{Label: label, Colon: colon, Stmt: p.parseStmt()} p.declare(stmt, nil, p.labelScope, ast.Lbl, label) return stmt, false } // 碰到"<-",就构成 <- rhs 这样的语句. case token.ARROW: // send statement arrow := p.pos p.next() y := p.parseRhs() return &ast.SendStmt{Chan: x[0], Arrow: arrow, Value: y}, false
// 碰到"++"或者"--"就构成一个单独的自增语句. case token.INC, token.DEC: // increment or decrement s := &ast.IncDecStmt{X: x[0], TokPos: p.pos, Tok: p.tok} p.next() return s, false }
// Position describes an arbitrary source position // including the file, line, and column location. // A Position is valid if the line number is > 0. // type Position struct { Filename string // filename, if any Offset int // offset, starting at 0 Line int // line number, starting at 1 Column int // column number, starting at 1 (byte count) }
Older IEs serialize html uppercased, but IE9 does not... Would be better to expect case insensitive, unfortunately jasmine does not allow to user regexps for throw expectations.
Closes #392 Breaks foo.bar api, foo.baz should be used instead
GNU style
1 2 3 4 5 6 7 8 9 10 11
Author: Aleksa Sarai <asarai@suse.de> Date: Sun Mar 13 04:53:20 2016 +1100
libcontainer: add pids.max to PidsStats
In order to allow nice usage statistics (in terms of percentages and other such data), add the value of pids.max to the PidsStats struct returned from the pids cgroup controller.
Signed-off-by: Aleksa Sarai <asarai@suse.de>
或者
1 2 3 4 5 6 7 8 9 10 11 12 13 14
Author: Alexander Morozov <lk4d4@docker.com> Date: Fri Mar 27 10:50:32 2015 -0700
Use syscall.Kill instead of p.cmd.Process.Kill
We need this to unmask syscall.ESRCH error, which handled in docker and can be handled by other clients.
Closes #457
Signed-off-by: Alexander Morozov <lk4d4@docker.com> Acked-by: Hugh Dickins <hughd@google.com> Reviewed-by: Michal Hocko <mhocko@suse.com>
// func now() (sec int64, nsec int32) TEXT time·now(SB),NOSPLIT,$16 // Be careful. We're calling a function with gcc calling convention here. // We're guaranteed 128 bytes on entry, and we've taken 16, and the // call uses another 8. // That leaves 104 for the gettime code to use. Hope that's enough! // 这里只能保证调用 gettime 函数的时候有 104 个字节给这个函数作为栈 // 因为返回值用了 16 个字节,这个函数本身会用 8 个字节,到调用 gettime 的时候只剩 104 个字节可以用。 MOVQ runtime·__vdso_clock_gettime_sym(SB), AX CMPQ AX, $0 JEQ fallback MOVL $0, DI // CLOCK_REALTIME LEAQ 0(SP), SI CALL AX MOVQ 0(SP), AX // sec MOVQ 8(SP), DX // nsec MOVQ AX, sec+0(FP) MOVL DX, nsec+8(FP) RET fallback: LEAQ 0(SP), DI MOVQ $0, SI MOVQ runtime·__vdso_gettimeofday_sym(SB), AX CALL AX MOVQ 0(SP), AX // sec MOVL 8(SP), DX // usec IMULQ $1000, DX MOVQ AX, sec+0(FP) MOVL DX, nsec+8(FP) RET
第一行TEXT time·now(SB),NOSPLIT,$16, 其中time·now(SB)表示函数now的地址,NOSPLIT标志不依赖参数,$16表示返回的内容是 16 个字节,Go 的汇编语法具体可以参考 Go asm 文档 [1].
首先是取出__vdso_clock_gettime_sym(SB)(这是一个符号,指向的其实是 clock_gettime 函数,man clock_gettime 可以看到 glibc 的定义)的地址,如果符号不为空的话就把栈顶的地址计算出来传入 SI(LEA 指令). DI 和 SI 分别是system call的前两个参数的寄存器,这个相当于调用clock_gettime(0,&ret). fallback 分支是当对应的符号没有初始化就退而求其次调用gettimeofday(man gettimeofday 可以看到 libc 的定义)这个函数。
Go 的函数调用确保会有至少 128 个字节的栈(注意不是 goroutine 的栈), 具体可以参考runtime/stack.go里的_StackSmall, 但是进入对应的 c 函数以后,栈的增长就不能够由 Go 控制了,所以剩下的 104 个字节要确保这个调用不会栈溢出.(不过一般不会,因为这两个获取时间的函数不复杂).
One way of obtaining a fast gettimeofday() is by writing the current time in a fixed place, on a page mapped into the memory of all applications, and updating this location on each clock interrupt. These applications could then read this fixed location with a single instruction - no system call required. Concerning gettimeofday(), a vsyscall version for the x86-64 is already part of the vanilla kernel. Patches for i386 exist. (An example of the kind of timing differences: John Stultz reports on an experiment where he measures gettimeofday() and finds 1.67 us for the int 0x80 way, 1.24 us for the sysenter way, and 0.88 us for the vsyscall.)
// Package time knows the layout of this structure. // If this struct changes, adjust ../time/sleep.go:/runtimeTimer. // For GOOS=nacl, package syscall knows the layout of this structure. // If this struct changes, adjust ../syscall/net_nacl.go:/runtimeTimer. type timer struct { i int // heap index
// Timer wakes up at when, and then at when+period, ... (period > 0 only) // each time calling f(now, arg) in the timer goroutine, so f must be // a well-behaved function and not block. when int64 period int64 f func(interface{}, uintptr) arg interface{} seq uintptr }
timer 是以 heap 的形式管理的,heap 是个完全二叉树,用数组就可以存下,i 是 heap 的 index. when 是 goroutine 被唤醒的时间,period 是唤醒的间隙,下次唤醒的时间是 when+period, 依次类推。 调用函数f(now, arg), now 是时间戳。
1 2 3 4 5 6 7 8 9
var timers struct { lock mutex gp *g created bool sleeping bool rescheduling bool waitnote note t []*timer }
// Add a timer to the heap and start or kick the timer proc. // If the new timer is earlier than any of the others. // Timers are locked. func addtimerLocked(t *timer) { // when must never be negative; otherwise timerproc will overflow // during its delta calculation and never expire other runtime·timers. if t.when < 0 { t.when = 1<<63 - 1 } t.i = len(timers.t) timers.t = append(timers.t, t) siftupTimer(t.i) if t.i == 0 { // siftup moved to top: new earliest deadline. if timers.sleeping { timers.sleeping = false notewakeup(&timers.waitnote) } if timers.rescheduling { timers.rescheduling = false goready(timers.gp, 0) } } if !timers.created { timers.created = true go timerproc() } }
先从下面的if !timers.created分支看起,如果 timers 对应没有创建就 go 一个 timerproc, timeproc 的定义如下。
// Timerproc runs the time-driven events. // It sleeps until the next event in the timers heap. // If addtimer inserts a new earlier event, addtimer1 wakes timerproc early. func timerproc() { timers.gp = getg() for { lock(&timers.lock) timers.sleeping = false now := nanotime() delta := int64(-1) for { if len(timers.t) == 0 { delta = -1 break } t := timers.t[0] delta = t.when - now if delta > 0 { break } if t.period > 0 { // leave in heap but adjust next time to fire t.when += t.period * (1 + -delta/t.period) siftdownTimer(0) } else { // remove from heap last := len(timers.t) - 1 if last > 0 { timers.t[0] = timers.t[last] timers.t[0].i = 0 } timers.t[last] = nil timers.t = timers.t[:last] if last > 0 { siftdownTimer(0) } t.i = -1 // mark as removed } f := t.f arg := t.arg seq := t.seq unlock(&timers.lock) if raceenabled { raceacquire(unsafe.Pointer(t)) } f(arg, seq) lock(&timers.lock) } if delta < 0 || faketime > 0 { // No timers left - put goroutine to sleep. timers.rescheduling = true // 如果没有 timers 剩余,就让 G 进入 sleep 状态。 // goparkunlock 的作用是解开 G 和 M 的联系,让 goroutine sleep 而 M // 寻找下一个 G 来执行。 goparkunlock(&timers.lock, "timer goroutine (idle)", traceEvGoBlock, 1) continue } // At least one timer pending. Sleep until then. timers.sleeping = true // 清零 waitnote. noteclear(&timers.waitnote) unlock(&timers.lock) // 调用 M 结构对应的 OS-dependent, OS-thread 的信号量让 M 进入 sleep 状态。 // 只会在超时的时候或者条件变量 waitnote 触发才会被唤醒。 notetsleepg(&timers.waitnote, delta) } }
看完这一路 case, 回到addtimerLocked, 当加入一个新的timer时,会作依次检查,也就是说最新插入的timer是在堆顶的话,会唤醒睡眠的 timergorountine 开始从 heap 上检查过时的timer并执行. 这里的唤醒和之前的睡眠是两种对应的状态timers.sleeping是进入了 M 的 os 信号量睡眠。 timers.rescheduling是进入了 G 的调度睡眠,而 M 并没有睡眠,让 G 重新进入可运行状态. 时间超时和新 timer 的加入,两者结合成为了timer运行时的驱动力。
看完了 time 的实现,再回过头来回答文章最初的问题”timer 可以有多精确”, 其实和两个原因有关,一个是操作系统本身的时间粒度,一般是 us 级别的,时间基准更新是 ms 级别的,时间精度能到 us 级别,另外一个就是timer本身的 goroutine 的调度问题,如果运行时的负载过大或者操作系统本身的负载过大,会导致timer本身的 goroutine 响应不及时,导致 timer 触发并不及时,可能导致一个 20ms 的 timer 和一个 30ms 的 timer 之间像是并发的一样(这也是笔者碰到的问题,特别是一些被 cgroup 限制了的容器环境,cpu 时间分配很少的条件下), 所以有时候我们不能过分相信 timer 的时序来保证程序的正常运行.NewTimer的注释也强调了”NewTimer creates a new Timer that will send the current time on its channel after at least duration d.”, 没有人能保证 timer 按点执行,当然如果你的间隔离谱的大的话其实就可以忽略这方面的影响了:)