Author: Lars Hjemli <hjemli@gmail.com>
Add caching infrastructure This enables internal caching of page output. Page requests are split into four groups: 1) repo listing (front page) 2) repo summary 3) repo pages w/symbolic references in query string 4) repo pages w/constant sha1's in query string Each group has a TTL specified in minutes. When a page is requested, a cached filename is stat(2)'ed and st_mtime is compared to time(2). If TTL has expired (or the file didn't exist), the cached file is regenerated. When generating a cached file, locking is used to avoid parallell processing of the request. If multiple processes tries to aquire the same lock, the ones who fail to get the lock serves the (expired) cached file. If the cached file don't exist, the process instead calls sched_yield(2) before restarting the request processing. Signed-off-by: Lars Hjemli <hjemli@gmail.com>
.gitignore | 1 Makefile | 6 + README | 54 +++++++++++++++++++++++++ cache.c | 86 +++++++++++++++++++++++++++++++++++++++++ cgit.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++------- cgit.h | 47 +++++++++++++++++++++ config.c | 4 git.h | 60 ++++++++++++++++++++++++++-- html.c | 6 +
diff --git a/.gitignore b/.gitignore index 4eaec977feff4abb7e283ca3a4c68bec93c16ebc..c4c9ac3ffa5af077a3a5ffd349a413df1f6e703b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ # Files I don't care to see in git-status/commit cgit *.o +*~ diff --git a/Makefile b/Makefile index 4e72b07e70d5369b8f568421c2c77dc22f65c8b5..243f59051423db9024066caf44e371f1fc7ae114 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,9 @@ INSTALL_BIN = /var/www/htdocs/cgit.cgi INSTALL_CSS = /var/www/htdocs/cgit.css EXTLIBS = ../git/libgit.a ../git/xdiff/lib.a -lz -lcrypto -OBJECTS = cgit.o config.o html.o +OBJECTS = cgit.o config.o html.o cache.o + +CFLAGS += -Wall all: cgit @@ -15,6 +17,6 @@ clean: rm -f cgit *.o cgit: $(OBJECTS) - $(CC) -o cgit $(OBJECTS) $(EXTLIBS) + $(CC) $(CFLAGS) -o cgit $(OBJECTS) $(EXTLIBS) $(OBJECTS): cgit.h git.h diff --git a/README b/README new file mode 100644 index 0000000000000000000000000000000000000000..5917c37ce30b3f0a374c9fa376955f51f1d7bfbf --- /dev/null +++ b/README @@ -0,0 +1,54 @@ +Cache algorithm +=============== + +Cgit normally returns cached pages when invoked. If there is no cache file, or +the cache file has expired, it is regenerated. Finally, the cache file is +printed on stdout. + +When it is decided that a cache file needs to be regenerated, an attempt is +made to create a corresponding lockfile. If this fails, the process gives up +and uses the expired cache file instead. + +When there is no cache file for a request, an attempt is made to create a +corresponding lockfile. If this fails, the process calls sched_yield(2) before +restarting the request handling. + +In pseudocode: + + name = generate_cache_name(request); +top: + if (!exists(name)) { + if (lock_cache(name)) { + generate_cache(request, name); + unlock_cache(name); + } else { + sched_yield(); + goto top; + } + } else if (expired(name)) { + if (lock_cache(name)) { + generate_cache(request, name); + unlock_cache(name); + } + } + print_file(name); + + +The following options can be set in /etc/cgitrc to control cache behaviour: + cache-root: root directory for cache files + cache-root-ttl: TTL for the repo listing page + cache-repo-ttl: TTL for any repos summary page + cache-dynamic-ttl: TTL for pages with symbolic references (not SHA1) + cache-static-ttl: TTL for pages with sha1 references + +TTL is specified in minutes, -1 meaning "infinite caching". + + +Naming of cache files +--------------------- +Repository listing: <cachedir>/index.html +Repository summary: <cachedir>/<repo>/index.html +Repository subpage: <cachedir>/<repo>/<page>/<querystring>.html + +The corresponding lock files have a ".lock" suffix. + diff --git a/cache.c b/cache.c new file mode 100644 index 0000000000000000000000000000000000000000..1be1ea4393d50266ec623e7ff5bb38d9e7008795 --- /dev/null +++ b/cache.c @@ -0,0 +1,86 @@ +#include "cgit.h" + +const int NOLOCK = -1; + +int cache_lookup(struct cacheitem *item) +{ + if (!cgit_query_repo) { + item->name = xstrdup(fmt("%s/index.html", cgit_cache_root)); + item->ttl = cgit_cache_root_ttl; + } else if (!cgit_query_page) { + item->name = xstrdup(fmt("%s/%s/index.html", cgit_cache_root, + cgit_query_repo)); + item->ttl = cgit_cache_repo_ttl; + } else { + item->name = xstrdup(fmt("%s/%s/%s/%s.html", cgit_cache_root, + cgit_query_repo, cgit_query_page, + cgit_querystring)); + if (cgit_query_has_symref) + item->ttl = cgit_cache_dynamic_ttl; + else if (cgit_query_has_sha1) + item->ttl = cgit_cache_static_ttl; + else + item->ttl = cgit_cache_repo_ttl; + } + if (stat(item->name, &item->st)) { + item->st.st_mtime = 0; + return 0; + } + return 1; +} + +int cache_create_dirs() +{ + char *path; + + if (!cgit_query_repo) + return 0; + + path = fmt("%s/%s", cgit_cache_root, cgit_query_repo); + if (mkdir(path, S_IRWXU) && errno!=EEXIST) + return 0; + + if (cgit_query_page) { + path = fmt("%s/%s/%s", cgit_cache_root, cgit_query_repo, + cgit_query_page); + if (mkdir(path, S_IRWXU) && errno!=EEXIST) + return 0; + } + return 1; +} + +int cache_lock(struct cacheitem *item) +{ + int ret; + char *lockfile = fmt("%s.lock", item->name); + + top: + item->fd = open(lockfile, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR|S_IWUSR); + if (item->fd == NOLOCK && errno == ENOENT && cache_create_dirs()) + goto top; + if (item->fd == NOLOCK && errno == EEXIST) { + struct stat st; + time_t t; + if (stat(lockfile, &st)) + return ret; + t = time(NULL); + if (t-st.st_mtime > cgit_cache_max_create_time && + !unlink(lockfile)) + goto top; + return 0; + } + return (item->fd > 0); +} + +int cache_unlock(struct cacheitem *item) +{ + close(item->fd); + return (rename(fmt("%s.lock", item->name), item->name) == 0); +} + +int cache_expired(struct cacheitem *item) +{ + if (item->ttl < 0) + return 0; + return item->st.st_mtime + item->ttl * 60 < time(NULL); +} diff --git a/cgit.c b/cgit.c index 4c14f7746ad34052c761bd547ae2d6c5d23fec1b..09c857ca5f18ac23c63b9235a975f4f5ba0872af 100644 --- a/cgit.c +++ b/cgit.c @@ -10,29 +10,47 @@ static const char cgit_lib_error[] = "<div class='error'>%s: %s</div>"; +int htmlfd = 0; -char *cgit_root = "/var/git"; +char *cgit_root = "/usr/src/git"; char *cgit_root_title = "Git repository browser"; char *cgit_css = "/cgit.css"; char *cgit_logo = "/git-logo.png"; char *cgit_logo_link = "http://www.kernel.org/pub/software/scm/git/docs/"; char *cgit_virtual_root = NULL; +char *cgit_cache_root = "/var/cache/cgit"; + +int cgit_cache_root_ttl = 5; +int cgit_cache_repo_ttl = 5; +int cgit_cache_dynamic_ttl = 5; +int cgit_cache_static_ttl = -1; +int cgit_cache_max_create_time = 5; + char *cgit_repo_name = NULL; char *cgit_repo_desc = NULL; char *cgit_repo_owner = NULL; +int cgit_query_has_symref = 0; +int cgit_query_has_sha1 = 0; + +char *cgit_querystring = NULL; char *cgit_query_repo = NULL; char *cgit_query_page = NULL; char *cgit_query_head = NULL; +char *cgit_query_sha1 = NULL; + +struct cacheitem cacheitem; int cgit_parse_query(char *txt, configfn fn) { - char *t = txt, *value = NULL, c; + char *t, *value = NULL, c; if (!txt) return 0; + t = txt = xstrdup(txt); + while((c=*t) != '\0') { if (c=='=') { *t = '\0'; @@ -82,8 +100,13 @@ if (!strcmp(name,"r")) cgit_query_repo = xstrdup(value); else if (!strcmp(name, "p")) cgit_query_page = xstrdup(value); - else if (!strcmp(name, "h")) + else if (!strcmp(name, "h")) { cgit_query_head = xstrdup(value); + cgit_query_has_symref = 1; + } else if (!strcmp(name, "id")) { + cgit_query_sha1 = xstrdup(value); + cgit_query_has_sha1 = 1; + } } char *cgit_repourl(const char *reponame) @@ -136,9 +159,32 @@ } return 0; } +/* Sun, 06 Nov 1994 08:49:37 GMT */ +static char *http_date(time_t t) +{ + static char day[][4] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; + static char month[][4] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Now", "Dec"}; + struct tm *tm = gmtime(&t); + return fmt("%s, %02d %s %04d %02d:%02d:%02d GMT", day[tm->tm_wday], + tm->tm_mday, month[tm->tm_mon], 1900+tm->tm_year, + tm->tm_hour, tm->tm_min, tm->tm_sec); +} + +static int ttl_seconds(int ttl) +{ + if (ttl<0) + return 60 * 60 * 24 * 365; + else + return ttl * 60; +} + static void cgit_print_docstart(char *title) { html("Content-Type: text/html; charset=utf-8\n"); + htmlf("Last-Modified: %s\n", http_date(cacheitem.st.st_mtime)); + htmlf("Expires: %s\n", http_date(cacheitem.st.st_mtime + + ttl_seconds(cacheitem.ttl))); html("\n"); html(cgit_doctype); html("<html>\n"); @@ -175,6 +221,7 @@ struct dirent *de; struct stat st; char *name; + chdir(cgit_root); cgit_print_docstart(cgit_root_title); cgit_print_pageheader(cgit_root_title); @@ -197,7 +244,7 @@ if (!S_ISDIR(st.st_mode)) continue; cgit_repo_name = cgit_repo_desc = cgit_repo_owner = NULL; - name = fmt("%s/.git/info/cgit", de->d_name); + name = fmt("%s/info/cgit", de->d_name); if (cgit_read_config(name, cgit_repo_config_cb)) continue; @@ -291,7 +338,7 @@ html(""); strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", time); html_txt(buf); html("</td><td>"); - char *qry = fmt("h=%s", sha1_to_hex(commit->object.sha1)); + char *qry = fmt("id=%s", sha1_to_hex(commit->object.sha1)); char *url = cgit_pageurl(cgit_query_repo, "view", qry); html_link_open(url, NULL, NULL); html_txt(subject); @@ -371,8 +418,8 @@ } static void cgit_print_repo_page() { - if (chdir(cgit_query_repo) || - cgit_read_config(".git/info/cgit", cgit_repo_config_cb)) { + if (chdir(fmt("%s/%s", cgit_root, cgit_query_repo)) || + cgit_read_config("info/cgit", cgit_repo_config_cb)) { char *title = fmt("%s - %s", cgit_root_title, "Bad request"); cgit_print_docstart(title); cgit_print_pageheader(title); @@ -381,7 +428,7 @@ strerror(errno)); cgit_print_docend(); return; } - + setenv("GIT_DIR", fmt("%s/%s", cgit_root, cgit_query_repo), 1); char *title = fmt("%s - %s", cgit_repo_name, cgit_repo_desc); cgit_print_docstart(title); cgit_print_pageheader(title); @@ -390,21 +437,61 @@ cgit_print_repo_summary(); else if (!strcmp(cgit_query_page, "log")) { cgit_print_log(cgit_query_head, 0, 100); } else if (!strcmp(cgit_query_page, "view")) { - cgit_print_object(cgit_query_head); + cgit_print_object(cgit_query_sha1); } cgit_print_docend(); } -int main(int argc, const char **argv) +static void cgit_fill_cache(struct cacheitem *item) { - if (cgit_read_config("/etc/cgitrc", cgit_global_config_cb)) - die("Error reading config: %d %s", errno, strerror(errno)); - - chdir(cgit_root); - cgit_parse_query(getenv("QUERY_STRING"), cgit_querystring_cb); + htmlfd = item->fd; + item->st.st_mtime = time(NULL); if (cgit_query_repo) cgit_print_repo_page(); else cgit_print_repolist(); +} + +static void cgit_refresh_cache(struct cacheitem *item) +{ + top: + if (!cache_lookup(item)) { + if (cache_lock(item)) { + cgit_fill_cache(item); + cache_unlock(item); + } else { + sched_yield(); + goto top; + } + } else if (cache_expired(item)) { + if (cache_lock(item)) { + cgit_fill_cache(item); + cache_unlock(item); + } + } +} + +static void cgit_print_cache(struct cacheitem *item) +{ + static char buf[4096]; + ssize_t i; + + int fd = open(item->name, O_RDONLY); + if (fd<0) + die("Unable to open cached file %s", item->name); + + while((i=read(fd, buf, sizeof(buf))) > 0) + write(STDOUT_FILENO, buf, i); + + close(fd); +} + +int main(int argc, const char **argv) +{ + cgit_read_config("/etc/cgitrc", cgit_global_config_cb); + cgit_querystring = xstrdup(getenv("QUERY_STRING")); + cgit_parse_query(cgit_querystring, cgit_querystring_cb); + cgit_refresh_cache(&cacheitem); + cgit_print_cache(&cacheitem); return 0; } diff --git a/cgit.h b/cgit.h index 19f7ba73354dd5fb7eff950fc3c2a29106825c1a..1e084d4cb02f743d2b4c014ce69b9a8cacad491f 100644 --- a/cgit.h +++ b/cgit.h @@ -3,6 +3,46 @@ #define CGIT_H #include "git.h" #include <openssl/sha.h> +#include <ctype.h> +#include <sched.h> + +typedef void (*configfn)(const char *name, const char *value); + +struct cacheitem { + char *name; + struct stat st; + int ttl; + int fd; +}; + +extern char *cgit_root; +extern char *cgit_root_title; +extern char *cgit_css; +extern char *cgit_logo; +extern char *cgit_logo_link; +extern char *cgit_virtual_root; +extern char *cgit_cache_root; + +extern int cgit_cache_root_ttl; +extern int cgit_cache_repo_ttl; +extern int cgit_cache_dynamic_ttl; +extern int cgit_cache_static_ttl; +extern int cgit_cache_max_create_time; + +extern char *cgit_repo_name; +extern char *cgit_repo_desc; +extern char *cgit_repo_owner; + +extern int cgit_query_has_symref; +extern int cgit_query_has_sha1; + +extern char *cgit_querystring; +extern char *cgit_query_repo; +extern char *cgit_query_page; +extern char *cgit_query_head; +extern char *cgit_query_sha1; + +extern int htmlfd; extern char *fmt(const char *format,...); @@ -10,12 +50,15 @@ extern void html(const char *txt); extern void htmlf(const char *format,...); extern void html_txt(char *txt); extern void html_attr(char *txt); - extern void html_link_open(char *url, char *title, char *class); extern void html_link_close(void); -typedef void (*configfn)(const char *name, const char *value); extern int cgit_read_config(const char *filename, configfn fn); + +extern int cache_lookup(struct cacheitem *item); +extern int cache_lock(struct cacheitem *item); +extern int cache_unlock(struct cacheitem *item); +extern int cache_expired(struct cacheitem *item); #endif /* CGIT_H */ diff --git a/config.c b/config.c index 858ab699a181f47a4463aa771a80f902c2d5760d..ee49b626defec9413699607b895121100de53dbd 100644 --- a/config.c +++ b/config.c @@ -32,7 +32,7 @@ if (!isname && (c=='#' || c==';')) { skip_line(f); continue; } - if (!isname && isblank(c)) + if (!isname && isspace(c)) continue; if (c=='=' && !*value) { @@ -64,7 +64,7 @@ if (!f) return -1; - while(len = read_config_line(f, line, &value, sizeof(line))) + while((len = read_config_line(f, line, &value, sizeof(line))) > 0) (*fn)(line, value); fclose(f); diff --git a/git.h b/git.h index 443f216f658c23bed45c211950868daccb94131d..dfa3542256bbbfa963f83670dc2f8b49d0b960ff 100644 --- a/git.h +++ b/git.h @@ -33,6 +33,26 @@ #include #include <time.h> +/* On most systems <limits.h> would have given us this, but + * not on some systems (e.g. GNU/Hurd). + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +#ifdef __GNUC__ +#define NORETURN __attribute__((__noreturn__)) +#else +#define NORETURN +#ifndef __attribute__ +#define __attribute__(x) +#endif +#endif + + +extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); + + static inline char* xstrdup(const char *str) { char *ret = strdup(str); @@ -108,9 +128,13 @@ /* Convert to/from hex/sha1 representation */ #define MINIMUM_ABBREV 4 #define DEFAULT_ABBREV 7 +extern int sha1_object_info(const unsigned char *, char *, unsigned long *); extern void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size); +extern int get_sha1(const char *str, unsigned char *sha1); +extern int get_sha1_hex(const char *hex, unsigned char *sha1); +extern char *sha1_to_hex(const unsigned char *sha1); /* static buffer result! */ @@ -183,6 +207,21 @@ char *buffer; }; +struct commit *lookup_commit(const unsigned char *sha1); +struct commit *lookup_commit_reference(const unsigned char *sha1); +struct commit *lookup_commit_reference_gently(const unsigned char *sha1, + int quiet); + +int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size); +int parse_commit(struct commit *item); + +struct commit_list * commit_list_insert(struct commit *item, struct commit_list **list_p); +struct commit_list * insert_by_date(struct commit *item, struct commit_list **list); + +void free_commit_list(struct commit_list *list); + +void sort_by_date(struct commit_list **list); + /* Commit formats */ enum cmit_fmt { CMIT_FMT_RAW, @@ -197,12 +236,8 @@ CMIT_FMT_UNSPECIFIED, }; - +extern unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *, unsigned long len, char *buf, unsigned long space, int abbrev, const char *subject, const char *after_subject, int relative_date); -struct commit *lookup_commit(const unsigned char *sha1); -struct commit *lookup_commit_reference(const unsigned char *sha1); -struct commit *lookup_commit_reference_gently(const unsigned char *sha1, - int quiet); typedef void (*topo_sort_set_fn_t)(struct commit*, void *data); typedef void* (*topo_sort_get_fn_t)(struct commit*); @@ -306,6 +341,16 @@ +/* + * from git:refs.g + */ + +typedef int each_ref_fn(const char *refname, const unsigned char *sha1, int flags, void *cb_data); +extern int head_ref(each_ref_fn, void *); +extern int for_each_ref(each_ref_fn, void *); +extern int for_each_tag_ref(each_ref_fn, void *); +extern int for_each_branch_ref(each_ref_fn, void *); +extern int for_each_remote_ref(each_ref_fn, void *); @@ -391,6 +436,11 @@ topo_sort_get_fn_t topo_getter; }; +extern void init_revisions(struct rev_info *revs, const char *prefix); +extern int setup_revisions(int argc, const char **argv, struct rev_info *revs, const char *def); +extern int handle_revision_arg(const char *arg, struct rev_info *revs,int flags,int cant_be_filename); + +extern void prepare_revision_walk(struct rev_info *revs); extern struct commit *get_revision(struct rev_info *revs); diff --git a/html.c b/html.c index 5780dc179e6c22070e0f3a509cafbddfabb89e13..bf1490ff28439f1853cfee7436d4442d12c00a9f 100644 --- a/html.c +++ b/html.c @@ -20,16 +20,18 @@ } void html(const char *txt) { - fputs(txt, stdout); + write(htmlfd, txt, strlen(txt)); } void htmlf(const char *format, ...) { + static char buf[65536]; va_list args; va_start(args, format); - vprintf(format, args); + vsnprintf(buf, sizeof(buf), format, args); va_end(args); + html(buf); } void html_txt(char *txt)