1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#![feature(seek_convenience)]
#![allow(unused_variables, dead_code)]

#![doc(html_logo_url = "http://www.catb.org/~esr/reposurgeon/reposturgeon.png")]

#[macro_use] extern crate bitflags;
#[macro_use] extern crate failure;
#[macro_use] extern crate failure_derive;
extern crate tinf;
#[macro_use] extern crate lazy_static;
extern crate regex;

use std::collections::{HashMap, HashSet};

pub mod branchify;
mod byteslice;
#[macro_use] pub mod logging;
pub mod pathmap;
pub mod sectionreader;
pub mod svnreader;

pub use branchify::Branchify;
pub use sectionreader::SectionReader;

// Short types for these saves space in very large arrays of repository structures.
// But they're mainly here to avoid strings, which are expensive (16 bytes) in Go.
type MarkIdx = u32; // Mark indicies
type BlobIdx = u32; // Blob indices. Should not be narrower than mark indices.
type RevIdx = u32;  // Revision indices
// Large repositories can have more than 65536 nodes within a
// revision, especially after expansion of SVN directory copies, so it
// is not safe for this to be uint16.
type NodeIdx = u32;

// No user-serviceable parts below this line

// Blob represents a detached blob of data referenced by a mark.
#[derive(Clone, Default, Debug)]
pub struct Blob<'stream> {
    mark: String,
    abspath: String,
    //cookie: Cookie, // CVS/SVN cookie analyzed out of this file
    //repo: *Repository,
    //oplist: Vec<FileOp>, // In-repo paths associated with this blob
    pathlistmap: HashMap<String, bool>, // optimisation for the above, kept in sync
    content: Option<BlobContent<'stream>>,
    //_expungehook: *Blob,
    blobseq: BlobIdx,
    //colors: colorSet, // Scratch space for graph-coloring algorithms
}

#[derive(Clone, Debug)]
enum BlobContent<'stream> {
    Section(SectionReader<'stream>),
    File(&'stream std::fs::File)
}

impl<'stream> Blob<'stream> {
    // hasfile answers the question: "Does this blob have its own file?"
    fn has_file(&self) -> bool {
        match self.content {
            Some(BlobContent::Section(_)) => true,
            Some(BlobContent::File(_)) => false,
            None => false,
        }
    }
    // set_content sets the content of the blob from a string.
    // tell is the start offset of the data in the input source;
    // if it noOffset, there is no seek stream and creation of
    // an on-disk blob is forced.
    fn set_content(&mut self, content: Option<BlobContent<'stream>>) {
        self.content = content;
        //if b.hasfile() {
        //    file, err := os.OpenFile(b.getBlobfile(true),
        //        os.O_WRONLY|os.O_CREATE|os.O_TRUNC, userReadWriteMode)
        //    if err != nil {
        //        panic(fmt.Errorf("Blob write: %v", err))
        //    }
        //    defer file.Close()
        //    if control.flagOptions["compressblobs"] {
        //        output := gzip.NewWriter(file)
        //
        //        defer output.Close()
        //        _, err = output.Write(text)
        //    } else {
        //        _, err = file.Write(text)
        //    }
        //    if err != nil {
        //        panic(fmt.Errorf("Blob writer: %v", err))
        //    }
        //}
    }
}

// Callout is a stub object for callout marks in incomplete repository segments.
#[derive(Default, Debug)]
struct Callout {
    mark:        String,
    branch:      String,
    _child_nodes: Vec<String>,
    //color:       colorType,
    deleteme:    bool,
}

// Commit represents a commit event in a fast-export stream
#[derive(Default, Debug)]
struct Commit {
    legacy_id: String,        // Commit's ID in an alien system
    mark: String,        // Mark name of commit (may transiently be "")
    comment: String,        // Commit comment
    branch: String,        // branch name
    //authors:        Vec<Attribution>, // Authors of commit
    //committer:      Attribution,   // Person responsible for committing it.
    //fileops:        Vec<FileOp>,     // blob and file operation list
    //_manifest:      *PathMap,      // efficient map of *Fileop values
    //repo:           &Repository,
    //properties:     &HashMap<String, String>,  // commit properties (extension)
    //attachments:    Vec<Event>,      // Tags and Resets pointing at this commit
    //_parentNodes:   Vec<CommitLike>, // list of parent nodes
    //_childNodes:    Vec<CommitLike>, // list of child nodes
    //_expungehook:   &Commit,
    //color:          colorType, // Scratch storage for graph-coloring
    deleteme: bool,      // Flag used during deletion operations
    implicit_parent: bool,      // Whether the first parent was implicit
}

//enum Event {
//    Callout(Callout),
//    Commit(Commit),
//}

// Event is an operation in a repository's time sequence of modifications.
trait Event {
    fn id_me(&self) -> String;
    fn get_mark(&self) -> String;
    fn get_comment(&self) -> String;
    //fn save(&self, writer: impl std::io::Write) -> std::io::Result<()>;
    fn moveto(&self, destination: &Repository);
    fn get_del_flag(&self) -> bool;
    fn set_del_flag(&mut self, state: bool);
}

// Repository is the entire state of a version-control repository
#[derive(Default, Debug)]
pub struct Repository {
    name: String,
    readtime: Option<std::time::SystemTime>,
    //vcs: &VCS,
    stronghint: bool,
    //hintlist: Vec<Hint>,
    //sourcedir: &'a std::path::Path,
    seekstream: Option<std::fs::File>,
    //events: Vec<Box<dyn Event>>, // A list of the events encountered, in order
    _mark_to_index: std::sync::Mutex<HashMap<String, usize>>,
    _mark_to_index_len: usize, // Cache is valid for events[:_markToIndexLen]
    _mark_to_index_saw_n: bool, // whether we saw a null mark blob/commit when caching
    _namecache: HashMap<String, Vec<isize>>,
    preserve_set: HashSet<String>,
    //basedir: &'a std::path::Path,
    uuid: String,
    write_legacy: bool,
    //dollarMap: std::sync::Mutex<HashMap<String, usize>>,
    //dollarOnce sync.Once,
    legacy_map: HashMap<String, HashSet<isize>>,
    legacy_count: RevIdx,
    inlines: isize,
    uniqueness: String, // "committer_date", "committer_stamp", or ""
    markseq: isize,
    //authormap: HashMap<String, Contributor>,
    //tzmap: map[string]*time.Location // most recent email address to timezone
    //aliases: HashMap<ContributorID, ContributorID>,
    // Write control - set, if required, before each dump
    //preferred: &VCS,                       // overrides vcs slot for writes
    realized: HashMap<String, bool>,         // clear and remake this before each dump
    branch_position: HashMap<String, Commit>, // clear and remake this before each dump
    write_options: HashSet<String>,           // options requested on this write
    internals: HashSet<String>,              // export code computes this itself
}